{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.8617146809895834, "eval_steps": 24576, "global_step": 84710, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0172526041666666e-05, "eval_loss": 4.542403221130371, "eval_runtime": 144.6811, "eval_samples_per_second": 13.872, "eval_steps_per_second": 13.872, "step": 1 }, { "epoch": 5.0862630208333336e-05, "grad_norm": 45.55976867675781, "learning_rate": 2.5000000000000004e-07, "loss": 4.478, "step": 5 }, { "epoch": 0.00010172526041666667, "grad_norm": 44.849979400634766, "learning_rate": 5.000000000000001e-07, "loss": 4.381, "step": 10 }, { "epoch": 0.000152587890625, "grad_norm": 47.78006362915039, "learning_rate": 7.5e-07, "loss": 4.2726, "step": 15 }, { "epoch": 0.00020345052083333334, "grad_norm": 49.011878967285156, "learning_rate": 1.0000000000000002e-06, "loss": 4.4009, "step": 20 }, { "epoch": 0.0002543131510416667, "grad_norm": 40.69770050048828, "learning_rate": 1.25e-06, "loss": 4.4735, "step": 25 }, { "epoch": 0.00030517578125, "grad_norm": 34.35673522949219, "learning_rate": 1.5e-06, "loss": 4.2665, "step": 30 }, { "epoch": 0.0003560384114583333, "grad_norm": 52.81883239746094, "learning_rate": 1.75e-06, "loss": 4.423, "step": 35 }, { "epoch": 0.0004069010416666667, "grad_norm": 32.41872024536133, "learning_rate": 2.0000000000000003e-06, "loss": 4.6442, "step": 40 }, { "epoch": 0.000457763671875, "grad_norm": 37.73821258544922, "learning_rate": 2.25e-06, "loss": 4.2681, "step": 45 }, { "epoch": 0.0005086263020833334, "grad_norm": 37.740386962890625, "learning_rate": 2.5e-06, "loss": 4.6894, "step": 50 }, { "epoch": 0.0005594889322916666, "grad_norm": 50.862735748291016, "learning_rate": 2.7500000000000004e-06, "loss": 4.3243, "step": 55 }, { "epoch": 0.0006103515625, "grad_norm": 45.02497863769531, "learning_rate": 3e-06, "loss": 4.3276, "step": 60 }, { "epoch": 0.0006612141927083334, "grad_norm": 28.076095581054688, "learning_rate": 3.2500000000000002e-06, "loss": 3.8881, "step": 65 }, { "epoch": 0.0007120768229166666, "grad_norm": 27.940998077392578, "learning_rate": 3.5e-06, "loss": 4.2496, "step": 70 }, { "epoch": 0.000762939453125, "grad_norm": 26.482099533081055, "learning_rate": 3.7500000000000005e-06, "loss": 4.349, "step": 75 }, { "epoch": 0.0008138020833333334, "grad_norm": 32.36627960205078, "learning_rate": 4.000000000000001e-06, "loss": 4.423, "step": 80 }, { "epoch": 0.0008646647135416666, "grad_norm": 48.71563720703125, "learning_rate": 4.25e-06, "loss": 4.65, "step": 85 }, { "epoch": 0.00091552734375, "grad_norm": 25.62880516052246, "learning_rate": 4.5e-06, "loss": 4.2165, "step": 90 }, { "epoch": 0.0009663899739583334, "grad_norm": 33.9738655090332, "learning_rate": 4.75e-06, "loss": 4.1882, "step": 95 }, { "epoch": 0.0010172526041666667, "grad_norm": 24.544464111328125, "learning_rate": 5e-06, "loss": 3.7919, "step": 100 }, { "epoch": 0.001068115234375, "grad_norm": 38.953922271728516, "learning_rate": 4.999999968019047e-06, "loss": 4.5549, "step": 105 }, { "epoch": 0.0011189778645833333, "grad_norm": 30.3071346282959, "learning_rate": 4.999999872076186e-06, "loss": 4.3079, "step": 110 }, { "epoch": 0.0011698404947916667, "grad_norm": 26.42899513244629, "learning_rate": 4.999999712171422e-06, "loss": 4.3139, "step": 115 }, { "epoch": 0.001220703125, "grad_norm": 18.36640739440918, "learning_rate": 4.999999488304758e-06, "loss": 4.1012, "step": 120 }, { "epoch": 0.0012715657552083333, "grad_norm": 22.997697830200195, "learning_rate": 4.999999200476199e-06, "loss": 4.0616, "step": 125 }, { "epoch": 0.0013224283854166667, "grad_norm": 18.358749389648438, "learning_rate": 4.999998848685752e-06, "loss": 4.0783, "step": 130 }, { "epoch": 0.001373291015625, "grad_norm": 16.22219467163086, "learning_rate": 4.999998432933428e-06, "loss": 4.14, "step": 135 }, { "epoch": 0.0014241536458333333, "grad_norm": 31.525222778320312, "learning_rate": 4.999997953219238e-06, "loss": 4.268, "step": 140 }, { "epoch": 0.0014750162760416667, "grad_norm": 16.66249656677246, "learning_rate": 4.999997409543191e-06, "loss": 3.7311, "step": 145 }, { "epoch": 0.00152587890625, "grad_norm": 19.815828323364258, "learning_rate": 4.999996801905304e-06, "loss": 4.0011, "step": 150 }, { "epoch": 0.0015767415364583333, "grad_norm": 27.2629337310791, "learning_rate": 4.9999961303055906e-06, "loss": 4.4585, "step": 155 }, { "epoch": 0.0016276041666666667, "grad_norm": 20.656837463378906, "learning_rate": 4.99999539474407e-06, "loss": 3.6024, "step": 160 }, { "epoch": 0.001678466796875, "grad_norm": 32.12629699707031, "learning_rate": 4.999994595220758e-06, "loss": 3.6387, "step": 165 }, { "epoch": 0.0017293294270833333, "grad_norm": 17.384239196777344, "learning_rate": 4.9999937317356776e-06, "loss": 3.7885, "step": 170 }, { "epoch": 0.0017801920572916667, "grad_norm": 16.839290618896484, "learning_rate": 4.99999280428885e-06, "loss": 3.9351, "step": 175 }, { "epoch": 0.0018310546875, "grad_norm": 17.16065216064453, "learning_rate": 4.9999918128803e-06, "loss": 3.829, "step": 180 }, { "epoch": 0.0018819173177083333, "grad_norm": 19.670543670654297, "learning_rate": 4.999990757510052e-06, "loss": 3.9205, "step": 185 }, { "epoch": 0.0019327799479166667, "grad_norm": 19.776790618896484, "learning_rate": 4.999989638178131e-06, "loss": 4.0733, "step": 190 }, { "epoch": 0.001983642578125, "grad_norm": 17.121217727661133, "learning_rate": 4.99998845488457e-06, "loss": 3.9968, "step": 195 }, { "epoch": 0.0020345052083333335, "grad_norm": 18.365943908691406, "learning_rate": 4.999987207629396e-06, "loss": 3.6687, "step": 200 }, { "epoch": 0.0020853678385416665, "grad_norm": 17.672025680541992, "learning_rate": 4.9999858964126415e-06, "loss": 3.9062, "step": 205 }, { "epoch": 0.00213623046875, "grad_norm": 14.973541259765625, "learning_rate": 4.9999845212343415e-06, "loss": 3.4274, "step": 210 }, { "epoch": 0.0021870930989583335, "grad_norm": 20.310712814331055, "learning_rate": 4.999983082094529e-06, "loss": 3.8899, "step": 215 }, { "epoch": 0.0022379557291666665, "grad_norm": 30.07281494140625, "learning_rate": 4.999981578993242e-06, "loss": 4.083, "step": 220 }, { "epoch": 0.002288818359375, "grad_norm": 14.967458724975586, "learning_rate": 4.999980011930519e-06, "loss": 3.8021, "step": 225 }, { "epoch": 0.0023396809895833335, "grad_norm": 18.596702575683594, "learning_rate": 4.999978380906401e-06, "loss": 4.2005, "step": 230 }, { "epoch": 0.0023905436197916665, "grad_norm": 14.92667293548584, "learning_rate": 4.999976685920927e-06, "loss": 3.7878, "step": 235 }, { "epoch": 0.00244140625, "grad_norm": 26.372528076171875, "learning_rate": 4.999974926974142e-06, "loss": 3.906, "step": 240 }, { "epoch": 0.0024922688802083335, "grad_norm": 11.505077362060547, "learning_rate": 4.9999731040660925e-06, "loss": 3.7275, "step": 245 }, { "epoch": 0.0025431315104166665, "grad_norm": 19.219104766845703, "learning_rate": 4.999971217196824e-06, "loss": 3.6951, "step": 250 }, { "epoch": 0.002593994140625, "grad_norm": 20.686763763427734, "learning_rate": 4.999969266366383e-06, "loss": 4.2166, "step": 255 }, { "epoch": 0.0026448567708333335, "grad_norm": 14.91057300567627, "learning_rate": 4.999967251574821e-06, "loss": 4.1096, "step": 260 }, { "epoch": 0.0026957194010416665, "grad_norm": 14.4781494140625, "learning_rate": 4.99996517282219e-06, "loss": 3.9429, "step": 265 }, { "epoch": 0.00274658203125, "grad_norm": 16.21483039855957, "learning_rate": 4.9999630301085425e-06, "loss": 4.5956, "step": 270 }, { "epoch": 0.0027974446614583335, "grad_norm": 20.831764221191406, "learning_rate": 4.9999608234339336e-06, "loss": 4.0729, "step": 275 }, { "epoch": 0.0028483072916666665, "grad_norm": 16.851608276367188, "learning_rate": 4.999958552798419e-06, "loss": 3.9821, "step": 280 }, { "epoch": 0.002899169921875, "grad_norm": 16.131776809692383, "learning_rate": 4.999956218202058e-06, "loss": 3.912, "step": 285 }, { "epoch": 0.0029500325520833335, "grad_norm": 22.348773956298828, "learning_rate": 4.9999538196449096e-06, "loss": 3.7261, "step": 290 }, { "epoch": 0.0030008951822916665, "grad_norm": 27.426599502563477, "learning_rate": 4.9999513571270355e-06, "loss": 3.9633, "step": 295 }, { "epoch": 0.0030517578125, "grad_norm": 19.74297523498535, "learning_rate": 4.999948830648497e-06, "loss": 3.6241, "step": 300 }, { "epoch": 0.0031026204427083335, "grad_norm": 26.0491943359375, "learning_rate": 4.999946240209362e-06, "loss": 3.8093, "step": 305 }, { "epoch": 0.0031534830729166665, "grad_norm": 17.24481964111328, "learning_rate": 4.999943585809694e-06, "loss": 3.9762, "step": 310 }, { "epoch": 0.003204345703125, "grad_norm": 16.89327621459961, "learning_rate": 4.999940867449562e-06, "loss": 3.5947, "step": 315 }, { "epoch": 0.0032552083333333335, "grad_norm": 19.533809661865234, "learning_rate": 4.999938085129036e-06, "loss": 3.5673, "step": 320 }, { "epoch": 0.0033060709635416665, "grad_norm": 16.76618766784668, "learning_rate": 4.999935238848187e-06, "loss": 3.8095, "step": 325 }, { "epoch": 0.00335693359375, "grad_norm": 15.267281532287598, "learning_rate": 4.999932328607087e-06, "loss": 3.8809, "step": 330 }, { "epoch": 0.0034077962239583335, "grad_norm": 14.550617218017578, "learning_rate": 4.999929354405811e-06, "loss": 3.9913, "step": 335 }, { "epoch": 0.0034586588541666665, "grad_norm": 22.565776824951172, "learning_rate": 4.999926316244434e-06, "loss": 4.0828, "step": 340 }, { "epoch": 0.003509521484375, "grad_norm": 14.583137512207031, "learning_rate": 4.999923214123036e-06, "loss": 4.0128, "step": 345 }, { "epoch": 0.0035603841145833335, "grad_norm": 14.065851211547852, "learning_rate": 4.999920048041694e-06, "loss": 3.5976, "step": 350 }, { "epoch": 0.0036112467447916665, "grad_norm": 22.18216323852539, "learning_rate": 4.999916818000491e-06, "loss": 3.8117, "step": 355 }, { "epoch": 0.003662109375, "grad_norm": 21.645153045654297, "learning_rate": 4.9999135239995076e-06, "loss": 3.5906, "step": 360 }, { "epoch": 0.0037129720052083335, "grad_norm": 13.195281028747559, "learning_rate": 4.9999101660388305e-06, "loss": 3.7585, "step": 365 }, { "epoch": 0.0037638346354166665, "grad_norm": 10.816890716552734, "learning_rate": 4.999906744118545e-06, "loss": 3.7736, "step": 370 }, { "epoch": 0.003814697265625, "grad_norm": 14.775542259216309, "learning_rate": 4.999903258238736e-06, "loss": 3.6809, "step": 375 }, { "epoch": 0.0038655598958333335, "grad_norm": 22.4163761138916, "learning_rate": 4.999899708399496e-06, "loss": 3.6198, "step": 380 }, { "epoch": 0.003916422526041667, "grad_norm": 20.934917449951172, "learning_rate": 4.999896094600914e-06, "loss": 3.7003, "step": 385 }, { "epoch": 0.00396728515625, "grad_norm": 13.397461891174316, "learning_rate": 4.999892416843083e-06, "loss": 3.9415, "step": 390 }, { "epoch": 0.004018147786458333, "grad_norm": 18.704856872558594, "learning_rate": 4.999888675126097e-06, "loss": 3.7429, "step": 395 }, { "epoch": 0.004069010416666667, "grad_norm": 14.650379180908203, "learning_rate": 4.9998848694500526e-06, "loss": 3.8455, "step": 400 }, { "epoch": 0.004119873046875, "grad_norm": 22.322834014892578, "learning_rate": 4.999880999815045e-06, "loss": 3.9125, "step": 405 }, { "epoch": 0.004170735677083333, "grad_norm": 16.105815887451172, "learning_rate": 4.999877066221175e-06, "loss": 3.5135, "step": 410 }, { "epoch": 0.004221598307291667, "grad_norm": 10.558919906616211, "learning_rate": 4.999873068668544e-06, "loss": 3.6677, "step": 415 }, { "epoch": 0.0042724609375, "grad_norm": 10.016486167907715, "learning_rate": 4.999869007157252e-06, "loss": 3.5761, "step": 420 }, { "epoch": 0.004323323567708333, "grad_norm": 16.01995849609375, "learning_rate": 4.999864881687404e-06, "loss": 3.9739, "step": 425 }, { "epoch": 0.004374186197916667, "grad_norm": 21.719324111938477, "learning_rate": 4.999860692259105e-06, "loss": 3.9211, "step": 430 }, { "epoch": 0.004425048828125, "grad_norm": 42.85234069824219, "learning_rate": 4.999856438872463e-06, "loss": 3.9438, "step": 435 }, { "epoch": 0.004475911458333333, "grad_norm": 13.891044616699219, "learning_rate": 4.999852121527588e-06, "loss": 3.6916, "step": 440 }, { "epoch": 0.004526774088541667, "grad_norm": 18.872058868408203, "learning_rate": 4.999847740224587e-06, "loss": 4.0558, "step": 445 }, { "epoch": 0.00457763671875, "grad_norm": 16.054645538330078, "learning_rate": 4.999843294963576e-06, "loss": 3.8301, "step": 450 }, { "epoch": 0.004628499348958333, "grad_norm": 14.235097885131836, "learning_rate": 4.999838785744665e-06, "loss": 3.745, "step": 455 }, { "epoch": 0.004679361979166667, "grad_norm": 21.37851905822754, "learning_rate": 4.999834212567972e-06, "loss": 3.8782, "step": 460 }, { "epoch": 0.004730224609375, "grad_norm": 17.109268188476562, "learning_rate": 4.999829575433613e-06, "loss": 3.4868, "step": 465 }, { "epoch": 0.004781087239583333, "grad_norm": 16.911109924316406, "learning_rate": 4.999824874341708e-06, "loss": 3.4223, "step": 470 }, { "epoch": 0.004831949869791667, "grad_norm": 9.432239532470703, "learning_rate": 4.9998201092923746e-06, "loss": 3.3816, "step": 475 }, { "epoch": 0.0048828125, "grad_norm": 18.146076202392578, "learning_rate": 4.999815280285737e-06, "loss": 4.0282, "step": 480 }, { "epoch": 0.004933675130208333, "grad_norm": 26.35177993774414, "learning_rate": 4.999810387321917e-06, "loss": 3.2918, "step": 485 }, { "epoch": 0.004984537760416667, "grad_norm": 17.22603988647461, "learning_rate": 4.9998054304010425e-06, "loss": 3.7617, "step": 490 }, { "epoch": 0.005035400390625, "grad_norm": 15.333989143371582, "learning_rate": 4.999800409523237e-06, "loss": 4.2317, "step": 495 }, { "epoch": 0.005086263020833333, "grad_norm": 19.130863189697266, "learning_rate": 4.999795324688631e-06, "loss": 3.8223, "step": 500 }, { "epoch": 0.005137125651041667, "grad_norm": 12.130922317504883, "learning_rate": 4.999790175897355e-06, "loss": 3.7448, "step": 505 }, { "epoch": 0.00518798828125, "grad_norm": 18.195701599121094, "learning_rate": 4.999784963149539e-06, "loss": 3.8377, "step": 510 }, { "epoch": 0.005238850911458333, "grad_norm": 13.157418251037598, "learning_rate": 4.999779686445318e-06, "loss": 4.2839, "step": 515 }, { "epoch": 0.005289713541666667, "grad_norm": 15.427362442016602, "learning_rate": 4.999774345784825e-06, "loss": 3.6147, "step": 520 }, { "epoch": 0.005340576171875, "grad_norm": 18.931623458862305, "learning_rate": 4.9997689411681986e-06, "loss": 4.246, "step": 525 }, { "epoch": 0.005391438802083333, "grad_norm": 16.495134353637695, "learning_rate": 4.9997634725955756e-06, "loss": 3.811, "step": 530 }, { "epoch": 0.005442301432291667, "grad_norm": 17.250377655029297, "learning_rate": 4.999757940067098e-06, "loss": 3.9962, "step": 535 }, { "epoch": 0.0054931640625, "grad_norm": 19.960880279541016, "learning_rate": 4.999752343582905e-06, "loss": 3.8214, "step": 540 }, { "epoch": 0.005544026692708333, "grad_norm": 21.063762664794922, "learning_rate": 4.999746683143141e-06, "loss": 3.5984, "step": 545 }, { "epoch": 0.005594889322916667, "grad_norm": 15.372790336608887, "learning_rate": 4.999740958747951e-06, "loss": 3.8445, "step": 550 }, { "epoch": 0.005645751953125, "grad_norm": 12.668439865112305, "learning_rate": 4.9997351703974804e-06, "loss": 4.1112, "step": 555 }, { "epoch": 0.005696614583333333, "grad_norm": 14.557056427001953, "learning_rate": 4.999729318091878e-06, "loss": 3.8517, "step": 560 }, { "epoch": 0.005747477213541667, "grad_norm": 18.399843215942383, "learning_rate": 4.9997234018312945e-06, "loss": 3.5826, "step": 565 }, { "epoch": 0.00579833984375, "grad_norm": 16.69244956970215, "learning_rate": 4.9997174216158795e-06, "loss": 3.6708, "step": 570 }, { "epoch": 0.005849202473958333, "grad_norm": 20.53217887878418, "learning_rate": 4.9997113774457865e-06, "loss": 4.0217, "step": 575 }, { "epoch": 0.005900065104166667, "grad_norm": 15.122044563293457, "learning_rate": 4.999705269321171e-06, "loss": 3.8238, "step": 580 }, { "epoch": 0.005950927734375, "grad_norm": 9.872886657714844, "learning_rate": 4.999699097242189e-06, "loss": 3.4889, "step": 585 }, { "epoch": 0.006001790364583333, "grad_norm": 14.158080101013184, "learning_rate": 4.999692861208997e-06, "loss": 3.5411, "step": 590 }, { "epoch": 0.006052652994791667, "grad_norm": 15.669339179992676, "learning_rate": 4.999686561221756e-06, "loss": 3.936, "step": 595 }, { "epoch": 0.006103515625, "grad_norm": 11.445816040039062, "learning_rate": 4.999680197280628e-06, "loss": 3.8597, "step": 600 }, { "epoch": 0.006154378255208333, "grad_norm": 16.03506088256836, "learning_rate": 4.999673769385773e-06, "loss": 3.4291, "step": 605 }, { "epoch": 0.006205240885416667, "grad_norm": 13.63383960723877, "learning_rate": 4.999667277537358e-06, "loss": 3.8566, "step": 610 }, { "epoch": 0.006256103515625, "grad_norm": 11.830592155456543, "learning_rate": 4.999660721735547e-06, "loss": 3.8303, "step": 615 }, { "epoch": 0.006306966145833333, "grad_norm": 19.99570655822754, "learning_rate": 4.999654101980511e-06, "loss": 3.8236, "step": 620 }, { "epoch": 0.006357828776041667, "grad_norm": 12.528511047363281, "learning_rate": 4.999647418272415e-06, "loss": 4.0486, "step": 625 }, { "epoch": 0.00640869140625, "grad_norm": 15.760631561279297, "learning_rate": 4.999640670611434e-06, "loss": 3.4468, "step": 630 }, { "epoch": 0.006459554036458333, "grad_norm": 19.84269905090332, "learning_rate": 4.999633858997738e-06, "loss": 3.8804, "step": 635 }, { "epoch": 0.006510416666666667, "grad_norm": 11.239544868469238, "learning_rate": 4.999626983431503e-06, "loss": 3.5703, "step": 640 }, { "epoch": 0.006561279296875, "grad_norm": 13.65317440032959, "learning_rate": 4.999620043912904e-06, "loss": 3.7155, "step": 645 }, { "epoch": 0.006612141927083333, "grad_norm": 12.85306167602539, "learning_rate": 4.999613040442118e-06, "loss": 3.9372, "step": 650 }, { "epoch": 0.006663004557291667, "grad_norm": 16.895217895507812, "learning_rate": 4.999605973019325e-06, "loss": 4.0348, "step": 655 }, { "epoch": 0.0067138671875, "grad_norm": 15.865885734558105, "learning_rate": 4.999598841644706e-06, "loss": 3.6799, "step": 660 }, { "epoch": 0.006764729817708333, "grad_norm": 17.68973731994629, "learning_rate": 4.999591646318443e-06, "loss": 3.446, "step": 665 }, { "epoch": 0.006815592447916667, "grad_norm": 17.351646423339844, "learning_rate": 4.99958438704072e-06, "loss": 3.9662, "step": 670 }, { "epoch": 0.006866455078125, "grad_norm": 12.52562427520752, "learning_rate": 4.999577063811723e-06, "loss": 3.564, "step": 675 }, { "epoch": 0.006917317708333333, "grad_norm": 15.416642189025879, "learning_rate": 4.999569676631639e-06, "loss": 3.9313, "step": 680 }, { "epoch": 0.006968180338541667, "grad_norm": 11.908843994140625, "learning_rate": 4.999562225500658e-06, "loss": 3.7715, "step": 685 }, { "epoch": 0.00701904296875, "grad_norm": 15.781961441040039, "learning_rate": 4.999554710418969e-06, "loss": 3.467, "step": 690 }, { "epoch": 0.007069905598958333, "grad_norm": 17.706050872802734, "learning_rate": 4.999547131386766e-06, "loss": 3.442, "step": 695 }, { "epoch": 0.007120768229166667, "grad_norm": 16.651485443115234, "learning_rate": 4.999539488404242e-06, "loss": 3.7181, "step": 700 }, { "epoch": 0.007171630859375, "grad_norm": 10.602474212646484, "learning_rate": 4.9995317814715925e-06, "loss": 3.6258, "step": 705 }, { "epoch": 0.007222493489583333, "grad_norm": 20.507596969604492, "learning_rate": 4.999524010589015e-06, "loss": 3.9877, "step": 710 }, { "epoch": 0.007273356119791667, "grad_norm": 17.32587432861328, "learning_rate": 4.999516175756708e-06, "loss": 3.7221, "step": 715 }, { "epoch": 0.00732421875, "grad_norm": 11.81552791595459, "learning_rate": 4.9995082769748715e-06, "loss": 3.4813, "step": 720 }, { "epoch": 0.007375081380208333, "grad_norm": 13.437356948852539, "learning_rate": 4.9995003142437086e-06, "loss": 3.6507, "step": 725 }, { "epoch": 0.007425944010416667, "grad_norm": 25.208234786987305, "learning_rate": 4.999492287563422e-06, "loss": 3.9219, "step": 730 }, { "epoch": 0.007476806640625, "grad_norm": 13.51179313659668, "learning_rate": 4.999484196934219e-06, "loss": 3.5389, "step": 735 }, { "epoch": 0.007527669270833333, "grad_norm": 13.544529914855957, "learning_rate": 4.999476042356305e-06, "loss": 3.597, "step": 740 }, { "epoch": 0.007578531901041667, "grad_norm": 10.314176559448242, "learning_rate": 4.999467823829888e-06, "loss": 3.8758, "step": 745 }, { "epoch": 0.00762939453125, "grad_norm": 21.49091148376465, "learning_rate": 4.99945954135518e-06, "loss": 3.3777, "step": 750 }, { "epoch": 0.007680257161458333, "grad_norm": 12.480860710144043, "learning_rate": 4.999451194932392e-06, "loss": 3.511, "step": 755 }, { "epoch": 0.007731119791666667, "grad_norm": 15.50587272644043, "learning_rate": 4.999442784561737e-06, "loss": 3.6119, "step": 760 }, { "epoch": 0.007781982421875, "grad_norm": 17.906938552856445, "learning_rate": 4.9994343102434314e-06, "loss": 3.8537, "step": 765 }, { "epoch": 0.007832845052083334, "grad_norm": 18.00349998474121, "learning_rate": 4.9994257719776915e-06, "loss": 3.5682, "step": 770 }, { "epoch": 0.007883707682291666, "grad_norm": 14.669235229492188, "learning_rate": 4.999417169764735e-06, "loss": 3.4905, "step": 775 }, { "epoch": 0.0079345703125, "grad_norm": 15.826396942138672, "learning_rate": 4.999408503604783e-06, "loss": 3.9937, "step": 780 }, { "epoch": 0.007985432942708334, "grad_norm": 17.248136520385742, "learning_rate": 4.999399773498057e-06, "loss": 4.14, "step": 785 }, { "epoch": 0.008036295572916666, "grad_norm": 16.377283096313477, "learning_rate": 4.99939097944478e-06, "loss": 4.2206, "step": 790 }, { "epoch": 0.008087158203125, "grad_norm": 11.323334693908691, "learning_rate": 4.9993821214451774e-06, "loss": 4.1021, "step": 795 }, { "epoch": 0.008138020833333334, "grad_norm": 14.058307647705078, "learning_rate": 4.999373199499476e-06, "loss": 3.9362, "step": 800 }, { "epoch": 0.008188883463541666, "grad_norm": 17.746889114379883, "learning_rate": 4.9993642136079025e-06, "loss": 4.0898, "step": 805 }, { "epoch": 0.00823974609375, "grad_norm": 17.276458740234375, "learning_rate": 4.999355163770688e-06, "loss": 3.9971, "step": 810 }, { "epoch": 0.008290608723958334, "grad_norm": 20.424314498901367, "learning_rate": 4.999346049988065e-06, "loss": 3.9069, "step": 815 }, { "epoch": 0.008341471354166666, "grad_norm": 21.62326431274414, "learning_rate": 4.999336872260266e-06, "loss": 3.812, "step": 820 }, { "epoch": 0.008392333984375, "grad_norm": 12.865884780883789, "learning_rate": 4.999327630587525e-06, "loss": 3.7855, "step": 825 }, { "epoch": 0.008443196614583334, "grad_norm": 14.172892570495605, "learning_rate": 4.999318324970079e-06, "loss": 3.8214, "step": 830 }, { "epoch": 0.008494059244791666, "grad_norm": 19.895843505859375, "learning_rate": 4.999308955408166e-06, "loss": 3.5814, "step": 835 }, { "epoch": 0.008544921875, "grad_norm": 14.874984741210938, "learning_rate": 4.999299521902026e-06, "loss": 3.64, "step": 840 }, { "epoch": 0.008595784505208334, "grad_norm": 15.312943458557129, "learning_rate": 4.9992900244519e-06, "loss": 3.7124, "step": 845 }, { "epoch": 0.008646647135416666, "grad_norm": 17.663049697875977, "learning_rate": 4.999280463058031e-06, "loss": 3.3959, "step": 850 }, { "epoch": 0.008697509765625, "grad_norm": 19.949892044067383, "learning_rate": 4.999270837720663e-06, "loss": 3.4157, "step": 855 }, { "epoch": 0.008748372395833334, "grad_norm": 14.926891326904297, "learning_rate": 4.9992611484400444e-06, "loss": 3.4777, "step": 860 }, { "epoch": 0.008799235026041666, "grad_norm": 13.239298820495605, "learning_rate": 4.999251395216421e-06, "loss": 3.6932, "step": 865 }, { "epoch": 0.00885009765625, "grad_norm": 16.126522064208984, "learning_rate": 4.999241578050044e-06, "loss": 3.7468, "step": 870 }, { "epoch": 0.008900960286458334, "grad_norm": 26.55894660949707, "learning_rate": 4.999231696941162e-06, "loss": 3.2945, "step": 875 }, { "epoch": 0.008951822916666666, "grad_norm": 16.377870559692383, "learning_rate": 4.99922175189003e-06, "loss": 3.8092, "step": 880 }, { "epoch": 0.009002685546875, "grad_norm": 25.377391815185547, "learning_rate": 4.999211742896902e-06, "loss": 4.0082, "step": 885 }, { "epoch": 0.009053548177083334, "grad_norm": 16.60061264038086, "learning_rate": 4.999201669962034e-06, "loss": 3.717, "step": 890 }, { "epoch": 0.009104410807291666, "grad_norm": 14.67759895324707, "learning_rate": 4.999191533085684e-06, "loss": 3.5591, "step": 895 }, { "epoch": 0.0091552734375, "grad_norm": 10.630406379699707, "learning_rate": 4.9991813322681105e-06, "loss": 3.6146, "step": 900 }, { "epoch": 0.009206136067708334, "grad_norm": 13.745230674743652, "learning_rate": 4.999171067509575e-06, "loss": 3.5511, "step": 905 }, { "epoch": 0.009256998697916666, "grad_norm": 15.126039505004883, "learning_rate": 4.99916073881034e-06, "loss": 3.6083, "step": 910 }, { "epoch": 0.009307861328125, "grad_norm": 18.30549430847168, "learning_rate": 4.99915034617067e-06, "loss": 3.4733, "step": 915 }, { "epoch": 0.009358723958333334, "grad_norm": 12.323296546936035, "learning_rate": 4.999139889590832e-06, "loss": 3.6615, "step": 920 }, { "epoch": 0.009409586588541666, "grad_norm": 19.943538665771484, "learning_rate": 4.999129369071091e-06, "loss": 4.1944, "step": 925 }, { "epoch": 0.00946044921875, "grad_norm": 10.738496780395508, "learning_rate": 4.9991187846117175e-06, "loss": 3.4904, "step": 930 }, { "epoch": 0.009511311848958334, "grad_norm": 11.223962783813477, "learning_rate": 4.999108136212982e-06, "loss": 3.6925, "step": 935 }, { "epoch": 0.009562174479166666, "grad_norm": 13.026283264160156, "learning_rate": 4.999097423875158e-06, "loss": 4.1644, "step": 940 }, { "epoch": 0.009613037109375, "grad_norm": 16.166419982910156, "learning_rate": 4.999086647598518e-06, "loss": 3.5475, "step": 945 }, { "epoch": 0.009663899739583334, "grad_norm": 18.75146484375, "learning_rate": 4.999075807383339e-06, "loss": 3.4752, "step": 950 }, { "epoch": 0.009714762369791666, "grad_norm": 14.348006248474121, "learning_rate": 4.999064903229897e-06, "loss": 3.5193, "step": 955 }, { "epoch": 0.009765625, "grad_norm": 13.05683708190918, "learning_rate": 4.9990539351384725e-06, "loss": 3.5619, "step": 960 }, { "epoch": 0.009816487630208334, "grad_norm": 14.792659759521484, "learning_rate": 4.999042903109345e-06, "loss": 3.2695, "step": 965 }, { "epoch": 0.009867350260416666, "grad_norm": 11.70416259765625, "learning_rate": 4.999031807142798e-06, "loss": 3.7578, "step": 970 }, { "epoch": 0.009918212890625, "grad_norm": 10.735535621643066, "learning_rate": 4.999020647239114e-06, "loss": 3.8695, "step": 975 }, { "epoch": 0.009969075520833334, "grad_norm": 17.912128448486328, "learning_rate": 4.999009423398579e-06, "loss": 3.5609, "step": 980 }, { "epoch": 0.010019938151041666, "grad_norm": 14.438199043273926, "learning_rate": 4.99899813562148e-06, "loss": 3.7282, "step": 985 }, { "epoch": 0.01007080078125, "grad_norm": 15.654008865356445, "learning_rate": 4.9989867839081065e-06, "loss": 3.8585, "step": 990 }, { "epoch": 0.010121663411458334, "grad_norm": 15.130616188049316, "learning_rate": 4.998975368258749e-06, "loss": 3.4857, "step": 995 }, { "epoch": 0.010172526041666666, "grad_norm": 15.098103523254395, "learning_rate": 4.998963888673698e-06, "loss": 3.7006, "step": 1000 }, { "epoch": 0.010223388671875, "grad_norm": 14.421039581298828, "learning_rate": 4.998952345153249e-06, "loss": 3.7491, "step": 1005 }, { "epoch": 0.010274251302083334, "grad_norm": 20.087514877319336, "learning_rate": 4.998940737697695e-06, "loss": 3.7776, "step": 1010 }, { "epoch": 0.010325113932291666, "grad_norm": 18.88400650024414, "learning_rate": 4.998929066307336e-06, "loss": 3.7481, "step": 1015 }, { "epoch": 0.0103759765625, "grad_norm": 21.07455062866211, "learning_rate": 4.998917330982469e-06, "loss": 3.5294, "step": 1020 }, { "epoch": 0.010426839192708334, "grad_norm": 11.424040794372559, "learning_rate": 4.998905531723394e-06, "loss": 3.9097, "step": 1025 }, { "epoch": 0.010477701822916666, "grad_norm": 10.922247886657715, "learning_rate": 4.998893668530414e-06, "loss": 3.5514, "step": 1030 }, { "epoch": 0.010528564453125, "grad_norm": 17.195194244384766, "learning_rate": 4.99888174140383e-06, "loss": 3.616, "step": 1035 }, { "epoch": 0.010579427083333334, "grad_norm": 10.144538879394531, "learning_rate": 4.998869750343951e-06, "loss": 3.7874, "step": 1040 }, { "epoch": 0.010630289713541666, "grad_norm": 11.742879867553711, "learning_rate": 4.998857695351081e-06, "loss": 3.9271, "step": 1045 }, { "epoch": 0.01068115234375, "grad_norm": 13.740591049194336, "learning_rate": 4.998845576425529e-06, "loss": 3.7697, "step": 1050 }, { "epoch": 0.010732014973958334, "grad_norm": 15.526152610778809, "learning_rate": 4.998833393567605e-06, "loss": 3.3944, "step": 1055 }, { "epoch": 0.010782877604166666, "grad_norm": 14.3406982421875, "learning_rate": 4.998821146777622e-06, "loss": 3.8095, "step": 1060 }, { "epoch": 0.010833740234375, "grad_norm": 12.40285873413086, "learning_rate": 4.99880883605589e-06, "loss": 3.6193, "step": 1065 }, { "epoch": 0.010884602864583334, "grad_norm": 20.011245727539062, "learning_rate": 4.998796461402729e-06, "loss": 3.8485, "step": 1070 }, { "epoch": 0.010935465494791666, "grad_norm": 15.710371017456055, "learning_rate": 4.998784022818452e-06, "loss": 3.8256, "step": 1075 }, { "epoch": 0.010986328125, "grad_norm": 18.561986923217773, "learning_rate": 4.998771520303376e-06, "loss": 3.9108, "step": 1080 }, { "epoch": 0.011037190755208334, "grad_norm": 18.281129837036133, "learning_rate": 4.998758953857825e-06, "loss": 3.4311, "step": 1085 }, { "epoch": 0.011088053385416666, "grad_norm": 14.150856018066406, "learning_rate": 4.998746323482117e-06, "loss": 3.5551, "step": 1090 }, { "epoch": 0.011138916015625, "grad_norm": 13.725726127624512, "learning_rate": 4.9987336291765784e-06, "loss": 3.6229, "step": 1095 }, { "epoch": 0.011189778645833334, "grad_norm": 11.532977104187012, "learning_rate": 4.998720870941531e-06, "loss": 3.5328, "step": 1100 }, { "epoch": 0.011240641276041666, "grad_norm": 15.219062805175781, "learning_rate": 4.998708048777303e-06, "loss": 3.7603, "step": 1105 }, { "epoch": 0.01129150390625, "grad_norm": 10.167997360229492, "learning_rate": 4.9986951626842215e-06, "loss": 4.157, "step": 1110 }, { "epoch": 0.011342366536458334, "grad_norm": 25.62779998779297, "learning_rate": 4.9986822126626165e-06, "loss": 3.539, "step": 1115 }, { "epoch": 0.011393229166666666, "grad_norm": 13.080843925476074, "learning_rate": 4.998669198712819e-06, "loss": 3.9684, "step": 1120 }, { "epoch": 0.011444091796875, "grad_norm": 13.87924861907959, "learning_rate": 4.998656120835163e-06, "loss": 3.6138, "step": 1125 }, { "epoch": 0.011494954427083334, "grad_norm": 16.160778045654297, "learning_rate": 4.998642979029982e-06, "loss": 3.7762, "step": 1130 }, { "epoch": 0.011545817057291666, "grad_norm": 12.456393241882324, "learning_rate": 4.998629773297613e-06, "loss": 3.5095, "step": 1135 }, { "epoch": 0.0115966796875, "grad_norm": 18.339937210083008, "learning_rate": 4.998616503638393e-06, "loss": 3.6277, "step": 1140 }, { "epoch": 0.011647542317708334, "grad_norm": 13.26982307434082, "learning_rate": 4.998603170052662e-06, "loss": 3.9732, "step": 1145 }, { "epoch": 0.011698404947916666, "grad_norm": 12.922799110412598, "learning_rate": 4.9985897725407616e-06, "loss": 3.4633, "step": 1150 }, { "epoch": 0.011749267578125, "grad_norm": 19.501089096069336, "learning_rate": 4.998576311103033e-06, "loss": 3.7619, "step": 1155 }, { "epoch": 0.011800130208333334, "grad_norm": 12.718639373779297, "learning_rate": 4.998562785739823e-06, "loss": 3.7302, "step": 1160 }, { "epoch": 0.011850992838541666, "grad_norm": 10.442389488220215, "learning_rate": 4.998549196451475e-06, "loss": 3.5264, "step": 1165 }, { "epoch": 0.01190185546875, "grad_norm": 15.280603408813477, "learning_rate": 4.99853554323834e-06, "loss": 3.7362, "step": 1170 }, { "epoch": 0.011952718098958334, "grad_norm": 14.212106704711914, "learning_rate": 4.998521826100764e-06, "loss": 3.7874, "step": 1175 }, { "epoch": 0.012003580729166666, "grad_norm": 13.269865989685059, "learning_rate": 4.998508045039099e-06, "loss": 3.6369, "step": 1180 }, { "epoch": 0.012054443359375, "grad_norm": 12.6091890335083, "learning_rate": 4.998494200053698e-06, "loss": 3.6542, "step": 1185 }, { "epoch": 0.012105305989583334, "grad_norm": 14.155792236328125, "learning_rate": 4.998480291144916e-06, "loss": 4.0782, "step": 1190 }, { "epoch": 0.012156168619791666, "grad_norm": 19.589990615844727, "learning_rate": 4.998466318313108e-06, "loss": 3.7343, "step": 1195 }, { "epoch": 0.01220703125, "grad_norm": 13.791729927062988, "learning_rate": 4.99845228155863e-06, "loss": 3.6456, "step": 1200 }, { "epoch": 0.012257893880208334, "grad_norm": 13.561478614807129, "learning_rate": 4.998438180881844e-06, "loss": 4.0166, "step": 1205 }, { "epoch": 0.012308756510416666, "grad_norm": 18.43284034729004, "learning_rate": 4.998424016283109e-06, "loss": 3.6783, "step": 1210 }, { "epoch": 0.012359619140625, "grad_norm": 19.545591354370117, "learning_rate": 4.9984097877627865e-06, "loss": 3.7069, "step": 1215 }, { "epoch": 0.012410481770833334, "grad_norm": 16.589338302612305, "learning_rate": 4.998395495321243e-06, "loss": 3.5455, "step": 1220 }, { "epoch": 0.012461344401041666, "grad_norm": 13.449020385742188, "learning_rate": 4.998381138958843e-06, "loss": 3.6977, "step": 1225 }, { "epoch": 0.01251220703125, "grad_norm": 17.311525344848633, "learning_rate": 4.9983667186759535e-06, "loss": 3.6899, "step": 1230 }, { "epoch": 0.012563069661458334, "grad_norm": 15.647948265075684, "learning_rate": 4.998352234472944e-06, "loss": 3.1426, "step": 1235 }, { "epoch": 0.012613932291666666, "grad_norm": 22.255765914916992, "learning_rate": 4.998337686350184e-06, "loss": 3.66, "step": 1240 }, { "epoch": 0.012664794921875, "grad_norm": 15.966592788696289, "learning_rate": 4.998323074308047e-06, "loss": 3.6801, "step": 1245 }, { "epoch": 0.012715657552083334, "grad_norm": 10.439255714416504, "learning_rate": 4.998308398346906e-06, "loss": 3.6352, "step": 1250 }, { "epoch": 0.012766520182291666, "grad_norm": 14.009291648864746, "learning_rate": 4.998293658467137e-06, "loss": 3.5148, "step": 1255 }, { "epoch": 0.0128173828125, "grad_norm": 9.830448150634766, "learning_rate": 4.998278854669117e-06, "loss": 3.405, "step": 1260 }, { "epoch": 0.012868245442708334, "grad_norm": 18.137758255004883, "learning_rate": 4.998263986953224e-06, "loss": 3.2475, "step": 1265 }, { "epoch": 0.012919108072916666, "grad_norm": 13.296918869018555, "learning_rate": 4.99824905531984e-06, "loss": 3.5983, "step": 1270 }, { "epoch": 0.012969970703125, "grad_norm": 17.331226348876953, "learning_rate": 4.9982340597693455e-06, "loss": 3.4618, "step": 1275 }, { "epoch": 0.013020833333333334, "grad_norm": 16.383100509643555, "learning_rate": 4.998219000302125e-06, "loss": 3.5292, "step": 1280 }, { "epoch": 0.013071695963541666, "grad_norm": 10.87190055847168, "learning_rate": 4.998203876918564e-06, "loss": 3.6293, "step": 1285 }, { "epoch": 0.01312255859375, "grad_norm": 12.597033500671387, "learning_rate": 4.998188689619048e-06, "loss": 3.7316, "step": 1290 }, { "epoch": 0.013173421223958334, "grad_norm": 12.681578636169434, "learning_rate": 4.998173438403966e-06, "loss": 3.478, "step": 1295 }, { "epoch": 0.013224283854166666, "grad_norm": 16.530866622924805, "learning_rate": 4.99815812327371e-06, "loss": 3.7343, "step": 1300 }, { "epoch": 0.013275146484375, "grad_norm": 17.380083084106445, "learning_rate": 4.99814274422867e-06, "loss": 3.5332, "step": 1305 }, { "epoch": 0.013326009114583334, "grad_norm": 15.711562156677246, "learning_rate": 4.998127301269241e-06, "loss": 3.4281, "step": 1310 }, { "epoch": 0.013376871744791666, "grad_norm": 13.467732429504395, "learning_rate": 4.998111794395816e-06, "loss": 3.588, "step": 1315 }, { "epoch": 0.013427734375, "grad_norm": 16.989633560180664, "learning_rate": 4.998096223608792e-06, "loss": 3.304, "step": 1320 }, { "epoch": 0.013478597005208334, "grad_norm": 12.060418128967285, "learning_rate": 4.998080588908571e-06, "loss": 3.121, "step": 1325 }, { "epoch": 0.013529459635416666, "grad_norm": 15.241015434265137, "learning_rate": 4.9980648902955475e-06, "loss": 3.6774, "step": 1330 }, { "epoch": 0.013580322265625, "grad_norm": 15.760343551635742, "learning_rate": 4.998049127770127e-06, "loss": 3.2477, "step": 1335 }, { "epoch": 0.013631184895833334, "grad_norm": 15.907750129699707, "learning_rate": 4.998033301332712e-06, "loss": 3.3698, "step": 1340 }, { "epoch": 0.013682047526041666, "grad_norm": 17.1384334564209, "learning_rate": 4.9980174109837065e-06, "loss": 3.7272, "step": 1345 }, { "epoch": 0.01373291015625, "grad_norm": 16.641490936279297, "learning_rate": 4.998001456723518e-06, "loss": 3.6683, "step": 1350 }, { "epoch": 0.013783772786458334, "grad_norm": 8.19157600402832, "learning_rate": 4.997985438552554e-06, "loss": 3.6485, "step": 1355 }, { "epoch": 0.013834635416666666, "grad_norm": 13.124740600585938, "learning_rate": 4.997969356471225e-06, "loss": 3.6489, "step": 1360 }, { "epoch": 0.013885498046875, "grad_norm": 11.489811897277832, "learning_rate": 4.997953210479941e-06, "loss": 3.9778, "step": 1365 }, { "epoch": 0.013936360677083334, "grad_norm": 18.886642456054688, "learning_rate": 4.997937000579118e-06, "loss": 3.7343, "step": 1370 }, { "epoch": 0.013987223307291666, "grad_norm": 9.009153366088867, "learning_rate": 4.997920726769168e-06, "loss": 3.5347, "step": 1375 }, { "epoch": 0.0140380859375, "grad_norm": 14.774568557739258, "learning_rate": 4.997904389050508e-06, "loss": 3.7011, "step": 1380 }, { "epoch": 0.014088948567708334, "grad_norm": 26.565807342529297, "learning_rate": 4.997887987423556e-06, "loss": 4.026, "step": 1385 }, { "epoch": 0.014139811197916666, "grad_norm": 13.156881332397461, "learning_rate": 4.997871521888733e-06, "loss": 3.645, "step": 1390 }, { "epoch": 0.014190673828125, "grad_norm": 13.961007118225098, "learning_rate": 4.9978549924464595e-06, "loss": 3.638, "step": 1395 }, { "epoch": 0.014241536458333334, "grad_norm": 12.008218765258789, "learning_rate": 4.997838399097157e-06, "loss": 3.7365, "step": 1400 }, { "epoch": 0.014292399088541666, "grad_norm": 16.033838272094727, "learning_rate": 4.997821741841251e-06, "loss": 3.8982, "step": 1405 }, { "epoch": 0.01434326171875, "grad_norm": 10.620261192321777, "learning_rate": 4.997805020679169e-06, "loss": 3.5443, "step": 1410 }, { "epoch": 0.014394124348958334, "grad_norm": 13.818375587463379, "learning_rate": 4.997788235611336e-06, "loss": 3.8064, "step": 1415 }, { "epoch": 0.014444986979166666, "grad_norm": 14.587963104248047, "learning_rate": 4.997771386638184e-06, "loss": 3.599, "step": 1420 }, { "epoch": 0.014495849609375, "grad_norm": 11.063785552978516, "learning_rate": 4.997754473760143e-06, "loss": 3.6159, "step": 1425 }, { "epoch": 0.014546712239583334, "grad_norm": 11.75071907043457, "learning_rate": 4.997737496977645e-06, "loss": 4.4565, "step": 1430 }, { "epoch": 0.014597574869791666, "grad_norm": 14.461017608642578, "learning_rate": 4.997720456291126e-06, "loss": 3.7098, "step": 1435 }, { "epoch": 0.0146484375, "grad_norm": 11.124221801757812, "learning_rate": 4.997703351701021e-06, "loss": 3.5637, "step": 1440 }, { "epoch": 0.014699300130208334, "grad_norm": 13.678889274597168, "learning_rate": 4.997686183207767e-06, "loss": 3.5194, "step": 1445 }, { "epoch": 0.014750162760416666, "grad_norm": 11.239283561706543, "learning_rate": 4.9976689508118055e-06, "loss": 3.6568, "step": 1450 }, { "epoch": 0.014801025390625, "grad_norm": 12.112907409667969, "learning_rate": 4.997651654513575e-06, "loss": 3.6608, "step": 1455 }, { "epoch": 0.014851888020833334, "grad_norm": 13.270151138305664, "learning_rate": 4.997634294313519e-06, "loss": 3.5079, "step": 1460 }, { "epoch": 0.014902750651041666, "grad_norm": 13.076834678649902, "learning_rate": 4.997616870212082e-06, "loss": 3.3748, "step": 1465 }, { "epoch": 0.01495361328125, "grad_norm": 10.513318061828613, "learning_rate": 4.997599382209709e-06, "loss": 3.6342, "step": 1470 }, { "epoch": 0.015004475911458334, "grad_norm": 18.05800437927246, "learning_rate": 4.997581830306848e-06, "loss": 3.6844, "step": 1475 }, { "epoch": 0.015055338541666666, "grad_norm": 10.047406196594238, "learning_rate": 4.997564214503947e-06, "loss": 4.0638, "step": 1480 }, { "epoch": 0.015106201171875, "grad_norm": 15.92277717590332, "learning_rate": 4.997546534801459e-06, "loss": 3.7102, "step": 1485 }, { "epoch": 0.015157063802083334, "grad_norm": 15.660829544067383, "learning_rate": 4.997528791199834e-06, "loss": 4.0566, "step": 1490 }, { "epoch": 0.015207926432291666, "grad_norm": 21.788442611694336, "learning_rate": 4.997510983699527e-06, "loss": 4.1864, "step": 1495 }, { "epoch": 0.0152587890625, "grad_norm": 10.556007385253906, "learning_rate": 4.997493112300994e-06, "loss": 3.621, "step": 1500 }, { "epoch": 0.015309651692708334, "grad_norm": 13.028473854064941, "learning_rate": 4.99747517700469e-06, "loss": 4.406, "step": 1505 }, { "epoch": 0.015360514322916666, "grad_norm": 14.941851615905762, "learning_rate": 4.997457177811077e-06, "loss": 3.1199, "step": 1510 }, { "epoch": 0.015411376953125, "grad_norm": 18.102357864379883, "learning_rate": 4.997439114720614e-06, "loss": 3.7058, "step": 1515 }, { "epoch": 0.015462239583333334, "grad_norm": 22.9876651763916, "learning_rate": 4.997420987733763e-06, "loss": 3.5268, "step": 1520 }, { "epoch": 0.015513102213541666, "grad_norm": 11.794015884399414, "learning_rate": 4.997402796850989e-06, "loss": 3.6392, "step": 1525 }, { "epoch": 0.01556396484375, "grad_norm": 11.62551212310791, "learning_rate": 4.997384542072755e-06, "loss": 3.4675, "step": 1530 }, { "epoch": 0.015614827473958334, "grad_norm": 14.436004638671875, "learning_rate": 4.99736622339953e-06, "loss": 3.657, "step": 1535 }, { "epoch": 0.015665690104166668, "grad_norm": 13.236321449279785, "learning_rate": 4.997347840831782e-06, "loss": 3.2599, "step": 1540 }, { "epoch": 0.015716552734375, "grad_norm": 11.505661010742188, "learning_rate": 4.997329394369981e-06, "loss": 3.6238, "step": 1545 }, { "epoch": 0.015767415364583332, "grad_norm": 14.669344902038574, "learning_rate": 4.997310884014599e-06, "loss": 3.4919, "step": 1550 }, { "epoch": 0.015818277994791668, "grad_norm": 9.981671333312988, "learning_rate": 4.997292309766111e-06, "loss": 3.4323, "step": 1555 }, { "epoch": 0.015869140625, "grad_norm": 13.35000228881836, "learning_rate": 4.997273671624991e-06, "loss": 3.5778, "step": 1560 }, { "epoch": 0.015920003255208332, "grad_norm": 13.411084175109863, "learning_rate": 4.997254969591716e-06, "loss": 3.8477, "step": 1565 }, { "epoch": 0.015970865885416668, "grad_norm": 13.634716033935547, "learning_rate": 4.997236203666764e-06, "loss": 3.4397, "step": 1570 }, { "epoch": 0.016021728515625, "grad_norm": 9.219624519348145, "learning_rate": 4.997217373850617e-06, "loss": 3.5114, "step": 1575 }, { "epoch": 0.016072591145833332, "grad_norm": 12.118866920471191, "learning_rate": 4.997198480143755e-06, "loss": 3.7325, "step": 1580 }, { "epoch": 0.016123453776041668, "grad_norm": 12.5546875, "learning_rate": 4.99717952254666e-06, "loss": 3.6924, "step": 1585 }, { "epoch": 0.01617431640625, "grad_norm": 15.333346366882324, "learning_rate": 4.99716050105982e-06, "loss": 3.4589, "step": 1590 }, { "epoch": 0.016225179036458332, "grad_norm": 11.861115455627441, "learning_rate": 4.997141415683721e-06, "loss": 3.7009, "step": 1595 }, { "epoch": 0.016276041666666668, "grad_norm": 15.62850570678711, "learning_rate": 4.99712226641885e-06, "loss": 4.2749, "step": 1600 }, { "epoch": 0.016326904296875, "grad_norm": 16.968338012695312, "learning_rate": 4.997103053265698e-06, "loss": 3.458, "step": 1605 }, { "epoch": 0.016377766927083332, "grad_norm": 15.927022933959961, "learning_rate": 4.997083776224757e-06, "loss": 3.5739, "step": 1610 }, { "epoch": 0.016428629557291668, "grad_norm": 12.973907470703125, "learning_rate": 4.997064435296518e-06, "loss": 3.5018, "step": 1615 }, { "epoch": 0.0164794921875, "grad_norm": 16.26294708251953, "learning_rate": 4.997045030481478e-06, "loss": 3.8331, "step": 1620 }, { "epoch": 0.016530354817708332, "grad_norm": 14.90145492553711, "learning_rate": 4.997025561780133e-06, "loss": 3.7663, "step": 1625 }, { "epoch": 0.016581217447916668, "grad_norm": 10.96834945678711, "learning_rate": 4.9970060291929816e-06, "loss": 3.2283, "step": 1630 }, { "epoch": 0.016632080078125, "grad_norm": 14.805373191833496, "learning_rate": 4.996986432720521e-06, "loss": 3.5404, "step": 1635 }, { "epoch": 0.016682942708333332, "grad_norm": 14.054951667785645, "learning_rate": 4.996966772363255e-06, "loss": 3.7508, "step": 1640 }, { "epoch": 0.016733805338541668, "grad_norm": 18.448801040649414, "learning_rate": 4.996947048121686e-06, "loss": 3.1579, "step": 1645 }, { "epoch": 0.01678466796875, "grad_norm": 11.387920379638672, "learning_rate": 4.996927259996319e-06, "loss": 3.1547, "step": 1650 }, { "epoch": 0.016835530598958332, "grad_norm": 17.80491828918457, "learning_rate": 4.99690740798766e-06, "loss": 3.5202, "step": 1655 }, { "epoch": 0.016886393229166668, "grad_norm": 15.007222175598145, "learning_rate": 4.9968874920962165e-06, "loss": 3.6235, "step": 1660 }, { "epoch": 0.016937255859375, "grad_norm": 10.149649620056152, "learning_rate": 4.996867512322499e-06, "loss": 3.6799, "step": 1665 }, { "epoch": 0.016988118489583332, "grad_norm": 17.945354461669922, "learning_rate": 4.996847468667016e-06, "loss": 3.9798, "step": 1670 }, { "epoch": 0.017038981119791668, "grad_norm": 15.979263305664062, "learning_rate": 4.9968273611302845e-06, "loss": 3.3488, "step": 1675 }, { "epoch": 0.01708984375, "grad_norm": 14.131532669067383, "learning_rate": 4.996807189712815e-06, "loss": 3.3508, "step": 1680 }, { "epoch": 0.017140706380208332, "grad_norm": 19.903356552124023, "learning_rate": 4.996786954415127e-06, "loss": 3.5976, "step": 1685 }, { "epoch": 0.017191569010416668, "grad_norm": 13.6408052444458, "learning_rate": 4.996766655237736e-06, "loss": 3.4822, "step": 1690 }, { "epoch": 0.017242431640625, "grad_norm": 9.797231674194336, "learning_rate": 4.9967462921811614e-06, "loss": 3.9847, "step": 1695 }, { "epoch": 0.017293294270833332, "grad_norm": 13.015440940856934, "learning_rate": 4.996725865245926e-06, "loss": 3.5052, "step": 1700 }, { "epoch": 0.017344156901041668, "grad_norm": 17.70994758605957, "learning_rate": 4.99670537443255e-06, "loss": 3.9466, "step": 1705 }, { "epoch": 0.01739501953125, "grad_norm": 9.877345085144043, "learning_rate": 4.996684819741559e-06, "loss": 3.6346, "step": 1710 }, { "epoch": 0.017445882161458332, "grad_norm": 12.302515983581543, "learning_rate": 4.996664201173478e-06, "loss": 3.3808, "step": 1715 }, { "epoch": 0.017496744791666668, "grad_norm": 12.325230598449707, "learning_rate": 4.9966435187288365e-06, "loss": 3.8128, "step": 1720 }, { "epoch": 0.017547607421875, "grad_norm": 14.4461669921875, "learning_rate": 4.996622772408162e-06, "loss": 4.3367, "step": 1725 }, { "epoch": 0.017598470052083332, "grad_norm": 15.207497596740723, "learning_rate": 4.996601962211985e-06, "loss": 3.5895, "step": 1730 }, { "epoch": 0.017649332682291668, "grad_norm": 10.924785614013672, "learning_rate": 4.9965810881408384e-06, "loss": 3.3813, "step": 1735 }, { "epoch": 0.0177001953125, "grad_norm": 19.209896087646484, "learning_rate": 4.996560150195257e-06, "loss": 3.4489, "step": 1740 }, { "epoch": 0.017751057942708332, "grad_norm": 14.56915283203125, "learning_rate": 4.9965391483757765e-06, "loss": 3.4132, "step": 1745 }, { "epoch": 0.017801920572916668, "grad_norm": 9.958040237426758, "learning_rate": 4.996518082682933e-06, "loss": 3.5184, "step": 1750 }, { "epoch": 0.017852783203125, "grad_norm": 14.741584777832031, "learning_rate": 4.9964969531172656e-06, "loss": 3.7564, "step": 1755 }, { "epoch": 0.017903645833333332, "grad_norm": 12.061718940734863, "learning_rate": 4.996475759679316e-06, "loss": 3.5494, "step": 1760 }, { "epoch": 0.017954508463541668, "grad_norm": 19.455101013183594, "learning_rate": 4.9964545023696255e-06, "loss": 3.5547, "step": 1765 }, { "epoch": 0.01800537109375, "grad_norm": 15.922080993652344, "learning_rate": 4.996433181188739e-06, "loss": 3.6292, "step": 1770 }, { "epoch": 0.018056233723958332, "grad_norm": 10.75054931640625, "learning_rate": 4.996411796137201e-06, "loss": 3.3277, "step": 1775 }, { "epoch": 0.018107096354166668, "grad_norm": 14.0524263381958, "learning_rate": 4.996390347215558e-06, "loss": 3.671, "step": 1780 }, { "epoch": 0.018157958984375, "grad_norm": 17.705394744873047, "learning_rate": 4.9963688344243605e-06, "loss": 3.4854, "step": 1785 }, { "epoch": 0.018208821614583332, "grad_norm": 14.042203903198242, "learning_rate": 4.996347257764158e-06, "loss": 3.3586, "step": 1790 }, { "epoch": 0.018259684244791668, "grad_norm": 15.437379837036133, "learning_rate": 4.996325617235502e-06, "loss": 3.1949, "step": 1795 }, { "epoch": 0.018310546875, "grad_norm": 14.990214347839355, "learning_rate": 4.996303912838948e-06, "loss": 3.2657, "step": 1800 }, { "epoch": 0.018361409505208332, "grad_norm": 20.377416610717773, "learning_rate": 4.9962821445750485e-06, "loss": 3.5048, "step": 1805 }, { "epoch": 0.018412272135416668, "grad_norm": 11.091985702514648, "learning_rate": 4.996260312444363e-06, "loss": 3.5013, "step": 1810 }, { "epoch": 0.018463134765625, "grad_norm": 17.564790725708008, "learning_rate": 4.9962384164474495e-06, "loss": 3.9346, "step": 1815 }, { "epoch": 0.018513997395833332, "grad_norm": 11.695920944213867, "learning_rate": 4.996216456584867e-06, "loss": 3.7216, "step": 1820 }, { "epoch": 0.018564860026041668, "grad_norm": 12.64533519744873, "learning_rate": 4.9961944328571785e-06, "loss": 3.4994, "step": 1825 }, { "epoch": 0.01861572265625, "grad_norm": 11.813600540161133, "learning_rate": 4.9961723452649465e-06, "loss": 3.3742, "step": 1830 }, { "epoch": 0.018666585286458332, "grad_norm": 13.247116088867188, "learning_rate": 4.9961501938087375e-06, "loss": 3.7691, "step": 1835 }, { "epoch": 0.018717447916666668, "grad_norm": 10.45460319519043, "learning_rate": 4.996127978489117e-06, "loss": 3.6143, "step": 1840 }, { "epoch": 0.018768310546875, "grad_norm": 19.198963165283203, "learning_rate": 4.996105699306654e-06, "loss": 3.4559, "step": 1845 }, { "epoch": 0.018819173177083332, "grad_norm": 15.408687591552734, "learning_rate": 4.996083356261918e-06, "loss": 3.6425, "step": 1850 }, { "epoch": 0.018870035807291668, "grad_norm": 16.426103591918945, "learning_rate": 4.996060949355481e-06, "loss": 3.6617, "step": 1855 }, { "epoch": 0.0189208984375, "grad_norm": 9.62375259399414, "learning_rate": 4.996038478587916e-06, "loss": 4.2817, "step": 1860 }, { "epoch": 0.018971761067708332, "grad_norm": 13.927205085754395, "learning_rate": 4.9960159439598e-06, "loss": 3.1512, "step": 1865 }, { "epoch": 0.019022623697916668, "grad_norm": 16.292879104614258, "learning_rate": 4.995993345471706e-06, "loss": 3.7999, "step": 1870 }, { "epoch": 0.019073486328125, "grad_norm": 16.1326904296875, "learning_rate": 4.995970683124214e-06, "loss": 3.0987, "step": 1875 }, { "epoch": 0.019124348958333332, "grad_norm": 17.09796142578125, "learning_rate": 4.995947956917904e-06, "loss": 3.6616, "step": 1880 }, { "epoch": 0.019175211588541668, "grad_norm": 8.162797927856445, "learning_rate": 4.995925166853357e-06, "loss": 3.5208, "step": 1885 }, { "epoch": 0.01922607421875, "grad_norm": 35.92325210571289, "learning_rate": 4.995902312931156e-06, "loss": 3.819, "step": 1890 }, { "epoch": 0.019276936848958332, "grad_norm": 15.114096641540527, "learning_rate": 4.995879395151886e-06, "loss": 3.5895, "step": 1895 }, { "epoch": 0.019327799479166668, "grad_norm": 18.872722625732422, "learning_rate": 4.995856413516134e-06, "loss": 3.731, "step": 1900 }, { "epoch": 0.019378662109375, "grad_norm": 15.703900337219238, "learning_rate": 4.9958333680244865e-06, "loss": 3.8427, "step": 1905 }, { "epoch": 0.019429524739583332, "grad_norm": 14.129450798034668, "learning_rate": 4.9958102586775334e-06, "loss": 3.6527, "step": 1910 }, { "epoch": 0.019480387369791668, "grad_norm": 11.964359283447266, "learning_rate": 4.9957870854758675e-06, "loss": 3.3432, "step": 1915 }, { "epoch": 0.01953125, "grad_norm": 19.071924209594727, "learning_rate": 4.99576384842008e-06, "loss": 3.7882, "step": 1920 }, { "epoch": 0.019582112630208332, "grad_norm": 14.704536437988281, "learning_rate": 4.995740547510766e-06, "loss": 3.2511, "step": 1925 }, { "epoch": 0.019632975260416668, "grad_norm": 20.858272552490234, "learning_rate": 4.9957171827485215e-06, "loss": 4.266, "step": 1930 }, { "epoch": 0.019683837890625, "grad_norm": 12.940155982971191, "learning_rate": 4.995693754133944e-06, "loss": 3.7349, "step": 1935 }, { "epoch": 0.019734700520833332, "grad_norm": 17.16631507873535, "learning_rate": 4.995670261667635e-06, "loss": 3.6617, "step": 1940 }, { "epoch": 0.019785563151041668, "grad_norm": 12.754393577575684, "learning_rate": 4.995646705350193e-06, "loss": 3.6483, "step": 1945 }, { "epoch": 0.01983642578125, "grad_norm": 12.014126777648926, "learning_rate": 4.995623085182221e-06, "loss": 3.4977, "step": 1950 }, { "epoch": 0.019887288411458332, "grad_norm": 16.564720153808594, "learning_rate": 4.995599401164325e-06, "loss": 3.7256, "step": 1955 }, { "epoch": 0.019938151041666668, "grad_norm": 11.473775863647461, "learning_rate": 4.995575653297109e-06, "loss": 3.416, "step": 1960 }, { "epoch": 0.019989013671875, "grad_norm": 14.353362083435059, "learning_rate": 4.995551841581181e-06, "loss": 3.4641, "step": 1965 }, { "epoch": 0.020039876302083332, "grad_norm": 16.31317901611328, "learning_rate": 4.9955279660171514e-06, "loss": 3.7767, "step": 1970 }, { "epoch": 0.020090738932291668, "grad_norm": 8.309894561767578, "learning_rate": 4.99550402660563e-06, "loss": 3.6317, "step": 1975 }, { "epoch": 0.0201416015625, "grad_norm": 18.78270149230957, "learning_rate": 4.99548002334723e-06, "loss": 3.2542, "step": 1980 }, { "epoch": 0.020192464192708332, "grad_norm": 10.488119125366211, "learning_rate": 4.9954559562425654e-06, "loss": 3.3925, "step": 1985 }, { "epoch": 0.020243326822916668, "grad_norm": 19.56174087524414, "learning_rate": 4.99543182529225e-06, "loss": 3.4966, "step": 1990 }, { "epoch": 0.020294189453125, "grad_norm": 11.951705932617188, "learning_rate": 4.995407630496905e-06, "loss": 3.6632, "step": 1995 }, { "epoch": 0.020345052083333332, "grad_norm": 15.856471061706543, "learning_rate": 4.995383371857145e-06, "loss": 3.3239, "step": 2000 }, { "epoch": 0.020395914713541668, "grad_norm": 12.084221839904785, "learning_rate": 4.9953590493735945e-06, "loss": 3.5615, "step": 2005 }, { "epoch": 0.02044677734375, "grad_norm": 27.98715591430664, "learning_rate": 4.995334663046874e-06, "loss": 3.6218, "step": 2010 }, { "epoch": 0.020497639973958332, "grad_norm": 9.576325416564941, "learning_rate": 4.995310212877608e-06, "loss": 3.4569, "step": 2015 }, { "epoch": 0.020548502604166668, "grad_norm": 13.834068298339844, "learning_rate": 4.9952856988664205e-06, "loss": 3.2584, "step": 2020 }, { "epoch": 0.020599365234375, "grad_norm": 17.03768539428711, "learning_rate": 4.99526112101394e-06, "loss": 3.7883, "step": 2025 }, { "epoch": 0.020650227864583332, "grad_norm": 11.945119857788086, "learning_rate": 4.995236479320796e-06, "loss": 3.3385, "step": 2030 }, { "epoch": 0.020701090494791668, "grad_norm": 13.012072563171387, "learning_rate": 4.995211773787617e-06, "loss": 3.5075, "step": 2035 }, { "epoch": 0.020751953125, "grad_norm": 11.229742050170898, "learning_rate": 4.995187004415038e-06, "loss": 3.1753, "step": 2040 }, { "epoch": 0.020802815755208332, "grad_norm": 15.254374504089355, "learning_rate": 4.995162171203689e-06, "loss": 3.4893, "step": 2045 }, { "epoch": 0.020853678385416668, "grad_norm": 12.569649696350098, "learning_rate": 4.9951372741542084e-06, "loss": 3.7053, "step": 2050 }, { "epoch": 0.020904541015625, "grad_norm": 13.927783966064453, "learning_rate": 4.995112313267231e-06, "loss": 3.43, "step": 2055 }, { "epoch": 0.020955403645833332, "grad_norm": 10.135937690734863, "learning_rate": 4.995087288543397e-06, "loss": 3.3201, "step": 2060 }, { "epoch": 0.021006266276041668, "grad_norm": 17.4373722076416, "learning_rate": 4.995062199983346e-06, "loss": 3.9556, "step": 2065 }, { "epoch": 0.02105712890625, "grad_norm": 8.249671936035156, "learning_rate": 4.9950370475877204e-06, "loss": 3.4842, "step": 2070 }, { "epoch": 0.021107991536458332, "grad_norm": 7.946985244750977, "learning_rate": 4.995011831357164e-06, "loss": 3.7847, "step": 2075 }, { "epoch": 0.021158854166666668, "grad_norm": 14.219249725341797, "learning_rate": 4.99498655129232e-06, "loss": 3.6362, "step": 2080 }, { "epoch": 0.021209716796875, "grad_norm": 16.812952041625977, "learning_rate": 4.994961207393837e-06, "loss": 3.7761, "step": 2085 }, { "epoch": 0.021260579427083332, "grad_norm": 12.385375022888184, "learning_rate": 4.994935799662363e-06, "loss": 3.5996, "step": 2090 }, { "epoch": 0.021311442057291668, "grad_norm": 8.916481971740723, "learning_rate": 4.994910328098548e-06, "loss": 3.6023, "step": 2095 }, { "epoch": 0.0213623046875, "grad_norm": 14.721908569335938, "learning_rate": 4.994884792703043e-06, "loss": 3.3011, "step": 2100 }, { "epoch": 0.021413167317708332, "grad_norm": 15.370878219604492, "learning_rate": 4.9948591934765025e-06, "loss": 3.5518, "step": 2105 }, { "epoch": 0.021464029947916668, "grad_norm": 14.788433074951172, "learning_rate": 4.994833530419581e-06, "loss": 3.5008, "step": 2110 }, { "epoch": 0.021514892578125, "grad_norm": 12.971417427062988, "learning_rate": 4.994807803532934e-06, "loss": 3.5467, "step": 2115 }, { "epoch": 0.021565755208333332, "grad_norm": 14.517382621765137, "learning_rate": 4.994782012817221e-06, "loss": 3.6258, "step": 2120 }, { "epoch": 0.021616617838541668, "grad_norm": 10.307328224182129, "learning_rate": 4.994756158273102e-06, "loss": 4.0953, "step": 2125 }, { "epoch": 0.02166748046875, "grad_norm": 15.38490104675293, "learning_rate": 4.994730239901238e-06, "loss": 4.2029, "step": 2130 }, { "epoch": 0.021718343098958332, "grad_norm": 12.546719551086426, "learning_rate": 4.994704257702292e-06, "loss": 3.4173, "step": 2135 }, { "epoch": 0.021769205729166668, "grad_norm": 11.977869033813477, "learning_rate": 4.994678211676929e-06, "loss": 3.4095, "step": 2140 }, { "epoch": 0.021820068359375, "grad_norm": 10.538824081420898, "learning_rate": 4.994652101825815e-06, "loss": 3.607, "step": 2145 }, { "epoch": 0.021870930989583332, "grad_norm": 18.2629337310791, "learning_rate": 4.994625928149619e-06, "loss": 3.9144, "step": 2150 }, { "epoch": 0.021921793619791668, "grad_norm": 12.87021255493164, "learning_rate": 4.994599690649009e-06, "loss": 3.9643, "step": 2155 }, { "epoch": 0.02197265625, "grad_norm": 16.46050453186035, "learning_rate": 4.994573389324657e-06, "loss": 3.7189, "step": 2160 }, { "epoch": 0.022023518880208332, "grad_norm": 14.574606895446777, "learning_rate": 4.994547024177236e-06, "loss": 3.906, "step": 2165 }, { "epoch": 0.022074381510416668, "grad_norm": 10.178772926330566, "learning_rate": 4.994520595207422e-06, "loss": 3.7685, "step": 2170 }, { "epoch": 0.022125244140625, "grad_norm": 16.5206298828125, "learning_rate": 4.994494102415889e-06, "loss": 3.6191, "step": 2175 }, { "epoch": 0.022176106770833332, "grad_norm": 13.858648300170898, "learning_rate": 4.9944675458033156e-06, "loss": 3.5425, "step": 2180 }, { "epoch": 0.022226969401041668, "grad_norm": 15.840536117553711, "learning_rate": 4.994440925370382e-06, "loss": 3.5171, "step": 2185 }, { "epoch": 0.02227783203125, "grad_norm": 13.754470825195312, "learning_rate": 4.9944142411177675e-06, "loss": 3.3044, "step": 2190 }, { "epoch": 0.022328694661458332, "grad_norm": 13.027044296264648, "learning_rate": 4.994387493046157e-06, "loss": 3.3715, "step": 2195 }, { "epoch": 0.022379557291666668, "grad_norm": 10.115904808044434, "learning_rate": 4.994360681156233e-06, "loss": 3.8424, "step": 2200 }, { "epoch": 0.022430419921875, "grad_norm": 16.806591033935547, "learning_rate": 4.994333805448682e-06, "loss": 4.0637, "step": 2205 }, { "epoch": 0.022481282552083332, "grad_norm": 9.8330078125, "learning_rate": 4.994306865924192e-06, "loss": 3.383, "step": 2210 }, { "epoch": 0.022532145182291668, "grad_norm": 13.583646774291992, "learning_rate": 4.994279862583453e-06, "loss": 3.3756, "step": 2215 }, { "epoch": 0.0225830078125, "grad_norm": 12.496264457702637, "learning_rate": 4.994252795427153e-06, "loss": 3.5715, "step": 2220 }, { "epoch": 0.022633870442708332, "grad_norm": 9.020492553710938, "learning_rate": 4.994225664455989e-06, "loss": 3.3013, "step": 2225 }, { "epoch": 0.022684733072916668, "grad_norm": 14.377449989318848, "learning_rate": 4.99419846967065e-06, "loss": 3.3859, "step": 2230 }, { "epoch": 0.022735595703125, "grad_norm": 13.713812828063965, "learning_rate": 4.994171211071836e-06, "loss": 3.3353, "step": 2235 }, { "epoch": 0.022786458333333332, "grad_norm": 18.0495548248291, "learning_rate": 4.994143888660242e-06, "loss": 3.5454, "step": 2240 }, { "epoch": 0.022837320963541668, "grad_norm": 13.879693031311035, "learning_rate": 4.994116502436568e-06, "loss": 3.9231, "step": 2245 }, { "epoch": 0.02288818359375, "grad_norm": 14.874258041381836, "learning_rate": 4.994089052401515e-06, "loss": 3.114, "step": 2250 }, { "epoch": 0.022939046223958332, "grad_norm": 17.684362411499023, "learning_rate": 4.994061538555784e-06, "loss": 3.5418, "step": 2255 }, { "epoch": 0.022989908854166668, "grad_norm": 20.040653228759766, "learning_rate": 4.9940339609000796e-06, "loss": 4.0738, "step": 2260 }, { "epoch": 0.023040771484375, "grad_norm": 8.416106224060059, "learning_rate": 4.994006319435108e-06, "loss": 3.7515, "step": 2265 }, { "epoch": 0.023091634114583332, "grad_norm": 14.56326675415039, "learning_rate": 4.9939786141615754e-06, "loss": 3.4834, "step": 2270 }, { "epoch": 0.023142496744791668, "grad_norm": 9.106837272644043, "learning_rate": 4.993950845080191e-06, "loss": 3.363, "step": 2275 }, { "epoch": 0.023193359375, "grad_norm": 16.284809112548828, "learning_rate": 4.993923012191666e-06, "loss": 3.4758, "step": 2280 }, { "epoch": 0.023244222005208332, "grad_norm": 12.630521774291992, "learning_rate": 4.993895115496712e-06, "loss": 3.5318, "step": 2285 }, { "epoch": 0.023295084635416668, "grad_norm": 11.593666076660156, "learning_rate": 4.993867154996042e-06, "loss": 3.7887, "step": 2290 }, { "epoch": 0.023345947265625, "grad_norm": 14.084927558898926, "learning_rate": 4.993839130690372e-06, "loss": 3.5525, "step": 2295 }, { "epoch": 0.023396809895833332, "grad_norm": 15.547626495361328, "learning_rate": 4.993811042580419e-06, "loss": 3.3359, "step": 2300 }, { "epoch": 0.023447672526041668, "grad_norm": 17.31133460998535, "learning_rate": 4.993782890666902e-06, "loss": 3.6608, "step": 2305 }, { "epoch": 0.02349853515625, "grad_norm": 10.99673080444336, "learning_rate": 4.99375467495054e-06, "loss": 3.4517, "step": 2310 }, { "epoch": 0.023549397786458332, "grad_norm": 10.245965957641602, "learning_rate": 4.993726395432056e-06, "loss": 3.5961, "step": 2315 }, { "epoch": 0.023600260416666668, "grad_norm": 11.301896095275879, "learning_rate": 4.993698052112174e-06, "loss": 3.2348, "step": 2320 }, { "epoch": 0.023651123046875, "grad_norm": 16.186355590820312, "learning_rate": 4.993669644991617e-06, "loss": 3.6148, "step": 2325 }, { "epoch": 0.023701985677083332, "grad_norm": 15.257999420166016, "learning_rate": 4.993641174071115e-06, "loss": 3.7732, "step": 2330 }, { "epoch": 0.023752848307291668, "grad_norm": 17.36763572692871, "learning_rate": 4.993612639351393e-06, "loss": 3.5761, "step": 2335 }, { "epoch": 0.0238037109375, "grad_norm": 12.962672233581543, "learning_rate": 4.993584040833183e-06, "loss": 3.6535, "step": 2340 }, { "epoch": 0.023854573567708332, "grad_norm": 13.28327751159668, "learning_rate": 4.993555378517217e-06, "loss": 3.5254, "step": 2345 }, { "epoch": 0.023905436197916668, "grad_norm": 14.348062515258789, "learning_rate": 4.993526652404227e-06, "loss": 3.6133, "step": 2350 }, { "epoch": 0.023956298828125, "grad_norm": 15.535712242126465, "learning_rate": 4.993497862494949e-06, "loss": 3.5402, "step": 2355 }, { "epoch": 0.024007161458333332, "grad_norm": 10.898641586303711, "learning_rate": 4.993469008790119e-06, "loss": 3.5233, "step": 2360 }, { "epoch": 0.024058024088541668, "grad_norm": 9.476696968078613, "learning_rate": 4.993440091290476e-06, "loss": 4.3757, "step": 2365 }, { "epoch": 0.02410888671875, "grad_norm": 16.282838821411133, "learning_rate": 4.993411109996759e-06, "loss": 3.8592, "step": 2370 }, { "epoch": 0.024159749348958332, "grad_norm": 15.315332412719727, "learning_rate": 4.99338206490971e-06, "loss": 3.626, "step": 2375 }, { "epoch": 0.024210611979166668, "grad_norm": 11.935681343078613, "learning_rate": 4.993352956030071e-06, "loss": 3.5642, "step": 2380 }, { "epoch": 0.024261474609375, "grad_norm": 14.131113052368164, "learning_rate": 4.993323783358588e-06, "loss": 3.6816, "step": 2385 }, { "epoch": 0.024312337239583332, "grad_norm": 17.588123321533203, "learning_rate": 4.993294546896007e-06, "loss": 3.574, "step": 2390 }, { "epoch": 0.024363199869791668, "grad_norm": 11.560687065124512, "learning_rate": 4.993265246643076e-06, "loss": 3.6406, "step": 2395 }, { "epoch": 0.0244140625, "grad_norm": 9.761478424072266, "learning_rate": 4.993235882600545e-06, "loss": 3.5061, "step": 2400 }, { "epoch": 0.024464925130208332, "grad_norm": 13.649985313415527, "learning_rate": 4.993206454769165e-06, "loss": 3.4024, "step": 2405 }, { "epoch": 0.024515787760416668, "grad_norm": 8.24244499206543, "learning_rate": 4.993176963149689e-06, "loss": 3.6048, "step": 2410 }, { "epoch": 0.024566650390625, "grad_norm": 16.1616268157959, "learning_rate": 4.99314740774287e-06, "loss": 3.5386, "step": 2415 }, { "epoch": 0.024617513020833332, "grad_norm": 9.706306457519531, "learning_rate": 4.993117788549466e-06, "loss": 3.5126, "step": 2420 }, { "epoch": 0.024668375651041668, "grad_norm": 12.5220365524292, "learning_rate": 4.993088105570235e-06, "loss": 4.1297, "step": 2425 }, { "epoch": 0.02471923828125, "grad_norm": 12.254471778869629, "learning_rate": 4.993058358805935e-06, "loss": 3.4553, "step": 2430 }, { "epoch": 0.024770100911458332, "grad_norm": 14.104937553405762, "learning_rate": 4.993028548257328e-06, "loss": 4.2029, "step": 2435 }, { "epoch": 0.024820963541666668, "grad_norm": 14.170137405395508, "learning_rate": 4.992998673925177e-06, "loss": 3.45, "step": 2440 }, { "epoch": 0.024871826171875, "grad_norm": 16.678157806396484, "learning_rate": 4.9929687358102455e-06, "loss": 3.464, "step": 2445 }, { "epoch": 0.024922688802083332, "grad_norm": 11.808859825134277, "learning_rate": 4.9929387339133e-06, "loss": 4.1151, "step": 2450 }, { "epoch": 0.024973551432291668, "grad_norm": 13.544400215148926, "learning_rate": 4.992908668235107e-06, "loss": 3.4802, "step": 2455 }, { "epoch": 0.0250244140625, "grad_norm": 10.196426391601562, "learning_rate": 4.992878538776438e-06, "loss": 3.6122, "step": 2460 }, { "epoch": 0.025075276692708332, "grad_norm": 24.040620803833008, "learning_rate": 4.992848345538062e-06, "loss": 3.7206, "step": 2465 }, { "epoch": 0.025126139322916668, "grad_norm": 13.72429370880127, "learning_rate": 4.992818088520751e-06, "loss": 3.7046, "step": 2470 }, { "epoch": 0.025177001953125, "grad_norm": 17.09827995300293, "learning_rate": 4.992787767725281e-06, "loss": 3.6007, "step": 2475 }, { "epoch": 0.025227864583333332, "grad_norm": 17.559282302856445, "learning_rate": 4.992757383152427e-06, "loss": 3.5262, "step": 2480 }, { "epoch": 0.025278727213541668, "grad_norm": 12.092161178588867, "learning_rate": 4.992726934802965e-06, "loss": 3.4942, "step": 2485 }, { "epoch": 0.02532958984375, "grad_norm": 12.694467544555664, "learning_rate": 4.992696422677677e-06, "loss": 3.6092, "step": 2490 }, { "epoch": 0.025380452473958332, "grad_norm": 19.91484832763672, "learning_rate": 4.99266584677734e-06, "loss": 3.6935, "step": 2495 }, { "epoch": 0.025431315104166668, "grad_norm": 12.521865844726562, "learning_rate": 4.99263520710274e-06, "loss": 3.509, "step": 2500 }, { "epoch": 0.025482177734375, "grad_norm": 14.746912002563477, "learning_rate": 4.9926045036546576e-06, "loss": 3.4715, "step": 2505 }, { "epoch": 0.025533040364583332, "grad_norm": 13.788453102111816, "learning_rate": 4.99257373643388e-06, "loss": 3.7318, "step": 2510 }, { "epoch": 0.025583902994791668, "grad_norm": 15.496885299682617, "learning_rate": 4.992542905441194e-06, "loss": 3.6001, "step": 2515 }, { "epoch": 0.025634765625, "grad_norm": 11.678740501403809, "learning_rate": 4.992512010677389e-06, "loss": 3.3945, "step": 2520 }, { "epoch": 0.025685628255208332, "grad_norm": 11.555937767028809, "learning_rate": 4.992481052143256e-06, "loss": 3.5856, "step": 2525 }, { "epoch": 0.025736490885416668, "grad_norm": 11.76779556274414, "learning_rate": 4.992450029839584e-06, "loss": 3.6207, "step": 2530 }, { "epoch": 0.025787353515625, "grad_norm": 11.902135848999023, "learning_rate": 4.99241894376717e-06, "loss": 3.6504, "step": 2535 }, { "epoch": 0.025838216145833332, "grad_norm": 13.56098461151123, "learning_rate": 4.992387793926808e-06, "loss": 3.561, "step": 2540 }, { "epoch": 0.025889078776041668, "grad_norm": 9.4614839553833, "learning_rate": 4.9923565803192945e-06, "loss": 3.7419, "step": 2545 }, { "epoch": 0.02593994140625, "grad_norm": 15.035961151123047, "learning_rate": 4.9923253029454295e-06, "loss": 3.4019, "step": 2550 }, { "epoch": 0.025990804036458332, "grad_norm": 14.106694221496582, "learning_rate": 4.992293961806012e-06, "loss": 3.6503, "step": 2555 }, { "epoch": 0.026041666666666668, "grad_norm": 13.903535842895508, "learning_rate": 4.992262556901844e-06, "loss": 3.5632, "step": 2560 }, { "epoch": 0.026092529296875, "grad_norm": 21.27461814880371, "learning_rate": 4.99223108823373e-06, "loss": 3.3556, "step": 2565 }, { "epoch": 0.026143391927083332, "grad_norm": 19.755544662475586, "learning_rate": 4.992199555802473e-06, "loss": 3.9516, "step": 2570 }, { "epoch": 0.026194254557291668, "grad_norm": 10.459436416625977, "learning_rate": 4.992167959608882e-06, "loss": 4.184, "step": 2575 }, { "epoch": 0.0262451171875, "grad_norm": 15.970342636108398, "learning_rate": 4.992136299653763e-06, "loss": 3.4005, "step": 2580 }, { "epoch": 0.026295979817708332, "grad_norm": 12.037581443786621, "learning_rate": 4.992104575937929e-06, "loss": 4.3835, "step": 2585 }, { "epoch": 0.026346842447916668, "grad_norm": 9.693264961242676, "learning_rate": 4.99207278846219e-06, "loss": 3.7904, "step": 2590 }, { "epoch": 0.026397705078125, "grad_norm": 18.028060913085938, "learning_rate": 4.99204093722736e-06, "loss": 3.4888, "step": 2595 }, { "epoch": 0.026448567708333332, "grad_norm": 14.927176475524902, "learning_rate": 4.992009022234252e-06, "loss": 3.7141, "step": 2600 }, { "epoch": 0.026499430338541668, "grad_norm": 10.663310050964355, "learning_rate": 4.991977043483684e-06, "loss": 3.3668, "step": 2605 }, { "epoch": 0.02655029296875, "grad_norm": 21.105947494506836, "learning_rate": 4.991945000976475e-06, "loss": 3.7438, "step": 2610 }, { "epoch": 0.026601155598958332, "grad_norm": 11.864397048950195, "learning_rate": 4.991912894713443e-06, "loss": 3.4406, "step": 2615 }, { "epoch": 0.026652018229166668, "grad_norm": 16.927186965942383, "learning_rate": 4.99188072469541e-06, "loss": 3.8746, "step": 2620 }, { "epoch": 0.026702880859375, "grad_norm": 10.140044212341309, "learning_rate": 4.9918484909232e-06, "loss": 3.7149, "step": 2625 }, { "epoch": 0.026753743489583332, "grad_norm": 10.26709270477295, "learning_rate": 4.991816193397637e-06, "loss": 3.4153, "step": 2630 }, { "epoch": 0.026804606119791668, "grad_norm": 18.33658790588379, "learning_rate": 4.991783832119547e-06, "loss": 3.6604, "step": 2635 }, { "epoch": 0.02685546875, "grad_norm": 15.353517532348633, "learning_rate": 4.991751407089759e-06, "loss": 3.4237, "step": 2640 }, { "epoch": 0.026906331380208332, "grad_norm": 13.645818710327148, "learning_rate": 4.991718918309101e-06, "loss": 3.3133, "step": 2645 }, { "epoch": 0.026957194010416668, "grad_norm": 8.63716983795166, "learning_rate": 4.991686365778405e-06, "loss": 3.3966, "step": 2650 }, { "epoch": 0.027008056640625, "grad_norm": 12.624420166015625, "learning_rate": 4.991653749498504e-06, "loss": 3.3418, "step": 2655 }, { "epoch": 0.027058919270833332, "grad_norm": 16.91732406616211, "learning_rate": 4.991621069470233e-06, "loss": 4.1244, "step": 2660 }, { "epoch": 0.027109781901041668, "grad_norm": 15.740944862365723, "learning_rate": 4.991588325694426e-06, "loss": 3.4994, "step": 2665 }, { "epoch": 0.02716064453125, "grad_norm": 12.525944709777832, "learning_rate": 4.9915555181719235e-06, "loss": 3.7722, "step": 2670 }, { "epoch": 0.027211507161458332, "grad_norm": 13.604511260986328, "learning_rate": 4.991522646903564e-06, "loss": 3.4386, "step": 2675 }, { "epoch": 0.027262369791666668, "grad_norm": 10.147235870361328, "learning_rate": 4.991489711890188e-06, "loss": 3.5642, "step": 2680 }, { "epoch": 0.027313232421875, "grad_norm": 14.985891342163086, "learning_rate": 4.991456713132637e-06, "loss": 3.5542, "step": 2685 }, { "epoch": 0.027364095052083332, "grad_norm": 9.699203491210938, "learning_rate": 4.991423650631758e-06, "loss": 3.4597, "step": 2690 }, { "epoch": 0.027414957682291668, "grad_norm": 19.256378173828125, "learning_rate": 4.991390524388394e-06, "loss": 3.9099, "step": 2695 }, { "epoch": 0.0274658203125, "grad_norm": 13.979656219482422, "learning_rate": 4.991357334403396e-06, "loss": 3.4309, "step": 2700 }, { "epoch": 0.027516682942708332, "grad_norm": 15.490063667297363, "learning_rate": 4.9913240806776095e-06, "loss": 3.6231, "step": 2705 }, { "epoch": 0.027567545572916668, "grad_norm": 11.414307594299316, "learning_rate": 4.991290763211887e-06, "loss": 3.5332, "step": 2710 }, { "epoch": 0.027618408203125, "grad_norm": 14.588093757629395, "learning_rate": 4.991257382007081e-06, "loss": 3.8702, "step": 2715 }, { "epoch": 0.027669270833333332, "grad_norm": 11.244452476501465, "learning_rate": 4.9912239370640455e-06, "loss": 4.1972, "step": 2720 }, { "epoch": 0.027720133463541668, "grad_norm": 18.345767974853516, "learning_rate": 4.991190428383637e-06, "loss": 3.4974, "step": 2725 }, { "epoch": 0.02777099609375, "grad_norm": 16.70025634765625, "learning_rate": 4.99115685596671e-06, "loss": 3.4827, "step": 2730 }, { "epoch": 0.027821858723958332, "grad_norm": 9.72400951385498, "learning_rate": 4.9911232198141266e-06, "loss": 3.7941, "step": 2735 }, { "epoch": 0.027872721354166668, "grad_norm": 18.471965789794922, "learning_rate": 4.991089519926746e-06, "loss": 3.3165, "step": 2740 }, { "epoch": 0.027923583984375, "grad_norm": 8.832108497619629, "learning_rate": 4.9910557563054295e-06, "loss": 4.046, "step": 2745 }, { "epoch": 0.027974446614583332, "grad_norm": 13.375359535217285, "learning_rate": 4.991021928951043e-06, "loss": 3.7558, "step": 2750 }, { "epoch": 0.028025309244791668, "grad_norm": 14.425735473632812, "learning_rate": 4.99098803786445e-06, "loss": 3.7207, "step": 2755 }, { "epoch": 0.028076171875, "grad_norm": 11.840872764587402, "learning_rate": 4.99095408304652e-06, "loss": 3.8429, "step": 2760 }, { "epoch": 0.028127034505208332, "grad_norm": 11.43602466583252, "learning_rate": 4.990920064498119e-06, "loss": 3.3477, "step": 2765 }, { "epoch": 0.028177897135416668, "grad_norm": 21.40333366394043, "learning_rate": 4.9908859822201186e-06, "loss": 3.3127, "step": 2770 }, { "epoch": 0.028228759765625, "grad_norm": 9.050609588623047, "learning_rate": 4.990851836213391e-06, "loss": 3.5749, "step": 2775 }, { "epoch": 0.028279622395833332, "grad_norm": 18.72174644470215, "learning_rate": 4.990817626478809e-06, "loss": 3.4886, "step": 2780 }, { "epoch": 0.028330485026041668, "grad_norm": 17.724315643310547, "learning_rate": 4.990783353017249e-06, "loss": 3.6107, "step": 2785 }, { "epoch": 0.02838134765625, "grad_norm": 17.255958557128906, "learning_rate": 4.990749015829587e-06, "loss": 3.4667, "step": 2790 }, { "epoch": 0.028432210286458332, "grad_norm": 15.389385223388672, "learning_rate": 4.9907146149167025e-06, "loss": 3.6194, "step": 2795 }, { "epoch": 0.028483072916666668, "grad_norm": 16.21523666381836, "learning_rate": 4.990680150279474e-06, "loss": 3.7462, "step": 2800 }, { "epoch": 0.028533935546875, "grad_norm": 10.564112663269043, "learning_rate": 4.990645621918785e-06, "loss": 3.5578, "step": 2805 }, { "epoch": 0.028584798177083332, "grad_norm": 11.502965927124023, "learning_rate": 4.990611029835518e-06, "loss": 3.3145, "step": 2810 }, { "epoch": 0.028635660807291668, "grad_norm": 11.729395866394043, "learning_rate": 4.990576374030558e-06, "loss": 3.5022, "step": 2815 }, { "epoch": 0.0286865234375, "grad_norm": 17.083139419555664, "learning_rate": 4.9905416545047914e-06, "loss": 3.1965, "step": 2820 }, { "epoch": 0.028737386067708332, "grad_norm": 12.707961082458496, "learning_rate": 4.990506871259107e-06, "loss": 3.3708, "step": 2825 }, { "epoch": 0.028788248697916668, "grad_norm": 12.2274808883667, "learning_rate": 4.990472024294395e-06, "loss": 3.6543, "step": 2830 }, { "epoch": 0.028839111328125, "grad_norm": 10.415914535522461, "learning_rate": 4.990437113611546e-06, "loss": 3.3149, "step": 2835 }, { "epoch": 0.028889973958333332, "grad_norm": 14.803680419921875, "learning_rate": 4.990402139211454e-06, "loss": 3.6954, "step": 2840 }, { "epoch": 0.028940836588541668, "grad_norm": 9.584573745727539, "learning_rate": 4.990367101095014e-06, "loss": 3.2306, "step": 2845 }, { "epoch": 0.02899169921875, "grad_norm": 9.19983196258545, "learning_rate": 4.9903319992631215e-06, "loss": 3.5329, "step": 2850 }, { "epoch": 0.029042561848958332, "grad_norm": 13.915033340454102, "learning_rate": 4.990296833716676e-06, "loss": 3.5633, "step": 2855 }, { "epoch": 0.029093424479166668, "grad_norm": 8.67843246459961, "learning_rate": 4.990261604456575e-06, "loss": 3.5621, "step": 2860 }, { "epoch": 0.029144287109375, "grad_norm": 14.383540153503418, "learning_rate": 4.990226311483721e-06, "loss": 3.6706, "step": 2865 }, { "epoch": 0.029195149739583332, "grad_norm": 15.184710502624512, "learning_rate": 4.990190954799018e-06, "loss": 3.5497, "step": 2870 }, { "epoch": 0.029246012369791668, "grad_norm": 17.983572006225586, "learning_rate": 4.990155534403369e-06, "loss": 3.2129, "step": 2875 }, { "epoch": 0.029296875, "grad_norm": 19.197294235229492, "learning_rate": 4.9901200502976825e-06, "loss": 3.7564, "step": 2880 }, { "epoch": 0.029347737630208332, "grad_norm": 14.85810661315918, "learning_rate": 4.990084502482863e-06, "loss": 3.4496, "step": 2885 }, { "epoch": 0.029398600260416668, "grad_norm": 12.596855163574219, "learning_rate": 4.990048890959822e-06, "loss": 3.7283, "step": 2890 }, { "epoch": 0.029449462890625, "grad_norm": 17.732402801513672, "learning_rate": 4.99001321572947e-06, "loss": 3.8592, "step": 2895 }, { "epoch": 0.029500325520833332, "grad_norm": 12.990941047668457, "learning_rate": 4.989977476792721e-06, "loss": 3.5972, "step": 2900 }, { "epoch": 0.029551188151041668, "grad_norm": 15.059063911437988, "learning_rate": 4.989941674150488e-06, "loss": 3.6941, "step": 2905 }, { "epoch": 0.02960205078125, "grad_norm": 13.130831718444824, "learning_rate": 4.989905807803688e-06, "loss": 3.5952, "step": 2910 }, { "epoch": 0.029652913411458332, "grad_norm": 15.897665977478027, "learning_rate": 4.989869877753237e-06, "loss": 3.5691, "step": 2915 }, { "epoch": 0.029703776041666668, "grad_norm": 11.824736595153809, "learning_rate": 4.989833884000056e-06, "loss": 3.7098, "step": 2920 }, { "epoch": 0.029754638671875, "grad_norm": 14.641777038574219, "learning_rate": 4.989797826545065e-06, "loss": 3.6694, "step": 2925 }, { "epoch": 0.029805501302083332, "grad_norm": 12.433199882507324, "learning_rate": 4.989761705389187e-06, "loss": 3.5255, "step": 2930 }, { "epoch": 0.029856363932291668, "grad_norm": 12.532072067260742, "learning_rate": 4.989725520533346e-06, "loss": 3.5487, "step": 2935 }, { "epoch": 0.0299072265625, "grad_norm": 16.237571716308594, "learning_rate": 4.9896892719784675e-06, "loss": 3.562, "step": 2940 }, { "epoch": 0.029958089192708332, "grad_norm": 15.376089096069336, "learning_rate": 4.989652959725479e-06, "loss": 3.4396, "step": 2945 }, { "epoch": 0.030008951822916668, "grad_norm": 13.294553756713867, "learning_rate": 4.98961658377531e-06, "loss": 3.8229, "step": 2950 }, { "epoch": 0.030059814453125, "grad_norm": 15.016079902648926, "learning_rate": 4.98958014412889e-06, "loss": 3.4993, "step": 2955 }, { "epoch": 0.030110677083333332, "grad_norm": 16.527847290039062, "learning_rate": 4.989543640787153e-06, "loss": 3.6308, "step": 2960 }, { "epoch": 0.030161539713541668, "grad_norm": 15.960955619812012, "learning_rate": 4.989507073751032e-06, "loss": 3.5736, "step": 2965 }, { "epoch": 0.03021240234375, "grad_norm": 19.20722198486328, "learning_rate": 4.989470443021462e-06, "loss": 3.3009, "step": 2970 }, { "epoch": 0.030263264973958332, "grad_norm": 11.201433181762695, "learning_rate": 4.989433748599381e-06, "loss": 3.6235, "step": 2975 }, { "epoch": 0.030314127604166668, "grad_norm": 13.62901782989502, "learning_rate": 4.989396990485727e-06, "loss": 4.1232, "step": 2980 }, { "epoch": 0.030364990234375, "grad_norm": 15.175191879272461, "learning_rate": 4.989360168681442e-06, "loss": 3.6938, "step": 2985 }, { "epoch": 0.030415852864583332, "grad_norm": 12.873221397399902, "learning_rate": 4.9893232831874676e-06, "loss": 3.7898, "step": 2990 }, { "epoch": 0.030466715494791668, "grad_norm": 10.560770034790039, "learning_rate": 4.989286334004746e-06, "loss": 3.1075, "step": 2995 }, { "epoch": 0.030517578125, "grad_norm": 16.364398956298828, "learning_rate": 4.9892493211342235e-06, "loss": 3.8729, "step": 3000 }, { "epoch": 0.030568440755208332, "grad_norm": 14.276408195495605, "learning_rate": 4.989212244576848e-06, "loss": 3.4278, "step": 3005 }, { "epoch": 0.030619303385416668, "grad_norm": 17.391857147216797, "learning_rate": 4.989175104333567e-06, "loss": 3.5136, "step": 3010 }, { "epoch": 0.030670166015625, "grad_norm": 16.542964935302734, "learning_rate": 4.98913790040533e-06, "loss": 3.7648, "step": 3015 }, { "epoch": 0.030721028645833332, "grad_norm": 12.984308242797852, "learning_rate": 4.9891006327930905e-06, "loss": 3.2495, "step": 3020 }, { "epoch": 0.030771891276041668, "grad_norm": 16.2092227935791, "learning_rate": 4.989063301497801e-06, "loss": 3.2186, "step": 3025 }, { "epoch": 0.03082275390625, "grad_norm": 9.466002464294434, "learning_rate": 4.989025906520417e-06, "loss": 3.5156, "step": 3030 }, { "epoch": 0.030873616536458332, "grad_norm": 11.886150360107422, "learning_rate": 4.988988447861895e-06, "loss": 3.5858, "step": 3035 }, { "epoch": 0.030924479166666668, "grad_norm": 10.326323509216309, "learning_rate": 4.988950925523194e-06, "loss": 3.3724, "step": 3040 }, { "epoch": 0.030975341796875, "grad_norm": 15.874045372009277, "learning_rate": 4.988913339505274e-06, "loss": 3.5452, "step": 3045 }, { "epoch": 0.031026204427083332, "grad_norm": 11.754904747009277, "learning_rate": 4.988875689809095e-06, "loss": 3.4911, "step": 3050 }, { "epoch": 0.031077067057291668, "grad_norm": 17.041898727416992, "learning_rate": 4.988837976435622e-06, "loss": 3.6784, "step": 3055 }, { "epoch": 0.0311279296875, "grad_norm": 11.09570026397705, "learning_rate": 4.988800199385819e-06, "loss": 3.366, "step": 3060 }, { "epoch": 0.031178792317708332, "grad_norm": 14.231475830078125, "learning_rate": 4.988762358660654e-06, "loss": 3.3146, "step": 3065 }, { "epoch": 0.031229654947916668, "grad_norm": 11.312032699584961, "learning_rate": 4.988724454261092e-06, "loss": 3.6764, "step": 3070 }, { "epoch": 0.031280517578125, "grad_norm": 15.22202205657959, "learning_rate": 4.988686486188105e-06, "loss": 3.4084, "step": 3075 }, { "epoch": 0.031331380208333336, "grad_norm": 14.88944149017334, "learning_rate": 4.988648454442666e-06, "loss": 3.1168, "step": 3080 }, { "epoch": 0.031382242838541664, "grad_norm": 10.730203628540039, "learning_rate": 4.988610359025745e-06, "loss": 3.3345, "step": 3085 }, { "epoch": 0.03143310546875, "grad_norm": 13.363783836364746, "learning_rate": 4.988572199938317e-06, "loss": 3.0878, "step": 3090 }, { "epoch": 0.031483968098958336, "grad_norm": 21.714414596557617, "learning_rate": 4.9885339771813604e-06, "loss": 3.6447, "step": 3095 }, { "epoch": 0.031534830729166664, "grad_norm": 18.616336822509766, "learning_rate": 4.9884956907558515e-06, "loss": 3.5968, "step": 3100 }, { "epoch": 0.031585693359375, "grad_norm": 12.650989532470703, "learning_rate": 4.98845734066277e-06, "loss": 3.4597, "step": 3105 }, { "epoch": 0.031636555989583336, "grad_norm": 14.19465446472168, "learning_rate": 4.988418926903098e-06, "loss": 3.399, "step": 3110 }, { "epoch": 0.031687418619791664, "grad_norm": 9.14580249786377, "learning_rate": 4.9883804494778165e-06, "loss": 3.6816, "step": 3115 }, { "epoch": 0.03173828125, "grad_norm": 10.480582237243652, "learning_rate": 4.988341908387912e-06, "loss": 3.128, "step": 3120 }, { "epoch": 0.031789143880208336, "grad_norm": 10.952704429626465, "learning_rate": 4.988303303634368e-06, "loss": 3.4517, "step": 3125 }, { "epoch": 0.031840006510416664, "grad_norm": 12.522828102111816, "learning_rate": 4.988264635218175e-06, "loss": 3.9186, "step": 3130 }, { "epoch": 0.031890869140625, "grad_norm": 12.782024383544922, "learning_rate": 4.988225903140321e-06, "loss": 3.725, "step": 3135 }, { "epoch": 0.031941731770833336, "grad_norm": 11.72850227355957, "learning_rate": 4.988187107401797e-06, "loss": 3.7264, "step": 3140 }, { "epoch": 0.031992594401041664, "grad_norm": 12.133509635925293, "learning_rate": 4.988148248003595e-06, "loss": 3.4472, "step": 3145 }, { "epoch": 0.03204345703125, "grad_norm": 12.93433666229248, "learning_rate": 4.98810932494671e-06, "loss": 3.9443, "step": 3150 }, { "epoch": 0.032094319661458336, "grad_norm": 16.135568618774414, "learning_rate": 4.988070338232138e-06, "loss": 3.4966, "step": 3155 }, { "epoch": 0.032145182291666664, "grad_norm": 18.191791534423828, "learning_rate": 4.988031287860877e-06, "loss": 3.5644, "step": 3160 }, { "epoch": 0.032196044921875, "grad_norm": 11.924603462219238, "learning_rate": 4.987992173833924e-06, "loss": 3.4058, "step": 3165 }, { "epoch": 0.032246907552083336, "grad_norm": 12.82744026184082, "learning_rate": 4.987952996152281e-06, "loss": 3.492, "step": 3170 }, { "epoch": 0.032297770182291664, "grad_norm": 8.64533519744873, "learning_rate": 4.987913754816951e-06, "loss": 3.9099, "step": 3175 }, { "epoch": 0.0323486328125, "grad_norm": 16.365373611450195, "learning_rate": 4.987874449828937e-06, "loss": 3.1102, "step": 3180 }, { "epoch": 0.032399495442708336, "grad_norm": 15.942273139953613, "learning_rate": 4.987835081189245e-06, "loss": 3.7335, "step": 3185 }, { "epoch": 0.032450358072916664, "grad_norm": 17.56052589416504, "learning_rate": 4.987795648898882e-06, "loss": 3.8561, "step": 3190 }, { "epoch": 0.032501220703125, "grad_norm": 9.97986125946045, "learning_rate": 4.987756152958857e-06, "loss": 3.3722, "step": 3195 }, { "epoch": 0.032552083333333336, "grad_norm": 13.236599922180176, "learning_rate": 4.98771659337018e-06, "loss": 3.5341, "step": 3200 }, { "epoch": 0.032602945963541664, "grad_norm": 15.567898750305176, "learning_rate": 4.987676970133864e-06, "loss": 3.6612, "step": 3205 }, { "epoch": 0.03265380859375, "grad_norm": 14.741514205932617, "learning_rate": 4.987637283250923e-06, "loss": 3.7577, "step": 3210 }, { "epoch": 0.032704671223958336, "grad_norm": 12.727431297302246, "learning_rate": 4.98759753272237e-06, "loss": 4.3001, "step": 3215 }, { "epoch": 0.032755533854166664, "grad_norm": 11.071520805358887, "learning_rate": 4.987557718549225e-06, "loss": 3.61, "step": 3220 }, { "epoch": 0.032806396484375, "grad_norm": 10.508591651916504, "learning_rate": 4.987517840732505e-06, "loss": 3.3211, "step": 3225 }, { "epoch": 0.032857259114583336, "grad_norm": 12.959603309631348, "learning_rate": 4.987477899273232e-06, "loss": 3.5145, "step": 3230 }, { "epoch": 0.032908121744791664, "grad_norm": 14.553711891174316, "learning_rate": 4.987437894172426e-06, "loss": 3.9318, "step": 3235 }, { "epoch": 0.032958984375, "grad_norm": 14.899251937866211, "learning_rate": 4.987397825431109e-06, "loss": 3.5583, "step": 3240 }, { "epoch": 0.033009847005208336, "grad_norm": 11.648773193359375, "learning_rate": 4.98735769305031e-06, "loss": 3.2714, "step": 3245 }, { "epoch": 0.033060709635416664, "grad_norm": 13.525915145874023, "learning_rate": 4.987317497031055e-06, "loss": 3.3922, "step": 3250 }, { "epoch": 0.033111572265625, "grad_norm": 16.7314453125, "learning_rate": 4.987277237374369e-06, "loss": 3.6725, "step": 3255 }, { "epoch": 0.033162434895833336, "grad_norm": 16.31707191467285, "learning_rate": 4.987236914081286e-06, "loss": 3.3669, "step": 3260 }, { "epoch": 0.033213297526041664, "grad_norm": 11.438323974609375, "learning_rate": 4.987196527152835e-06, "loss": 3.4222, "step": 3265 }, { "epoch": 0.03326416015625, "grad_norm": 14.705175399780273, "learning_rate": 4.987156076590051e-06, "loss": 3.6076, "step": 3270 }, { "epoch": 0.033315022786458336, "grad_norm": 14.206781387329102, "learning_rate": 4.987115562393969e-06, "loss": 3.4704, "step": 3275 }, { "epoch": 0.033365885416666664, "grad_norm": 10.712113380432129, "learning_rate": 4.987074984565624e-06, "loss": 3.575, "step": 3280 }, { "epoch": 0.033416748046875, "grad_norm": 12.62575626373291, "learning_rate": 4.987034343106055e-06, "loss": 3.2375, "step": 3285 }, { "epoch": 0.033467610677083336, "grad_norm": 13.155769348144531, "learning_rate": 4.986993638016302e-06, "loss": 3.0587, "step": 3290 }, { "epoch": 0.033518473307291664, "grad_norm": 15.919364929199219, "learning_rate": 4.986952869297407e-06, "loss": 3.569, "step": 3295 }, { "epoch": 0.0335693359375, "grad_norm": 16.994239807128906, "learning_rate": 4.986912036950411e-06, "loss": 3.7481, "step": 3300 }, { "epoch": 0.033620198567708336, "grad_norm": 13.510692596435547, "learning_rate": 4.986871140976361e-06, "loss": 3.5751, "step": 3305 }, { "epoch": 0.033671061197916664, "grad_norm": 14.510315895080566, "learning_rate": 4.986830181376302e-06, "loss": 3.2262, "step": 3310 }, { "epoch": 0.033721923828125, "grad_norm": 13.225347518920898, "learning_rate": 4.986789158151282e-06, "loss": 3.3544, "step": 3315 }, { "epoch": 0.033772786458333336, "grad_norm": 15.174870491027832, "learning_rate": 4.9867480713023506e-06, "loss": 3.39, "step": 3320 }, { "epoch": 0.033823649088541664, "grad_norm": 13.420331954956055, "learning_rate": 4.98670692083056e-06, "loss": 3.4115, "step": 3325 }, { "epoch": 0.03387451171875, "grad_norm": 14.904714584350586, "learning_rate": 4.986665706736962e-06, "loss": 3.5199, "step": 3330 }, { "epoch": 0.033925374348958336, "grad_norm": 9.973340034484863, "learning_rate": 4.986624429022611e-06, "loss": 3.5716, "step": 3335 }, { "epoch": 0.033976236979166664, "grad_norm": 12.208763122558594, "learning_rate": 4.986583087688563e-06, "loss": 3.6914, "step": 3340 }, { "epoch": 0.034027099609375, "grad_norm": 13.434621810913086, "learning_rate": 4.986541682735877e-06, "loss": 3.2363, "step": 3345 }, { "epoch": 0.034077962239583336, "grad_norm": 17.55096435546875, "learning_rate": 4.986500214165611e-06, "loss": 3.4762, "step": 3350 }, { "epoch": 0.034128824869791664, "grad_norm": 11.070765495300293, "learning_rate": 4.986458681978826e-06, "loss": 3.6086, "step": 3355 }, { "epoch": 0.0341796875, "grad_norm": 12.782002449035645, "learning_rate": 4.986417086176586e-06, "loss": 3.3807, "step": 3360 }, { "epoch": 0.034230550130208336, "grad_norm": 11.93525218963623, "learning_rate": 4.9863754267599535e-06, "loss": 3.6002, "step": 3365 }, { "epoch": 0.034281412760416664, "grad_norm": 12.5736083984375, "learning_rate": 4.986333703729995e-06, "loss": 3.6075, "step": 3370 }, { "epoch": 0.034332275390625, "grad_norm": 14.359274864196777, "learning_rate": 4.986291917087778e-06, "loss": 3.461, "step": 3375 }, { "epoch": 0.034383138020833336, "grad_norm": 12.868420600891113, "learning_rate": 4.9862500668343714e-06, "loss": 3.9434, "step": 3380 }, { "epoch": 0.034434000651041664, "grad_norm": 18.13227653503418, "learning_rate": 4.986208152970847e-06, "loss": 3.2659, "step": 3385 }, { "epoch": 0.03448486328125, "grad_norm": 8.368807792663574, "learning_rate": 4.986166175498276e-06, "loss": 3.6618, "step": 3390 }, { "epoch": 0.034535725911458336, "grad_norm": 11.16234302520752, "learning_rate": 4.986124134417732e-06, "loss": 3.3186, "step": 3395 }, { "epoch": 0.034586588541666664, "grad_norm": 14.434364318847656, "learning_rate": 4.986082029730292e-06, "loss": 3.8224, "step": 3400 }, { "epoch": 0.034637451171875, "grad_norm": 11.538227081298828, "learning_rate": 4.9860398614370324e-06, "loss": 3.3252, "step": 3405 }, { "epoch": 0.034688313802083336, "grad_norm": 15.10848617553711, "learning_rate": 4.985997629539032e-06, "loss": 3.3582, "step": 3410 }, { "epoch": 0.034739176432291664, "grad_norm": 12.740103721618652, "learning_rate": 4.985955334037372e-06, "loss": 3.672, "step": 3415 }, { "epoch": 0.0347900390625, "grad_norm": 15.706042289733887, "learning_rate": 4.985912974933134e-06, "loss": 3.379, "step": 3420 }, { "epoch": 0.034840901692708336, "grad_norm": 21.957704544067383, "learning_rate": 4.985870552227401e-06, "loss": 3.8408, "step": 3425 }, { "epoch": 0.034891764322916664, "grad_norm": 8.741256713867188, "learning_rate": 4.9858280659212595e-06, "loss": 3.4575, "step": 3430 }, { "epoch": 0.034942626953125, "grad_norm": 16.37959098815918, "learning_rate": 4.9857855160157965e-06, "loss": 3.7038, "step": 3435 }, { "epoch": 0.034993489583333336, "grad_norm": 15.630584716796875, "learning_rate": 4.9857429025120996e-06, "loss": 3.3838, "step": 3440 }, { "epoch": 0.035044352213541664, "grad_norm": 13.591779708862305, "learning_rate": 4.98570022541126e-06, "loss": 3.4661, "step": 3445 }, { "epoch": 0.03509521484375, "grad_norm": 14.534747123718262, "learning_rate": 4.985657484714369e-06, "loss": 3.487, "step": 3450 }, { "epoch": 0.035146077473958336, "grad_norm": 10.500582695007324, "learning_rate": 4.985614680422521e-06, "loss": 3.6731, "step": 3455 }, { "epoch": 0.035196940104166664, "grad_norm": 10.115405082702637, "learning_rate": 4.9855718125368105e-06, "loss": 3.4601, "step": 3460 }, { "epoch": 0.035247802734375, "grad_norm": 17.56122398376465, "learning_rate": 4.985528881058334e-06, "loss": 3.6548, "step": 3465 }, { "epoch": 0.035298665364583336, "grad_norm": 9.477646827697754, "learning_rate": 4.9854858859881905e-06, "loss": 3.3522, "step": 3470 }, { "epoch": 0.035349527994791664, "grad_norm": 12.787845611572266, "learning_rate": 4.985442827327479e-06, "loss": 3.3005, "step": 3475 }, { "epoch": 0.035400390625, "grad_norm": 9.624972343444824, "learning_rate": 4.985399705077303e-06, "loss": 3.6494, "step": 3480 }, { "epoch": 0.035451253255208336, "grad_norm": 16.32085609436035, "learning_rate": 4.985356519238764e-06, "loss": 3.6078, "step": 3485 }, { "epoch": 0.035502115885416664, "grad_norm": 14.953262329101562, "learning_rate": 4.985313269812968e-06, "loss": 3.256, "step": 3490 }, { "epoch": 0.035552978515625, "grad_norm": 19.811403274536133, "learning_rate": 4.985269956801021e-06, "loss": 3.6117, "step": 3495 }, { "epoch": 0.035603841145833336, "grad_norm": 15.88729190826416, "learning_rate": 4.985226580204031e-06, "loss": 3.5138, "step": 3500 }, { "epoch": 0.035654703776041664, "grad_norm": 16.489349365234375, "learning_rate": 4.9851831400231075e-06, "loss": 3.5093, "step": 3505 }, { "epoch": 0.03570556640625, "grad_norm": 11.760025024414062, "learning_rate": 4.985139636259363e-06, "loss": 4.247, "step": 3510 }, { "epoch": 0.035756429036458336, "grad_norm": 12.459917068481445, "learning_rate": 4.98509606891391e-06, "loss": 3.4997, "step": 3515 }, { "epoch": 0.035807291666666664, "grad_norm": 10.306233406066895, "learning_rate": 4.985052437987863e-06, "loss": 3.4573, "step": 3520 }, { "epoch": 0.035858154296875, "grad_norm": 16.409082412719727, "learning_rate": 4.9850087434823384e-06, "loss": 3.4351, "step": 3525 }, { "epoch": 0.035909016927083336, "grad_norm": 9.501917839050293, "learning_rate": 4.984964985398454e-06, "loss": 3.4707, "step": 3530 }, { "epoch": 0.035959879557291664, "grad_norm": 13.083230972290039, "learning_rate": 4.98492116373733e-06, "loss": 3.3439, "step": 3535 }, { "epoch": 0.0360107421875, "grad_norm": 11.03459358215332, "learning_rate": 4.984877278500087e-06, "loss": 3.3296, "step": 3540 }, { "epoch": 0.036061604817708336, "grad_norm": 15.221756935119629, "learning_rate": 4.984833329687847e-06, "loss": 3.5049, "step": 3545 }, { "epoch": 0.036112467447916664, "grad_norm": 12.892142295837402, "learning_rate": 4.9847893173017345e-06, "loss": 3.6549, "step": 3550 }, { "epoch": 0.036163330078125, "grad_norm": 13.091279029846191, "learning_rate": 4.984745241342877e-06, "loss": 3.6493, "step": 3555 }, { "epoch": 0.036214192708333336, "grad_norm": 14.84507942199707, "learning_rate": 4.984701101812402e-06, "loss": 3.5608, "step": 3560 }, { "epoch": 0.036265055338541664, "grad_norm": 12.539996147155762, "learning_rate": 4.984656898711438e-06, "loss": 3.5464, "step": 3565 }, { "epoch": 0.03631591796875, "grad_norm": 12.54378604888916, "learning_rate": 4.984612632041117e-06, "loss": 3.7036, "step": 3570 }, { "epoch": 0.036366780598958336, "grad_norm": 15.650618553161621, "learning_rate": 4.98456830180257e-06, "loss": 3.7464, "step": 3575 }, { "epoch": 0.036417643229166664, "grad_norm": 12.167283058166504, "learning_rate": 4.984523907996932e-06, "loss": 3.6591, "step": 3580 }, { "epoch": 0.036468505859375, "grad_norm": 13.527252197265625, "learning_rate": 4.984479450625338e-06, "loss": 3.5705, "step": 3585 }, { "epoch": 0.036519368489583336, "grad_norm": 16.410930633544922, "learning_rate": 4.9844349296889275e-06, "loss": 3.7471, "step": 3590 }, { "epoch": 0.036570231119791664, "grad_norm": 10.030250549316406, "learning_rate": 4.984390345188838e-06, "loss": 3.841, "step": 3595 }, { "epoch": 0.03662109375, "grad_norm": 11.255728721618652, "learning_rate": 4.9843456971262095e-06, "loss": 3.5435, "step": 3600 }, { "epoch": 0.036671956380208336, "grad_norm": 9.756819725036621, "learning_rate": 4.984300985502185e-06, "loss": 3.3497, "step": 3605 }, { "epoch": 0.036722819010416664, "grad_norm": 12.754555702209473, "learning_rate": 4.984256210317909e-06, "loss": 3.723, "step": 3610 }, { "epoch": 0.036773681640625, "grad_norm": 15.393765449523926, "learning_rate": 4.984211371574527e-06, "loss": 3.99, "step": 3615 }, { "epoch": 0.036824544270833336, "grad_norm": 16.75415802001953, "learning_rate": 4.984166469273186e-06, "loss": 3.1881, "step": 3620 }, { "epoch": 0.036875406901041664, "grad_norm": 15.176877975463867, "learning_rate": 4.984121503415034e-06, "loss": 3.6833, "step": 3625 }, { "epoch": 0.03692626953125, "grad_norm": 95.34676361083984, "learning_rate": 4.9840764740012225e-06, "loss": 3.5113, "step": 3630 }, { "epoch": 0.036977132161458336, "grad_norm": 12.578669548034668, "learning_rate": 4.984031381032903e-06, "loss": 4.0164, "step": 3635 }, { "epoch": 0.037027994791666664, "grad_norm": 12.883451461791992, "learning_rate": 4.98398622451123e-06, "loss": 3.2015, "step": 3640 }, { "epoch": 0.037078857421875, "grad_norm": 14.403608322143555, "learning_rate": 4.983941004437358e-06, "loss": 3.2212, "step": 3645 }, { "epoch": 0.037129720052083336, "grad_norm": 14.960567474365234, "learning_rate": 4.983895720812444e-06, "loss": 3.2845, "step": 3650 }, { "epoch": 0.037180582682291664, "grad_norm": 9.842466354370117, "learning_rate": 4.9838503736376465e-06, "loss": 3.4744, "step": 3655 }, { "epoch": 0.0372314453125, "grad_norm": 9.656144142150879, "learning_rate": 4.983804962914126e-06, "loss": 3.6732, "step": 3660 }, { "epoch": 0.037282307942708336, "grad_norm": 15.675665855407715, "learning_rate": 4.983759488643045e-06, "loss": 3.4493, "step": 3665 }, { "epoch": 0.037333170572916664, "grad_norm": 11.249836921691895, "learning_rate": 4.983713950825565e-06, "loss": 3.6472, "step": 3670 }, { "epoch": 0.037384033203125, "grad_norm": 16.177907943725586, "learning_rate": 4.983668349462853e-06, "loss": 3.3561, "step": 3675 }, { "epoch": 0.037434895833333336, "grad_norm": 14.318443298339844, "learning_rate": 4.983622684556075e-06, "loss": 3.4628, "step": 3680 }, { "epoch": 0.037485758463541664, "grad_norm": 14.132331848144531, "learning_rate": 4.9835769561064e-06, "loss": 3.4866, "step": 3685 }, { "epoch": 0.03753662109375, "grad_norm": 12.774168014526367, "learning_rate": 4.9835311641149955e-06, "loss": 3.2058, "step": 3690 }, { "epoch": 0.037587483723958336, "grad_norm": 10.149826049804688, "learning_rate": 4.983485308583036e-06, "loss": 3.2685, "step": 3695 }, { "epoch": 0.037638346354166664, "grad_norm": 12.236494064331055, "learning_rate": 4.983439389511693e-06, "loss": 3.3043, "step": 3700 }, { "epoch": 0.037689208984375, "grad_norm": 13.76052188873291, "learning_rate": 4.983393406902142e-06, "loss": 3.6717, "step": 3705 }, { "epoch": 0.037740071614583336, "grad_norm": 16.32668113708496, "learning_rate": 4.983347360755559e-06, "loss": 3.6747, "step": 3710 }, { "epoch": 0.037790934244791664, "grad_norm": 14.244294166564941, "learning_rate": 4.983301251073124e-06, "loss": 3.8085, "step": 3715 }, { "epoch": 0.037841796875, "grad_norm": 15.147490501403809, "learning_rate": 4.983255077856014e-06, "loss": 3.2321, "step": 3720 }, { "epoch": 0.037892659505208336, "grad_norm": 12.199444770812988, "learning_rate": 4.983208841105411e-06, "loss": 3.4871, "step": 3725 }, { "epoch": 0.037943522135416664, "grad_norm": 18.730581283569336, "learning_rate": 4.983162540822498e-06, "loss": 4.198, "step": 3730 }, { "epoch": 0.037994384765625, "grad_norm": 14.874238967895508, "learning_rate": 4.983116177008461e-06, "loss": 3.5318, "step": 3735 }, { "epoch": 0.038045247395833336, "grad_norm": 10.15190601348877, "learning_rate": 4.9830697496644855e-06, "loss": 3.3821, "step": 3740 }, { "epoch": 0.038096110026041664, "grad_norm": 17.800188064575195, "learning_rate": 4.983023258791758e-06, "loss": 3.5971, "step": 3745 }, { "epoch": 0.03814697265625, "grad_norm": 45.00802993774414, "learning_rate": 4.98297670439147e-06, "loss": 3.0957, "step": 3750 }, { "epoch": 0.038197835286458336, "grad_norm": 11.17233657836914, "learning_rate": 4.9829300864648104e-06, "loss": 3.6894, "step": 3755 }, { "epoch": 0.038248697916666664, "grad_norm": 16.200355529785156, "learning_rate": 4.982883405012974e-06, "loss": 3.2511, "step": 3760 }, { "epoch": 0.038299560546875, "grad_norm": 11.851147651672363, "learning_rate": 4.982836660037154e-06, "loss": 3.31, "step": 3765 }, { "epoch": 0.038350423177083336, "grad_norm": 8.351678848266602, "learning_rate": 4.982789851538545e-06, "loss": 3.1318, "step": 3770 }, { "epoch": 0.038401285807291664, "grad_norm": 12.307997703552246, "learning_rate": 4.982742979518348e-06, "loss": 3.3283, "step": 3775 }, { "epoch": 0.0384521484375, "grad_norm": 11.954642295837402, "learning_rate": 4.98269604397776e-06, "loss": 3.7551, "step": 3780 }, { "epoch": 0.038503011067708336, "grad_norm": 11.246746063232422, "learning_rate": 4.982649044917982e-06, "loss": 3.5243, "step": 3785 }, { "epoch": 0.038553873697916664, "grad_norm": 19.319927215576172, "learning_rate": 4.982601982340216e-06, "loss": 3.546, "step": 3790 }, { "epoch": 0.038604736328125, "grad_norm": 15.717957496643066, "learning_rate": 4.982554856245668e-06, "loss": 3.9242, "step": 3795 }, { "epoch": 0.038655598958333336, "grad_norm": 10.940032005310059, "learning_rate": 4.982507666635541e-06, "loss": 3.5011, "step": 3800 }, { "epoch": 0.038706461588541664, "grad_norm": 17.505874633789062, "learning_rate": 4.982460413511045e-06, "loss": 3.4926, "step": 3805 }, { "epoch": 0.03875732421875, "grad_norm": 13.361002922058105, "learning_rate": 4.9824130968733875e-06, "loss": 3.431, "step": 3810 }, { "epoch": 0.038808186848958336, "grad_norm": 11.022466659545898, "learning_rate": 4.982365716723779e-06, "loss": 3.4269, "step": 3815 }, { "epoch": 0.038859049479166664, "grad_norm": 13.40270709991455, "learning_rate": 4.982318273063432e-06, "loss": 3.4141, "step": 3820 }, { "epoch": 0.038909912109375, "grad_norm": 12.13808536529541, "learning_rate": 4.98227076589356e-06, "loss": 3.4167, "step": 3825 }, { "epoch": 0.038960774739583336, "grad_norm": 13.911450386047363, "learning_rate": 4.98222319521538e-06, "loss": 3.5935, "step": 3830 }, { "epoch": 0.039011637369791664, "grad_norm": 9.00546646118164, "learning_rate": 4.982175561030107e-06, "loss": 3.4011, "step": 3835 }, { "epoch": 0.0390625, "grad_norm": 15.89461612701416, "learning_rate": 4.982127863338961e-06, "loss": 3.3942, "step": 3840 }, { "epoch": 0.039113362630208336, "grad_norm": 15.606123924255371, "learning_rate": 4.982080102143161e-06, "loss": 3.4825, "step": 3845 }, { "epoch": 0.039164225260416664, "grad_norm": 16.339702606201172, "learning_rate": 4.982032277443931e-06, "loss": 3.5663, "step": 3850 }, { "epoch": 0.039215087890625, "grad_norm": 9.590949058532715, "learning_rate": 4.981984389242493e-06, "loss": 3.5181, "step": 3855 }, { "epoch": 0.039265950520833336, "grad_norm": 8.50133228302002, "learning_rate": 4.981936437540073e-06, "loss": 3.5247, "step": 3860 }, { "epoch": 0.039316813151041664, "grad_norm": 15.226415634155273, "learning_rate": 4.981888422337897e-06, "loss": 3.5207, "step": 3865 }, { "epoch": 0.03936767578125, "grad_norm": 15.351889610290527, "learning_rate": 4.981840343637194e-06, "loss": 3.6821, "step": 3870 }, { "epoch": 0.039418538411458336, "grad_norm": 11.84135627746582, "learning_rate": 4.981792201439195e-06, "loss": 3.4474, "step": 3875 }, { "epoch": 0.039469401041666664, "grad_norm": 19.106542587280273, "learning_rate": 4.9817439957451295e-06, "loss": 3.4201, "step": 3880 }, { "epoch": 0.039520263671875, "grad_norm": 14.762589454650879, "learning_rate": 4.981695726556233e-06, "loss": 3.6159, "step": 3885 }, { "epoch": 0.039571126302083336, "grad_norm": 10.755833625793457, "learning_rate": 4.98164739387374e-06, "loss": 3.6952, "step": 3890 }, { "epoch": 0.039621988932291664, "grad_norm": 7.459182262420654, "learning_rate": 4.9815989976988856e-06, "loss": 3.3008, "step": 3895 }, { "epoch": 0.0396728515625, "grad_norm": 16.29890251159668, "learning_rate": 4.98155053803291e-06, "loss": 3.7614, "step": 3900 }, { "epoch": 0.039723714192708336, "grad_norm": 15.238051414489746, "learning_rate": 4.981502014877051e-06, "loss": 3.5197, "step": 3905 }, { "epoch": 0.039774576822916664, "grad_norm": 16.749492645263672, "learning_rate": 4.981453428232551e-06, "loss": 4.1579, "step": 3910 }, { "epoch": 0.039825439453125, "grad_norm": 14.327499389648438, "learning_rate": 4.981404778100654e-06, "loss": 3.1206, "step": 3915 }, { "epoch": 0.039876302083333336, "grad_norm": 18.08525848388672, "learning_rate": 4.981356064482604e-06, "loss": 3.5667, "step": 3920 }, { "epoch": 0.039927164713541664, "grad_norm": 9.953446388244629, "learning_rate": 4.981307287379647e-06, "loss": 3.1772, "step": 3925 }, { "epoch": 0.03997802734375, "grad_norm": 12.199675559997559, "learning_rate": 4.9812584467930315e-06, "loss": 3.3682, "step": 3930 }, { "epoch": 0.040028889973958336, "grad_norm": 14.886872291564941, "learning_rate": 4.981209542724006e-06, "loss": 3.3351, "step": 3935 }, { "epoch": 0.040079752604166664, "grad_norm": 15.33847427368164, "learning_rate": 4.981160575173823e-06, "loss": 3.2065, "step": 3940 }, { "epoch": 0.040130615234375, "grad_norm": 10.76516056060791, "learning_rate": 4.981111544143735e-06, "loss": 3.6167, "step": 3945 }, { "epoch": 0.040181477864583336, "grad_norm": 10.835091590881348, "learning_rate": 4.981062449634996e-06, "loss": 3.3896, "step": 3950 }, { "epoch": 0.040232340494791664, "grad_norm": 8.555797576904297, "learning_rate": 4.981013291648861e-06, "loss": 3.3618, "step": 3955 }, { "epoch": 0.040283203125, "grad_norm": 20.06720733642578, "learning_rate": 4.980964070186591e-06, "loss": 3.1369, "step": 3960 }, { "epoch": 0.040334065755208336, "grad_norm": 10.057026863098145, "learning_rate": 4.9809147852494425e-06, "loss": 3.4979, "step": 3965 }, { "epoch": 0.040384928385416664, "grad_norm": 9.190557479858398, "learning_rate": 4.980865436838677e-06, "loss": 3.5271, "step": 3970 }, { "epoch": 0.040435791015625, "grad_norm": 13.96591854095459, "learning_rate": 4.9808160249555585e-06, "loss": 3.8185, "step": 3975 }, { "epoch": 0.040486653645833336, "grad_norm": 13.018309593200684, "learning_rate": 4.980766549601349e-06, "loss": 3.3418, "step": 3980 }, { "epoch": 0.040537516276041664, "grad_norm": 12.482734680175781, "learning_rate": 4.9807170107773155e-06, "loss": 3.7106, "step": 3985 }, { "epoch": 0.04058837890625, "grad_norm": 19.169965744018555, "learning_rate": 4.980667408484725e-06, "loss": 3.7609, "step": 3990 }, { "epoch": 0.040639241536458336, "grad_norm": 16.576343536376953, "learning_rate": 4.980617742724847e-06, "loss": 3.564, "step": 3995 }, { "epoch": 0.040690104166666664, "grad_norm": 13.268610954284668, "learning_rate": 4.980568013498952e-06, "loss": 3.5274, "step": 4000 }, { "epoch": 0.040740966796875, "grad_norm": 11.195540428161621, "learning_rate": 4.980518220808312e-06, "loss": 3.6116, "step": 4005 }, { "epoch": 0.040791829427083336, "grad_norm": 10.44593620300293, "learning_rate": 4.980468364654202e-06, "loss": 3.4021, "step": 4010 }, { "epoch": 0.040842692057291664, "grad_norm": 49.621131896972656, "learning_rate": 4.980418445037897e-06, "loss": 3.6859, "step": 4015 }, { "epoch": 0.0408935546875, "grad_norm": 14.462821006774902, "learning_rate": 4.980368461960673e-06, "loss": 3.4476, "step": 4020 }, { "epoch": 0.040944417317708336, "grad_norm": 10.35971736907959, "learning_rate": 4.98031841542381e-06, "loss": 3.6455, "step": 4025 }, { "epoch": 0.040995279947916664, "grad_norm": 16.127132415771484, "learning_rate": 4.980268305428589e-06, "loss": 3.4884, "step": 4030 }, { "epoch": 0.041046142578125, "grad_norm": 11.943824768066406, "learning_rate": 4.980218131976291e-06, "loss": 3.7037, "step": 4035 }, { "epoch": 0.041097005208333336, "grad_norm": 13.393162727355957, "learning_rate": 4.9801678950682e-06, "loss": 3.5249, "step": 4040 }, { "epoch": 0.041147867838541664, "grad_norm": 13.681143760681152, "learning_rate": 4.9801175947056005e-06, "loss": 3.4255, "step": 4045 }, { "epoch": 0.04119873046875, "grad_norm": 14.354728698730469, "learning_rate": 4.980067230889781e-06, "loss": 3.3633, "step": 4050 }, { "epoch": 0.041249593098958336, "grad_norm": 15.5615873336792, "learning_rate": 4.9800168036220295e-06, "loss": 3.5337, "step": 4055 }, { "epoch": 0.041300455729166664, "grad_norm": 8.801639556884766, "learning_rate": 4.9799663129036354e-06, "loss": 3.5424, "step": 4060 }, { "epoch": 0.041351318359375, "grad_norm": 11.887249946594238, "learning_rate": 4.9799157587358905e-06, "loss": 3.595, "step": 4065 }, { "epoch": 0.041402180989583336, "grad_norm": 10.93061637878418, "learning_rate": 4.979865141120089e-06, "loss": 3.2786, "step": 4070 }, { "epoch": 0.041453043619791664, "grad_norm": 13.5011625289917, "learning_rate": 4.979814460057527e-06, "loss": 3.3899, "step": 4075 }, { "epoch": 0.04150390625, "grad_norm": 13.714580535888672, "learning_rate": 4.979763715549498e-06, "loss": 3.3754, "step": 4080 }, { "epoch": 0.041554768880208336, "grad_norm": 8.235960960388184, "learning_rate": 4.9797129075973025e-06, "loss": 3.4213, "step": 4085 }, { "epoch": 0.041605631510416664, "grad_norm": 9.007630348205566, "learning_rate": 4.979662036202241e-06, "loss": 3.5357, "step": 4090 }, { "epoch": 0.041656494140625, "grad_norm": 11.23054313659668, "learning_rate": 4.979611101365613e-06, "loss": 3.7044, "step": 4095 }, { "epoch": 0.041707356770833336, "grad_norm": 12.494560241699219, "learning_rate": 4.979560103088723e-06, "loss": 3.5578, "step": 4100 }, { "epoch": 0.041758219401041664, "grad_norm": 15.440023422241211, "learning_rate": 4.979509041372876e-06, "loss": 3.3444, "step": 4105 }, { "epoch": 0.04180908203125, "grad_norm": 14.82787799835205, "learning_rate": 4.979457916219378e-06, "loss": 3.7682, "step": 4110 }, { "epoch": 0.041859944661458336, "grad_norm": 13.421795845031738, "learning_rate": 4.979406727629536e-06, "loss": 3.673, "step": 4115 }, { "epoch": 0.041910807291666664, "grad_norm": 19.039793014526367, "learning_rate": 4.979355475604661e-06, "loss": 3.5433, "step": 4120 }, { "epoch": 0.041961669921875, "grad_norm": 16.655475616455078, "learning_rate": 4.979304160146064e-06, "loss": 3.2191, "step": 4125 }, { "epoch": 0.042012532552083336, "grad_norm": 13.381031036376953, "learning_rate": 4.979252781255057e-06, "loss": 3.1412, "step": 4130 }, { "epoch": 0.042063395182291664, "grad_norm": 11.062989234924316, "learning_rate": 4.979201338932956e-06, "loss": 3.8812, "step": 4135 }, { "epoch": 0.0421142578125, "grad_norm": 11.04766845703125, "learning_rate": 4.979149833181076e-06, "loss": 3.1701, "step": 4140 }, { "epoch": 0.042165120442708336, "grad_norm": 12.870123863220215, "learning_rate": 4.979098264000736e-06, "loss": 3.4735, "step": 4145 }, { "epoch": 0.042215983072916664, "grad_norm": 11.865944862365723, "learning_rate": 4.979046631393253e-06, "loss": 3.666, "step": 4150 }, { "epoch": 0.042266845703125, "grad_norm": 10.82459831237793, "learning_rate": 4.97899493535995e-06, "loss": 3.6551, "step": 4155 }, { "epoch": 0.042317708333333336, "grad_norm": 12.600262641906738, "learning_rate": 4.97894317590215e-06, "loss": 3.8571, "step": 4160 }, { "epoch": 0.042368570963541664, "grad_norm": 9.6876220703125, "learning_rate": 4.978891353021176e-06, "loss": 3.5783, "step": 4165 }, { "epoch": 0.04241943359375, "grad_norm": 13.637721061706543, "learning_rate": 4.978839466718354e-06, "loss": 3.7054, "step": 4170 }, { "epoch": 0.042470296223958336, "grad_norm": 10.315239906311035, "learning_rate": 4.978787516995012e-06, "loss": 3.442, "step": 4175 }, { "epoch": 0.042521158854166664, "grad_norm": 12.303563117980957, "learning_rate": 4.9787355038524785e-06, "loss": 3.425, "step": 4180 }, { "epoch": 0.042572021484375, "grad_norm": 8.549363136291504, "learning_rate": 4.978683427292086e-06, "loss": 3.3095, "step": 4185 }, { "epoch": 0.042622884114583336, "grad_norm": 14.406485557556152, "learning_rate": 4.978631287315165e-06, "loss": 3.4881, "step": 4190 }, { "epoch": 0.042673746744791664, "grad_norm": 13.812122344970703, "learning_rate": 4.978579083923049e-06, "loss": 3.5369, "step": 4195 }, { "epoch": 0.042724609375, "grad_norm": 14.43535041809082, "learning_rate": 4.978526817117075e-06, "loss": 3.5955, "step": 4200 }, { "epoch": 0.042775472005208336, "grad_norm": 14.955559730529785, "learning_rate": 4.97847448689858e-06, "loss": 3.4783, "step": 4205 }, { "epoch": 0.042826334635416664, "grad_norm": 15.662449836730957, "learning_rate": 4.978422093268903e-06, "loss": 3.0456, "step": 4210 }, { "epoch": 0.042877197265625, "grad_norm": 21.45525360107422, "learning_rate": 4.978369636229383e-06, "loss": 3.3702, "step": 4215 }, { "epoch": 0.042928059895833336, "grad_norm": 11.133001327514648, "learning_rate": 4.978317115781365e-06, "loss": 3.7719, "step": 4220 }, { "epoch": 0.042978922526041664, "grad_norm": 14.71216869354248, "learning_rate": 4.97826453192619e-06, "loss": 3.2268, "step": 4225 }, { "epoch": 0.04302978515625, "grad_norm": 8.537089347839355, "learning_rate": 4.978211884665205e-06, "loss": 3.3107, "step": 4230 }, { "epoch": 0.043080647786458336, "grad_norm": 20.554950714111328, "learning_rate": 4.978159173999756e-06, "loss": 3.9013, "step": 4235 }, { "epoch": 0.043131510416666664, "grad_norm": 16.465560913085938, "learning_rate": 4.9781063999311914e-06, "loss": 3.5264, "step": 4240 }, { "epoch": 0.043182373046875, "grad_norm": 12.345917701721191, "learning_rate": 4.978053562460863e-06, "loss": 3.6222, "step": 4245 }, { "epoch": 0.043233235677083336, "grad_norm": 14.286460876464844, "learning_rate": 4.978000661590121e-06, "loss": 3.55, "step": 4250 }, { "epoch": 0.043284098307291664, "grad_norm": 16.970375061035156, "learning_rate": 4.97794769732032e-06, "loss": 3.7756, "step": 4255 }, { "epoch": 0.0433349609375, "grad_norm": 12.45829963684082, "learning_rate": 4.977894669652814e-06, "loss": 3.5234, "step": 4260 }, { "epoch": 0.043385823567708336, "grad_norm": 7.817399501800537, "learning_rate": 4.97784157858896e-06, "loss": 3.658, "step": 4265 }, { "epoch": 0.043436686197916664, "grad_norm": 16.19847869873047, "learning_rate": 4.9777884241301165e-06, "loss": 3.5021, "step": 4270 }, { "epoch": 0.043487548828125, "grad_norm": 8.416207313537598, "learning_rate": 4.977735206277644e-06, "loss": 3.3046, "step": 4275 }, { "epoch": 0.043538411458333336, "grad_norm": 13.35494613647461, "learning_rate": 4.977681925032902e-06, "loss": 3.3918, "step": 4280 }, { "epoch": 0.043589274088541664, "grad_norm": 10.832646369934082, "learning_rate": 4.977628580397257e-06, "loss": 3.4338, "step": 4285 }, { "epoch": 0.04364013671875, "grad_norm": 9.665356636047363, "learning_rate": 4.977575172372072e-06, "loss": 3.5508, "step": 4290 }, { "epoch": 0.043690999348958336, "grad_norm": 11.935736656188965, "learning_rate": 4.977521700958712e-06, "loss": 3.463, "step": 4295 }, { "epoch": 0.043741861979166664, "grad_norm": 16.61982536315918, "learning_rate": 4.977468166158548e-06, "loss": 3.7634, "step": 4300 }, { "epoch": 0.043792724609375, "grad_norm": 14.604238510131836, "learning_rate": 4.977414567972948e-06, "loss": 3.3686, "step": 4305 }, { "epoch": 0.043843587239583336, "grad_norm": 13.716455459594727, "learning_rate": 4.977360906403283e-06, "loss": 3.374, "step": 4310 }, { "epoch": 0.043894449869791664, "grad_norm": 11.446566581726074, "learning_rate": 4.977307181450926e-06, "loss": 3.3746, "step": 4315 }, { "epoch": 0.0439453125, "grad_norm": 9.788915634155273, "learning_rate": 4.977253393117253e-06, "loss": 3.5568, "step": 4320 }, { "epoch": 0.043996175130208336, "grad_norm": 203.79078674316406, "learning_rate": 4.977199541403638e-06, "loss": 3.7115, "step": 4325 }, { "epoch": 0.044047037760416664, "grad_norm": 14.938610076904297, "learning_rate": 4.97714562631146e-06, "loss": 3.6643, "step": 4330 }, { "epoch": 0.044097900390625, "grad_norm": 15.485106468200684, "learning_rate": 4.977091647842099e-06, "loss": 3.5269, "step": 4335 }, { "epoch": 0.044148763020833336, "grad_norm": 12.620315551757812, "learning_rate": 4.977037605996936e-06, "loss": 4.1156, "step": 4340 }, { "epoch": 0.044199625651041664, "grad_norm": 16.161775588989258, "learning_rate": 4.976983500777352e-06, "loss": 3.845, "step": 4345 }, { "epoch": 0.04425048828125, "grad_norm": 14.427170753479004, "learning_rate": 4.976929332184732e-06, "loss": 3.7344, "step": 4350 }, { "epoch": 0.044301350911458336, "grad_norm": 15.274232864379883, "learning_rate": 4.976875100220462e-06, "loss": 3.3062, "step": 4355 }, { "epoch": 0.044352213541666664, "grad_norm": 8.53735637664795, "learning_rate": 4.97682080488593e-06, "loss": 3.272, "step": 4360 }, { "epoch": 0.044403076171875, "grad_norm": 20.58081817626953, "learning_rate": 4.9767664461825246e-06, "loss": 3.3835, "step": 4365 }, { "epoch": 0.044453938802083336, "grad_norm": 8.867008209228516, "learning_rate": 4.976712024111637e-06, "loss": 3.36, "step": 4370 }, { "epoch": 0.044504801432291664, "grad_norm": 13.777779579162598, "learning_rate": 4.976657538674659e-06, "loss": 3.6504, "step": 4375 }, { "epoch": 0.0445556640625, "grad_norm": 13.533966064453125, "learning_rate": 4.9766029898729865e-06, "loss": 3.6923, "step": 4380 }, { "epoch": 0.044606526692708336, "grad_norm": 16.9124698638916, "learning_rate": 4.976548377708011e-06, "loss": 3.7102, "step": 4385 }, { "epoch": 0.044657389322916664, "grad_norm": 14.597479820251465, "learning_rate": 4.9764937021811345e-06, "loss": 3.6415, "step": 4390 }, { "epoch": 0.044708251953125, "grad_norm": 12.073945999145508, "learning_rate": 4.976438963293753e-06, "loss": 3.5683, "step": 4395 }, { "epoch": 0.044759114583333336, "grad_norm": 10.953951835632324, "learning_rate": 4.976384161047266e-06, "loss": 3.4114, "step": 4400 }, { "epoch": 0.044809977213541664, "grad_norm": 11.895225524902344, "learning_rate": 4.976329295443079e-06, "loss": 3.5981, "step": 4405 }, { "epoch": 0.04486083984375, "grad_norm": 13.530261039733887, "learning_rate": 4.976274366482593e-06, "loss": 3.416, "step": 4410 }, { "epoch": 0.044911702473958336, "grad_norm": 12.748217582702637, "learning_rate": 4.9762193741672145e-06, "loss": 3.6393, "step": 4415 }, { "epoch": 0.044962565104166664, "grad_norm": 34.55634307861328, "learning_rate": 4.976164318498351e-06, "loss": 3.6435, "step": 4420 }, { "epoch": 0.045013427734375, "grad_norm": 9.771318435668945, "learning_rate": 4.9761091994774095e-06, "loss": 3.8975, "step": 4425 }, { "epoch": 0.045064290364583336, "grad_norm": 10.606549263000488, "learning_rate": 4.976054017105801e-06, "loss": 3.8694, "step": 4430 }, { "epoch": 0.045115152994791664, "grad_norm": 18.156574249267578, "learning_rate": 4.975998771384938e-06, "loss": 3.8872, "step": 4435 }, { "epoch": 0.045166015625, "grad_norm": 12.91622257232666, "learning_rate": 4.9759434623162325e-06, "loss": 3.5453, "step": 4440 }, { "epoch": 0.045216878255208336, "grad_norm": 12.400276184082031, "learning_rate": 4.975888089901101e-06, "loss": 3.7277, "step": 4445 }, { "epoch": 0.045267740885416664, "grad_norm": 11.56432056427002, "learning_rate": 4.97583265414096e-06, "loss": 3.62, "step": 4450 }, { "epoch": 0.045318603515625, "grad_norm": 12.74065113067627, "learning_rate": 4.975777155037226e-06, "loss": 3.2927, "step": 4455 }, { "epoch": 0.045369466145833336, "grad_norm": 16.921592712402344, "learning_rate": 4.975721592591321e-06, "loss": 3.4098, "step": 4460 }, { "epoch": 0.045420328776041664, "grad_norm": 10.564414024353027, "learning_rate": 4.975665966804666e-06, "loss": 3.5746, "step": 4465 }, { "epoch": 0.04547119140625, "grad_norm": 11.815017700195312, "learning_rate": 4.9756102776786845e-06, "loss": 3.0929, "step": 4470 }, { "epoch": 0.045522054036458336, "grad_norm": 11.405243873596191, "learning_rate": 4.9755545252147995e-06, "loss": 3.8489, "step": 4475 }, { "epoch": 0.045572916666666664, "grad_norm": 14.713571548461914, "learning_rate": 4.97549870941444e-06, "loss": 3.7178, "step": 4480 }, { "epoch": 0.045623779296875, "grad_norm": 11.61874771118164, "learning_rate": 4.9754428302790325e-06, "loss": 3.3223, "step": 4485 }, { "epoch": 0.045674641927083336, "grad_norm": 8.922009468078613, "learning_rate": 4.975386887810007e-06, "loss": 3.7321, "step": 4490 }, { "epoch": 0.045725504557291664, "grad_norm": 8.890037536621094, "learning_rate": 4.975330882008794e-06, "loss": 3.4178, "step": 4495 }, { "epoch": 0.0457763671875, "grad_norm": 11.523167610168457, "learning_rate": 4.9752748128768275e-06, "loss": 3.8556, "step": 4500 }, { "epoch": 0.045827229817708336, "grad_norm": 15.356710433959961, "learning_rate": 4.975218680415541e-06, "loss": 3.4063, "step": 4505 }, { "epoch": 0.045878092447916664, "grad_norm": 12.003191947937012, "learning_rate": 4.9751624846263725e-06, "loss": 3.727, "step": 4510 }, { "epoch": 0.045928955078125, "grad_norm": 11.115232467651367, "learning_rate": 4.9751062255107575e-06, "loss": 3.3798, "step": 4515 }, { "epoch": 0.045979817708333336, "grad_norm": 12.786600112915039, "learning_rate": 4.975049903070137e-06, "loss": 3.8232, "step": 4520 }, { "epoch": 0.046030680338541664, "grad_norm": 9.778971672058105, "learning_rate": 4.974993517305952e-06, "loss": 3.5046, "step": 4525 }, { "epoch": 0.04608154296875, "grad_norm": 11.696513175964355, "learning_rate": 4.974937068219643e-06, "loss": 3.5063, "step": 4530 }, { "epoch": 0.046132405598958336, "grad_norm": 10.245550155639648, "learning_rate": 4.974880555812656e-06, "loss": 3.2886, "step": 4535 }, { "epoch": 0.046183268229166664, "grad_norm": 15.772186279296875, "learning_rate": 4.9748239800864375e-06, "loss": 3.482, "step": 4540 }, { "epoch": 0.046234130859375, "grad_norm": 11.759252548217773, "learning_rate": 4.974767341042433e-06, "loss": 3.663, "step": 4545 }, { "epoch": 0.046284993489583336, "grad_norm": 9.798601150512695, "learning_rate": 4.9747106386820934e-06, "loss": 3.3534, "step": 4550 }, { "epoch": 0.046335856119791664, "grad_norm": 13.721288681030273, "learning_rate": 4.9746538730068684e-06, "loss": 3.4893, "step": 4555 }, { "epoch": 0.04638671875, "grad_norm": 11.46217155456543, "learning_rate": 4.974597044018211e-06, "loss": 3.3706, "step": 4560 }, { "epoch": 0.046437581380208336, "grad_norm": 8.599750518798828, "learning_rate": 4.974540151717574e-06, "loss": 4.0034, "step": 4565 }, { "epoch": 0.046488444010416664, "grad_norm": 15.226737022399902, "learning_rate": 4.974483196106415e-06, "loss": 3.591, "step": 4570 }, { "epoch": 0.046539306640625, "grad_norm": 8.270947456359863, "learning_rate": 4.9744261771861894e-06, "loss": 3.3649, "step": 4575 }, { "epoch": 0.046590169270833336, "grad_norm": 13.826691627502441, "learning_rate": 4.974369094958356e-06, "loss": 3.7075, "step": 4580 }, { "epoch": 0.046641031901041664, "grad_norm": 15.241434097290039, "learning_rate": 4.974311949424376e-06, "loss": 3.9796, "step": 4585 }, { "epoch": 0.04669189453125, "grad_norm": 9.045734405517578, "learning_rate": 4.974254740585712e-06, "loss": 3.3453, "step": 4590 }, { "epoch": 0.046742757161458336, "grad_norm": 12.359314918518066, "learning_rate": 4.974197468443826e-06, "loss": 3.5554, "step": 4595 }, { "epoch": 0.046793619791666664, "grad_norm": 16.416982650756836, "learning_rate": 4.974140133000184e-06, "loss": 3.4799, "step": 4600 }, { "epoch": 0.046844482421875, "grad_norm": 12.722060203552246, "learning_rate": 4.974082734256254e-06, "loss": 3.8829, "step": 4605 }, { "epoch": 0.046895345052083336, "grad_norm": 14.915850639343262, "learning_rate": 4.9740252722135035e-06, "loss": 3.0652, "step": 4610 }, { "epoch": 0.046946207682291664, "grad_norm": 15.305362701416016, "learning_rate": 4.973967746873403e-06, "loss": 3.2942, "step": 4615 }, { "epoch": 0.0469970703125, "grad_norm": 18.63602638244629, "learning_rate": 4.973910158237423e-06, "loss": 3.1882, "step": 4620 }, { "epoch": 0.047047932942708336, "grad_norm": 15.647112846374512, "learning_rate": 4.973852506307039e-06, "loss": 3.2307, "step": 4625 }, { "epoch": 0.047098795572916664, "grad_norm": 13.300541877746582, "learning_rate": 4.973794791083725e-06, "loss": 3.6601, "step": 4630 }, { "epoch": 0.047149658203125, "grad_norm": 12.027398109436035, "learning_rate": 4.9737370125689575e-06, "loss": 3.5073, "step": 4635 }, { "epoch": 0.047200520833333336, "grad_norm": 9.269347190856934, "learning_rate": 4.973679170764214e-06, "loss": 4.1268, "step": 4640 }, { "epoch": 0.047251383463541664, "grad_norm": 9.16490650177002, "learning_rate": 4.973621265670976e-06, "loss": 3.2929, "step": 4645 }, { "epoch": 0.04730224609375, "grad_norm": 11.5504789352417, "learning_rate": 4.973563297290724e-06, "loss": 3.369, "step": 4650 }, { "epoch": 0.047353108723958336, "grad_norm": 15.80301284790039, "learning_rate": 4.973505265624942e-06, "loss": 3.4352, "step": 4655 }, { "epoch": 0.047403971354166664, "grad_norm": 9.65170669555664, "learning_rate": 4.9734471706751135e-06, "loss": 3.8803, "step": 4660 }, { "epoch": 0.047454833984375, "grad_norm": 11.459211349487305, "learning_rate": 4.9733890124427255e-06, "loss": 3.5879, "step": 4665 }, { "epoch": 0.047505696614583336, "grad_norm": 16.0178279876709, "learning_rate": 4.973330790929266e-06, "loss": 3.6492, "step": 4670 }, { "epoch": 0.047556559244791664, "grad_norm": 16.536197662353516, "learning_rate": 4.973272506136224e-06, "loss": 3.2745, "step": 4675 }, { "epoch": 0.047607421875, "grad_norm": 10.868372917175293, "learning_rate": 4.973214158065092e-06, "loss": 3.4472, "step": 4680 }, { "epoch": 0.047658284505208336, "grad_norm": 14.260408401489258, "learning_rate": 4.973155746717361e-06, "loss": 3.4165, "step": 4685 }, { "epoch": 0.047709147135416664, "grad_norm": 10.171429634094238, "learning_rate": 4.973097272094527e-06, "loss": 3.209, "step": 4690 }, { "epoch": 0.047760009765625, "grad_norm": 13.096430778503418, "learning_rate": 4.973038734198086e-06, "loss": 3.4693, "step": 4695 }, { "epoch": 0.047810872395833336, "grad_norm": 15.2460355758667, "learning_rate": 4.972980133029534e-06, "loss": 3.4677, "step": 4700 }, { "epoch": 0.047861735026041664, "grad_norm": 13.545559883117676, "learning_rate": 4.9729214685903725e-06, "loss": 3.4694, "step": 4705 }, { "epoch": 0.04791259765625, "grad_norm": 17.726600646972656, "learning_rate": 4.9728627408821e-06, "loss": 3.5932, "step": 4710 }, { "epoch": 0.047963460286458336, "grad_norm": 13.018586158752441, "learning_rate": 4.972803949906222e-06, "loss": 3.4267, "step": 4715 }, { "epoch": 0.048014322916666664, "grad_norm": 15.820887565612793, "learning_rate": 4.9727450956642395e-06, "loss": 3.3878, "step": 4720 }, { "epoch": 0.048065185546875, "grad_norm": 10.09266471862793, "learning_rate": 4.972686178157661e-06, "loss": 3.7088, "step": 4725 }, { "epoch": 0.048116048177083336, "grad_norm": 14.535094261169434, "learning_rate": 4.972627197387993e-06, "loss": 3.492, "step": 4730 }, { "epoch": 0.048166910807291664, "grad_norm": 16.890783309936523, "learning_rate": 4.972568153356744e-06, "loss": 3.3656, "step": 4735 }, { "epoch": 0.0482177734375, "grad_norm": 8.608535766601562, "learning_rate": 4.972509046065423e-06, "loss": 3.4594, "step": 4740 }, { "epoch": 0.048268636067708336, "grad_norm": 12.446272850036621, "learning_rate": 4.9724498755155455e-06, "loss": 3.3436, "step": 4745 }, { "epoch": 0.048319498697916664, "grad_norm": 11.247904777526855, "learning_rate": 4.972390641708625e-06, "loss": 3.7488, "step": 4750 }, { "epoch": 0.048370361328125, "grad_norm": 10.85303783416748, "learning_rate": 4.972331344646175e-06, "loss": 3.4678, "step": 4755 }, { "epoch": 0.048421223958333336, "grad_norm": 12.21154499053955, "learning_rate": 4.972271984329713e-06, "loss": 3.4773, "step": 4760 }, { "epoch": 0.048472086588541664, "grad_norm": 12.006597518920898, "learning_rate": 4.9722125607607595e-06, "loss": 3.5087, "step": 4765 }, { "epoch": 0.04852294921875, "grad_norm": 9.981345176696777, "learning_rate": 4.972153073940833e-06, "loss": 3.5338, "step": 4770 }, { "epoch": 0.048573811848958336, "grad_norm": 13.28870964050293, "learning_rate": 4.972093523871456e-06, "loss": 3.2124, "step": 4775 }, { "epoch": 0.048624674479166664, "grad_norm": 11.399847984313965, "learning_rate": 4.972033910554151e-06, "loss": 3.251, "step": 4780 }, { "epoch": 0.048675537109375, "grad_norm": 11.87983226776123, "learning_rate": 4.971974233990447e-06, "loss": 3.2554, "step": 4785 }, { "epoch": 0.048726399739583336, "grad_norm": 10.418863296508789, "learning_rate": 4.971914494181866e-06, "loss": 3.8199, "step": 4790 }, { "epoch": 0.048777262369791664, "grad_norm": 13.546000480651855, "learning_rate": 4.971854691129939e-06, "loss": 3.6464, "step": 4795 }, { "epoch": 0.048828125, "grad_norm": 16.547273635864258, "learning_rate": 4.9717948248361954e-06, "loss": 3.3086, "step": 4800 }, { "epoch": 0.048878987630208336, "grad_norm": 10.11819839477539, "learning_rate": 4.971734895302168e-06, "loss": 3.5591, "step": 4805 }, { "epoch": 0.048929850260416664, "grad_norm": 11.45274829864502, "learning_rate": 4.971674902529389e-06, "loss": 3.666, "step": 4810 }, { "epoch": 0.048980712890625, "grad_norm": 12.099120140075684, "learning_rate": 4.971614846519393e-06, "loss": 3.6187, "step": 4815 }, { "epoch": 0.049031575520833336, "grad_norm": 9.340841293334961, "learning_rate": 4.971554727273718e-06, "loss": 3.4518, "step": 4820 }, { "epoch": 0.049082438151041664, "grad_norm": 14.398073196411133, "learning_rate": 4.9714945447939e-06, "loss": 3.514, "step": 4825 }, { "epoch": 0.04913330078125, "grad_norm": 10.84057331085205, "learning_rate": 4.97143429908148e-06, "loss": 4.0011, "step": 4830 }, { "epoch": 0.049184163411458336, "grad_norm": 14.066889762878418, "learning_rate": 4.971373990137999e-06, "loss": 3.4912, "step": 4835 }, { "epoch": 0.049235026041666664, "grad_norm": 13.424138069152832, "learning_rate": 4.971313617965001e-06, "loss": 3.4337, "step": 4840 }, { "epoch": 0.049285888671875, "grad_norm": 15.70202922821045, "learning_rate": 4.971253182564029e-06, "loss": 3.5523, "step": 4845 }, { "epoch": 0.049336751302083336, "grad_norm": 15.583520889282227, "learning_rate": 4.971192683936631e-06, "loss": 3.548, "step": 4850 }, { "epoch": 0.049387613932291664, "grad_norm": 16.691743850708008, "learning_rate": 4.9711321220843535e-06, "loss": 3.3919, "step": 4855 }, { "epoch": 0.0494384765625, "grad_norm": 15.311267852783203, "learning_rate": 4.971071497008746e-06, "loss": 3.275, "step": 4860 }, { "epoch": 0.049489339192708336, "grad_norm": 11.667826652526855, "learning_rate": 4.971010808711361e-06, "loss": 3.3042, "step": 4865 }, { "epoch": 0.049540201822916664, "grad_norm": 12.931231498718262, "learning_rate": 4.9709500571937485e-06, "loss": 3.6733, "step": 4870 }, { "epoch": 0.049591064453125, "grad_norm": 14.819002151489258, "learning_rate": 4.970889242457466e-06, "loss": 3.3424, "step": 4875 }, { "epoch": 0.049641927083333336, "grad_norm": 11.588568687438965, "learning_rate": 4.9708283645040675e-06, "loss": 3.2318, "step": 4880 }, { "epoch": 0.049692789713541664, "grad_norm": 14.38076400756836, "learning_rate": 4.970767423335111e-06, "loss": 3.5042, "step": 4885 }, { "epoch": 0.04974365234375, "grad_norm": 11.927538871765137, "learning_rate": 4.970706418952155e-06, "loss": 3.4414, "step": 4890 }, { "epoch": 0.049794514973958336, "grad_norm": 17.747844696044922, "learning_rate": 4.970645351356761e-06, "loss": 3.9686, "step": 4895 }, { "epoch": 0.049845377604166664, "grad_norm": 13.796122550964355, "learning_rate": 4.970584220550492e-06, "loss": 4.094, "step": 4900 }, { "epoch": 0.049896240234375, "grad_norm": 13.095043182373047, "learning_rate": 4.97052302653491e-06, "loss": 3.7906, "step": 4905 }, { "epoch": 0.049947102864583336, "grad_norm": 12.416662216186523, "learning_rate": 4.970461769311583e-06, "loss": 3.4955, "step": 4910 }, { "epoch": 0.049997965494791664, "grad_norm": 12.235672950744629, "learning_rate": 4.970400448882078e-06, "loss": 3.0274, "step": 4915 }, { "epoch": 0.050048828125, "grad_norm": 12.012406349182129, "learning_rate": 4.9703390652479615e-06, "loss": 3.3271, "step": 4920 }, { "epoch": 0.050099690755208336, "grad_norm": 11.558436393737793, "learning_rate": 4.970277618410806e-06, "loss": 3.4439, "step": 4925 }, { "epoch": 0.050150553385416664, "grad_norm": 10.99045467376709, "learning_rate": 4.970216108372184e-06, "loss": 3.4762, "step": 4930 }, { "epoch": 0.050201416015625, "grad_norm": 14.257964134216309, "learning_rate": 4.970154535133667e-06, "loss": 4.1728, "step": 4935 }, { "epoch": 0.050252278645833336, "grad_norm": 14.24341106414795, "learning_rate": 4.970092898696832e-06, "loss": 3.3222, "step": 4940 }, { "epoch": 0.050303141276041664, "grad_norm": 17.90021324157715, "learning_rate": 4.9700311990632565e-06, "loss": 3.2803, "step": 4945 }, { "epoch": 0.05035400390625, "grad_norm": 13.78532600402832, "learning_rate": 4.9699694362345175e-06, "loss": 3.6847, "step": 4950 }, { "epoch": 0.050404866536458336, "grad_norm": 9.49715518951416, "learning_rate": 4.969907610212197e-06, "loss": 3.4094, "step": 4955 }, { "epoch": 0.050455729166666664, "grad_norm": 16.413652420043945, "learning_rate": 4.969845720997874e-06, "loss": 3.4125, "step": 4960 }, { "epoch": 0.050506591796875, "grad_norm": 11.681251525878906, "learning_rate": 4.969783768593135e-06, "loss": 3.7634, "step": 4965 }, { "epoch": 0.050557454427083336, "grad_norm": 12.140727043151855, "learning_rate": 4.969721752999563e-06, "loss": 3.3175, "step": 4970 }, { "epoch": 0.050608317057291664, "grad_norm": 17.70606231689453, "learning_rate": 4.9696596742187455e-06, "loss": 3.7512, "step": 4975 }, { "epoch": 0.0506591796875, "grad_norm": 14.161558151245117, "learning_rate": 4.969597532252271e-06, "loss": 3.2145, "step": 4980 }, { "epoch": 0.050710042317708336, "grad_norm": 9.752479553222656, "learning_rate": 4.969535327101729e-06, "loss": 3.3454, "step": 4985 }, { "epoch": 0.050760904947916664, "grad_norm": 10.96493911743164, "learning_rate": 4.96947305876871e-06, "loss": 3.493, "step": 4990 }, { "epoch": 0.050811767578125, "grad_norm": 10.895770072937012, "learning_rate": 4.969410727254809e-06, "loss": 3.1966, "step": 4995 }, { "epoch": 0.050862630208333336, "grad_norm": 14.102956771850586, "learning_rate": 4.96934833256162e-06, "loss": 3.4336, "step": 5000 }, { "epoch": 0.050913492838541664, "grad_norm": 10.122415542602539, "learning_rate": 4.9692858746907395e-06, "loss": 3.275, "step": 5005 }, { "epoch": 0.05096435546875, "grad_norm": 16.442476272583008, "learning_rate": 4.969223353643764e-06, "loss": 3.7156, "step": 5010 }, { "epoch": 0.051015218098958336, "grad_norm": 12.107405662536621, "learning_rate": 4.969160769422294e-06, "loss": 3.645, "step": 5015 }, { "epoch": 0.051066080729166664, "grad_norm": 18.26190948486328, "learning_rate": 4.969098122027932e-06, "loss": 3.5743, "step": 5020 }, { "epoch": 0.051116943359375, "grad_norm": 11.404193878173828, "learning_rate": 4.96903541146228e-06, "loss": 3.2735, "step": 5025 }, { "epoch": 0.051167805989583336, "grad_norm": 14.428089141845703, "learning_rate": 4.968972637726942e-06, "loss": 3.7864, "step": 5030 }, { "epoch": 0.051218668619791664, "grad_norm": 10.478737831115723, "learning_rate": 4.9689098008235235e-06, "loss": 3.8017, "step": 5035 }, { "epoch": 0.05126953125, "grad_norm": 15.701272010803223, "learning_rate": 4.968846900753634e-06, "loss": 3.4754, "step": 5040 }, { "epoch": 0.051320393880208336, "grad_norm": 11.944446563720703, "learning_rate": 4.968783937518882e-06, "loss": 3.5693, "step": 5045 }, { "epoch": 0.051371256510416664, "grad_norm": 9.105219841003418, "learning_rate": 4.968720911120876e-06, "loss": 3.3752, "step": 5050 }, { "epoch": 0.051422119140625, "grad_norm": 17.28725814819336, "learning_rate": 4.968657821561233e-06, "loss": 3.542, "step": 5055 }, { "epoch": 0.051472981770833336, "grad_norm": 9.695812225341797, "learning_rate": 4.9685946688415635e-06, "loss": 3.3903, "step": 5060 }, { "epoch": 0.051523844401041664, "grad_norm": 11.877903938293457, "learning_rate": 4.968531452963485e-06, "loss": 3.8497, "step": 5065 }, { "epoch": 0.05157470703125, "grad_norm": 14.348855018615723, "learning_rate": 4.968468173928614e-06, "loss": 3.2782, "step": 5070 }, { "epoch": 0.051625569661458336, "grad_norm": 14.756049156188965, "learning_rate": 4.96840483173857e-06, "loss": 4.0242, "step": 5075 }, { "epoch": 0.051676432291666664, "grad_norm": 14.059425354003906, "learning_rate": 4.968341426394974e-06, "loss": 3.2894, "step": 5080 }, { "epoch": 0.051727294921875, "grad_norm": 11.158661842346191, "learning_rate": 4.968277957899446e-06, "loss": 3.6611, "step": 5085 }, { "epoch": 0.051778157552083336, "grad_norm": 18.206151962280273, "learning_rate": 4.968214426253613e-06, "loss": 3.4665, "step": 5090 }, { "epoch": 0.051829020182291664, "grad_norm": 8.762948989868164, "learning_rate": 4.968150831459099e-06, "loss": 3.4422, "step": 5095 }, { "epoch": 0.0518798828125, "grad_norm": 14.994571685791016, "learning_rate": 4.968087173517531e-06, "loss": 3.602, "step": 5100 }, { "epoch": 0.051930745442708336, "grad_norm": 12.887874603271484, "learning_rate": 4.968023452430537e-06, "loss": 3.12, "step": 5105 }, { "epoch": 0.051981608072916664, "grad_norm": 15.499711990356445, "learning_rate": 4.967959668199748e-06, "loss": 3.7486, "step": 5110 }, { "epoch": 0.052032470703125, "grad_norm": 12.390300750732422, "learning_rate": 4.967895820826796e-06, "loss": 3.4817, "step": 5115 }, { "epoch": 0.052083333333333336, "grad_norm": 13.03585433959961, "learning_rate": 4.967831910313314e-06, "loss": 3.1236, "step": 5120 }, { "epoch": 0.052134195963541664, "grad_norm": 11.844696998596191, "learning_rate": 4.967767936660939e-06, "loss": 3.4756, "step": 5125 }, { "epoch": 0.05218505859375, "grad_norm": 12.35226821899414, "learning_rate": 4.967703899871304e-06, "loss": 3.481, "step": 5130 }, { "epoch": 0.052235921223958336, "grad_norm": 10.799506187438965, "learning_rate": 4.967639799946052e-06, "loss": 3.7089, "step": 5135 }, { "epoch": 0.052286783854166664, "grad_norm": 11.297710418701172, "learning_rate": 4.967575636886819e-06, "loss": 3.5326, "step": 5140 }, { "epoch": 0.052337646484375, "grad_norm": 9.424179077148438, "learning_rate": 4.967511410695249e-06, "loss": 3.7654, "step": 5145 }, { "epoch": 0.052388509114583336, "grad_norm": 11.021271705627441, "learning_rate": 4.9674471213729836e-06, "loss": 3.278, "step": 5150 }, { "epoch": 0.052439371744791664, "grad_norm": 12.35921573638916, "learning_rate": 4.9673827689216695e-06, "loss": 3.4304, "step": 5155 }, { "epoch": 0.052490234375, "grad_norm": 12.476812362670898, "learning_rate": 4.967318353342952e-06, "loss": 3.3936, "step": 5160 }, { "epoch": 0.052541097005208336, "grad_norm": 10.683613777160645, "learning_rate": 4.967253874638478e-06, "loss": 3.1559, "step": 5165 }, { "epoch": 0.052591959635416664, "grad_norm": 13.43266773223877, "learning_rate": 4.967189332809899e-06, "loss": 3.8937, "step": 5170 }, { "epoch": 0.052642822265625, "grad_norm": 17.731548309326172, "learning_rate": 4.967124727858867e-06, "loss": 3.4165, "step": 5175 }, { "epoch": 0.052693684895833336, "grad_norm": 15.984275817871094, "learning_rate": 4.967060059787032e-06, "loss": 3.6395, "step": 5180 }, { "epoch": 0.052744547526041664, "grad_norm": 18.11372184753418, "learning_rate": 4.96699532859605e-06, "loss": 3.495, "step": 5185 }, { "epoch": 0.05279541015625, "grad_norm": 13.443792343139648, "learning_rate": 4.9669305342875785e-06, "loss": 4.1017, "step": 5190 }, { "epoch": 0.052846272786458336, "grad_norm": 16.402788162231445, "learning_rate": 4.9668656768632725e-06, "loss": 3.4839, "step": 5195 }, { "epoch": 0.052897135416666664, "grad_norm": 14.646184921264648, "learning_rate": 4.966800756324794e-06, "loss": 3.7131, "step": 5200 }, { "epoch": 0.052947998046875, "grad_norm": 16.304649353027344, "learning_rate": 4.966735772673803e-06, "loss": 3.4049, "step": 5205 }, { "epoch": 0.052998860677083336, "grad_norm": 17.611019134521484, "learning_rate": 4.966670725911962e-06, "loss": 3.2992, "step": 5210 }, { "epoch": 0.053049723307291664, "grad_norm": 9.94887638092041, "learning_rate": 4.966605616040935e-06, "loss": 3.498, "step": 5215 }, { "epoch": 0.0531005859375, "grad_norm": 14.684574127197266, "learning_rate": 4.9665404430623874e-06, "loss": 3.6487, "step": 5220 }, { "epoch": 0.053151448567708336, "grad_norm": 14.74726676940918, "learning_rate": 4.9664752069779875e-06, "loss": 3.3533, "step": 5225 }, { "epoch": 0.053202311197916664, "grad_norm": 16.72749137878418, "learning_rate": 4.966409907789403e-06, "loss": 3.203, "step": 5230 }, { "epoch": 0.053253173828125, "grad_norm": 12.44540786743164, "learning_rate": 4.966344545498307e-06, "loss": 3.4401, "step": 5235 }, { "epoch": 0.053304036458333336, "grad_norm": 10.099539756774902, "learning_rate": 4.96627912010637e-06, "loss": 3.5915, "step": 5240 }, { "epoch": 0.053354899088541664, "grad_norm": 11.097341537475586, "learning_rate": 4.966213631615266e-06, "loss": 3.9558, "step": 5245 }, { "epoch": 0.05340576171875, "grad_norm": 13.878585815429688, "learning_rate": 4.966148080026671e-06, "loss": 3.4617, "step": 5250 }, { "epoch": 0.053456624348958336, "grad_norm": 10.348923683166504, "learning_rate": 4.966082465342263e-06, "loss": 3.6141, "step": 5255 }, { "epoch": 0.053507486979166664, "grad_norm": 9.475302696228027, "learning_rate": 4.966016787563719e-06, "loss": 3.2901, "step": 5260 }, { "epoch": 0.053558349609375, "grad_norm": 16.38394546508789, "learning_rate": 4.965951046692719e-06, "loss": 3.1904, "step": 5265 }, { "epoch": 0.053609212239583336, "grad_norm": 14.318309783935547, "learning_rate": 4.965885242730947e-06, "loss": 3.3258, "step": 5270 }, { "epoch": 0.053660074869791664, "grad_norm": 8.593515396118164, "learning_rate": 4.965819375680085e-06, "loss": 3.341, "step": 5275 }, { "epoch": 0.0537109375, "grad_norm": 14.667823791503906, "learning_rate": 4.9657534455418186e-06, "loss": 3.5297, "step": 5280 }, { "epoch": 0.053761800130208336, "grad_norm": 15.408576965332031, "learning_rate": 4.965687452317836e-06, "loss": 3.3244, "step": 5285 }, { "epoch": 0.053812662760416664, "grad_norm": 15.13183307647705, "learning_rate": 4.9656213960098235e-06, "loss": 3.3597, "step": 5290 }, { "epoch": 0.053863525390625, "grad_norm": 10.050660133361816, "learning_rate": 4.965555276619471e-06, "loss": 3.2173, "step": 5295 }, { "epoch": 0.053914388020833336, "grad_norm": 11.996289253234863, "learning_rate": 4.965489094148473e-06, "loss": 3.258, "step": 5300 }, { "epoch": 0.053965250651041664, "grad_norm": 13.994873046875, "learning_rate": 4.965422848598519e-06, "loss": 3.9475, "step": 5305 }, { "epoch": 0.05401611328125, "grad_norm": 13.173226356506348, "learning_rate": 4.965356539971307e-06, "loss": 3.5109, "step": 5310 }, { "epoch": 0.054066975911458336, "grad_norm": 11.68813419342041, "learning_rate": 4.965290168268532e-06, "loss": 3.7385, "step": 5315 }, { "epoch": 0.054117838541666664, "grad_norm": 13.431297302246094, "learning_rate": 4.965223733491893e-06, "loss": 3.4336, "step": 5320 }, { "epoch": 0.054168701171875, "grad_norm": 14.174638748168945, "learning_rate": 4.965157235643088e-06, "loss": 3.2346, "step": 5325 }, { "epoch": 0.054219563802083336, "grad_norm": 14.978144645690918, "learning_rate": 4.96509067472382e-06, "loss": 3.6034, "step": 5330 }, { "epoch": 0.054270426432291664, "grad_norm": 11.926872253417969, "learning_rate": 4.965024050735792e-06, "loss": 3.4996, "step": 5335 }, { "epoch": 0.0543212890625, "grad_norm": 9.099247932434082, "learning_rate": 4.9649573636807065e-06, "loss": 3.1218, "step": 5340 }, { "epoch": 0.054372151692708336, "grad_norm": 17.655933380126953, "learning_rate": 4.964890613560272e-06, "loss": 3.6185, "step": 5345 }, { "epoch": 0.054423014322916664, "grad_norm": 10.588809967041016, "learning_rate": 4.964823800376195e-06, "loss": 3.5396, "step": 5350 }, { "epoch": 0.054473876953125, "grad_norm": 12.350024223327637, "learning_rate": 4.964756924130186e-06, "loss": 3.7472, "step": 5355 }, { "epoch": 0.054524739583333336, "grad_norm": 13.325206756591797, "learning_rate": 4.964689984823955e-06, "loss": 3.1619, "step": 5360 }, { "epoch": 0.054575602213541664, "grad_norm": 12.419147491455078, "learning_rate": 4.964622982459214e-06, "loss": 3.3821, "step": 5365 }, { "epoch": 0.05462646484375, "grad_norm": 11.1918306350708, "learning_rate": 4.964555917037679e-06, "loss": 3.2978, "step": 5370 }, { "epoch": 0.054677327473958336, "grad_norm": 8.211668014526367, "learning_rate": 4.964488788561066e-06, "loss": 3.1058, "step": 5375 }, { "epoch": 0.054728190104166664, "grad_norm": 14.706374168395996, "learning_rate": 4.96442159703109e-06, "loss": 3.5759, "step": 5380 }, { "epoch": 0.054779052734375, "grad_norm": 12.363343238830566, "learning_rate": 4.964354342449472e-06, "loss": 3.3521, "step": 5385 }, { "epoch": 0.054829915364583336, "grad_norm": 11.750041007995605, "learning_rate": 4.964287024817933e-06, "loss": 3.7942, "step": 5390 }, { "epoch": 0.054880777994791664, "grad_norm": 15.14163589477539, "learning_rate": 4.964219644138194e-06, "loss": 3.7889, "step": 5395 }, { "epoch": 0.054931640625, "grad_norm": 14.809743881225586, "learning_rate": 4.964152200411979e-06, "loss": 3.1966, "step": 5400 }, { "epoch": 0.054982503255208336, "grad_norm": 10.114827156066895, "learning_rate": 4.964084693641014e-06, "loss": 3.5248, "step": 5405 }, { "epoch": 0.055033365885416664, "grad_norm": 12.720572471618652, "learning_rate": 4.964017123827027e-06, "loss": 3.1197, "step": 5410 }, { "epoch": 0.055084228515625, "grad_norm": 8.508932113647461, "learning_rate": 4.963949490971746e-06, "loss": 3.8478, "step": 5415 }, { "epoch": 0.055135091145833336, "grad_norm": 7.770327091217041, "learning_rate": 4.963881795076901e-06, "loss": 3.0364, "step": 5420 }, { "epoch": 0.055185953776041664, "grad_norm": 13.418745994567871, "learning_rate": 4.963814036144223e-06, "loss": 3.1178, "step": 5425 }, { "epoch": 0.05523681640625, "grad_norm": 17.769771575927734, "learning_rate": 4.963746214175448e-06, "loss": 3.5998, "step": 5430 }, { "epoch": 0.055287679036458336, "grad_norm": 12.966880798339844, "learning_rate": 4.96367832917231e-06, "loss": 3.2851, "step": 5435 }, { "epoch": 0.055338541666666664, "grad_norm": 13.409180641174316, "learning_rate": 4.9636103811365464e-06, "loss": 3.7049, "step": 5440 }, { "epoch": 0.055389404296875, "grad_norm": 13.363730430603027, "learning_rate": 4.963542370069895e-06, "loss": 3.6024, "step": 5445 }, { "epoch": 0.055440266927083336, "grad_norm": 13.596270561218262, "learning_rate": 4.963474295974095e-06, "loss": 3.5165, "step": 5450 }, { "epoch": 0.055491129557291664, "grad_norm": 10.22996711730957, "learning_rate": 4.96340615885089e-06, "loss": 3.6675, "step": 5455 }, { "epoch": 0.0555419921875, "grad_norm": 14.241738319396973, "learning_rate": 4.963337958702022e-06, "loss": 3.3507, "step": 5460 }, { "epoch": 0.055592854817708336, "grad_norm": 9.234009742736816, "learning_rate": 4.963269695529236e-06, "loss": 3.4202, "step": 5465 }, { "epoch": 0.055643717447916664, "grad_norm": 13.222739219665527, "learning_rate": 4.963201369334279e-06, "loss": 3.7927, "step": 5470 }, { "epoch": 0.055694580078125, "grad_norm": 11.704004287719727, "learning_rate": 4.963132980118899e-06, "loss": 3.3365, "step": 5475 }, { "epoch": 0.055745442708333336, "grad_norm": 11.018651008605957, "learning_rate": 4.963064527884845e-06, "loss": 3.6051, "step": 5480 }, { "epoch": 0.055796305338541664, "grad_norm": 14.942146301269531, "learning_rate": 4.96299601263387e-06, "loss": 3.2443, "step": 5485 }, { "epoch": 0.05584716796875, "grad_norm": 9.587372779846191, "learning_rate": 4.962927434367724e-06, "loss": 3.637, "step": 5490 }, { "epoch": 0.055898030598958336, "grad_norm": 9.663505554199219, "learning_rate": 4.9628587930881646e-06, "loss": 3.5114, "step": 5495 }, { "epoch": 0.055948893229166664, "grad_norm": 15.723515510559082, "learning_rate": 4.962790088796946e-06, "loss": 3.5283, "step": 5500 }, { "epoch": 0.055999755859375, "grad_norm": 11.564032554626465, "learning_rate": 4.962721321495827e-06, "loss": 3.523, "step": 5505 }, { "epoch": 0.056050618489583336, "grad_norm": 15.196375846862793, "learning_rate": 4.962652491186567e-06, "loss": 3.4245, "step": 5510 }, { "epoch": 0.056101481119791664, "grad_norm": 11.132039070129395, "learning_rate": 4.962583597870927e-06, "loss": 3.6306, "step": 5515 }, { "epoch": 0.05615234375, "grad_norm": 13.807767868041992, "learning_rate": 4.962514641550668e-06, "loss": 3.284, "step": 5520 }, { "epoch": 0.056203206380208336, "grad_norm": 8.87157154083252, "learning_rate": 4.962445622227558e-06, "loss": 3.5285, "step": 5525 }, { "epoch": 0.056254069010416664, "grad_norm": 16.512109756469727, "learning_rate": 4.962376539903359e-06, "loss": 3.5056, "step": 5530 }, { "epoch": 0.056304931640625, "grad_norm": 10.745555877685547, "learning_rate": 4.962307394579839e-06, "loss": 3.2763, "step": 5535 }, { "epoch": 0.056355794270833336, "grad_norm": 10.458619117736816, "learning_rate": 4.9622381862587685e-06, "loss": 3.3848, "step": 5540 }, { "epoch": 0.056406656901041664, "grad_norm": 10.014710426330566, "learning_rate": 4.962168914941919e-06, "loss": 3.4247, "step": 5545 }, { "epoch": 0.05645751953125, "grad_norm": 15.231337547302246, "learning_rate": 4.96209958063106e-06, "loss": 3.368, "step": 5550 }, { "epoch": 0.056508382161458336, "grad_norm": 14.728731155395508, "learning_rate": 4.962030183327967e-06, "loss": 3.4259, "step": 5555 }, { "epoch": 0.056559244791666664, "grad_norm": 14.27651596069336, "learning_rate": 4.961960723034415e-06, "loss": 3.3908, "step": 5560 }, { "epoch": 0.056610107421875, "grad_norm": 15.396354675292969, "learning_rate": 4.961891199752182e-06, "loss": 3.6155, "step": 5565 }, { "epoch": 0.056660970052083336, "grad_norm": 13.059240341186523, "learning_rate": 4.961821613483047e-06, "loss": 3.737, "step": 5570 }, { "epoch": 0.056711832682291664, "grad_norm": 21.01386833190918, "learning_rate": 4.961751964228788e-06, "loss": 3.4121, "step": 5575 }, { "epoch": 0.0567626953125, "grad_norm": 12.324559211730957, "learning_rate": 4.961682251991189e-06, "loss": 3.1513, "step": 5580 }, { "epoch": 0.056813557942708336, "grad_norm": 11.327527046203613, "learning_rate": 4.961612476772033e-06, "loss": 3.4368, "step": 5585 }, { "epoch": 0.056864420572916664, "grad_norm": 9.986822128295898, "learning_rate": 4.961542638573106e-06, "loss": 3.5226, "step": 5590 }, { "epoch": 0.056915283203125, "grad_norm": 16.936189651489258, "learning_rate": 4.961472737396193e-06, "loss": 3.698, "step": 5595 }, { "epoch": 0.056966145833333336, "grad_norm": 15.606807708740234, "learning_rate": 4.9614027732430835e-06, "loss": 3.4487, "step": 5600 }, { "epoch": 0.057017008463541664, "grad_norm": 11.489161491394043, "learning_rate": 4.961332746115568e-06, "loss": 3.5364, "step": 5605 }, { "epoch": 0.05706787109375, "grad_norm": 13.7288818359375, "learning_rate": 4.9612626560154375e-06, "loss": 3.6168, "step": 5610 }, { "epoch": 0.057118733723958336, "grad_norm": 8.421143531799316, "learning_rate": 4.961192502944485e-06, "loss": 3.4713, "step": 5615 }, { "epoch": 0.057169596354166664, "grad_norm": 17.100997924804688, "learning_rate": 4.961122286904506e-06, "loss": 3.392, "step": 5620 }, { "epoch": 0.057220458984375, "grad_norm": 17.024621963500977, "learning_rate": 4.961052007897297e-06, "loss": 3.6999, "step": 5625 }, { "epoch": 0.057271321614583336, "grad_norm": 8.612135887145996, "learning_rate": 4.960981665924655e-06, "loss": 3.2406, "step": 5630 }, { "epoch": 0.057322184244791664, "grad_norm": 11.135616302490234, "learning_rate": 4.9609112609883816e-06, "loss": 3.9177, "step": 5635 }, { "epoch": 0.057373046875, "grad_norm": 13.029520988464355, "learning_rate": 4.960840793090276e-06, "loss": 3.4563, "step": 5640 }, { "epoch": 0.057423909505208336, "grad_norm": 12.987259864807129, "learning_rate": 4.960770262232141e-06, "loss": 3.4741, "step": 5645 }, { "epoch": 0.057474772135416664, "grad_norm": 8.016860008239746, "learning_rate": 4.960699668415784e-06, "loss": 3.423, "step": 5650 }, { "epoch": 0.057525634765625, "grad_norm": 11.892767906188965, "learning_rate": 4.960629011643008e-06, "loss": 3.0988, "step": 5655 }, { "epoch": 0.057576497395833336, "grad_norm": 7.135873794555664, "learning_rate": 4.960558291915622e-06, "loss": 3.2616, "step": 5660 }, { "epoch": 0.057627360026041664, "grad_norm": 10.335238456726074, "learning_rate": 4.960487509235435e-06, "loss": 3.5319, "step": 5665 }, { "epoch": 0.05767822265625, "grad_norm": 10.540165901184082, "learning_rate": 4.96041666360426e-06, "loss": 3.3135, "step": 5670 }, { "epoch": 0.057729085286458336, "grad_norm": 17.09685516357422, "learning_rate": 4.9603457550239065e-06, "loss": 3.5976, "step": 5675 }, { "epoch": 0.057779947916666664, "grad_norm": 13.739051818847656, "learning_rate": 4.96027478349619e-06, "loss": 3.5974, "step": 5680 }, { "epoch": 0.057830810546875, "grad_norm": 13.99421501159668, "learning_rate": 4.960203749022927e-06, "loss": 3.2972, "step": 5685 }, { "epoch": 0.057881673177083336, "grad_norm": 12.529129028320312, "learning_rate": 4.960132651605934e-06, "loss": 3.4988, "step": 5690 }, { "epoch": 0.057932535807291664, "grad_norm": 13.570207595825195, "learning_rate": 4.96006149124703e-06, "loss": 3.2342, "step": 5695 }, { "epoch": 0.0579833984375, "grad_norm": 8.2381591796875, "learning_rate": 4.959990267948035e-06, "loss": 3.6295, "step": 5700 }, { "epoch": 0.058034261067708336, "grad_norm": 10.554726600646973, "learning_rate": 4.959918981710773e-06, "loss": 3.6647, "step": 5705 }, { "epoch": 0.058085123697916664, "grad_norm": 10.161996841430664, "learning_rate": 4.959847632537067e-06, "loss": 3.3671, "step": 5710 }, { "epoch": 0.058135986328125, "grad_norm": 10.479255676269531, "learning_rate": 4.959776220428743e-06, "loss": 3.6848, "step": 5715 }, { "epoch": 0.058186848958333336, "grad_norm": 13.689743995666504, "learning_rate": 4.959704745387626e-06, "loss": 3.6923, "step": 5720 }, { "epoch": 0.058237711588541664, "grad_norm": 15.075189590454102, "learning_rate": 4.9596332074155465e-06, "loss": 3.1929, "step": 5725 }, { "epoch": 0.05828857421875, "grad_norm": 10.657966613769531, "learning_rate": 4.959561606514335e-06, "loss": 3.3856, "step": 5730 }, { "epoch": 0.058339436848958336, "grad_norm": 17.425458908081055, "learning_rate": 4.959489942685822e-06, "loss": 3.5663, "step": 5735 }, { "epoch": 0.058390299479166664, "grad_norm": 10.390463829040527, "learning_rate": 4.959418215931843e-06, "loss": 4.1035, "step": 5740 }, { "epoch": 0.058441162109375, "grad_norm": 9.901952743530273, "learning_rate": 4.959346426254231e-06, "loss": 3.8645, "step": 5745 }, { "epoch": 0.058492024739583336, "grad_norm": 12.191425323486328, "learning_rate": 4.9592745736548235e-06, "loss": 3.5104, "step": 5750 }, { "epoch": 0.058542887369791664, "grad_norm": 8.743103981018066, "learning_rate": 4.959202658135459e-06, "loss": 3.4598, "step": 5755 }, { "epoch": 0.05859375, "grad_norm": 12.222476959228516, "learning_rate": 4.959130679697978e-06, "loss": 3.4976, "step": 5760 }, { "epoch": 0.058644612630208336, "grad_norm": 12.773666381835938, "learning_rate": 4.95905863834422e-06, "loss": 3.3259, "step": 5765 }, { "epoch": 0.058695475260416664, "grad_norm": 12.655311584472656, "learning_rate": 4.958986534076031e-06, "loss": 3.6635, "step": 5770 }, { "epoch": 0.058746337890625, "grad_norm": 10.425118446350098, "learning_rate": 4.9589143668952536e-06, "loss": 3.3051, "step": 5775 }, { "epoch": 0.058797200520833336, "grad_norm": 12.868205070495605, "learning_rate": 4.958842136803735e-06, "loss": 3.6249, "step": 5780 }, { "epoch": 0.058848063151041664, "grad_norm": 15.800631523132324, "learning_rate": 4.958769843803324e-06, "loss": 3.5991, "step": 5785 }, { "epoch": 0.05889892578125, "grad_norm": 10.733988761901855, "learning_rate": 4.958697487895869e-06, "loss": 3.6806, "step": 5790 }, { "epoch": 0.058949788411458336, "grad_norm": 14.783018112182617, "learning_rate": 4.9586250690832214e-06, "loss": 3.6191, "step": 5795 }, { "epoch": 0.059000651041666664, "grad_norm": 9.55103874206543, "learning_rate": 4.958552587367233e-06, "loss": 3.2202, "step": 5800 }, { "epoch": 0.059051513671875, "grad_norm": 12.465963363647461, "learning_rate": 4.958480042749762e-06, "loss": 3.0866, "step": 5805 }, { "epoch": 0.059102376302083336, "grad_norm": 10.959587097167969, "learning_rate": 4.958407435232659e-06, "loss": 3.6214, "step": 5810 }, { "epoch": 0.059153238932291664, "grad_norm": 20.63323974609375, "learning_rate": 4.958334764817786e-06, "loss": 4.0361, "step": 5815 }, { "epoch": 0.0592041015625, "grad_norm": 7.549029350280762, "learning_rate": 4.9582620315070005e-06, "loss": 3.3141, "step": 5820 }, { "epoch": 0.059254964192708336, "grad_norm": 12.9930419921875, "learning_rate": 4.958189235302164e-06, "loss": 3.397, "step": 5825 }, { "epoch": 0.059305826822916664, "grad_norm": 15.772438049316406, "learning_rate": 4.958116376205138e-06, "loss": 3.375, "step": 5830 }, { "epoch": 0.059356689453125, "grad_norm": 14.877005577087402, "learning_rate": 4.9580434542177875e-06, "loss": 3.2812, "step": 5835 }, { "epoch": 0.059407552083333336, "grad_norm": 14.17616081237793, "learning_rate": 4.957970469341977e-06, "loss": 3.4907, "step": 5840 }, { "epoch": 0.059458414713541664, "grad_norm": 16.345909118652344, "learning_rate": 4.957897421579576e-06, "loss": 3.3758, "step": 5845 }, { "epoch": 0.05950927734375, "grad_norm": 15.003165245056152, "learning_rate": 4.957824310932451e-06, "loss": 3.0737, "step": 5850 }, { "epoch": 0.059560139973958336, "grad_norm": 14.897906303405762, "learning_rate": 4.957751137402475e-06, "loss": 3.2275, "step": 5855 }, { "epoch": 0.059611002604166664, "grad_norm": 13.421442031860352, "learning_rate": 4.957677900991516e-06, "loss": 3.2209, "step": 5860 }, { "epoch": 0.059661865234375, "grad_norm": 9.91036319732666, "learning_rate": 4.957604601701453e-06, "loss": 3.9609, "step": 5865 }, { "epoch": 0.059712727864583336, "grad_norm": 18.146198272705078, "learning_rate": 4.957531239534158e-06, "loss": 3.4241, "step": 5870 }, { "epoch": 0.059763590494791664, "grad_norm": 13.376401901245117, "learning_rate": 4.957457814491509e-06, "loss": 3.4611, "step": 5875 }, { "epoch": 0.059814453125, "grad_norm": 9.991517066955566, "learning_rate": 4.957384326575383e-06, "loss": 3.1355, "step": 5880 }, { "epoch": 0.059865315755208336, "grad_norm": 8.1071138381958, "learning_rate": 4.9573107757876625e-06, "loss": 3.4358, "step": 5885 }, { "epoch": 0.059916178385416664, "grad_norm": 16.746749877929688, "learning_rate": 4.9572371621302284e-06, "loss": 3.6471, "step": 5890 }, { "epoch": 0.059967041015625, "grad_norm": 16.02521514892578, "learning_rate": 4.957163485604963e-06, "loss": 3.5407, "step": 5895 }, { "epoch": 0.060017903645833336, "grad_norm": 8.261488914489746, "learning_rate": 4.957089746213753e-06, "loss": 3.2416, "step": 5900 }, { "epoch": 0.060068766276041664, "grad_norm": 12.552051544189453, "learning_rate": 4.957015943958484e-06, "loss": 3.3061, "step": 5905 }, { "epoch": 0.06011962890625, "grad_norm": 15.044920921325684, "learning_rate": 4.956942078841045e-06, "loss": 3.8459, "step": 5910 }, { "epoch": 0.060170491536458336, "grad_norm": 13.593306541442871, "learning_rate": 4.9568681508633246e-06, "loss": 3.2017, "step": 5915 }, { "epoch": 0.060221354166666664, "grad_norm": 12.529535293579102, "learning_rate": 4.956794160027215e-06, "loss": 3.7657, "step": 5920 }, { "epoch": 0.060272216796875, "grad_norm": 13.670187950134277, "learning_rate": 4.9567201063346096e-06, "loss": 3.5618, "step": 5925 }, { "epoch": 0.060323079427083336, "grad_norm": 14.781356811523438, "learning_rate": 4.956645989787402e-06, "loss": 3.3705, "step": 5930 }, { "epoch": 0.060373942057291664, "grad_norm": 12.808194160461426, "learning_rate": 4.95657181038749e-06, "loss": 3.5778, "step": 5935 }, { "epoch": 0.0604248046875, "grad_norm": 9.7361421585083, "learning_rate": 4.956497568136769e-06, "loss": 3.9065, "step": 5940 }, { "epoch": 0.060475667317708336, "grad_norm": 8.544392585754395, "learning_rate": 4.9564232630371414e-06, "loss": 3.0755, "step": 5945 }, { "epoch": 0.060526529947916664, "grad_norm": 16.483707427978516, "learning_rate": 4.956348895090506e-06, "loss": 3.2558, "step": 5950 }, { "epoch": 0.060577392578125, "grad_norm": 17.052963256835938, "learning_rate": 4.956274464298766e-06, "loss": 3.6055, "step": 5955 }, { "epoch": 0.060628255208333336, "grad_norm": 13.169681549072266, "learning_rate": 4.956199970663827e-06, "loss": 3.6049, "step": 5960 }, { "epoch": 0.060679117838541664, "grad_norm": 11.642385482788086, "learning_rate": 4.956125414187594e-06, "loss": 3.3229, "step": 5965 }, { "epoch": 0.06072998046875, "grad_norm": 9.508110046386719, "learning_rate": 4.956050794871974e-06, "loss": 3.2438, "step": 5970 }, { "epoch": 0.060780843098958336, "grad_norm": 9.767243385314941, "learning_rate": 4.955976112718876e-06, "loss": 3.623, "step": 5975 }, { "epoch": 0.060831705729166664, "grad_norm": 13.47176456451416, "learning_rate": 4.955901367730212e-06, "loss": 3.5425, "step": 5980 }, { "epoch": 0.060882568359375, "grad_norm": 13.689672470092773, "learning_rate": 4.9558265599078935e-06, "loss": 3.4059, "step": 5985 }, { "epoch": 0.060933430989583336, "grad_norm": 14.70511531829834, "learning_rate": 4.955751689253834e-06, "loss": 3.2993, "step": 5990 }, { "epoch": 0.060984293619791664, "grad_norm": 10.678279876708984, "learning_rate": 4.955676755769951e-06, "loss": 3.8429, "step": 5995 }, { "epoch": 0.06103515625, "grad_norm": 9.907837867736816, "learning_rate": 4.955601759458158e-06, "loss": 3.6905, "step": 6000 }, { "epoch": 0.061086018880208336, "grad_norm": 14.26388931274414, "learning_rate": 4.955526700320378e-06, "loss": 3.8103, "step": 6005 }, { "epoch": 0.061136881510416664, "grad_norm": 13.094128608703613, "learning_rate": 4.955451578358529e-06, "loss": 3.5114, "step": 6010 }, { "epoch": 0.061187744140625, "grad_norm": 9.898237228393555, "learning_rate": 4.955376393574533e-06, "loss": 3.0525, "step": 6015 }, { "epoch": 0.061238606770833336, "grad_norm": 20.232635498046875, "learning_rate": 4.955301145970314e-06, "loss": 3.3842, "step": 6020 }, { "epoch": 0.061289469401041664, "grad_norm": 10.714902877807617, "learning_rate": 4.955225835547798e-06, "loss": 3.2338, "step": 6025 }, { "epoch": 0.06134033203125, "grad_norm": 16.868967056274414, "learning_rate": 4.95515046230891e-06, "loss": 3.3336, "step": 6030 }, { "epoch": 0.061391194661458336, "grad_norm": 10.545510292053223, "learning_rate": 4.9550750262555795e-06, "loss": 3.4626, "step": 6035 }, { "epoch": 0.061442057291666664, "grad_norm": 16.030223846435547, "learning_rate": 4.9549995273897365e-06, "loss": 3.746, "step": 6040 }, { "epoch": 0.061492919921875, "grad_norm": 16.585622787475586, "learning_rate": 4.954923965713312e-06, "loss": 3.5239, "step": 6045 }, { "epoch": 0.061543782552083336, "grad_norm": 10.373279571533203, "learning_rate": 4.95484834122824e-06, "loss": 3.4698, "step": 6050 }, { "epoch": 0.061594645182291664, "grad_norm": 11.033610343933105, "learning_rate": 4.954772653936455e-06, "loss": 3.521, "step": 6055 }, { "epoch": 0.0616455078125, "grad_norm": 12.441916465759277, "learning_rate": 4.954696903839894e-06, "loss": 3.5568, "step": 6060 }, { "epoch": 0.061696370442708336, "grad_norm": 12.675573348999023, "learning_rate": 4.954621090940495e-06, "loss": 3.199, "step": 6065 }, { "epoch": 0.061747233072916664, "grad_norm": 11.934941291809082, "learning_rate": 4.9545452152401965e-06, "loss": 3.5181, "step": 6070 }, { "epoch": 0.061798095703125, "grad_norm": 12.990299224853516, "learning_rate": 4.95446927674094e-06, "loss": 3.1343, "step": 6075 }, { "epoch": 0.061848958333333336, "grad_norm": 15.312989234924316, "learning_rate": 4.954393275444669e-06, "loss": 2.912, "step": 6080 }, { "epoch": 0.061899820963541664, "grad_norm": 16.04402732849121, "learning_rate": 4.954317211353328e-06, "loss": 3.5654, "step": 6085 }, { "epoch": 0.06195068359375, "grad_norm": 16.57796287536621, "learning_rate": 4.954241084468863e-06, "loss": 3.4681, "step": 6090 }, { "epoch": 0.062001546223958336, "grad_norm": 12.278860092163086, "learning_rate": 4.954164894793222e-06, "loss": 3.4626, "step": 6095 }, { "epoch": 0.062052408854166664, "grad_norm": 85.06364440917969, "learning_rate": 4.954088642328353e-06, "loss": 3.5676, "step": 6100 }, { "epoch": 0.062103271484375, "grad_norm": 9.45626163482666, "learning_rate": 4.954012327076207e-06, "loss": 3.7756, "step": 6105 }, { "epoch": 0.062154134114583336, "grad_norm": 13.716742515563965, "learning_rate": 4.95393594903874e-06, "loss": 3.5243, "step": 6110 }, { "epoch": 0.062204996744791664, "grad_norm": 19.727203369140625, "learning_rate": 4.953859508217901e-06, "loss": 3.479, "step": 6115 }, { "epoch": 0.062255859375, "grad_norm": 14.221769332885742, "learning_rate": 4.953783004615649e-06, "loss": 3.3731, "step": 6120 }, { "epoch": 0.062306722005208336, "grad_norm": 8.173396110534668, "learning_rate": 4.953706438233941e-06, "loss": 3.2895, "step": 6125 }, { "epoch": 0.062357584635416664, "grad_norm": 11.394742965698242, "learning_rate": 4.953629809074734e-06, "loss": 3.3431, "step": 6130 }, { "epoch": 0.062408447265625, "grad_norm": 7.780871391296387, "learning_rate": 4.953553117139991e-06, "loss": 3.6523, "step": 6135 }, { "epoch": 0.062459309895833336, "grad_norm": 12.438297271728516, "learning_rate": 4.953476362431672e-06, "loss": 3.4039, "step": 6140 }, { "epoch": 0.06251017252604167, "grad_norm": 12.829780578613281, "learning_rate": 4.953399544951742e-06, "loss": 3.5707, "step": 6145 }, { "epoch": 0.06256103515625, "grad_norm": 11.045709609985352, "learning_rate": 4.953322664702167e-06, "loss": 3.1758, "step": 6150 }, { "epoch": 0.06261189778645833, "grad_norm": 13.500697135925293, "learning_rate": 4.953245721684913e-06, "loss": 3.5634, "step": 6155 }, { "epoch": 0.06266276041666667, "grad_norm": 13.87531852722168, "learning_rate": 4.953168715901949e-06, "loss": 3.0295, "step": 6160 }, { "epoch": 0.062713623046875, "grad_norm": 10.557450294494629, "learning_rate": 4.953091647355244e-06, "loss": 3.6556, "step": 6165 }, { "epoch": 0.06276448567708333, "grad_norm": 14.243014335632324, "learning_rate": 4.953014516046771e-06, "loss": 3.493, "step": 6170 }, { "epoch": 0.06281534830729167, "grad_norm": 14.641172409057617, "learning_rate": 4.952937321978502e-06, "loss": 3.5743, "step": 6175 }, { "epoch": 0.0628662109375, "grad_norm": 14.950688362121582, "learning_rate": 4.952860065152415e-06, "loss": 3.3458, "step": 6180 }, { "epoch": 0.06291707356770833, "grad_norm": 10.996521949768066, "learning_rate": 4.952782745570483e-06, "loss": 3.3334, "step": 6185 }, { "epoch": 0.06296793619791667, "grad_norm": 10.796847343444824, "learning_rate": 4.952705363234687e-06, "loss": 3.6746, "step": 6190 }, { "epoch": 0.063018798828125, "grad_norm": 16.92947006225586, "learning_rate": 4.952627918147005e-06, "loss": 3.3581, "step": 6195 }, { "epoch": 0.06306966145833333, "grad_norm": 9.009227752685547, "learning_rate": 4.952550410309419e-06, "loss": 3.3688, "step": 6200 }, { "epoch": 0.06312052408854167, "grad_norm": 9.859138488769531, "learning_rate": 4.952472839723912e-06, "loss": 3.2627, "step": 6205 }, { "epoch": 0.06317138671875, "grad_norm": 16.701183319091797, "learning_rate": 4.952395206392469e-06, "loss": 3.6775, "step": 6210 }, { "epoch": 0.06322224934895833, "grad_norm": 17.42451286315918, "learning_rate": 4.952317510317076e-06, "loss": 3.5911, "step": 6215 }, { "epoch": 0.06327311197916667, "grad_norm": 12.738759994506836, "learning_rate": 4.952239751499721e-06, "loss": 3.2518, "step": 6220 }, { "epoch": 0.063323974609375, "grad_norm": 12.116572380065918, "learning_rate": 4.952161929942393e-06, "loss": 3.6424, "step": 6225 }, { "epoch": 0.06337483723958333, "grad_norm": 15.029314041137695, "learning_rate": 4.952084045647083e-06, "loss": 3.4677, "step": 6230 }, { "epoch": 0.06342569986979167, "grad_norm": 16.844175338745117, "learning_rate": 4.952006098615784e-06, "loss": 3.0719, "step": 6235 }, { "epoch": 0.0634765625, "grad_norm": 16.886533737182617, "learning_rate": 4.95192808885049e-06, "loss": 3.4529, "step": 6240 }, { "epoch": 0.06352742513020833, "grad_norm": 15.081376075744629, "learning_rate": 4.9518500163531966e-06, "loss": 3.1723, "step": 6245 }, { "epoch": 0.06357828776041667, "grad_norm": 9.575623512268066, "learning_rate": 4.951771881125903e-06, "loss": 3.5933, "step": 6250 }, { "epoch": 0.063629150390625, "grad_norm": 9.829601287841797, "learning_rate": 4.951693683170606e-06, "loss": 3.7078, "step": 6255 }, { "epoch": 0.06368001302083333, "grad_norm": 10.027034759521484, "learning_rate": 4.951615422489308e-06, "loss": 3.3214, "step": 6260 }, { "epoch": 0.06373087565104167, "grad_norm": 14.713983535766602, "learning_rate": 4.9515370990840095e-06, "loss": 3.9297, "step": 6265 }, { "epoch": 0.06378173828125, "grad_norm": 10.196232795715332, "learning_rate": 4.951458712956716e-06, "loss": 3.3136, "step": 6270 }, { "epoch": 0.06383260091145833, "grad_norm": 8.880327224731445, "learning_rate": 4.9513802641094325e-06, "loss": 3.4526, "step": 6275 }, { "epoch": 0.06388346354166667, "grad_norm": 10.190267562866211, "learning_rate": 4.951301752544165e-06, "loss": 3.8351, "step": 6280 }, { "epoch": 0.063934326171875, "grad_norm": 13.929616928100586, "learning_rate": 4.951223178262924e-06, "loss": 3.3708, "step": 6285 }, { "epoch": 0.06398518880208333, "grad_norm": 12.750432014465332, "learning_rate": 4.951144541267719e-06, "loss": 3.617, "step": 6290 }, { "epoch": 0.06403605143229167, "grad_norm": 17.89909553527832, "learning_rate": 4.951065841560561e-06, "loss": 3.6072, "step": 6295 }, { "epoch": 0.0640869140625, "grad_norm": 14.314258575439453, "learning_rate": 4.950987079143465e-06, "loss": 3.4483, "step": 6300 }, { "epoch": 0.06413777669270833, "grad_norm": 12.541434288024902, "learning_rate": 4.950908254018446e-06, "loss": 3.9403, "step": 6305 }, { "epoch": 0.06418863932291667, "grad_norm": 10.588662147521973, "learning_rate": 4.9508293661875205e-06, "loss": 3.3919, "step": 6310 }, { "epoch": 0.064239501953125, "grad_norm": 14.57321834564209, "learning_rate": 4.950750415652706e-06, "loss": 3.6334, "step": 6315 }, { "epoch": 0.06429036458333333, "grad_norm": 12.940896034240723, "learning_rate": 4.950671402416023e-06, "loss": 3.5378, "step": 6320 }, { "epoch": 0.06434122721354167, "grad_norm": 14.000555992126465, "learning_rate": 4.9505923264794935e-06, "loss": 3.7651, "step": 6325 }, { "epoch": 0.06439208984375, "grad_norm": 12.165482521057129, "learning_rate": 4.95051318784514e-06, "loss": 3.3013, "step": 6330 }, { "epoch": 0.06444295247395833, "grad_norm": 10.64943790435791, "learning_rate": 4.950433986514986e-06, "loss": 3.6277, "step": 6335 }, { "epoch": 0.06449381510416667, "grad_norm": 9.537559509277344, "learning_rate": 4.9503547224910605e-06, "loss": 3.3196, "step": 6340 }, { "epoch": 0.064544677734375, "grad_norm": 12.809176445007324, "learning_rate": 4.9502753957753905e-06, "loss": 3.5917, "step": 6345 }, { "epoch": 0.06459554036458333, "grad_norm": 9.395776748657227, "learning_rate": 4.950196006370005e-06, "loss": 3.4648, "step": 6350 }, { "epoch": 0.06464640299479167, "grad_norm": 14.865406036376953, "learning_rate": 4.950116554276936e-06, "loss": 3.5405, "step": 6355 }, { "epoch": 0.064697265625, "grad_norm": 13.822293281555176, "learning_rate": 4.950037039498215e-06, "loss": 3.2149, "step": 6360 }, { "epoch": 0.06474812825520833, "grad_norm": 10.368663787841797, "learning_rate": 4.949957462035877e-06, "loss": 3.2916, "step": 6365 }, { "epoch": 0.06479899088541667, "grad_norm": 15.785028457641602, "learning_rate": 4.949877821891958e-06, "loss": 3.2872, "step": 6370 }, { "epoch": 0.064849853515625, "grad_norm": 9.993640899658203, "learning_rate": 4.949798119068495e-06, "loss": 3.3167, "step": 6375 }, { "epoch": 0.06490071614583333, "grad_norm": 14.402359962463379, "learning_rate": 4.949718353567529e-06, "loss": 3.7203, "step": 6380 }, { "epoch": 0.06495157877604167, "grad_norm": 13.888138771057129, "learning_rate": 4.9496385253910996e-06, "loss": 3.4504, "step": 6385 }, { "epoch": 0.06500244140625, "grad_norm": 19.04587745666504, "learning_rate": 4.949558634541249e-06, "loss": 3.4553, "step": 6390 }, { "epoch": 0.06505330403645833, "grad_norm": 17.151155471801758, "learning_rate": 4.94947868102002e-06, "loss": 3.7058, "step": 6395 }, { "epoch": 0.06510416666666667, "grad_norm": 13.129286766052246, "learning_rate": 4.949398664829461e-06, "loss": 3.5657, "step": 6400 }, { "epoch": 0.065155029296875, "grad_norm": 12.586851119995117, "learning_rate": 4.949318585971617e-06, "loss": 3.2961, "step": 6405 }, { "epoch": 0.06520589192708333, "grad_norm": 15.264113426208496, "learning_rate": 4.949238444448539e-06, "loss": 3.7745, "step": 6410 }, { "epoch": 0.06525675455729167, "grad_norm": 12.554951667785645, "learning_rate": 4.949158240262274e-06, "loss": 3.455, "step": 6415 }, { "epoch": 0.0653076171875, "grad_norm": 11.962057113647461, "learning_rate": 4.949077973414877e-06, "loss": 3.2963, "step": 6420 }, { "epoch": 0.06535847981770833, "grad_norm": 12.596719741821289, "learning_rate": 4.9489976439084e-06, "loss": 3.7278, "step": 6425 }, { "epoch": 0.06540934244791667, "grad_norm": 10.47852897644043, "learning_rate": 4.948917251744899e-06, "loss": 3.6283, "step": 6430 }, { "epoch": 0.065460205078125, "grad_norm": 9.309669494628906, "learning_rate": 4.9488367969264304e-06, "loss": 3.3136, "step": 6435 }, { "epoch": 0.06551106770833333, "grad_norm": 13.344404220581055, "learning_rate": 4.9487562794550535e-06, "loss": 3.3655, "step": 6440 }, { "epoch": 0.06556193033854167, "grad_norm": 13.794371604919434, "learning_rate": 4.948675699332827e-06, "loss": 3.2034, "step": 6445 }, { "epoch": 0.06561279296875, "grad_norm": 14.009343147277832, "learning_rate": 4.9485950565618134e-06, "loss": 3.4136, "step": 6450 }, { "epoch": 0.06566365559895833, "grad_norm": 10.496328353881836, "learning_rate": 4.9485143511440754e-06, "loss": 3.5469, "step": 6455 }, { "epoch": 0.06571451822916667, "grad_norm": 18.860055923461914, "learning_rate": 4.948433583081678e-06, "loss": 4.3312, "step": 6460 }, { "epoch": 0.065765380859375, "grad_norm": 12.226606369018555, "learning_rate": 4.948352752376689e-06, "loss": 3.187, "step": 6465 }, { "epoch": 0.06581624348958333, "grad_norm": 8.17588996887207, "learning_rate": 4.948271859031173e-06, "loss": 3.4183, "step": 6470 }, { "epoch": 0.06586710611979167, "grad_norm": 18.25055694580078, "learning_rate": 4.948190903047203e-06, "loss": 3.9294, "step": 6475 }, { "epoch": 0.06591796875, "grad_norm": 15.113214492797852, "learning_rate": 4.948109884426849e-06, "loss": 3.2971, "step": 6480 }, { "epoch": 0.06596883138020833, "grad_norm": 10.636944770812988, "learning_rate": 4.9480288031721835e-06, "loss": 3.1685, "step": 6485 }, { "epoch": 0.06601969401041667, "grad_norm": 12.23661994934082, "learning_rate": 4.947947659285281e-06, "loss": 3.2994, "step": 6490 }, { "epoch": 0.066070556640625, "grad_norm": 14.394771575927734, "learning_rate": 4.9478664527682194e-06, "loss": 3.5549, "step": 6495 }, { "epoch": 0.06612141927083333, "grad_norm": 12.141425132751465, "learning_rate": 4.947785183623074e-06, "loss": 3.5396, "step": 6500 }, { "epoch": 0.06617228190104167, "grad_norm": 11.212850570678711, "learning_rate": 4.9477038518519235e-06, "loss": 3.3503, "step": 6505 }, { "epoch": 0.06622314453125, "grad_norm": 13.835770606994629, "learning_rate": 4.947622457456852e-06, "loss": 3.1723, "step": 6510 }, { "epoch": 0.06627400716145833, "grad_norm": 13.431824684143066, "learning_rate": 4.94754100043994e-06, "loss": 3.339, "step": 6515 }, { "epoch": 0.06632486979166667, "grad_norm": 12.01131820678711, "learning_rate": 4.94745948080327e-06, "loss": 3.2717, "step": 6520 }, { "epoch": 0.066375732421875, "grad_norm": 14.991222381591797, "learning_rate": 4.947377898548931e-06, "loss": 3.52, "step": 6525 }, { "epoch": 0.06642659505208333, "grad_norm": 14.108927726745605, "learning_rate": 4.947296253679008e-06, "loss": 3.1859, "step": 6530 }, { "epoch": 0.06647745768229167, "grad_norm": 9.38899040222168, "learning_rate": 4.94721454619559e-06, "loss": 3.6626, "step": 6535 }, { "epoch": 0.0665283203125, "grad_norm": 12.356621742248535, "learning_rate": 4.947132776100768e-06, "loss": 3.8692, "step": 6540 }, { "epoch": 0.06657918294270833, "grad_norm": 16.474075317382812, "learning_rate": 4.947050943396634e-06, "loss": 3.0192, "step": 6545 }, { "epoch": 0.06663004557291667, "grad_norm": 12.742081642150879, "learning_rate": 4.9469690480852824e-06, "loss": 3.3703, "step": 6550 }, { "epoch": 0.066680908203125, "grad_norm": 13.639100074768066, "learning_rate": 4.946887090168807e-06, "loss": 3.8247, "step": 6555 }, { "epoch": 0.06673177083333333, "grad_norm": 9.607970237731934, "learning_rate": 4.946805069649305e-06, "loss": 3.4566, "step": 6560 }, { "epoch": 0.06678263346354167, "grad_norm": 13.177118301391602, "learning_rate": 4.946722986528876e-06, "loss": 3.5717, "step": 6565 }, { "epoch": 0.06683349609375, "grad_norm": 12.43066120147705, "learning_rate": 4.946640840809619e-06, "loss": 3.5391, "step": 6570 }, { "epoch": 0.06688435872395833, "grad_norm": 13.892637252807617, "learning_rate": 4.946558632493636e-06, "loss": 3.802, "step": 6575 }, { "epoch": 0.06693522135416667, "grad_norm": 13.632702827453613, "learning_rate": 4.94647636158303e-06, "loss": 3.3285, "step": 6580 }, { "epoch": 0.066986083984375, "grad_norm": 12.024981498718262, "learning_rate": 4.946394028079907e-06, "loss": 3.2208, "step": 6585 }, { "epoch": 0.06703694661458333, "grad_norm": 15.032366752624512, "learning_rate": 4.946311631986372e-06, "loss": 3.374, "step": 6590 }, { "epoch": 0.06708780924479167, "grad_norm": 10.841226577758789, "learning_rate": 4.946229173304535e-06, "loss": 3.2525, "step": 6595 }, { "epoch": 0.067138671875, "grad_norm": 10.338508605957031, "learning_rate": 4.946146652036502e-06, "loss": 3.5958, "step": 6600 }, { "epoch": 0.06718953450520833, "grad_norm": 10.029006004333496, "learning_rate": 4.9460640681843885e-06, "loss": 3.6985, "step": 6605 }, { "epoch": 0.06724039713541667, "grad_norm": 9.308262825012207, "learning_rate": 4.945981421750305e-06, "loss": 3.3633, "step": 6610 }, { "epoch": 0.067291259765625, "grad_norm": 8.373737335205078, "learning_rate": 4.945898712736366e-06, "loss": 3.5412, "step": 6615 }, { "epoch": 0.06734212239583333, "grad_norm": 11.022754669189453, "learning_rate": 4.94581594114469e-06, "loss": 4.3451, "step": 6620 }, { "epoch": 0.06739298502604167, "grad_norm": 7.635653972625732, "learning_rate": 4.945733106977391e-06, "loss": 3.1132, "step": 6625 }, { "epoch": 0.06744384765625, "grad_norm": 11.958892822265625, "learning_rate": 4.945650210236591e-06, "loss": 3.736, "step": 6630 }, { "epoch": 0.06749471028645833, "grad_norm": 17.302410125732422, "learning_rate": 4.94556725092441e-06, "loss": 3.2335, "step": 6635 }, { "epoch": 0.06754557291666667, "grad_norm": 10.765392303466797, "learning_rate": 4.945484229042971e-06, "loss": 3.0814, "step": 6640 }, { "epoch": 0.067596435546875, "grad_norm": 10.385746002197266, "learning_rate": 4.945401144594397e-06, "loss": 3.4643, "step": 6645 }, { "epoch": 0.06764729817708333, "grad_norm": 14.88101577758789, "learning_rate": 4.945317997580814e-06, "loss": 3.7192, "step": 6650 }, { "epoch": 0.06769816080729167, "grad_norm": 10.534723281860352, "learning_rate": 4.9452347880043505e-06, "loss": 3.4338, "step": 6655 }, { "epoch": 0.0677490234375, "grad_norm": 10.487231254577637, "learning_rate": 4.945151515867134e-06, "loss": 3.6227, "step": 6660 }, { "epoch": 0.06779988606770833, "grad_norm": 10.452747344970703, "learning_rate": 4.9450681811712954e-06, "loss": 3.2158, "step": 6665 }, { "epoch": 0.06785074869791667, "grad_norm": 12.921323776245117, "learning_rate": 4.944984783918968e-06, "loss": 3.5547, "step": 6670 }, { "epoch": 0.067901611328125, "grad_norm": 12.540404319763184, "learning_rate": 4.944901324112283e-06, "loss": 3.5022, "step": 6675 }, { "epoch": 0.06795247395833333, "grad_norm": 11.059189796447754, "learning_rate": 4.9448178017533775e-06, "loss": 3.3226, "step": 6680 }, { "epoch": 0.06800333658854167, "grad_norm": 19.822628021240234, "learning_rate": 4.944734216844388e-06, "loss": 3.7347, "step": 6685 }, { "epoch": 0.06805419921875, "grad_norm": 12.81627082824707, "learning_rate": 4.944650569387453e-06, "loss": 3.9766, "step": 6690 }, { "epoch": 0.06810506184895833, "grad_norm": 14.65715217590332, "learning_rate": 4.944566859384714e-06, "loss": 3.2335, "step": 6695 }, { "epoch": 0.06815592447916667, "grad_norm": 9.100480079650879, "learning_rate": 4.94448308683831e-06, "loss": 3.3709, "step": 6700 }, { "epoch": 0.068206787109375, "grad_norm": 12.44413948059082, "learning_rate": 4.944399251750386e-06, "loss": 3.705, "step": 6705 }, { "epoch": 0.06825764973958333, "grad_norm": 612.3552856445312, "learning_rate": 4.944315354123086e-06, "loss": 3.851, "step": 6710 }, { "epoch": 0.06830851236979167, "grad_norm": 11.150618553161621, "learning_rate": 4.944231393958558e-06, "loss": 3.3883, "step": 6715 }, { "epoch": 0.068359375, "grad_norm": 15.825736999511719, "learning_rate": 4.944147371258948e-06, "loss": 3.1653, "step": 6720 }, { "epoch": 0.06841023763020833, "grad_norm": 13.351125717163086, "learning_rate": 4.944063286026408e-06, "loss": 3.6262, "step": 6725 }, { "epoch": 0.06846110026041667, "grad_norm": 10.806169509887695, "learning_rate": 4.9439791382630875e-06, "loss": 3.4346, "step": 6730 }, { "epoch": 0.068511962890625, "grad_norm": 14.80085277557373, "learning_rate": 4.94389492797114e-06, "loss": 4.8293, "step": 6735 }, { "epoch": 0.06856282552083333, "grad_norm": 15.626360893249512, "learning_rate": 4.94381065515272e-06, "loss": 3.5732, "step": 6740 }, { "epoch": 0.06861368815104167, "grad_norm": 12.220220565795898, "learning_rate": 4.943726319809984e-06, "loss": 3.1158, "step": 6745 }, { "epoch": 0.06866455078125, "grad_norm": 11.84122371673584, "learning_rate": 4.943641921945089e-06, "loss": 3.2894, "step": 6750 }, { "epoch": 0.06871541341145833, "grad_norm": 11.213668823242188, "learning_rate": 4.943557461560195e-06, "loss": 3.5569, "step": 6755 }, { "epoch": 0.06876627604166667, "grad_norm": 10.223577499389648, "learning_rate": 4.943472938657462e-06, "loss": 3.4687, "step": 6760 }, { "epoch": 0.068817138671875, "grad_norm": 11.476598739624023, "learning_rate": 4.943388353239053e-06, "loss": 3.3717, "step": 6765 }, { "epoch": 0.06886800130208333, "grad_norm": 12.814501762390137, "learning_rate": 4.943303705307133e-06, "loss": 3.4168, "step": 6770 }, { "epoch": 0.06891886393229167, "grad_norm": 15.519283294677734, "learning_rate": 4.943218994863866e-06, "loss": 3.4865, "step": 6775 }, { "epoch": 0.0689697265625, "grad_norm": 10.789101600646973, "learning_rate": 4.943134221911421e-06, "loss": 3.3488, "step": 6780 }, { "epoch": 0.06902058919270833, "grad_norm": 16.44157600402832, "learning_rate": 4.943049386451964e-06, "loss": 3.7548, "step": 6785 }, { "epoch": 0.06907145182291667, "grad_norm": 12.07598876953125, "learning_rate": 4.942964488487669e-06, "loss": 3.6223, "step": 6790 }, { "epoch": 0.069122314453125, "grad_norm": 13.612014770507812, "learning_rate": 4.942879528020707e-06, "loss": 3.9421, "step": 6795 }, { "epoch": 0.06917317708333333, "grad_norm": 10.37803840637207, "learning_rate": 4.9427945050532515e-06, "loss": 3.6429, "step": 6800 }, { "epoch": 0.06922403971354167, "grad_norm": 10.748969078063965, "learning_rate": 4.942709419587476e-06, "loss": 3.4308, "step": 6805 }, { "epoch": 0.06927490234375, "grad_norm": 8.533020973205566, "learning_rate": 4.9426242716255605e-06, "loss": 3.2213, "step": 6810 }, { "epoch": 0.06932576497395833, "grad_norm": 14.239413261413574, "learning_rate": 4.942539061169681e-06, "loss": 3.5663, "step": 6815 }, { "epoch": 0.06937662760416667, "grad_norm": 12.664621353149414, "learning_rate": 4.942453788222019e-06, "loss": 3.5002, "step": 6820 }, { "epoch": 0.069427490234375, "grad_norm": 12.779163360595703, "learning_rate": 4.942368452784756e-06, "loss": 3.2068, "step": 6825 }, { "epoch": 0.06947835286458333, "grad_norm": 16.653684616088867, "learning_rate": 4.9422830548600745e-06, "loss": 3.3543, "step": 6830 }, { "epoch": 0.06952921549479167, "grad_norm": 16.349224090576172, "learning_rate": 4.94219759445016e-06, "loss": 3.5099, "step": 6835 }, { "epoch": 0.069580078125, "grad_norm": 8.445566177368164, "learning_rate": 4.942112071557199e-06, "loss": 3.793, "step": 6840 }, { "epoch": 0.06963094075520833, "grad_norm": 14.703461647033691, "learning_rate": 4.94202648618338e-06, "loss": 3.5832, "step": 6845 }, { "epoch": 0.06968180338541667, "grad_norm": 14.559622764587402, "learning_rate": 4.941940838330891e-06, "loss": 3.6992, "step": 6850 }, { "epoch": 0.069732666015625, "grad_norm": 11.245277404785156, "learning_rate": 4.941855128001925e-06, "loss": 2.9931, "step": 6855 }, { "epoch": 0.06978352864583333, "grad_norm": 17.25091552734375, "learning_rate": 4.941769355198675e-06, "loss": 3.243, "step": 6860 }, { "epoch": 0.06983439127604167, "grad_norm": 11.740355491638184, "learning_rate": 4.941683519923335e-06, "loss": 3.3695, "step": 6865 }, { "epoch": 0.06988525390625, "grad_norm": 12.884564399719238, "learning_rate": 4.9415976221781e-06, "loss": 3.3783, "step": 6870 }, { "epoch": 0.06993611653645833, "grad_norm": 14.360733032226562, "learning_rate": 4.9415116619651685e-06, "loss": 3.332, "step": 6875 }, { "epoch": 0.06998697916666667, "grad_norm": 15.234169960021973, "learning_rate": 4.94142563928674e-06, "loss": 3.0307, "step": 6880 }, { "epoch": 0.070037841796875, "grad_norm": 12.016648292541504, "learning_rate": 4.941339554145015e-06, "loss": 3.607, "step": 6885 }, { "epoch": 0.07008870442708333, "grad_norm": 10.132488250732422, "learning_rate": 4.941253406542197e-06, "loss": 3.7322, "step": 6890 }, { "epoch": 0.07013956705729167, "grad_norm": 11.677488327026367, "learning_rate": 4.941167196480489e-06, "loss": 3.7035, "step": 6895 }, { "epoch": 0.0701904296875, "grad_norm": 8.434168815612793, "learning_rate": 4.941080923962096e-06, "loss": 3.3046, "step": 6900 }, { "epoch": 0.07024129231770833, "grad_norm": 18.53049659729004, "learning_rate": 4.940994588989227e-06, "loss": 3.9959, "step": 6905 }, { "epoch": 0.07029215494791667, "grad_norm": 14.731730461120605, "learning_rate": 4.94090819156409e-06, "loss": 3.5238, "step": 6910 }, { "epoch": 0.070343017578125, "grad_norm": 9.122929573059082, "learning_rate": 4.940821731688895e-06, "loss": 3.525, "step": 6915 }, { "epoch": 0.07039388020833333, "grad_norm": 11.580704689025879, "learning_rate": 4.940735209365855e-06, "loss": 3.9973, "step": 6920 }, { "epoch": 0.07044474283854167, "grad_norm": 13.921919822692871, "learning_rate": 4.940648624597183e-06, "loss": 3.3295, "step": 6925 }, { "epoch": 0.07049560546875, "grad_norm": 11.508909225463867, "learning_rate": 4.9405619773850944e-06, "loss": 3.3846, "step": 6930 }, { "epoch": 0.07054646809895833, "grad_norm": 15.52819538116455, "learning_rate": 4.940475267731806e-06, "loss": 3.7001, "step": 6935 }, { "epoch": 0.07059733072916667, "grad_norm": 12.931517601013184, "learning_rate": 4.940388495639537e-06, "loss": 3.4707, "step": 6940 }, { "epoch": 0.070648193359375, "grad_norm": 16.401697158813477, "learning_rate": 4.9403016611105055e-06, "loss": 3.5479, "step": 6945 }, { "epoch": 0.07069905598958333, "grad_norm": 14.115461349487305, "learning_rate": 4.940214764146935e-06, "loss": 3.4651, "step": 6950 }, { "epoch": 0.07074991861979167, "grad_norm": 13.501302719116211, "learning_rate": 4.940127804751048e-06, "loss": 3.2613, "step": 6955 }, { "epoch": 0.07080078125, "grad_norm": 9.428580284118652, "learning_rate": 4.94004078292507e-06, "loss": 3.528, "step": 6960 }, { "epoch": 0.07085164388020833, "grad_norm": 15.635141372680664, "learning_rate": 4.939953698671227e-06, "loss": 3.413, "step": 6965 }, { "epoch": 0.07090250651041667, "grad_norm": 18.93190574645996, "learning_rate": 4.939866551991746e-06, "loss": 3.6146, "step": 6970 }, { "epoch": 0.070953369140625, "grad_norm": 13.07038688659668, "learning_rate": 4.939779342888858e-06, "loss": 3.6089, "step": 6975 }, { "epoch": 0.07100423177083333, "grad_norm": 11.98829174041748, "learning_rate": 4.939692071364794e-06, "loss": 3.2366, "step": 6980 }, { "epoch": 0.07105509440104167, "grad_norm": 13.00400161743164, "learning_rate": 4.939604737421787e-06, "loss": 3.6997, "step": 6985 }, { "epoch": 0.07110595703125, "grad_norm": 10.635180473327637, "learning_rate": 4.9395173410620714e-06, "loss": 3.5263, "step": 6990 }, { "epoch": 0.07115681966145833, "grad_norm": 8.874784469604492, "learning_rate": 4.939429882287881e-06, "loss": 3.7435, "step": 6995 }, { "epoch": 0.07120768229166667, "grad_norm": 17.223342895507812, "learning_rate": 4.939342361101457e-06, "loss": 3.248, "step": 7000 }, { "epoch": 0.071258544921875, "grad_norm": 15.709144592285156, "learning_rate": 4.939254777505037e-06, "loss": 3.3038, "step": 7005 }, { "epoch": 0.07130940755208333, "grad_norm": 10.639139175415039, "learning_rate": 4.93916713150086e-06, "loss": 3.433, "step": 7010 }, { "epoch": 0.07136027018229167, "grad_norm": 13.915593147277832, "learning_rate": 4.9390794230911715e-06, "loss": 3.4058, "step": 7015 }, { "epoch": 0.0714111328125, "grad_norm": 8.267925262451172, "learning_rate": 4.938991652278213e-06, "loss": 3.5928, "step": 7020 }, { "epoch": 0.07146199544270833, "grad_norm": 16.01654624938965, "learning_rate": 4.938903819064232e-06, "loss": 3.1889, "step": 7025 }, { "epoch": 0.07151285807291667, "grad_norm": 13.366415977478027, "learning_rate": 4.938815923451476e-06, "loss": 3.3296, "step": 7030 }, { "epoch": 0.071563720703125, "grad_norm": 12.922379493713379, "learning_rate": 4.9387279654421905e-06, "loss": 3.4845, "step": 7035 }, { "epoch": 0.07161458333333333, "grad_norm": 10.66179084777832, "learning_rate": 4.938639945038629e-06, "loss": 3.3688, "step": 7040 }, { "epoch": 0.07166544596354167, "grad_norm": 9.114995002746582, "learning_rate": 4.938551862243042e-06, "loss": 3.2822, "step": 7045 }, { "epoch": 0.07171630859375, "grad_norm": 10.946172714233398, "learning_rate": 4.9384637170576844e-06, "loss": 3.5603, "step": 7050 }, { "epoch": 0.07176717122395833, "grad_norm": 11.345202445983887, "learning_rate": 4.93837550948481e-06, "loss": 3.4276, "step": 7055 }, { "epoch": 0.07181803385416667, "grad_norm": 9.533265113830566, "learning_rate": 4.938287239526676e-06, "loss": 3.1513, "step": 7060 }, { "epoch": 0.071868896484375, "grad_norm": 9.234315872192383, "learning_rate": 4.938198907185542e-06, "loss": 3.3517, "step": 7065 }, { "epoch": 0.07191975911458333, "grad_norm": 10.09774398803711, "learning_rate": 4.938110512463666e-06, "loss": 3.5838, "step": 7070 }, { "epoch": 0.07197062174479167, "grad_norm": 14.220784187316895, "learning_rate": 4.938022055363311e-06, "loss": 3.4819, "step": 7075 }, { "epoch": 0.072021484375, "grad_norm": 10.781584739685059, "learning_rate": 4.9379335358867384e-06, "loss": 3.7672, "step": 7080 }, { "epoch": 0.07207234700520833, "grad_norm": 9.52526569366455, "learning_rate": 4.937844954036215e-06, "loss": 3.3561, "step": 7085 }, { "epoch": 0.07212320963541667, "grad_norm": 12.460453033447266, "learning_rate": 4.9377563098140065e-06, "loss": 3.4052, "step": 7090 }, { "epoch": 0.072174072265625, "grad_norm": 14.343060493469238, "learning_rate": 4.9376676032223805e-06, "loss": 3.214, "step": 7095 }, { "epoch": 0.07222493489583333, "grad_norm": 12.104986190795898, "learning_rate": 4.937578834263607e-06, "loss": 3.472, "step": 7100 }, { "epoch": 0.07227579752604167, "grad_norm": 12.275232315063477, "learning_rate": 4.9374900029399555e-06, "loss": 3.7637, "step": 7105 }, { "epoch": 0.07232666015625, "grad_norm": 12.134095191955566, "learning_rate": 4.937401109253701e-06, "loss": 3.6845, "step": 7110 }, { "epoch": 0.07237752278645833, "grad_norm": 15.203348159790039, "learning_rate": 4.937312153207117e-06, "loss": 3.5803, "step": 7115 }, { "epoch": 0.07242838541666667, "grad_norm": 17.58793830871582, "learning_rate": 4.937223134802478e-06, "loss": 3.1135, "step": 7120 }, { "epoch": 0.072479248046875, "grad_norm": 9.813260078430176, "learning_rate": 4.937134054042064e-06, "loss": 3.487, "step": 7125 }, { "epoch": 0.07253011067708333, "grad_norm": 12.799726486206055, "learning_rate": 4.9370449109281524e-06, "loss": 3.5097, "step": 7130 }, { "epoch": 0.07258097330729167, "grad_norm": 10.807867050170898, "learning_rate": 4.936955705463025e-06, "loss": 3.5704, "step": 7135 }, { "epoch": 0.0726318359375, "grad_norm": 13.212722778320312, "learning_rate": 4.936866437648963e-06, "loss": 3.2637, "step": 7140 }, { "epoch": 0.07268269856770833, "grad_norm": 10.858726501464844, "learning_rate": 4.936777107488251e-06, "loss": 3.3638, "step": 7145 }, { "epoch": 0.07273356119791667, "grad_norm": 14.568108558654785, "learning_rate": 4.936687714983174e-06, "loss": 3.2916, "step": 7150 }, { "epoch": 0.072784423828125, "grad_norm": 12.920387268066406, "learning_rate": 4.9365982601360194e-06, "loss": 3.4835, "step": 7155 }, { "epoch": 0.07283528645833333, "grad_norm": 13.888976097106934, "learning_rate": 4.9365087429490765e-06, "loss": 3.4057, "step": 7160 }, { "epoch": 0.07288614908854167, "grad_norm": 8.182246208190918, "learning_rate": 4.936419163424634e-06, "loss": 3.4935, "step": 7165 }, { "epoch": 0.07293701171875, "grad_norm": 9.68957805633545, "learning_rate": 4.936329521564986e-06, "loss": 3.4725, "step": 7170 }, { "epoch": 0.07298787434895833, "grad_norm": 8.523253440856934, "learning_rate": 4.936239817372423e-06, "loss": 3.3035, "step": 7175 }, { "epoch": 0.07303873697916667, "grad_norm": 15.208540916442871, "learning_rate": 4.936150050849242e-06, "loss": 3.3124, "step": 7180 }, { "epoch": 0.073089599609375, "grad_norm": 9.686799049377441, "learning_rate": 4.93606022199774e-06, "loss": 3.1586, "step": 7185 }, { "epoch": 0.07314046223958333, "grad_norm": 17.809568405151367, "learning_rate": 4.935970330820215e-06, "loss": 3.5217, "step": 7190 }, { "epoch": 0.07319132486979167, "grad_norm": 9.103455543518066, "learning_rate": 4.935880377318965e-06, "loss": 3.3448, "step": 7195 }, { "epoch": 0.0732421875, "grad_norm": 17.965932846069336, "learning_rate": 4.935790361496295e-06, "loss": 3.4703, "step": 7200 }, { "epoch": 0.07329305013020833, "grad_norm": 17.913076400756836, "learning_rate": 4.935700283354504e-06, "loss": 3.7618, "step": 7205 }, { "epoch": 0.07334391276041667, "grad_norm": 10.30252456665039, "learning_rate": 4.9356101428959e-06, "loss": 3.0109, "step": 7210 }, { "epoch": 0.073394775390625, "grad_norm": 9.662969589233398, "learning_rate": 4.935519940122787e-06, "loss": 3.4018, "step": 7215 }, { "epoch": 0.07344563802083333, "grad_norm": 15.70295524597168, "learning_rate": 4.935429675037474e-06, "loss": 3.083, "step": 7220 }, { "epoch": 0.07349650065104167, "grad_norm": 8.53459644317627, "learning_rate": 4.935339347642269e-06, "loss": 3.0109, "step": 7225 }, { "epoch": 0.07354736328125, "grad_norm": 15.32571792602539, "learning_rate": 4.935248957939486e-06, "loss": 3.5678, "step": 7230 }, { "epoch": 0.07359822591145833, "grad_norm": 15.416007995605469, "learning_rate": 4.935158505931434e-06, "loss": 3.2472, "step": 7235 }, { "epoch": 0.07364908854166667, "grad_norm": 12.033348083496094, "learning_rate": 4.93506799162043e-06, "loss": 3.3393, "step": 7240 }, { "epoch": 0.073699951171875, "grad_norm": 12.217211723327637, "learning_rate": 4.934977415008787e-06, "loss": 3.7256, "step": 7245 }, { "epoch": 0.07375081380208333, "grad_norm": 13.5538969039917, "learning_rate": 4.934886776098825e-06, "loss": 3.6471, "step": 7250 }, { "epoch": 0.07380167643229167, "grad_norm": 11.107625961303711, "learning_rate": 4.934796074892862e-06, "loss": 3.4801, "step": 7255 }, { "epoch": 0.0738525390625, "grad_norm": 8.796854972839355, "learning_rate": 4.934705311393219e-06, "loss": 2.999, "step": 7260 }, { "epoch": 0.07390340169270833, "grad_norm": 15.173271179199219, "learning_rate": 4.934614485602217e-06, "loss": 3.2074, "step": 7265 }, { "epoch": 0.07395426432291667, "grad_norm": 14.630196571350098, "learning_rate": 4.9345235975221804e-06, "loss": 3.2888, "step": 7270 }, { "epoch": 0.074005126953125, "grad_norm": 10.86033821105957, "learning_rate": 4.934432647155435e-06, "loss": 3.2936, "step": 7275 }, { "epoch": 0.07405598958333333, "grad_norm": 13.982941627502441, "learning_rate": 4.934341634504307e-06, "loss": 3.7184, "step": 7280 }, { "epoch": 0.07410685221354167, "grad_norm": 9.288113594055176, "learning_rate": 4.934250559571126e-06, "loss": 3.1865, "step": 7285 }, { "epoch": 0.07415771484375, "grad_norm": 11.010740280151367, "learning_rate": 4.93415942235822e-06, "loss": 3.442, "step": 7290 }, { "epoch": 0.07420857747395833, "grad_norm": 15.357698440551758, "learning_rate": 4.934068222867923e-06, "loss": 3.6836, "step": 7295 }, { "epoch": 0.07425944010416667, "grad_norm": 14.378852844238281, "learning_rate": 4.9339769611025675e-06, "loss": 3.4716, "step": 7300 }, { "epoch": 0.074310302734375, "grad_norm": 10.144874572753906, "learning_rate": 4.933885637064489e-06, "loss": 3.7662, "step": 7305 }, { "epoch": 0.07436116536458333, "grad_norm": 15.959395408630371, "learning_rate": 4.933794250756022e-06, "loss": 3.2475, "step": 7310 }, { "epoch": 0.07441202799479167, "grad_norm": 15.771014213562012, "learning_rate": 4.933702802179506e-06, "loss": 3.5408, "step": 7315 }, { "epoch": 0.074462890625, "grad_norm": 16.440584182739258, "learning_rate": 4.933611291337282e-06, "loss": 3.5703, "step": 7320 }, { "epoch": 0.07451375325520833, "grad_norm": 13.77549934387207, "learning_rate": 4.933519718231689e-06, "loss": 3.7564, "step": 7325 }, { "epoch": 0.07456461588541667, "grad_norm": 13.803625106811523, "learning_rate": 4.9334280828650714e-06, "loss": 3.9605, "step": 7330 }, { "epoch": 0.074615478515625, "grad_norm": 16.266210556030273, "learning_rate": 4.933336385239772e-06, "loss": 3.6834, "step": 7335 }, { "epoch": 0.07466634114583333, "grad_norm": 12.476020812988281, "learning_rate": 4.933244625358139e-06, "loss": 3.2462, "step": 7340 }, { "epoch": 0.07471720377604167, "grad_norm": 14.898695945739746, "learning_rate": 4.9331528032225186e-06, "loss": 4.0511, "step": 7345 }, { "epoch": 0.07476806640625, "grad_norm": 13.865912437438965, "learning_rate": 4.933060918835261e-06, "loss": 3.5242, "step": 7350 }, { "epoch": 0.07481892903645833, "grad_norm": 15.214755058288574, "learning_rate": 4.932968972198715e-06, "loss": 3.6132, "step": 7355 }, { "epoch": 0.07486979166666667, "grad_norm": 14.871356010437012, "learning_rate": 4.932876963315236e-06, "loss": 3.3851, "step": 7360 }, { "epoch": 0.074920654296875, "grad_norm": 12.997641563415527, "learning_rate": 4.932784892187176e-06, "loss": 3.1629, "step": 7365 }, { "epoch": 0.07497151692708333, "grad_norm": 10.09203052520752, "learning_rate": 4.932692758816892e-06, "loss": 3.3311, "step": 7370 }, { "epoch": 0.07502237955729167, "grad_norm": 8.960355758666992, "learning_rate": 4.932600563206739e-06, "loss": 3.1715, "step": 7375 }, { "epoch": 0.0750732421875, "grad_norm": 16.078197479248047, "learning_rate": 4.932508305359078e-06, "loss": 3.5975, "step": 7380 }, { "epoch": 0.07512410481770833, "grad_norm": 15.18477725982666, "learning_rate": 4.9324159852762685e-06, "loss": 3.341, "step": 7385 }, { "epoch": 0.07517496744791667, "grad_norm": 15.12987995147705, "learning_rate": 4.932323602960673e-06, "loss": 3.5689, "step": 7390 }, { "epoch": 0.075225830078125, "grad_norm": 13.448582649230957, "learning_rate": 4.932231158414653e-06, "loss": 3.6534, "step": 7395 }, { "epoch": 0.07527669270833333, "grad_norm": 9.764447212219238, "learning_rate": 4.932138651640577e-06, "loss": 3.5477, "step": 7400 }, { "epoch": 0.07532755533854167, "grad_norm": 11.549277305603027, "learning_rate": 4.932046082640809e-06, "loss": 3.0506, "step": 7405 }, { "epoch": 0.07537841796875, "grad_norm": 9.322796821594238, "learning_rate": 4.9319534514177196e-06, "loss": 3.2946, "step": 7410 }, { "epoch": 0.07542928059895833, "grad_norm": 10.205277442932129, "learning_rate": 4.931860757973676e-06, "loss": 3.3928, "step": 7415 }, { "epoch": 0.07548014322916667, "grad_norm": 8.97710132598877, "learning_rate": 4.931768002311052e-06, "loss": 3.5807, "step": 7420 }, { "epoch": 0.075531005859375, "grad_norm": 15.988985061645508, "learning_rate": 4.931675184432221e-06, "loss": 3.5203, "step": 7425 }, { "epoch": 0.07558186848958333, "grad_norm": 12.848153114318848, "learning_rate": 4.931582304339556e-06, "loss": 3.1023, "step": 7430 }, { "epoch": 0.07563273111979167, "grad_norm": 14.775106430053711, "learning_rate": 4.931489362035434e-06, "loss": 3.6487, "step": 7435 }, { "epoch": 0.07568359375, "grad_norm": 15.638808250427246, "learning_rate": 4.931396357522233e-06, "loss": 3.4362, "step": 7440 }, { "epoch": 0.07573445638020833, "grad_norm": 17.059715270996094, "learning_rate": 4.931303290802333e-06, "loss": 4.0533, "step": 7445 }, { "epoch": 0.07578531901041667, "grad_norm": 13.491350173950195, "learning_rate": 4.931210161878114e-06, "loss": 3.1253, "step": 7450 }, { "epoch": 0.075836181640625, "grad_norm": 11.535757064819336, "learning_rate": 4.93111697075196e-06, "loss": 3.4529, "step": 7455 }, { "epoch": 0.07588704427083333, "grad_norm": 8.205851554870605, "learning_rate": 4.9310237174262535e-06, "loss": 3.3011, "step": 7460 }, { "epoch": 0.07593790690104167, "grad_norm": 10.262289047241211, "learning_rate": 4.930930401903382e-06, "loss": 3.2739, "step": 7465 }, { "epoch": 0.07598876953125, "grad_norm": 11.151719093322754, "learning_rate": 4.930837024185732e-06, "loss": 3.6304, "step": 7470 }, { "epoch": 0.07603963216145833, "grad_norm": 13.860788345336914, "learning_rate": 4.930743584275694e-06, "loss": 3.3064, "step": 7475 }, { "epoch": 0.07609049479166667, "grad_norm": 12.631816864013672, "learning_rate": 4.930650082175656e-06, "loss": 3.3838, "step": 7480 }, { "epoch": 0.076141357421875, "grad_norm": 15.496430397033691, "learning_rate": 4.930556517888013e-06, "loss": 3.3085, "step": 7485 }, { "epoch": 0.07619222005208333, "grad_norm": 15.001185417175293, "learning_rate": 4.930462891415156e-06, "loss": 3.1146, "step": 7490 }, { "epoch": 0.07624308268229167, "grad_norm": 9.253118515014648, "learning_rate": 4.930369202759484e-06, "loss": 3.5206, "step": 7495 }, { "epoch": 0.0762939453125, "grad_norm": 9.574434280395508, "learning_rate": 4.9302754519233905e-06, "loss": 3.0834, "step": 7500 }, { "epoch": 0.07634480794270833, "grad_norm": 8.028079986572266, "learning_rate": 4.9301816389092775e-06, "loss": 3.3453, "step": 7505 }, { "epoch": 0.07639567057291667, "grad_norm": 15.80912971496582, "learning_rate": 4.930087763719541e-06, "loss": 3.3046, "step": 7510 }, { "epoch": 0.076446533203125, "grad_norm": 12.27437686920166, "learning_rate": 4.929993826356586e-06, "loss": 3.3498, "step": 7515 }, { "epoch": 0.07649739583333333, "grad_norm": 8.595794677734375, "learning_rate": 4.9298998268228154e-06, "loss": 3.4443, "step": 7520 }, { "epoch": 0.07654825846354167, "grad_norm": 14.03144645690918, "learning_rate": 4.929805765120633e-06, "loss": 3.4373, "step": 7525 }, { "epoch": 0.07659912109375, "grad_norm": 16.364065170288086, "learning_rate": 4.929711641252446e-06, "loss": 3.3485, "step": 7530 }, { "epoch": 0.07664998372395833, "grad_norm": 13.637460708618164, "learning_rate": 4.929617455220664e-06, "loss": 3.1559, "step": 7535 }, { "epoch": 0.07670084635416667, "grad_norm": 9.677896499633789, "learning_rate": 4.929523207027693e-06, "loss": 3.1611, "step": 7540 }, { "epoch": 0.076751708984375, "grad_norm": 11.03842830657959, "learning_rate": 4.929428896675949e-06, "loss": 3.468, "step": 7545 }, { "epoch": 0.07680257161458333, "grad_norm": 8.96399974822998, "learning_rate": 4.92933452416784e-06, "loss": 3.2743, "step": 7550 }, { "epoch": 0.07685343424479167, "grad_norm": 14.87559986114502, "learning_rate": 4.929240089505785e-06, "loss": 3.4153, "step": 7555 }, { "epoch": 0.076904296875, "grad_norm": 16.70171356201172, "learning_rate": 4.929145592692197e-06, "loss": 3.3959, "step": 7560 }, { "epoch": 0.07695515950520833, "grad_norm": 10.184401512145996, "learning_rate": 4.929051033729495e-06, "loss": 3.3099, "step": 7565 }, { "epoch": 0.07700602213541667, "grad_norm": 16.423912048339844, "learning_rate": 4.928956412620098e-06, "loss": 3.5852, "step": 7570 }, { "epoch": 0.077056884765625, "grad_norm": 12.25012493133545, "learning_rate": 4.928861729366427e-06, "loss": 3.3323, "step": 7575 }, { "epoch": 0.07710774739583333, "grad_norm": 14.227832794189453, "learning_rate": 4.928766983970905e-06, "loss": 3.7213, "step": 7580 }, { "epoch": 0.07715861002604167, "grad_norm": 9.725828170776367, "learning_rate": 4.928672176435955e-06, "loss": 3.8787, "step": 7585 }, { "epoch": 0.07720947265625, "grad_norm": 13.200632095336914, "learning_rate": 4.928577306764003e-06, "loss": 3.3488, "step": 7590 }, { "epoch": 0.07726033528645833, "grad_norm": 16.116952896118164, "learning_rate": 4.928482374957476e-06, "loss": 3.5104, "step": 7595 }, { "epoch": 0.07731119791666667, "grad_norm": 14.31235408782959, "learning_rate": 4.928387381018803e-06, "loss": 3.4802, "step": 7600 }, { "epoch": 0.077362060546875, "grad_norm": 14.306567192077637, "learning_rate": 4.928292324950415e-06, "loss": 3.1986, "step": 7605 }, { "epoch": 0.07741292317708333, "grad_norm": 10.679328918457031, "learning_rate": 4.9281972067547435e-06, "loss": 3.4726, "step": 7610 }, { "epoch": 0.07746378580729167, "grad_norm": 13.975683212280273, "learning_rate": 4.928102026434221e-06, "loss": 3.5155, "step": 7615 }, { "epoch": 0.0775146484375, "grad_norm": 11.450372695922852, "learning_rate": 4.928006783991285e-06, "loss": 3.26, "step": 7620 }, { "epoch": 0.07756551106770833, "grad_norm": 9.844697952270508, "learning_rate": 4.92791147942837e-06, "loss": 3.552, "step": 7625 }, { "epoch": 0.07761637369791667, "grad_norm": 13.510926246643066, "learning_rate": 4.927816112747915e-06, "loss": 3.2648, "step": 7630 }, { "epoch": 0.077667236328125, "grad_norm": 16.180030822753906, "learning_rate": 4.927720683952361e-06, "loss": 3.8344, "step": 7635 }, { "epoch": 0.07771809895833333, "grad_norm": 10.086830139160156, "learning_rate": 4.9276251930441485e-06, "loss": 3.5012, "step": 7640 }, { "epoch": 0.07776896158854167, "grad_norm": 16.06332778930664, "learning_rate": 4.927529640025721e-06, "loss": 3.5488, "step": 7645 }, { "epoch": 0.07781982421875, "grad_norm": 9.544757843017578, "learning_rate": 4.927434024899522e-06, "loss": 3.3311, "step": 7650 }, { "epoch": 0.07787068684895833, "grad_norm": 10.502907752990723, "learning_rate": 4.927338347668e-06, "loss": 3.7151, "step": 7655 }, { "epoch": 0.07792154947916667, "grad_norm": 21.776485443115234, "learning_rate": 4.927242608333601e-06, "loss": 3.6689, "step": 7660 }, { "epoch": 0.077972412109375, "grad_norm": 9.892070770263672, "learning_rate": 4.927146806898776e-06, "loss": 3.4572, "step": 7665 }, { "epoch": 0.07802327473958333, "grad_norm": 10.099709510803223, "learning_rate": 4.927050943365974e-06, "loss": 3.1367, "step": 7670 }, { "epoch": 0.07807413736979167, "grad_norm": 11.609224319458008, "learning_rate": 4.92695501773765e-06, "loss": 3.6545, "step": 7675 }, { "epoch": 0.078125, "grad_norm": 12.737848281860352, "learning_rate": 4.926859030016257e-06, "loss": 3.4818, "step": 7680 }, { "epoch": 0.07817586263020833, "grad_norm": 11.716086387634277, "learning_rate": 4.926762980204251e-06, "loss": 3.3399, "step": 7685 }, { "epoch": 0.07822672526041667, "grad_norm": 15.350922584533691, "learning_rate": 4.926666868304089e-06, "loss": 3.2643, "step": 7690 }, { "epoch": 0.078277587890625, "grad_norm": 12.929101943969727, "learning_rate": 4.9265706943182305e-06, "loss": 3.1085, "step": 7695 }, { "epoch": 0.07832845052083333, "grad_norm": 11.21131706237793, "learning_rate": 4.926474458249137e-06, "loss": 3.3425, "step": 7700 }, { "epoch": 0.07837931315104167, "grad_norm": 16.155336380004883, "learning_rate": 4.9263781600992675e-06, "loss": 3.7774, "step": 7705 }, { "epoch": 0.07843017578125, "grad_norm": 16.52438735961914, "learning_rate": 4.926281799871089e-06, "loss": 3.6957, "step": 7710 }, { "epoch": 0.07848103841145833, "grad_norm": 15.890446662902832, "learning_rate": 4.926185377567065e-06, "loss": 3.5257, "step": 7715 }, { "epoch": 0.07853190104166667, "grad_norm": 12.618369102478027, "learning_rate": 4.926088893189665e-06, "loss": 3.5028, "step": 7720 }, { "epoch": 0.078582763671875, "grad_norm": 8.554431915283203, "learning_rate": 4.925992346741354e-06, "loss": 3.0891, "step": 7725 }, { "epoch": 0.07863362630208333, "grad_norm": 15.57974624633789, "learning_rate": 4.9258957382246045e-06, "loss": 3.4668, "step": 7730 }, { "epoch": 0.07868448893229167, "grad_norm": 13.180612564086914, "learning_rate": 4.925799067641888e-06, "loss": 3.6996, "step": 7735 }, { "epoch": 0.0787353515625, "grad_norm": 10.547351837158203, "learning_rate": 4.9257023349956765e-06, "loss": 3.8278, "step": 7740 }, { "epoch": 0.07878621419270833, "grad_norm": 18.47353744506836, "learning_rate": 4.925605540288445e-06, "loss": 3.5644, "step": 7745 }, { "epoch": 0.07883707682291667, "grad_norm": 14.555137634277344, "learning_rate": 4.925508683522673e-06, "loss": 3.3353, "step": 7750 }, { "epoch": 0.078887939453125, "grad_norm": 14.03541088104248, "learning_rate": 4.925411764700834e-06, "loss": 3.3763, "step": 7755 }, { "epoch": 0.07893880208333333, "grad_norm": 13.177834510803223, "learning_rate": 4.925314783825411e-06, "loss": 3.6113, "step": 7760 }, { "epoch": 0.07898966471354167, "grad_norm": 12.112380027770996, "learning_rate": 4.925217740898884e-06, "loss": 3.3866, "step": 7765 }, { "epoch": 0.07904052734375, "grad_norm": 12.616769790649414, "learning_rate": 4.925120635923736e-06, "loss": 3.7202, "step": 7770 }, { "epoch": 0.07909138997395833, "grad_norm": 12.70201587677002, "learning_rate": 4.925023468902451e-06, "loss": 3.316, "step": 7775 }, { "epoch": 0.07914225260416667, "grad_norm": 15.856217384338379, "learning_rate": 4.924926239837515e-06, "loss": 3.36, "step": 7780 }, { "epoch": 0.079193115234375, "grad_norm": 12.738685607910156, "learning_rate": 4.9248289487314174e-06, "loss": 3.3463, "step": 7785 }, { "epoch": 0.07924397786458333, "grad_norm": 15.38477897644043, "learning_rate": 4.924731595586645e-06, "loss": 3.3254, "step": 7790 }, { "epoch": 0.07929484049479167, "grad_norm": 12.949041366577148, "learning_rate": 4.924634180405689e-06, "loss": 3.2682, "step": 7795 }, { "epoch": 0.079345703125, "grad_norm": 17.46603775024414, "learning_rate": 4.924536703191043e-06, "loss": 3.5482, "step": 7800 }, { "epoch": 0.07939656575520833, "grad_norm": 15.399724960327148, "learning_rate": 4.9244391639451995e-06, "loss": 3.5545, "step": 7805 }, { "epoch": 0.07944742838541667, "grad_norm": 13.765381813049316, "learning_rate": 4.924341562670655e-06, "loss": 3.2777, "step": 7810 }, { "epoch": 0.079498291015625, "grad_norm": 13.219882011413574, "learning_rate": 4.924243899369906e-06, "loss": 3.4312, "step": 7815 }, { "epoch": 0.07954915364583333, "grad_norm": 15.765204429626465, "learning_rate": 4.924146174045451e-06, "loss": 3.4005, "step": 7820 }, { "epoch": 0.07960001627604167, "grad_norm": 14.722639083862305, "learning_rate": 4.924048386699792e-06, "loss": 3.4344, "step": 7825 }, { "epoch": 0.07965087890625, "grad_norm": 14.24759292602539, "learning_rate": 4.923950537335429e-06, "loss": 3.5187, "step": 7830 }, { "epoch": 0.07970174153645833, "grad_norm": 14.9491548538208, "learning_rate": 4.923852625954866e-06, "loss": 3.3459, "step": 7835 }, { "epoch": 0.07975260416666667, "grad_norm": 10.572219848632812, "learning_rate": 4.9237546525606075e-06, "loss": 3.6514, "step": 7840 }, { "epoch": 0.079803466796875, "grad_norm": 13.39721393585205, "learning_rate": 4.923656617155162e-06, "loss": 3.4159, "step": 7845 }, { "epoch": 0.07985432942708333, "grad_norm": 13.185128211975098, "learning_rate": 4.923558519741035e-06, "loss": 3.3885, "step": 7850 }, { "epoch": 0.07990519205729167, "grad_norm": 10.458941459655762, "learning_rate": 4.923460360320738e-06, "loss": 3.8603, "step": 7855 }, { "epoch": 0.0799560546875, "grad_norm": 16.00554084777832, "learning_rate": 4.923362138896782e-06, "loss": 3.8007, "step": 7860 }, { "epoch": 0.08000691731770833, "grad_norm": 19.089359283447266, "learning_rate": 4.923263855471681e-06, "loss": 3.4031, "step": 7865 }, { "epoch": 0.08005777994791667, "grad_norm": 16.948728561401367, "learning_rate": 4.923165510047948e-06, "loss": 3.4621, "step": 7870 }, { "epoch": 0.080108642578125, "grad_norm": 15.898715019226074, "learning_rate": 4.9230671026281e-06, "loss": 3.2493, "step": 7875 }, { "epoch": 0.08015950520833333, "grad_norm": 15.451108932495117, "learning_rate": 4.922968633214654e-06, "loss": 3.5485, "step": 7880 }, { "epoch": 0.08021036783854167, "grad_norm": 8.44896411895752, "learning_rate": 4.922870101810131e-06, "loss": 3.6708, "step": 7885 }, { "epoch": 0.08026123046875, "grad_norm": 15.81263542175293, "learning_rate": 4.92277150841705e-06, "loss": 3.0769, "step": 7890 }, { "epoch": 0.08031209309895833, "grad_norm": 11.589963912963867, "learning_rate": 4.922672853037934e-06, "loss": 3.1524, "step": 7895 }, { "epoch": 0.08036295572916667, "grad_norm": 11.018848419189453, "learning_rate": 4.922574135675308e-06, "loss": 3.4289, "step": 7900 }, { "epoch": 0.080413818359375, "grad_norm": 14.702198028564453, "learning_rate": 4.922475356331696e-06, "loss": 3.7443, "step": 7905 }, { "epoch": 0.08046468098958333, "grad_norm": 7.636882305145264, "learning_rate": 4.922376515009627e-06, "loss": 3.2372, "step": 7910 }, { "epoch": 0.08051554361979167, "grad_norm": 14.174872398376465, "learning_rate": 4.922277611711629e-06, "loss": 3.2774, "step": 7915 }, { "epoch": 0.08056640625, "grad_norm": 7.712305068969727, "learning_rate": 4.922178646440232e-06, "loss": 2.9999, "step": 7920 }, { "epoch": 0.08061726888020833, "grad_norm": 13.299323081970215, "learning_rate": 4.922079619197968e-06, "loss": 3.4508, "step": 7925 }, { "epoch": 0.08066813151041667, "grad_norm": 12.260127067565918, "learning_rate": 4.9219805299873715e-06, "loss": 3.5971, "step": 7930 }, { "epoch": 0.080718994140625, "grad_norm": 11.661309242248535, "learning_rate": 4.9218813788109776e-06, "loss": 3.2826, "step": 7935 }, { "epoch": 0.08076985677083333, "grad_norm": 14.099932670593262, "learning_rate": 4.921782165671322e-06, "loss": 3.2441, "step": 7940 }, { "epoch": 0.08082071940104167, "grad_norm": 11.960383415222168, "learning_rate": 4.9216828905709445e-06, "loss": 3.2706, "step": 7945 }, { "epoch": 0.08087158203125, "grad_norm": 14.964332580566406, "learning_rate": 4.921583553512384e-06, "loss": 2.997, "step": 7950 }, { "epoch": 0.08092244466145833, "grad_norm": 11.150708198547363, "learning_rate": 4.9214841544981826e-06, "loss": 3.3165, "step": 7955 }, { "epoch": 0.08097330729166667, "grad_norm": 9.034783363342285, "learning_rate": 4.9213846935308816e-06, "loss": 3.4451, "step": 7960 }, { "epoch": 0.081024169921875, "grad_norm": 12.717357635498047, "learning_rate": 4.921285170613029e-06, "loss": 3.4678, "step": 7965 }, { "epoch": 0.08107503255208333, "grad_norm": 15.22363567352295, "learning_rate": 4.921185585747168e-06, "loss": 3.3836, "step": 7970 }, { "epoch": 0.08112589518229167, "grad_norm": 12.619773864746094, "learning_rate": 4.9210859389358475e-06, "loss": 3.3497, "step": 7975 }, { "epoch": 0.0811767578125, "grad_norm": 10.314453125, "learning_rate": 4.920986230181618e-06, "loss": 3.3937, "step": 7980 }, { "epoch": 0.08122762044270833, "grad_norm": 16.396963119506836, "learning_rate": 4.920886459487029e-06, "loss": 3.2124, "step": 7985 }, { "epoch": 0.08127848307291667, "grad_norm": 10.935754776000977, "learning_rate": 4.920786626854634e-06, "loss": 3.3134, "step": 7990 }, { "epoch": 0.081329345703125, "grad_norm": 11.221820831298828, "learning_rate": 4.920686732286988e-06, "loss": 3.3703, "step": 7995 }, { "epoch": 0.08138020833333333, "grad_norm": 15.557806968688965, "learning_rate": 4.9205867757866445e-06, "loss": 3.2063, "step": 8000 }, { "epoch": 0.08143107096354167, "grad_norm": 14.617751121520996, "learning_rate": 4.920486757356162e-06, "loss": 3.4361, "step": 8005 }, { "epoch": 0.08148193359375, "grad_norm": 7.648261070251465, "learning_rate": 4.9203866769981e-06, "loss": 3.1502, "step": 8010 }, { "epoch": 0.08153279622395833, "grad_norm": 13.730716705322266, "learning_rate": 4.920286534715018e-06, "loss": 3.7119, "step": 8015 }, { "epoch": 0.08158365885416667, "grad_norm": 14.1985502243042, "learning_rate": 4.9201863305094786e-06, "loss": 3.4887, "step": 8020 }, { "epoch": 0.081634521484375, "grad_norm": 13.51031494140625, "learning_rate": 4.920086064384046e-06, "loss": 3.0307, "step": 8025 }, { "epoch": 0.08168538411458333, "grad_norm": 11.816555976867676, "learning_rate": 4.919985736341286e-06, "loss": 3.4726, "step": 8030 }, { "epoch": 0.08173624674479167, "grad_norm": 15.351333618164062, "learning_rate": 4.919885346383764e-06, "loss": 3.6725, "step": 8035 }, { "epoch": 0.081787109375, "grad_norm": 17.565916061401367, "learning_rate": 4.919784894514048e-06, "loss": 3.551, "step": 8040 }, { "epoch": 0.08183797200520833, "grad_norm": 11.794846534729004, "learning_rate": 4.91968438073471e-06, "loss": 3.7001, "step": 8045 }, { "epoch": 0.08188883463541667, "grad_norm": 13.469988822937012, "learning_rate": 4.919583805048321e-06, "loss": 3.2706, "step": 8050 }, { "epoch": 0.081939697265625, "grad_norm": 9.399026870727539, "learning_rate": 4.919483167457452e-06, "loss": 3.476, "step": 8055 }, { "epoch": 0.08199055989583333, "grad_norm": 10.455305099487305, "learning_rate": 4.919382467964681e-06, "loss": 3.78, "step": 8060 }, { "epoch": 0.08204142252604167, "grad_norm": 13.494087219238281, "learning_rate": 4.919281706572583e-06, "loss": 3.5696, "step": 8065 }, { "epoch": 0.08209228515625, "grad_norm": 9.601789474487305, "learning_rate": 4.919180883283735e-06, "loss": 3.4697, "step": 8070 }, { "epoch": 0.08214314778645833, "grad_norm": 8.843679428100586, "learning_rate": 4.919079998100719e-06, "loss": 3.3273, "step": 8075 }, { "epoch": 0.08219401041666667, "grad_norm": 17.03862953186035, "learning_rate": 4.918979051026113e-06, "loss": 3.3333, "step": 8080 }, { "epoch": 0.082244873046875, "grad_norm": 9.111615180969238, "learning_rate": 4.918878042062503e-06, "loss": 3.1043, "step": 8085 }, { "epoch": 0.08229573567708333, "grad_norm": 9.032851219177246, "learning_rate": 4.918776971212471e-06, "loss": 3.1896, "step": 8090 }, { "epoch": 0.08234659830729167, "grad_norm": 15.128028869628906, "learning_rate": 4.918675838478603e-06, "loss": 3.5589, "step": 8095 }, { "epoch": 0.0823974609375, "grad_norm": 9.789177894592285, "learning_rate": 4.918574643863488e-06, "loss": 3.4767, "step": 8100 }, { "epoch": 0.08244832356770833, "grad_norm": 8.648837089538574, "learning_rate": 4.918473387369713e-06, "loss": 3.6547, "step": 8105 }, { "epoch": 0.08249918619791667, "grad_norm": 12.925644874572754, "learning_rate": 4.91837206899987e-06, "loss": 3.4412, "step": 8110 }, { "epoch": 0.082550048828125, "grad_norm": 10.411079406738281, "learning_rate": 4.918270688756551e-06, "loss": 3.501, "step": 8115 }, { "epoch": 0.08260091145833333, "grad_norm": 16.97960662841797, "learning_rate": 4.918169246642349e-06, "loss": 3.9448, "step": 8120 }, { "epoch": 0.08265177408854167, "grad_norm": 13.027390480041504, "learning_rate": 4.91806774265986e-06, "loss": 3.2829, "step": 8125 }, { "epoch": 0.08270263671875, "grad_norm": 11.88217544555664, "learning_rate": 4.9179661768116815e-06, "loss": 3.6066, "step": 8130 }, { "epoch": 0.08275349934895833, "grad_norm": 9.799572944641113, "learning_rate": 4.9178645491004115e-06, "loss": 3.2738, "step": 8135 }, { "epoch": 0.08280436197916667, "grad_norm": 12.099909782409668, "learning_rate": 4.91776285952865e-06, "loss": 3.2961, "step": 8140 }, { "epoch": 0.082855224609375, "grad_norm": 11.036294937133789, "learning_rate": 4.917661108098999e-06, "loss": 3.5535, "step": 8145 }, { "epoch": 0.08290608723958333, "grad_norm": 16.11235237121582, "learning_rate": 4.9175592948140614e-06, "loss": 3.5957, "step": 8150 }, { "epoch": 0.08295694986979167, "grad_norm": 12.630952835083008, "learning_rate": 4.917457419676443e-06, "loss": 3.1787, "step": 8155 }, { "epoch": 0.0830078125, "grad_norm": 10.421707153320312, "learning_rate": 4.9173554826887485e-06, "loss": 3.5609, "step": 8160 }, { "epoch": 0.08305867513020833, "grad_norm": 14.865351676940918, "learning_rate": 4.917253483853587e-06, "loss": 3.4347, "step": 8165 }, { "epoch": 0.08310953776041667, "grad_norm": 10.349051475524902, "learning_rate": 4.917151423173568e-06, "loss": 3.3103, "step": 8170 }, { "epoch": 0.083160400390625, "grad_norm": 12.763856887817383, "learning_rate": 4.917049300651303e-06, "loss": 3.2145, "step": 8175 }, { "epoch": 0.08321126302083333, "grad_norm": 13.702238082885742, "learning_rate": 4.916947116289405e-06, "loss": 3.4389, "step": 8180 }, { "epoch": 0.08326212565104167, "grad_norm": 8.125384330749512, "learning_rate": 4.916844870090487e-06, "loss": 3.1778, "step": 8185 }, { "epoch": 0.08331298828125, "grad_norm": 13.532679557800293, "learning_rate": 4.916742562057166e-06, "loss": 3.5005, "step": 8190 }, { "epoch": 0.08336385091145833, "grad_norm": 11.276693344116211, "learning_rate": 4.91664019219206e-06, "loss": 3.1357, "step": 8195 }, { "epoch": 0.08341471354166667, "grad_norm": 10.653390884399414, "learning_rate": 4.916537760497787e-06, "loss": 2.9572, "step": 8200 }, { "epoch": 0.083465576171875, "grad_norm": 19.044883728027344, "learning_rate": 4.9164352669769685e-06, "loss": 3.4532, "step": 8205 }, { "epoch": 0.08351643880208333, "grad_norm": 9.898783683776855, "learning_rate": 4.916332711632227e-06, "loss": 3.3115, "step": 8210 }, { "epoch": 0.08356730143229167, "grad_norm": 7.111556053161621, "learning_rate": 4.916230094466185e-06, "loss": 3.5531, "step": 8215 }, { "epoch": 0.0836181640625, "grad_norm": 15.385550498962402, "learning_rate": 4.916127415481469e-06, "loss": 3.5578, "step": 8220 }, { "epoch": 0.08366902669270833, "grad_norm": 9.553897857666016, "learning_rate": 4.916024674680705e-06, "loss": 3.4616, "step": 8225 }, { "epoch": 0.08371988932291667, "grad_norm": 9.77919864654541, "learning_rate": 4.915921872066524e-06, "loss": 3.5612, "step": 8230 }, { "epoch": 0.083770751953125, "grad_norm": 13.859053611755371, "learning_rate": 4.915819007641553e-06, "loss": 3.3759, "step": 8235 }, { "epoch": 0.08382161458333333, "grad_norm": 13.282938003540039, "learning_rate": 4.915716081408426e-06, "loss": 3.1999, "step": 8240 }, { "epoch": 0.08387247721354167, "grad_norm": 8.742036819458008, "learning_rate": 4.9156130933697756e-06, "loss": 3.6131, "step": 8245 }, { "epoch": 0.08392333984375, "grad_norm": 10.683076858520508, "learning_rate": 4.915510043528237e-06, "loss": 3.6148, "step": 8250 }, { "epoch": 0.08397420247395833, "grad_norm": 13.600173950195312, "learning_rate": 4.915406931886446e-06, "loss": 3.0754, "step": 8255 }, { "epoch": 0.08402506510416667, "grad_norm": 9.838703155517578, "learning_rate": 4.915303758447041e-06, "loss": 3.422, "step": 8260 }, { "epoch": 0.084075927734375, "grad_norm": 17.08401107788086, "learning_rate": 4.915200523212662e-06, "loss": 3.5272, "step": 8265 }, { "epoch": 0.08412679036458333, "grad_norm": 11.889626502990723, "learning_rate": 4.91509722618595e-06, "loss": 3.244, "step": 8270 }, { "epoch": 0.08417765299479167, "grad_norm": 13.750388145446777, "learning_rate": 4.914993867369549e-06, "loss": 3.0886, "step": 8275 }, { "epoch": 0.084228515625, "grad_norm": 12.008491516113281, "learning_rate": 4.914890446766101e-06, "loss": 3.4445, "step": 8280 }, { "epoch": 0.08427937825520833, "grad_norm": 8.624137878417969, "learning_rate": 4.914786964378253e-06, "loss": 3.1456, "step": 8285 }, { "epoch": 0.08433024088541667, "grad_norm": 13.373948097229004, "learning_rate": 4.914683420208654e-06, "loss": 3.1974, "step": 8290 }, { "epoch": 0.084381103515625, "grad_norm": 11.786771774291992, "learning_rate": 4.914579814259952e-06, "loss": 3.7054, "step": 8295 }, { "epoch": 0.08443196614583333, "grad_norm": 12.221219062805176, "learning_rate": 4.914476146534797e-06, "loss": 3.3905, "step": 8300 }, { "epoch": 0.08448282877604167, "grad_norm": 11.495316505432129, "learning_rate": 4.914372417035843e-06, "loss": 3.9629, "step": 8305 }, { "epoch": 0.08453369140625, "grad_norm": 13.219930648803711, "learning_rate": 4.914268625765742e-06, "loss": 3.359, "step": 8310 }, { "epoch": 0.08458455403645833, "grad_norm": 11.233851432800293, "learning_rate": 4.9141647727271515e-06, "loss": 3.6808, "step": 8315 }, { "epoch": 0.08463541666666667, "grad_norm": 10.176496505737305, "learning_rate": 4.914060857922727e-06, "loss": 3.3135, "step": 8320 }, { "epoch": 0.084686279296875, "grad_norm": 10.016979217529297, "learning_rate": 4.9139568813551275e-06, "loss": 3.8656, "step": 8325 }, { "epoch": 0.08473714192708333, "grad_norm": 14.729958534240723, "learning_rate": 4.913852843027013e-06, "loss": 3.4418, "step": 8330 }, { "epoch": 0.08478800455729167, "grad_norm": 16.3724422454834, "learning_rate": 4.913748742941046e-06, "loss": 3.3672, "step": 8335 }, { "epoch": 0.0848388671875, "grad_norm": 9.82769775390625, "learning_rate": 4.91364458109989e-06, "loss": 3.0876, "step": 8340 }, { "epoch": 0.08488972981770833, "grad_norm": 12.202662467956543, "learning_rate": 4.913540357506209e-06, "loss": 3.3755, "step": 8345 }, { "epoch": 0.08494059244791667, "grad_norm": 13.47652530670166, "learning_rate": 4.913436072162671e-06, "loss": 3.3207, "step": 8350 }, { "epoch": 0.084991455078125, "grad_norm": 13.997584342956543, "learning_rate": 4.913331725071942e-06, "loss": 3.4016, "step": 8355 }, { "epoch": 0.08504231770833333, "grad_norm": 10.103306770324707, "learning_rate": 4.9132273162366926e-06, "loss": 3.9784, "step": 8360 }, { "epoch": 0.08509318033854167, "grad_norm": 13.56583023071289, "learning_rate": 4.913122845659595e-06, "loss": 3.5657, "step": 8365 }, { "epoch": 0.08514404296875, "grad_norm": 16.747276306152344, "learning_rate": 4.913018313343322e-06, "loss": 3.3329, "step": 8370 }, { "epoch": 0.08519490559895833, "grad_norm": 15.131979942321777, "learning_rate": 4.912913719290546e-06, "loss": 3.7669, "step": 8375 }, { "epoch": 0.08524576822916667, "grad_norm": 13.406637191772461, "learning_rate": 4.912809063503945e-06, "loss": 3.8842, "step": 8380 }, { "epoch": 0.085296630859375, "grad_norm": 10.718530654907227, "learning_rate": 4.912704345986196e-06, "loss": 3.3867, "step": 8385 }, { "epoch": 0.08534749348958333, "grad_norm": 10.112154006958008, "learning_rate": 4.912599566739979e-06, "loss": 3.1978, "step": 8390 }, { "epoch": 0.08539835611979167, "grad_norm": 9.753875732421875, "learning_rate": 4.912494725767972e-06, "loss": 3.2426, "step": 8395 }, { "epoch": 0.08544921875, "grad_norm": 13.011728286743164, "learning_rate": 4.9123898230728616e-06, "loss": 3.689, "step": 8400 }, { "epoch": 0.08550008138020833, "grad_norm": 10.342684745788574, "learning_rate": 4.912284858657328e-06, "loss": 3.2583, "step": 8405 }, { "epoch": 0.08555094401041667, "grad_norm": 15.096055030822754, "learning_rate": 4.9121798325240574e-06, "loss": 3.797, "step": 8410 }, { "epoch": 0.085601806640625, "grad_norm": 16.6988582611084, "learning_rate": 4.912074744675739e-06, "loss": 3.7339, "step": 8415 }, { "epoch": 0.08565266927083333, "grad_norm": 9.644712448120117, "learning_rate": 4.911969595115059e-06, "loss": 3.6449, "step": 8420 }, { "epoch": 0.08570353190104167, "grad_norm": 9.427045822143555, "learning_rate": 4.911864383844709e-06, "loss": 3.7292, "step": 8425 }, { "epoch": 0.08575439453125, "grad_norm": 12.242361068725586, "learning_rate": 4.9117591108673815e-06, "loss": 3.6643, "step": 8430 }, { "epoch": 0.08580525716145833, "grad_norm": 15.966888427734375, "learning_rate": 4.911653776185768e-06, "loss": 3.7233, "step": 8435 }, { "epoch": 0.08585611979166667, "grad_norm": 13.699934005737305, "learning_rate": 4.9115483798025635e-06, "loss": 3.2923, "step": 8440 }, { "epoch": 0.085906982421875, "grad_norm": 18.62406349182129, "learning_rate": 4.911442921720465e-06, "loss": 3.3712, "step": 8445 }, { "epoch": 0.08595784505208333, "grad_norm": 12.284649848937988, "learning_rate": 4.911337401942172e-06, "loss": 3.0775, "step": 8450 }, { "epoch": 0.08600870768229167, "grad_norm": 15.336729049682617, "learning_rate": 4.911231820470383e-06, "loss": 3.1454, "step": 8455 }, { "epoch": 0.0860595703125, "grad_norm": 12.336068153381348, "learning_rate": 4.911126177307799e-06, "loss": 3.5428, "step": 8460 }, { "epoch": 0.08611043294270833, "grad_norm": 9.04693603515625, "learning_rate": 4.911020472457124e-06, "loss": 4.0488, "step": 8465 }, { "epoch": 0.08616129557291667, "grad_norm": 8.606358528137207, "learning_rate": 4.91091470592106e-06, "loss": 3.2326, "step": 8470 }, { "epoch": 0.086212158203125, "grad_norm": 16.552690505981445, "learning_rate": 4.910808877702317e-06, "loss": 3.6028, "step": 8475 }, { "epoch": 0.08626302083333333, "grad_norm": 10.208168029785156, "learning_rate": 4.910702987803599e-06, "loss": 3.4335, "step": 8480 }, { "epoch": 0.08631388346354167, "grad_norm": 12.139317512512207, "learning_rate": 4.910597036227617e-06, "loss": 3.2395, "step": 8485 }, { "epoch": 0.08636474609375, "grad_norm": 10.915331840515137, "learning_rate": 4.91049102297708e-06, "loss": 3.4295, "step": 8490 }, { "epoch": 0.08641560872395833, "grad_norm": 9.047663688659668, "learning_rate": 4.910384948054703e-06, "loss": 3.7313, "step": 8495 }, { "epoch": 0.08646647135416667, "grad_norm": 11.79930305480957, "learning_rate": 4.910278811463197e-06, "loss": 3.8923, "step": 8500 }, { "epoch": 0.086517333984375, "grad_norm": 9.152587890625, "learning_rate": 4.91017261320528e-06, "loss": 3.4595, "step": 8505 }, { "epoch": 0.08656819661458333, "grad_norm": 10.824846267700195, "learning_rate": 4.910066353283668e-06, "loss": 3.2886, "step": 8510 }, { "epoch": 0.08661905924479167, "grad_norm": 6.95084285736084, "learning_rate": 4.909960031701079e-06, "loss": 3.3848, "step": 8515 }, { "epoch": 0.086669921875, "grad_norm": 16.21605682373047, "learning_rate": 4.9098536484602334e-06, "loss": 3.2412, "step": 8520 }, { "epoch": 0.08672078450520833, "grad_norm": 8.85698127746582, "learning_rate": 4.909747203563855e-06, "loss": 3.5511, "step": 8525 }, { "epoch": 0.08677164713541667, "grad_norm": 16.60267448425293, "learning_rate": 4.909640697014664e-06, "loss": 3.9667, "step": 8530 }, { "epoch": 0.086822509765625, "grad_norm": 14.518190383911133, "learning_rate": 4.909534128815387e-06, "loss": 3.3415, "step": 8535 }, { "epoch": 0.08687337239583333, "grad_norm": 10.557718276977539, "learning_rate": 4.909427498968752e-06, "loss": 3.4739, "step": 8540 }, { "epoch": 0.08692423502604167, "grad_norm": 11.117448806762695, "learning_rate": 4.909320807477485e-06, "loss": 3.345, "step": 8545 }, { "epoch": 0.08697509765625, "grad_norm": 13.413477897644043, "learning_rate": 4.9092140543443145e-06, "loss": 3.8153, "step": 8550 }, { "epoch": 0.08702596028645833, "grad_norm": 11.934691429138184, "learning_rate": 4.909107239571975e-06, "loss": 3.548, "step": 8555 }, { "epoch": 0.08707682291666667, "grad_norm": 10.76050853729248, "learning_rate": 4.9090003631631975e-06, "loss": 3.3043, "step": 8560 }, { "epoch": 0.087127685546875, "grad_norm": 10.168484687805176, "learning_rate": 4.9088934251207165e-06, "loss": 3.8714, "step": 8565 }, { "epoch": 0.08717854817708333, "grad_norm": 13.013615608215332, "learning_rate": 4.908786425447269e-06, "loss": 3.5501, "step": 8570 }, { "epoch": 0.08722941080729167, "grad_norm": 19.23887062072754, "learning_rate": 4.908679364145591e-06, "loss": 3.5044, "step": 8575 }, { "epoch": 0.0872802734375, "grad_norm": 14.924409866333008, "learning_rate": 4.908572241218422e-06, "loss": 3.2358, "step": 8580 }, { "epoch": 0.08733113606770833, "grad_norm": 12.832475662231445, "learning_rate": 4.908465056668504e-06, "loss": 3.6383, "step": 8585 }, { "epoch": 0.08738199869791667, "grad_norm": 15.433663368225098, "learning_rate": 4.908357810498578e-06, "loss": 3.4119, "step": 8590 }, { "epoch": 0.087432861328125, "grad_norm": 8.936971664428711, "learning_rate": 4.908250502711388e-06, "loss": 3.2385, "step": 8595 }, { "epoch": 0.08748372395833333, "grad_norm": 15.772163391113281, "learning_rate": 4.9081431333096805e-06, "loss": 3.5685, "step": 8600 }, { "epoch": 0.08753458658854167, "grad_norm": 11.028748512268066, "learning_rate": 4.908035702296201e-06, "loss": 3.4128, "step": 8605 }, { "epoch": 0.08758544921875, "grad_norm": 12.614755630493164, "learning_rate": 4.907928209673699e-06, "loss": 3.2888, "step": 8610 }, { "epoch": 0.08763631184895833, "grad_norm": 12.599555969238281, "learning_rate": 4.907820655444924e-06, "loss": 3.1925, "step": 8615 }, { "epoch": 0.08768717447916667, "grad_norm": 8.425646781921387, "learning_rate": 4.907713039612629e-06, "loss": 3.2201, "step": 8620 }, { "epoch": 0.087738037109375, "grad_norm": 11.59953784942627, "learning_rate": 4.907605362179566e-06, "loss": 3.2422, "step": 8625 }, { "epoch": 0.08778889973958333, "grad_norm": 7.5655131340026855, "learning_rate": 4.907497623148491e-06, "loss": 3.5319, "step": 8630 }, { "epoch": 0.08783976236979167, "grad_norm": 15.107267379760742, "learning_rate": 4.90738982252216e-06, "loss": 3.5978, "step": 8635 }, { "epoch": 0.087890625, "grad_norm": 15.67873764038086, "learning_rate": 4.90728196030333e-06, "loss": 3.2055, "step": 8640 }, { "epoch": 0.08794148763020833, "grad_norm": 13.261436462402344, "learning_rate": 4.907174036494763e-06, "loss": 3.0959, "step": 8645 }, { "epoch": 0.08799235026041667, "grad_norm": 8.268048286437988, "learning_rate": 4.907066051099219e-06, "loss": 4.1154, "step": 8650 }, { "epoch": 0.088043212890625, "grad_norm": 10.855912208557129, "learning_rate": 4.906958004119459e-06, "loss": 3.7448, "step": 8655 }, { "epoch": 0.08809407552083333, "grad_norm": 11.688394546508789, "learning_rate": 4.90684989555825e-06, "loss": 3.5907, "step": 8660 }, { "epoch": 0.08814493815104167, "grad_norm": 10.814862251281738, "learning_rate": 4.906741725418357e-06, "loss": 3.0817, "step": 8665 }, { "epoch": 0.08819580078125, "grad_norm": 15.317708969116211, "learning_rate": 4.906633493702547e-06, "loss": 3.3432, "step": 8670 }, { "epoch": 0.08824666341145833, "grad_norm": 9.729673385620117, "learning_rate": 4.9065252004135896e-06, "loss": 3.2816, "step": 8675 }, { "epoch": 0.08829752604166667, "grad_norm": 16.496000289916992, "learning_rate": 4.906416845554255e-06, "loss": 3.511, "step": 8680 }, { "epoch": 0.088348388671875, "grad_norm": 16.304975509643555, "learning_rate": 4.906308429127317e-06, "loss": 3.3911, "step": 8685 }, { "epoch": 0.08839925130208333, "grad_norm": 9.595266342163086, "learning_rate": 4.906199951135547e-06, "loss": 3.5494, "step": 8690 }, { "epoch": 0.08845011393229167, "grad_norm": 13.496864318847656, "learning_rate": 4.906091411581722e-06, "loss": 3.0696, "step": 8695 }, { "epoch": 0.0885009765625, "grad_norm": 9.127147674560547, "learning_rate": 4.905982810468619e-06, "loss": 3.5201, "step": 8700 }, { "epoch": 0.08855183919270833, "grad_norm": 17.793336868286133, "learning_rate": 4.905874147799015e-06, "loss": 3.9616, "step": 8705 }, { "epoch": 0.08860270182291667, "grad_norm": 12.619993209838867, "learning_rate": 4.905765423575692e-06, "loss": 3.6193, "step": 8710 }, { "epoch": 0.088653564453125, "grad_norm": 13.0260648727417, "learning_rate": 4.90565663780143e-06, "loss": 3.1035, "step": 8715 }, { "epoch": 0.08870442708333333, "grad_norm": 11.968487739562988, "learning_rate": 4.905547790479015e-06, "loss": 3.8811, "step": 8720 }, { "epoch": 0.08875528971354167, "grad_norm": 14.09224796295166, "learning_rate": 4.905438881611228e-06, "loss": 3.2689, "step": 8725 }, { "epoch": 0.08880615234375, "grad_norm": 15.479565620422363, "learning_rate": 4.905329911200858e-06, "loss": 3.3083, "step": 8730 }, { "epoch": 0.08885701497395833, "grad_norm": 12.190298080444336, "learning_rate": 4.905220879250693e-06, "loss": 3.0763, "step": 8735 }, { "epoch": 0.08890787760416667, "grad_norm": 13.899861335754395, "learning_rate": 4.905111785763521e-06, "loss": 3.5266, "step": 8740 }, { "epoch": 0.088958740234375, "grad_norm": 11.89075756072998, "learning_rate": 4.905002630742135e-06, "loss": 3.9321, "step": 8745 }, { "epoch": 0.08900960286458333, "grad_norm": 13.489803314208984, "learning_rate": 4.904893414189326e-06, "loss": 3.4766, "step": 8750 }, { "epoch": 0.08906046549479167, "grad_norm": 13.605609893798828, "learning_rate": 4.904784136107888e-06, "loss": 3.1691, "step": 8755 }, { "epoch": 0.089111328125, "grad_norm": 10.71879768371582, "learning_rate": 4.90467479650062e-06, "loss": 3.2934, "step": 8760 }, { "epoch": 0.08916219075520833, "grad_norm": 10.595026016235352, "learning_rate": 4.9045653953703156e-06, "loss": 3.1218, "step": 8765 }, { "epoch": 0.08921305338541667, "grad_norm": 11.80540943145752, "learning_rate": 4.9044559327197764e-06, "loss": 3.2589, "step": 8770 }, { "epoch": 0.089263916015625, "grad_norm": 14.13592529296875, "learning_rate": 4.9043464085518026e-06, "loss": 3.3531, "step": 8775 }, { "epoch": 0.08931477864583333, "grad_norm": 12.386794090270996, "learning_rate": 4.904236822869195e-06, "loss": 3.529, "step": 8780 }, { "epoch": 0.08936564127604167, "grad_norm": 15.09712028503418, "learning_rate": 4.904127175674758e-06, "loss": 3.5851, "step": 8785 }, { "epoch": 0.08941650390625, "grad_norm": 22.025758743286133, "learning_rate": 4.904017466971297e-06, "loss": 3.4247, "step": 8790 }, { "epoch": 0.08946736653645833, "grad_norm": 12.594602584838867, "learning_rate": 4.9039076967616196e-06, "loss": 3.5087, "step": 8795 }, { "epoch": 0.08951822916666667, "grad_norm": 12.804073333740234, "learning_rate": 4.903797865048533e-06, "loss": 3.3348, "step": 8800 }, { "epoch": 0.089569091796875, "grad_norm": 10.385623931884766, "learning_rate": 4.903687971834848e-06, "loss": 3.4176, "step": 8805 }, { "epoch": 0.08961995442708333, "grad_norm": 9.570121765136719, "learning_rate": 4.903578017123376e-06, "loss": 3.3923, "step": 8810 }, { "epoch": 0.08967081705729167, "grad_norm": 12.22430419921875, "learning_rate": 4.90346800091693e-06, "loss": 3.6975, "step": 8815 }, { "epoch": 0.0897216796875, "grad_norm": 14.948432922363281, "learning_rate": 4.9033579232183256e-06, "loss": 3.483, "step": 8820 }, { "epoch": 0.08977254231770833, "grad_norm": 14.922759056091309, "learning_rate": 4.903247784030377e-06, "loss": 3.4358, "step": 8825 }, { "epoch": 0.08982340494791667, "grad_norm": 11.016508102416992, "learning_rate": 4.903137583355905e-06, "loss": 3.4084, "step": 8830 }, { "epoch": 0.089874267578125, "grad_norm": 17.078842163085938, "learning_rate": 4.903027321197726e-06, "loss": 3.5926, "step": 8835 }, { "epoch": 0.08992513020833333, "grad_norm": 13.634777069091797, "learning_rate": 4.902916997558665e-06, "loss": 3.5905, "step": 8840 }, { "epoch": 0.08997599283854167, "grad_norm": 8.89342212677002, "learning_rate": 4.902806612441539e-06, "loss": 3.2723, "step": 8845 }, { "epoch": 0.09002685546875, "grad_norm": 14.009095191955566, "learning_rate": 4.902696165849178e-06, "loss": 3.5151, "step": 8850 }, { "epoch": 0.09007771809895833, "grad_norm": 16.494112014770508, "learning_rate": 4.902585657784404e-06, "loss": 3.229, "step": 8855 }, { "epoch": 0.09012858072916667, "grad_norm": 9.716590881347656, "learning_rate": 4.902475088250045e-06, "loss": 3.4538, "step": 8860 }, { "epoch": 0.090179443359375, "grad_norm": 14.583306312561035, "learning_rate": 4.90236445724893e-06, "loss": 3.6892, "step": 8865 }, { "epoch": 0.09023030598958333, "grad_norm": 11.482134819030762, "learning_rate": 4.902253764783891e-06, "loss": 3.3962, "step": 8870 }, { "epoch": 0.09028116861979167, "grad_norm": 14.702147483825684, "learning_rate": 4.902143010857758e-06, "loss": 3.2805, "step": 8875 }, { "epoch": 0.09033203125, "grad_norm": 16.604808807373047, "learning_rate": 4.902032195473366e-06, "loss": 3.2619, "step": 8880 }, { "epoch": 0.09038289388020833, "grad_norm": 12.936920166015625, "learning_rate": 4.901921318633549e-06, "loss": 3.4328, "step": 8885 }, { "epoch": 0.09043375651041667, "grad_norm": 7.750491142272949, "learning_rate": 4.901810380341145e-06, "loss": 3.7113, "step": 8890 }, { "epoch": 0.090484619140625, "grad_norm": 17.840023040771484, "learning_rate": 4.901699380598992e-06, "loss": 3.1851, "step": 8895 }, { "epoch": 0.09053548177083333, "grad_norm": 9.541340827941895, "learning_rate": 4.901588319409929e-06, "loss": 3.5305, "step": 8900 }, { "epoch": 0.09058634440104167, "grad_norm": 15.900449752807617, "learning_rate": 4.901477196776798e-06, "loss": 3.084, "step": 8905 }, { "epoch": 0.09063720703125, "grad_norm": 11.218430519104004, "learning_rate": 4.901366012702443e-06, "loss": 3.5674, "step": 8910 }, { "epoch": 0.09068806966145833, "grad_norm": 12.750350952148438, "learning_rate": 4.901254767189707e-06, "loss": 3.8215, "step": 8915 }, { "epoch": 0.09073893229166667, "grad_norm": 13.617959976196289, "learning_rate": 4.901143460241437e-06, "loss": 3.3986, "step": 8920 }, { "epoch": 0.090789794921875, "grad_norm": 13.11312484741211, "learning_rate": 4.90103209186048e-06, "loss": 3.453, "step": 8925 }, { "epoch": 0.09084065755208333, "grad_norm": 9.750372886657715, "learning_rate": 4.9009206620496875e-06, "loss": 3.348, "step": 8930 }, { "epoch": 0.09089152018229167, "grad_norm": 13.289474487304688, "learning_rate": 4.900809170811908e-06, "loss": 3.284, "step": 8935 }, { "epoch": 0.0909423828125, "grad_norm": 10.578423500061035, "learning_rate": 4.900697618149995e-06, "loss": 3.8172, "step": 8940 }, { "epoch": 0.09099324544270833, "grad_norm": 14.269426345825195, "learning_rate": 4.900586004066803e-06, "loss": 3.8127, "step": 8945 }, { "epoch": 0.09104410807291667, "grad_norm": 11.517721176147461, "learning_rate": 4.900474328565186e-06, "loss": 3.2798, "step": 8950 }, { "epoch": 0.091094970703125, "grad_norm": 12.702051162719727, "learning_rate": 4.900362591648003e-06, "loss": 3.1521, "step": 8955 }, { "epoch": 0.09114583333333333, "grad_norm": 13.737582206726074, "learning_rate": 4.900250793318112e-06, "loss": 3.3586, "step": 8960 }, { "epoch": 0.09119669596354167, "grad_norm": 14.892784118652344, "learning_rate": 4.900138933578373e-06, "loss": 3.4807, "step": 8965 }, { "epoch": 0.09124755859375, "grad_norm": 11.482332229614258, "learning_rate": 4.9000270124316495e-06, "loss": 3.0206, "step": 8970 }, { "epoch": 0.09129842122395833, "grad_norm": 13.81802749633789, "learning_rate": 4.899915029880803e-06, "loss": 3.3766, "step": 8975 }, { "epoch": 0.09134928385416667, "grad_norm": 11.666979789733887, "learning_rate": 4.899802985928699e-06, "loss": 3.2013, "step": 8980 }, { "epoch": 0.091400146484375, "grad_norm": 14.300288200378418, "learning_rate": 4.899690880578205e-06, "loss": 3.1857, "step": 8985 }, { "epoch": 0.09145100911458333, "grad_norm": 9.281865119934082, "learning_rate": 4.899578713832188e-06, "loss": 3.4702, "step": 8990 }, { "epoch": 0.09150187174479167, "grad_norm": 13.91549015045166, "learning_rate": 4.899466485693518e-06, "loss": 3.9119, "step": 8995 }, { "epoch": 0.091552734375, "grad_norm": 8.730186462402344, "learning_rate": 4.899354196165068e-06, "loss": 3.136, "step": 9000 }, { "epoch": 0.09160359700520833, "grad_norm": 9.029413223266602, "learning_rate": 4.899241845249708e-06, "loss": 3.6015, "step": 9005 }, { "epoch": 0.09165445963541667, "grad_norm": 10.518607139587402, "learning_rate": 4.899129432950316e-06, "loss": 3.5339, "step": 9010 }, { "epoch": 0.091705322265625, "grad_norm": 13.978117942810059, "learning_rate": 4.899016959269764e-06, "loss": 3.6865, "step": 9015 }, { "epoch": 0.09175618489583333, "grad_norm": 13.56008243560791, "learning_rate": 4.898904424210934e-06, "loss": 3.1692, "step": 9020 }, { "epoch": 0.09180704752604167, "grad_norm": 11.420557975769043, "learning_rate": 4.898791827776701e-06, "loss": 3.3184, "step": 9025 }, { "epoch": 0.09185791015625, "grad_norm": 11.740954399108887, "learning_rate": 4.898679169969949e-06, "loss": 3.5797, "step": 9030 }, { "epoch": 0.09190877278645833, "grad_norm": 11.412757873535156, "learning_rate": 4.898566450793558e-06, "loss": 3.4047, "step": 9035 }, { "epoch": 0.09195963541666667, "grad_norm": 7.887825965881348, "learning_rate": 4.898453670250413e-06, "loss": 3.3213, "step": 9040 }, { "epoch": 0.092010498046875, "grad_norm": 10.620329856872559, "learning_rate": 4.8983408283433995e-06, "loss": 3.3956, "step": 9045 }, { "epoch": 0.09206136067708333, "grad_norm": 10.935351371765137, "learning_rate": 4.898227925075405e-06, "loss": 3.1719, "step": 9050 }, { "epoch": 0.09211222330729167, "grad_norm": 13.198664665222168, "learning_rate": 4.898114960449317e-06, "loss": 3.2274, "step": 9055 }, { "epoch": 0.0921630859375, "grad_norm": 7.661378383636475, "learning_rate": 4.8980019344680255e-06, "loss": 3.4533, "step": 9060 }, { "epoch": 0.09221394856770833, "grad_norm": 14.63359546661377, "learning_rate": 4.897888847134424e-06, "loss": 3.2218, "step": 9065 }, { "epoch": 0.09226481119791667, "grad_norm": 10.786942481994629, "learning_rate": 4.897775698451404e-06, "loss": 3.0604, "step": 9070 }, { "epoch": 0.092315673828125, "grad_norm": 13.290276527404785, "learning_rate": 4.897662488421861e-06, "loss": 3.4856, "step": 9075 }, { "epoch": 0.09236653645833333, "grad_norm": 13.736598014831543, "learning_rate": 4.897549217048692e-06, "loss": 3.4483, "step": 9080 }, { "epoch": 0.09241739908854167, "grad_norm": 13.418255805969238, "learning_rate": 4.897435884334795e-06, "loss": 3.3985, "step": 9085 }, { "epoch": 0.09246826171875, "grad_norm": 11.04183578491211, "learning_rate": 4.897322490283069e-06, "loss": 3.746, "step": 9090 }, { "epoch": 0.09251912434895833, "grad_norm": 14.375475883483887, "learning_rate": 4.897209034896414e-06, "loss": 3.3322, "step": 9095 }, { "epoch": 0.09256998697916667, "grad_norm": 12.415508270263672, "learning_rate": 4.897095518177735e-06, "loss": 3.221, "step": 9100 }, { "epoch": 0.092620849609375, "grad_norm": 13.276843070983887, "learning_rate": 4.896981940129935e-06, "loss": 3.1526, "step": 9105 }, { "epoch": 0.09267171223958333, "grad_norm": 16.24017333984375, "learning_rate": 4.8968683007559204e-06, "loss": 3.1622, "step": 9110 }, { "epoch": 0.09272257486979167, "grad_norm": 13.676992416381836, "learning_rate": 4.8967546000585985e-06, "loss": 3.4367, "step": 9115 }, { "epoch": 0.0927734375, "grad_norm": 12.518017768859863, "learning_rate": 4.896640838040878e-06, "loss": 3.149, "step": 9120 }, { "epoch": 0.09282430013020833, "grad_norm": 9.173798561096191, "learning_rate": 4.89652701470567e-06, "loss": 3.4939, "step": 9125 }, { "epoch": 0.09287516276041667, "grad_norm": 9.206432342529297, "learning_rate": 4.896413130055887e-06, "loss": 3.2798, "step": 9130 }, { "epoch": 0.092926025390625, "grad_norm": 10.400619506835938, "learning_rate": 4.896299184094441e-06, "loss": 3.5009, "step": 9135 }, { "epoch": 0.09297688802083333, "grad_norm": 14.5686616897583, "learning_rate": 4.896185176824249e-06, "loss": 3.0607, "step": 9140 }, { "epoch": 0.09302775065104167, "grad_norm": 15.614046096801758, "learning_rate": 4.8960711082482275e-06, "loss": 3.5764, "step": 9145 }, { "epoch": 0.09307861328125, "grad_norm": 11.736412048339844, "learning_rate": 4.895956978369294e-06, "loss": 3.2503, "step": 9150 }, { "epoch": 0.09312947591145833, "grad_norm": 8.38943862915039, "learning_rate": 4.895842787190369e-06, "loss": 3.5593, "step": 9155 }, { "epoch": 0.09318033854166667, "grad_norm": 10.80936050415039, "learning_rate": 4.895728534714375e-06, "loss": 3.7306, "step": 9160 }, { "epoch": 0.093231201171875, "grad_norm": 8.9891939163208, "learning_rate": 4.895614220944233e-06, "loss": 3.5101, "step": 9165 }, { "epoch": 0.09328206380208333, "grad_norm": 10.55257511138916, "learning_rate": 4.895499845882869e-06, "loss": 3.642, "step": 9170 }, { "epoch": 0.09333292643229167, "grad_norm": 11.787149429321289, "learning_rate": 4.895385409533211e-06, "loss": 3.3967, "step": 9175 }, { "epoch": 0.0933837890625, "grad_norm": 14.418353080749512, "learning_rate": 4.895270911898183e-06, "loss": 3.3822, "step": 9180 }, { "epoch": 0.09343465169270833, "grad_norm": 10.53858470916748, "learning_rate": 4.895156352980718e-06, "loss": 3.5411, "step": 9185 }, { "epoch": 0.09348551432291667, "grad_norm": 14.017937660217285, "learning_rate": 4.895041732783745e-06, "loss": 3.2294, "step": 9190 }, { "epoch": 0.093536376953125, "grad_norm": 9.317304611206055, "learning_rate": 4.8949270513101965e-06, "loss": 3.0859, "step": 9195 }, { "epoch": 0.09358723958333333, "grad_norm": 14.010085105895996, "learning_rate": 4.894812308563007e-06, "loss": 3.3879, "step": 9200 }, { "epoch": 0.09363810221354167, "grad_norm": 14.181611061096191, "learning_rate": 4.8946975045451125e-06, "loss": 3.4383, "step": 9205 }, { "epoch": 0.09368896484375, "grad_norm": 17.099210739135742, "learning_rate": 4.894582639259451e-06, "loss": 3.2286, "step": 9210 }, { "epoch": 0.09373982747395833, "grad_norm": 15.619363784790039, "learning_rate": 4.894467712708959e-06, "loss": 3.5737, "step": 9215 }, { "epoch": 0.09379069010416667, "grad_norm": 8.870353698730469, "learning_rate": 4.8943527248965786e-06, "loss": 3.2734, "step": 9220 }, { "epoch": 0.093841552734375, "grad_norm": 8.589174270629883, "learning_rate": 4.894237675825251e-06, "loss": 3.5581, "step": 9225 }, { "epoch": 0.09389241536458333, "grad_norm": 16.112144470214844, "learning_rate": 4.89412256549792e-06, "loss": 3.4103, "step": 9230 }, { "epoch": 0.09394327799479167, "grad_norm": 10.410552978515625, "learning_rate": 4.89400739391753e-06, "loss": 3.4895, "step": 9235 }, { "epoch": 0.093994140625, "grad_norm": 13.545750617980957, "learning_rate": 4.89389216108703e-06, "loss": 3.2598, "step": 9240 }, { "epoch": 0.09404500325520833, "grad_norm": 18.490036010742188, "learning_rate": 4.893776867009365e-06, "loss": 3.3705, "step": 9245 }, { "epoch": 0.09409586588541667, "grad_norm": 12.499258041381836, "learning_rate": 4.893661511687487e-06, "loss": 3.3089, "step": 9250 }, { "epoch": 0.094146728515625, "grad_norm": 46.41202163696289, "learning_rate": 4.893546095124346e-06, "loss": 3.46, "step": 9255 }, { "epoch": 0.09419759114583333, "grad_norm": 13.263053894042969, "learning_rate": 4.893430617322895e-06, "loss": 3.4889, "step": 9260 }, { "epoch": 0.09424845377604167, "grad_norm": 7.173794269561768, "learning_rate": 4.8933150782860905e-06, "loss": 3.4028, "step": 9265 }, { "epoch": 0.09429931640625, "grad_norm": 15.863984107971191, "learning_rate": 4.893199478016886e-06, "loss": 3.1168, "step": 9270 }, { "epoch": 0.09435017903645833, "grad_norm": 9.970428466796875, "learning_rate": 4.8930838165182405e-06, "loss": 3.3857, "step": 9275 }, { "epoch": 0.09440104166666667, "grad_norm": 6.910197734832764, "learning_rate": 4.892968093793112e-06, "loss": 3.6812, "step": 9280 }, { "epoch": 0.094451904296875, "grad_norm": 12.154581069946289, "learning_rate": 4.892852309844462e-06, "loss": 3.459, "step": 9285 }, { "epoch": 0.09450276692708333, "grad_norm": 13.089887619018555, "learning_rate": 4.892736464675254e-06, "loss": 3.75, "step": 9290 }, { "epoch": 0.09455362955729167, "grad_norm": 12.283695220947266, "learning_rate": 4.89262055828845e-06, "loss": 3.5142, "step": 9295 }, { "epoch": 0.0946044921875, "grad_norm": 16.34111976623535, "learning_rate": 4.892504590687016e-06, "loss": 3.4612, "step": 9300 }, { "epoch": 0.09465535481770833, "grad_norm": 14.332940101623535, "learning_rate": 4.89238856187392e-06, "loss": 2.9819, "step": 9305 }, { "epoch": 0.09470621744791667, "grad_norm": 9.912870407104492, "learning_rate": 4.892272471852128e-06, "loss": 3.3375, "step": 9310 }, { "epoch": 0.094757080078125, "grad_norm": 18.02761459350586, "learning_rate": 4.892156320624613e-06, "loss": 3.5184, "step": 9315 }, { "epoch": 0.09480794270833333, "grad_norm": 14.656006813049316, "learning_rate": 4.892040108194346e-06, "loss": 3.7168, "step": 9320 }, { "epoch": 0.09485880533854167, "grad_norm": 14.194197654724121, "learning_rate": 4.8919238345643e-06, "loss": 3.7077, "step": 9325 }, { "epoch": 0.09490966796875, "grad_norm": 14.731765747070312, "learning_rate": 4.891807499737449e-06, "loss": 3.526, "step": 9330 }, { "epoch": 0.09496053059895833, "grad_norm": 11.650579452514648, "learning_rate": 4.891691103716769e-06, "loss": 3.2138, "step": 9335 }, { "epoch": 0.09501139322916667, "grad_norm": 8.169177055358887, "learning_rate": 4.89157464650524e-06, "loss": 3.4209, "step": 9340 }, { "epoch": 0.095062255859375, "grad_norm": 13.967549324035645, "learning_rate": 4.89145812810584e-06, "loss": 3.523, "step": 9345 }, { "epoch": 0.09511311848958333, "grad_norm": 15.903494834899902, "learning_rate": 4.891341548521552e-06, "loss": 3.5998, "step": 9350 }, { "epoch": 0.09516398111979167, "grad_norm": 18.27981185913086, "learning_rate": 4.8912249077553566e-06, "loss": 4.1232, "step": 9355 }, { "epoch": 0.09521484375, "grad_norm": 12.90363597869873, "learning_rate": 4.8911082058102375e-06, "loss": 3.6707, "step": 9360 }, { "epoch": 0.09526570638020833, "grad_norm": 15.968925476074219, "learning_rate": 4.890991442689184e-06, "loss": 3.1965, "step": 9365 }, { "epoch": 0.09531656901041667, "grad_norm": 12.801103591918945, "learning_rate": 4.890874618395179e-06, "loss": 3.2281, "step": 9370 }, { "epoch": 0.095367431640625, "grad_norm": 9.642354011535645, "learning_rate": 4.890757732931215e-06, "loss": 3.6309, "step": 9375 }, { "epoch": 0.09541829427083333, "grad_norm": 18.670930862426758, "learning_rate": 4.8906407863002805e-06, "loss": 3.7841, "step": 9380 }, { "epoch": 0.09546915690104167, "grad_norm": 12.024273872375488, "learning_rate": 4.8905237785053675e-06, "loss": 3.3917, "step": 9385 }, { "epoch": 0.09552001953125, "grad_norm": 8.89647102355957, "learning_rate": 4.8904067095494714e-06, "loss": 3.703, "step": 9390 }, { "epoch": 0.09557088216145833, "grad_norm": 11.106717109680176, "learning_rate": 4.890289579435585e-06, "loss": 3.2822, "step": 9395 }, { "epoch": 0.09562174479166667, "grad_norm": 14.37429428100586, "learning_rate": 4.8901723881667075e-06, "loss": 3.3098, "step": 9400 }, { "epoch": 0.095672607421875, "grad_norm": 12.311078071594238, "learning_rate": 4.890055135745835e-06, "loss": 3.1674, "step": 9405 }, { "epoch": 0.09572347005208333, "grad_norm": 13.66702938079834, "learning_rate": 4.88993782217597e-06, "loss": 3.0287, "step": 9410 }, { "epoch": 0.09577433268229167, "grad_norm": 11.145536422729492, "learning_rate": 4.889820447460111e-06, "loss": 3.3768, "step": 9415 }, { "epoch": 0.0958251953125, "grad_norm": 12.304450988769531, "learning_rate": 4.889703011601262e-06, "loss": 3.5019, "step": 9420 }, { "epoch": 0.09587605794270833, "grad_norm": 9.890800476074219, "learning_rate": 4.889585514602429e-06, "loss": 3.1514, "step": 9425 }, { "epoch": 0.09592692057291667, "grad_norm": 9.260703086853027, "learning_rate": 4.889467956466616e-06, "loss": 3.4432, "step": 9430 }, { "epoch": 0.095977783203125, "grad_norm": 13.874801635742188, "learning_rate": 4.889350337196832e-06, "loss": 3.2378, "step": 9435 }, { "epoch": 0.09602864583333333, "grad_norm": 13.591817855834961, "learning_rate": 4.889232656796086e-06, "loss": 3.4383, "step": 9440 }, { "epoch": 0.09607950846354167, "grad_norm": 9.10405445098877, "learning_rate": 4.8891149152673875e-06, "loss": 3.4487, "step": 9445 }, { "epoch": 0.09613037109375, "grad_norm": 8.167961120605469, "learning_rate": 4.888997112613752e-06, "loss": 3.6359, "step": 9450 }, { "epoch": 0.09618123372395833, "grad_norm": 14.467639923095703, "learning_rate": 4.888879248838191e-06, "loss": 3.1424, "step": 9455 }, { "epoch": 0.09623209635416667, "grad_norm": 12.446633338928223, "learning_rate": 4.888761323943721e-06, "loss": 3.5426, "step": 9460 }, { "epoch": 0.096282958984375, "grad_norm": 11.570879936218262, "learning_rate": 4.888643337933358e-06, "loss": 3.5324, "step": 9465 }, { "epoch": 0.09633382161458333, "grad_norm": 13.078673362731934, "learning_rate": 4.8885252908101226e-06, "loss": 3.3334, "step": 9470 }, { "epoch": 0.09638468424479167, "grad_norm": 10.936042785644531, "learning_rate": 4.888407182577032e-06, "loss": 3.2394, "step": 9475 }, { "epoch": 0.096435546875, "grad_norm": 13.004395484924316, "learning_rate": 4.888289013237112e-06, "loss": 3.3966, "step": 9480 }, { "epoch": 0.09648640950520833, "grad_norm": 11.469902992248535, "learning_rate": 4.888170782793382e-06, "loss": 3.717, "step": 9485 }, { "epoch": 0.09653727213541667, "grad_norm": 14.799452781677246, "learning_rate": 4.888052491248869e-06, "loss": 3.4483, "step": 9490 }, { "epoch": 0.096588134765625, "grad_norm": 10.995068550109863, "learning_rate": 4.887934138606599e-06, "loss": 3.3911, "step": 9495 }, { "epoch": 0.09663899739583333, "grad_norm": 13.323307037353516, "learning_rate": 4.8878157248696e-06, "loss": 3.749, "step": 9500 }, { "epoch": 0.09668986002604167, "grad_norm": 20.117530822753906, "learning_rate": 4.887697250040901e-06, "loss": 3.5721, "step": 9505 }, { "epoch": 0.09674072265625, "grad_norm": 10.738547325134277, "learning_rate": 4.887578714123536e-06, "loss": 3.3881, "step": 9510 }, { "epoch": 0.09679158528645833, "grad_norm": 12.129579544067383, "learning_rate": 4.887460117120533e-06, "loss": 3.5233, "step": 9515 }, { "epoch": 0.09684244791666667, "grad_norm": 14.902997970581055, "learning_rate": 4.88734145903493e-06, "loss": 3.5717, "step": 9520 }, { "epoch": 0.096893310546875, "grad_norm": 11.737396240234375, "learning_rate": 4.887222739869761e-06, "loss": 3.175, "step": 9525 }, { "epoch": 0.09694417317708333, "grad_norm": 6.6605730056762695, "learning_rate": 4.8871039596280654e-06, "loss": 3.4191, "step": 9530 }, { "epoch": 0.09699503580729167, "grad_norm": 11.67459487915039, "learning_rate": 4.88698511831288e-06, "loss": 3.4298, "step": 9535 }, { "epoch": 0.0970458984375, "grad_norm": 7.061823844909668, "learning_rate": 4.886866215927246e-06, "loss": 3.4257, "step": 9540 }, { "epoch": 0.09709676106770833, "grad_norm": 14.894327163696289, "learning_rate": 4.8867472524742055e-06, "loss": 3.4476, "step": 9545 }, { "epoch": 0.09714762369791667, "grad_norm": 14.296971321105957, "learning_rate": 4.8866282279568024e-06, "loss": 3.7858, "step": 9550 }, { "epoch": 0.097198486328125, "grad_norm": 12.031661033630371, "learning_rate": 4.886509142378082e-06, "loss": 3.5772, "step": 9555 }, { "epoch": 0.09724934895833333, "grad_norm": 13.860501289367676, "learning_rate": 4.88638999574109e-06, "loss": 3.259, "step": 9560 }, { "epoch": 0.09730021158854167, "grad_norm": 12.739351272583008, "learning_rate": 4.886270788048877e-06, "loss": 3.358, "step": 9565 }, { "epoch": 0.09735107421875, "grad_norm": 8.996005058288574, "learning_rate": 4.8861515193044905e-06, "loss": 3.0429, "step": 9570 }, { "epoch": 0.09740193684895833, "grad_norm": 10.95101261138916, "learning_rate": 4.886032189510983e-06, "loss": 3.8154, "step": 9575 }, { "epoch": 0.09745279947916667, "grad_norm": 13.15895938873291, "learning_rate": 4.885912798671408e-06, "loss": 3.3369, "step": 9580 }, { "epoch": 0.097503662109375, "grad_norm": 13.110836029052734, "learning_rate": 4.885793346788819e-06, "loss": 3.5893, "step": 9585 }, { "epoch": 0.09755452473958333, "grad_norm": 13.61912727355957, "learning_rate": 4.885673833866273e-06, "loss": 3.3841, "step": 9590 }, { "epoch": 0.09760538736979167, "grad_norm": 8.887752532958984, "learning_rate": 4.885554259906827e-06, "loss": 3.7583, "step": 9595 }, { "epoch": 0.09765625, "grad_norm": 14.288346290588379, "learning_rate": 4.885434624913541e-06, "loss": 3.0627, "step": 9600 }, { "epoch": 0.09770711263020833, "grad_norm": 13.13482666015625, "learning_rate": 4.8853149288894765e-06, "loss": 3.6814, "step": 9605 }, { "epoch": 0.09775797526041667, "grad_norm": 10.950799942016602, "learning_rate": 4.885195171837694e-06, "loss": 3.0417, "step": 9610 }, { "epoch": 0.097808837890625, "grad_norm": 15.985264778137207, "learning_rate": 4.885075353761258e-06, "loss": 3.4947, "step": 9615 }, { "epoch": 0.09785970052083333, "grad_norm": 11.092909812927246, "learning_rate": 4.884955474663235e-06, "loss": 3.6053, "step": 9620 }, { "epoch": 0.09791056315104167, "grad_norm": 10.567216873168945, "learning_rate": 4.884835534546692e-06, "loss": 3.5717, "step": 9625 }, { "epoch": 0.09796142578125, "grad_norm": 16.9993896484375, "learning_rate": 4.884715533414696e-06, "loss": 3.8297, "step": 9630 }, { "epoch": 0.09801228841145833, "grad_norm": 9.653956413269043, "learning_rate": 4.884595471270319e-06, "loss": 3.2011, "step": 9635 }, { "epoch": 0.09806315104166667, "grad_norm": 7.467728137969971, "learning_rate": 4.884475348116631e-06, "loss": 3.3446, "step": 9640 }, { "epoch": 0.098114013671875, "grad_norm": 16.218908309936523, "learning_rate": 4.884355163956708e-06, "loss": 3.4298, "step": 9645 }, { "epoch": 0.09816487630208333, "grad_norm": 8.244502067565918, "learning_rate": 4.884234918793622e-06, "loss": 3.3024, "step": 9650 }, { "epoch": 0.09821573893229167, "grad_norm": 13.385653495788574, "learning_rate": 4.884114612630451e-06, "loss": 3.2548, "step": 9655 }, { "epoch": 0.0982666015625, "grad_norm": 12.53419303894043, "learning_rate": 4.883994245470274e-06, "loss": 3.0654, "step": 9660 }, { "epoch": 0.09831746419270833, "grad_norm": 7.7406005859375, "learning_rate": 4.883873817316168e-06, "loss": 3.7867, "step": 9665 }, { "epoch": 0.09836832682291667, "grad_norm": 16.32261085510254, "learning_rate": 4.883753328171216e-06, "loss": 3.5244, "step": 9670 }, { "epoch": 0.098419189453125, "grad_norm": 14.094369888305664, "learning_rate": 4.8836327780385e-06, "loss": 3.6854, "step": 9675 }, { "epoch": 0.09847005208333333, "grad_norm": 12.182512283325195, "learning_rate": 4.883512166921104e-06, "loss": 3.5938, "step": 9680 }, { "epoch": 0.09852091471354167, "grad_norm": 11.643973350524902, "learning_rate": 4.883391494822114e-06, "loss": 3.3795, "step": 9685 }, { "epoch": 0.09857177734375, "grad_norm": 13.109829902648926, "learning_rate": 4.883270761744617e-06, "loss": 3.4673, "step": 9690 }, { "epoch": 0.09862263997395833, "grad_norm": 9.588884353637695, "learning_rate": 4.883149967691704e-06, "loss": 3.4358, "step": 9695 }, { "epoch": 0.09867350260416667, "grad_norm": 16.999698638916016, "learning_rate": 4.883029112666463e-06, "loss": 3.4918, "step": 9700 }, { "epoch": 0.098724365234375, "grad_norm": 14.010063171386719, "learning_rate": 4.882908196671987e-06, "loss": 3.5411, "step": 9705 }, { "epoch": 0.09877522786458333, "grad_norm": 14.857710838317871, "learning_rate": 4.88278721971137e-06, "loss": 3.4874, "step": 9710 }, { "epoch": 0.09882609049479167, "grad_norm": 12.407607078552246, "learning_rate": 4.882666181787707e-06, "loss": 3.2071, "step": 9715 }, { "epoch": 0.098876953125, "grad_norm": 12.295507431030273, "learning_rate": 4.882545082904094e-06, "loss": 3.3899, "step": 9720 }, { "epoch": 0.09892781575520833, "grad_norm": 12.28351879119873, "learning_rate": 4.88242392306363e-06, "loss": 3.8177, "step": 9725 }, { "epoch": 0.09897867838541667, "grad_norm": 8.689821243286133, "learning_rate": 4.882302702269415e-06, "loss": 3.5002, "step": 9730 }, { "epoch": 0.099029541015625, "grad_norm": 10.74190616607666, "learning_rate": 4.882181420524548e-06, "loss": 3.1903, "step": 9735 }, { "epoch": 0.09908040364583333, "grad_norm": 11.707921981811523, "learning_rate": 4.882060077832137e-06, "loss": 3.2921, "step": 9740 }, { "epoch": 0.09913126627604167, "grad_norm": 13.249473571777344, "learning_rate": 4.881938674195282e-06, "loss": 3.3386, "step": 9745 }, { "epoch": 0.09918212890625, "grad_norm": 16.1980037689209, "learning_rate": 4.88181720961709e-06, "loss": 3.5874, "step": 9750 }, { "epoch": 0.09923299153645833, "grad_norm": 14.430301666259766, "learning_rate": 4.88169568410067e-06, "loss": 3.2323, "step": 9755 }, { "epoch": 0.09928385416666667, "grad_norm": 8.47429084777832, "learning_rate": 4.881574097649131e-06, "loss": 3.2952, "step": 9760 }, { "epoch": 0.099334716796875, "grad_norm": 8.88611888885498, "learning_rate": 4.881452450265583e-06, "loss": 3.3712, "step": 9765 }, { "epoch": 0.09938557942708333, "grad_norm": 10.999044418334961, "learning_rate": 4.881330741953137e-06, "loss": 3.504, "step": 9770 }, { "epoch": 0.09943644205729167, "grad_norm": 13.003782272338867, "learning_rate": 4.88120897271491e-06, "loss": 3.8129, "step": 9775 }, { "epoch": 0.0994873046875, "grad_norm": 11.573512077331543, "learning_rate": 4.881087142554015e-06, "loss": 3.3195, "step": 9780 }, { "epoch": 0.09953816731770833, "grad_norm": 16.704687118530273, "learning_rate": 4.880965251473571e-06, "loss": 3.0435, "step": 9785 }, { "epoch": 0.09958902994791667, "grad_norm": 13.198221206665039, "learning_rate": 4.8808432994766944e-06, "loss": 3.2013, "step": 9790 }, { "epoch": 0.099639892578125, "grad_norm": 9.851302146911621, "learning_rate": 4.880721286566506e-06, "loss": 3.3261, "step": 9795 }, { "epoch": 0.09969075520833333, "grad_norm": 14.158812522888184, "learning_rate": 4.880599212746128e-06, "loss": 3.2733, "step": 9800 }, { "epoch": 0.09974161783854167, "grad_norm": 15.338472366333008, "learning_rate": 4.880477078018684e-06, "loss": 3.3534, "step": 9805 }, { "epoch": 0.09979248046875, "grad_norm": 14.599812507629395, "learning_rate": 4.8803548823872985e-06, "loss": 3.6244, "step": 9810 }, { "epoch": 0.09984334309895833, "grad_norm": 11.007782936096191, "learning_rate": 4.880232625855096e-06, "loss": 3.0961, "step": 9815 }, { "epoch": 0.09989420572916667, "grad_norm": 13.516772270202637, "learning_rate": 4.880110308425207e-06, "loss": 3.3936, "step": 9820 }, { "epoch": 0.099945068359375, "grad_norm": 7.3785905838012695, "learning_rate": 4.8799879301007596e-06, "loss": 3.7464, "step": 9825 }, { "epoch": 0.09999593098958333, "grad_norm": 38.93828582763672, "learning_rate": 4.879865490884886e-06, "loss": 3.5595, "step": 9830 }, { "epoch": 0.10004679361979167, "grad_norm": 14.068145751953125, "learning_rate": 4.879742990780717e-06, "loss": 3.5197, "step": 9835 }, { "epoch": 0.10009765625, "grad_norm": 11.864350318908691, "learning_rate": 4.879620429791387e-06, "loss": 3.601, "step": 9840 }, { "epoch": 0.10014851888020833, "grad_norm": 15.89621639251709, "learning_rate": 4.879497807920034e-06, "loss": 3.2539, "step": 9845 }, { "epoch": 0.10019938151041667, "grad_norm": 11.07425308227539, "learning_rate": 4.8793751251697925e-06, "loss": 3.5963, "step": 9850 }, { "epoch": 0.100250244140625, "grad_norm": 10.702958106994629, "learning_rate": 4.879252381543803e-06, "loss": 3.6547, "step": 9855 }, { "epoch": 0.10030110677083333, "grad_norm": 11.077383995056152, "learning_rate": 4.879129577045204e-06, "loss": 3.3366, "step": 9860 }, { "epoch": 0.10035196940104167, "grad_norm": 11.844645500183105, "learning_rate": 4.87900671167714e-06, "loss": 3.377, "step": 9865 }, { "epoch": 0.10040283203125, "grad_norm": 12.209272384643555, "learning_rate": 4.8788837854427525e-06, "loss": 4.0542, "step": 9870 }, { "epoch": 0.10045369466145833, "grad_norm": 12.173541069030762, "learning_rate": 4.878760798345188e-06, "loss": 3.5212, "step": 9875 }, { "epoch": 0.10050455729166667, "grad_norm": 12.681471824645996, "learning_rate": 4.878637750387591e-06, "loss": 3.8139, "step": 9880 }, { "epoch": 0.100555419921875, "grad_norm": 8.942806243896484, "learning_rate": 4.878514641573112e-06, "loss": 3.5788, "step": 9885 }, { "epoch": 0.10060628255208333, "grad_norm": 11.298273086547852, "learning_rate": 4.8783914719048995e-06, "loss": 3.3029, "step": 9890 }, { "epoch": 0.10065714518229167, "grad_norm": 9.567761421203613, "learning_rate": 4.8782682413861046e-06, "loss": 3.7846, "step": 9895 }, { "epoch": 0.1007080078125, "grad_norm": 10.052159309387207, "learning_rate": 4.8781449500198804e-06, "loss": 3.3519, "step": 9900 }, { "epoch": 0.10075887044270833, "grad_norm": 16.80272674560547, "learning_rate": 4.878021597809382e-06, "loss": 3.7498, "step": 9905 }, { "epoch": 0.10080973307291667, "grad_norm": 11.9678316116333, "learning_rate": 4.877898184757765e-06, "loss": 3.7687, "step": 9910 }, { "epoch": 0.100860595703125, "grad_norm": 14.82444953918457, "learning_rate": 4.877774710868185e-06, "loss": 3.4121, "step": 9915 }, { "epoch": 0.10091145833333333, "grad_norm": 12.413556098937988, "learning_rate": 4.877651176143804e-06, "loss": 3.5701, "step": 9920 }, { "epoch": 0.10096232096354167, "grad_norm": 16.419466018676758, "learning_rate": 4.877527580587781e-06, "loss": 3.5236, "step": 9925 }, { "epoch": 0.10101318359375, "grad_norm": 15.198001861572266, "learning_rate": 4.877403924203278e-06, "loss": 3.4151, "step": 9930 }, { "epoch": 0.10106404622395833, "grad_norm": 11.96876335144043, "learning_rate": 4.877280206993459e-06, "loss": 3.1713, "step": 9935 }, { "epoch": 0.10111490885416667, "grad_norm": 12.78167724609375, "learning_rate": 4.8771564289614895e-06, "loss": 3.3529, "step": 9940 }, { "epoch": 0.101165771484375, "grad_norm": 14.186490058898926, "learning_rate": 4.877032590110536e-06, "loss": 3.8994, "step": 9945 }, { "epoch": 0.10121663411458333, "grad_norm": 9.95617389678955, "learning_rate": 4.876908690443767e-06, "loss": 3.402, "step": 9950 }, { "epoch": 0.10126749674479167, "grad_norm": 14.145368576049805, "learning_rate": 4.876784729964353e-06, "loss": 3.5655, "step": 9955 }, { "epoch": 0.101318359375, "grad_norm": 10.90357780456543, "learning_rate": 4.876660708675465e-06, "loss": 3.4442, "step": 9960 }, { "epoch": 0.10136922200520833, "grad_norm": 13.902193069458008, "learning_rate": 4.876536626580276e-06, "loss": 3.3617, "step": 9965 }, { "epoch": 0.10142008463541667, "grad_norm": 8.821534156799316, "learning_rate": 4.876412483681961e-06, "loss": 3.445, "step": 9970 }, { "epoch": 0.101470947265625, "grad_norm": 11.418331146240234, "learning_rate": 4.8762882799836955e-06, "loss": 3.4656, "step": 9975 }, { "epoch": 0.10152180989583333, "grad_norm": 11.632497787475586, "learning_rate": 4.876164015488658e-06, "loss": 3.2188, "step": 9980 }, { "epoch": 0.10157267252604167, "grad_norm": 10.307924270629883, "learning_rate": 4.876039690200027e-06, "loss": 3.5271, "step": 9985 }, { "epoch": 0.10162353515625, "grad_norm": 11.517049789428711, "learning_rate": 4.875915304120984e-06, "loss": 3.5669, "step": 9990 }, { "epoch": 0.10167439778645833, "grad_norm": 9.514555931091309, "learning_rate": 4.875790857254711e-06, "loss": 3.3767, "step": 9995 }, { "epoch": 0.10172526041666667, "grad_norm": 13.482763290405273, "learning_rate": 4.875666349604392e-06, "loss": 3.5474, "step": 10000 }, { "epoch": 0.101776123046875, "grad_norm": 8.935997009277344, "learning_rate": 4.875541781173212e-06, "loss": 3.4632, "step": 10005 }, { "epoch": 0.10182698567708333, "grad_norm": 8.468302726745605, "learning_rate": 4.875417151964359e-06, "loss": 3.4322, "step": 10010 }, { "epoch": 0.10187784830729167, "grad_norm": 12.212846755981445, "learning_rate": 4.875292461981022e-06, "loss": 4.2413, "step": 10015 }, { "epoch": 0.1019287109375, "grad_norm": 11.472529411315918, "learning_rate": 4.87516771122639e-06, "loss": 3.2805, "step": 10020 }, { "epoch": 0.10197957356770833, "grad_norm": 13.230598449707031, "learning_rate": 4.875042899703654e-06, "loss": 3.4256, "step": 10025 }, { "epoch": 0.10203043619791667, "grad_norm": 13.139010429382324, "learning_rate": 4.874918027416009e-06, "loss": 3.5081, "step": 10030 }, { "epoch": 0.102081298828125, "grad_norm": 6.4391703605651855, "learning_rate": 4.874793094366649e-06, "loss": 3.3638, "step": 10035 }, { "epoch": 0.10213216145833333, "grad_norm": 15.451169967651367, "learning_rate": 4.8746681005587715e-06, "loss": 3.6243, "step": 10040 }, { "epoch": 0.10218302408854167, "grad_norm": 11.433987617492676, "learning_rate": 4.874543045995572e-06, "loss": 3.6848, "step": 10045 }, { "epoch": 0.10223388671875, "grad_norm": 8.72944450378418, "learning_rate": 4.874417930680253e-06, "loss": 3.3524, "step": 10050 }, { "epoch": 0.10228474934895833, "grad_norm": 15.81502628326416, "learning_rate": 4.874292754616014e-06, "loss": 3.3923, "step": 10055 }, { "epoch": 0.10233561197916667, "grad_norm": 12.636479377746582, "learning_rate": 4.8741675178060565e-06, "loss": 3.2582, "step": 10060 }, { "epoch": 0.102386474609375, "grad_norm": 10.026837348937988, "learning_rate": 4.874042220253586e-06, "loss": 3.5174, "step": 10065 }, { "epoch": 0.10243733723958333, "grad_norm": 14.83649730682373, "learning_rate": 4.8739168619618086e-06, "loss": 3.5073, "step": 10070 }, { "epoch": 0.10248819986979167, "grad_norm": 15.73272705078125, "learning_rate": 4.873791442933931e-06, "loss": 3.3724, "step": 10075 }, { "epoch": 0.1025390625, "grad_norm": 15.510259628295898, "learning_rate": 4.873665963173161e-06, "loss": 3.1763, "step": 10080 }, { "epoch": 0.10258992513020833, "grad_norm": 8.296263694763184, "learning_rate": 4.873540422682711e-06, "loss": 3.4604, "step": 10085 }, { "epoch": 0.10264078776041667, "grad_norm": 11.954290390014648, "learning_rate": 4.873414821465792e-06, "loss": 3.4896, "step": 10090 }, { "epoch": 0.102691650390625, "grad_norm": 10.191630363464355, "learning_rate": 4.873289159525617e-06, "loss": 3.5075, "step": 10095 }, { "epoch": 0.10274251302083333, "grad_norm": 14.362115859985352, "learning_rate": 4.873163436865401e-06, "loss": 3.0294, "step": 10100 }, { "epoch": 0.10279337565104167, "grad_norm": 10.870323181152344, "learning_rate": 4.873037653488361e-06, "loss": 3.5388, "step": 10105 }, { "epoch": 0.10284423828125, "grad_norm": 14.673710823059082, "learning_rate": 4.872911809397715e-06, "loss": 3.2948, "step": 10110 }, { "epoch": 0.10289510091145833, "grad_norm": 10.174817085266113, "learning_rate": 4.872785904596684e-06, "loss": 3.831, "step": 10115 }, { "epoch": 0.10294596354166667, "grad_norm": 13.638348579406738, "learning_rate": 4.8726599390884866e-06, "loss": 3.2765, "step": 10120 }, { "epoch": 0.102996826171875, "grad_norm": 14.297739028930664, "learning_rate": 4.872533912876348e-06, "loss": 3.4857, "step": 10125 }, { "epoch": 0.10304768880208333, "grad_norm": 8.859086036682129, "learning_rate": 4.872407825963491e-06, "loss": 3.5332, "step": 10130 }, { "epoch": 0.10309855143229167, "grad_norm": 17.016996383666992, "learning_rate": 4.872281678353142e-06, "loss": 3.858, "step": 10135 }, { "epoch": 0.1031494140625, "grad_norm": 7.245146751403809, "learning_rate": 4.872155470048529e-06, "loss": 3.3026, "step": 10140 }, { "epoch": 0.10320027669270833, "grad_norm": 13.683245658874512, "learning_rate": 4.87202920105288e-06, "loss": 3.5045, "step": 10145 }, { "epoch": 0.10325113932291667, "grad_norm": 10.41574478149414, "learning_rate": 4.871902871369427e-06, "loss": 3.289, "step": 10150 }, { "epoch": 0.103302001953125, "grad_norm": 13.628034591674805, "learning_rate": 4.871776481001401e-06, "loss": 3.6961, "step": 10155 }, { "epoch": 0.10335286458333333, "grad_norm": 9.731292724609375, "learning_rate": 4.8716500299520356e-06, "loss": 3.4743, "step": 10160 }, { "epoch": 0.10340372721354167, "grad_norm": 15.618063926696777, "learning_rate": 4.871523518224567e-06, "loss": 3.0353, "step": 10165 }, { "epoch": 0.10345458984375, "grad_norm": 12.420486450195312, "learning_rate": 4.87139694582223e-06, "loss": 3.5414, "step": 10170 }, { "epoch": 0.10350545247395833, "grad_norm": 9.918728828430176, "learning_rate": 4.871270312748265e-06, "loss": 3.6799, "step": 10175 }, { "epoch": 0.10355631510416667, "grad_norm": 10.1067533493042, "learning_rate": 4.871143619005911e-06, "loss": 3.4345, "step": 10180 }, { "epoch": 0.103607177734375, "grad_norm": 6.703737735748291, "learning_rate": 4.87101686459841e-06, "loss": 3.2298, "step": 10185 }, { "epoch": 0.10365804036458333, "grad_norm": 15.222867012023926, "learning_rate": 4.8708900495290035e-06, "loss": 3.6949, "step": 10190 }, { "epoch": 0.10370890299479167, "grad_norm": 12.256555557250977, "learning_rate": 4.8707631738009376e-06, "loss": 3.4872, "step": 10195 }, { "epoch": 0.103759765625, "grad_norm": 8.05500602722168, "learning_rate": 4.870636237417458e-06, "loss": 3.5136, "step": 10200 }, { "epoch": 0.10381062825520833, "grad_norm": 9.914165496826172, "learning_rate": 4.870509240381812e-06, "loss": 3.5153, "step": 10205 }, { "epoch": 0.10386149088541667, "grad_norm": 14.44861125946045, "learning_rate": 4.8703821826972495e-06, "loss": 3.4795, "step": 10210 }, { "epoch": 0.103912353515625, "grad_norm": 12.164118766784668, "learning_rate": 4.87025506436702e-06, "loss": 3.4607, "step": 10215 }, { "epoch": 0.10396321614583333, "grad_norm": 15.556924819946289, "learning_rate": 4.8701278853943764e-06, "loss": 3.3538, "step": 10220 }, { "epoch": 0.10401407877604167, "grad_norm": 14.256190299987793, "learning_rate": 4.870000645782573e-06, "loss": 3.3421, "step": 10225 }, { "epoch": 0.10406494140625, "grad_norm": 13.732766151428223, "learning_rate": 4.869873345534865e-06, "loss": 3.0725, "step": 10230 }, { "epoch": 0.10411580403645833, "grad_norm": 16.038616180419922, "learning_rate": 4.869745984654508e-06, "loss": 3.212, "step": 10235 }, { "epoch": 0.10416666666666667, "grad_norm": 14.277772903442383, "learning_rate": 4.8696185631447635e-06, "loss": 3.6345, "step": 10240 }, { "epoch": 0.104217529296875, "grad_norm": 10.199970245361328, "learning_rate": 4.869491081008889e-06, "loss": 3.5137, "step": 10245 }, { "epoch": 0.10426839192708333, "grad_norm": 12.390953063964844, "learning_rate": 4.869363538250146e-06, "loss": 3.3681, "step": 10250 }, { "epoch": 0.10431925455729167, "grad_norm": 8.647974967956543, "learning_rate": 4.869235934871799e-06, "loss": 3.2399, "step": 10255 }, { "epoch": 0.1043701171875, "grad_norm": 11.923579216003418, "learning_rate": 4.869108270877112e-06, "loss": 3.6247, "step": 10260 }, { "epoch": 0.10442097981770833, "grad_norm": 13.356016159057617, "learning_rate": 4.868980546269352e-06, "loss": 3.7096, "step": 10265 }, { "epoch": 0.10447184244791667, "grad_norm": 8.816129684448242, "learning_rate": 4.868852761051787e-06, "loss": 3.3696, "step": 10270 }, { "epoch": 0.104522705078125, "grad_norm": 11.433305740356445, "learning_rate": 4.868724915227684e-06, "loss": 3.3735, "step": 10275 }, { "epoch": 0.10457356770833333, "grad_norm": 10.5938138961792, "learning_rate": 4.868597008800315e-06, "loss": 3.5073, "step": 10280 }, { "epoch": 0.10462443033854167, "grad_norm": 13.988579750061035, "learning_rate": 4.868469041772955e-06, "loss": 3.6398, "step": 10285 }, { "epoch": 0.10467529296875, "grad_norm": 8.426319122314453, "learning_rate": 4.868341014148875e-06, "loss": 3.5602, "step": 10290 }, { "epoch": 0.10472615559895833, "grad_norm": 12.661907196044922, "learning_rate": 4.868212925931351e-06, "loss": 3.3872, "step": 10295 }, { "epoch": 0.10477701822916667, "grad_norm": 10.294717788696289, "learning_rate": 4.868084777123661e-06, "loss": 3.5918, "step": 10300 }, { "epoch": 0.104827880859375, "grad_norm": 16.74044418334961, "learning_rate": 4.867956567729084e-06, "loss": 3.4537, "step": 10305 }, { "epoch": 0.10487874348958333, "grad_norm": 10.488282203674316, "learning_rate": 4.867828297750899e-06, "loss": 3.101, "step": 10310 }, { "epoch": 0.10492960611979167, "grad_norm": 10.39643669128418, "learning_rate": 4.867699967192388e-06, "loss": 3.1673, "step": 10315 }, { "epoch": 0.10498046875, "grad_norm": 13.151554107666016, "learning_rate": 4.867571576056834e-06, "loss": 3.6526, "step": 10320 }, { "epoch": 0.10503133138020833, "grad_norm": 11.318380355834961, "learning_rate": 4.867443124347523e-06, "loss": 3.6569, "step": 10325 }, { "epoch": 0.10508219401041667, "grad_norm": 11.30242919921875, "learning_rate": 4.867314612067741e-06, "loss": 3.2106, "step": 10330 }, { "epoch": 0.105133056640625, "grad_norm": 8.88015079498291, "learning_rate": 4.867186039220775e-06, "loss": 3.3928, "step": 10335 }, { "epoch": 0.10518391927083333, "grad_norm": 9.858390808105469, "learning_rate": 4.867057405809916e-06, "loss": 3.3096, "step": 10340 }, { "epoch": 0.10523478190104167, "grad_norm": 14.723819732666016, "learning_rate": 4.866928711838455e-06, "loss": 3.3486, "step": 10345 }, { "epoch": 0.10528564453125, "grad_norm": 8.097146034240723, "learning_rate": 4.866799957309682e-06, "loss": 4.0018, "step": 10350 }, { "epoch": 0.10533650716145833, "grad_norm": 14.983758926391602, "learning_rate": 4.866671142226895e-06, "loss": 3.5232, "step": 10355 }, { "epoch": 0.10538736979166667, "grad_norm": 10.44787311553955, "learning_rate": 4.866542266593387e-06, "loss": 3.6857, "step": 10360 }, { "epoch": 0.105438232421875, "grad_norm": 13.872020721435547, "learning_rate": 4.8664133304124555e-06, "loss": 3.2874, "step": 10365 }, { "epoch": 0.10548909505208333, "grad_norm": 11.341287612915039, "learning_rate": 4.8662843336874e-06, "loss": 3.202, "step": 10370 }, { "epoch": 0.10553995768229167, "grad_norm": 9.611438751220703, "learning_rate": 4.866155276421522e-06, "loss": 3.4759, "step": 10375 }, { "epoch": 0.1055908203125, "grad_norm": 12.784802436828613, "learning_rate": 4.8660261586181205e-06, "loss": 3.3239, "step": 10380 }, { "epoch": 0.10564168294270833, "grad_norm": 15.74527645111084, "learning_rate": 4.865896980280501e-06, "loss": 3.6173, "step": 10385 }, { "epoch": 0.10569254557291667, "grad_norm": 9.02971076965332, "learning_rate": 4.865767741411969e-06, "loss": 3.7378, "step": 10390 }, { "epoch": 0.105743408203125, "grad_norm": 8.090533256530762, "learning_rate": 4.8656384420158285e-06, "loss": 3.2215, "step": 10395 }, { "epoch": 0.10579427083333333, "grad_norm": 9.529847145080566, "learning_rate": 4.86550908209539e-06, "loss": 3.3138, "step": 10400 }, { "epoch": 0.10584513346354167, "grad_norm": 11.145707130432129, "learning_rate": 4.865379661653963e-06, "loss": 2.8438, "step": 10405 }, { "epoch": 0.10589599609375, "grad_norm": 9.50683879852295, "learning_rate": 4.8652501806948575e-06, "loss": 3.4143, "step": 10410 }, { "epoch": 0.10594685872395833, "grad_norm": 14.874054908752441, "learning_rate": 4.865120639221386e-06, "loss": 3.6896, "step": 10415 }, { "epoch": 0.10599772135416667, "grad_norm": 15.502473831176758, "learning_rate": 4.864991037236864e-06, "loss": 3.7117, "step": 10420 }, { "epoch": 0.106048583984375, "grad_norm": 17.096446990966797, "learning_rate": 4.864861374744607e-06, "loss": 3.4389, "step": 10425 }, { "epoch": 0.10609944661458333, "grad_norm": 12.414908409118652, "learning_rate": 4.8647316517479326e-06, "loss": 3.5158, "step": 10430 }, { "epoch": 0.10615030924479167, "grad_norm": 13.846083641052246, "learning_rate": 4.864601868250159e-06, "loss": 3.4447, "step": 10435 }, { "epoch": 0.106201171875, "grad_norm": 12.67673110961914, "learning_rate": 4.864472024254607e-06, "loss": 3.0979, "step": 10440 }, { "epoch": 0.10625203450520833, "grad_norm": 13.106204986572266, "learning_rate": 4.864342119764599e-06, "loss": 3.4044, "step": 10445 }, { "epoch": 0.10630289713541667, "grad_norm": 10.791247367858887, "learning_rate": 4.864212154783458e-06, "loss": 3.4244, "step": 10450 }, { "epoch": 0.106353759765625, "grad_norm": 15.417497634887695, "learning_rate": 4.86408212931451e-06, "loss": 3.8919, "step": 10455 }, { "epoch": 0.10640462239583333, "grad_norm": 12.463935852050781, "learning_rate": 4.86395204336108e-06, "loss": 3.3458, "step": 10460 }, { "epoch": 0.10645548502604167, "grad_norm": 11.733417510986328, "learning_rate": 4.863821896926498e-06, "loss": 3.5288, "step": 10465 }, { "epoch": 0.10650634765625, "grad_norm": 14.689102172851562, "learning_rate": 4.863691690014093e-06, "loss": 3.2967, "step": 10470 }, { "epoch": 0.10655721028645833, "grad_norm": 10.164743423461914, "learning_rate": 4.863561422627197e-06, "loss": 3.2024, "step": 10475 }, { "epoch": 0.10660807291666667, "grad_norm": 10.591276168823242, "learning_rate": 4.863431094769141e-06, "loss": 3.1854, "step": 10480 }, { "epoch": 0.106658935546875, "grad_norm": 7.265041351318359, "learning_rate": 4.863300706443261e-06, "loss": 3.3769, "step": 10485 }, { "epoch": 0.10670979817708333, "grad_norm": 9.011372566223145, "learning_rate": 4.8631702576528924e-06, "loss": 3.3535, "step": 10490 }, { "epoch": 0.10676066080729167, "grad_norm": 12.629626274108887, "learning_rate": 4.863039748401374e-06, "loss": 3.1713, "step": 10495 }, { "epoch": 0.1068115234375, "grad_norm": 10.04238224029541, "learning_rate": 4.8629091786920425e-06, "loss": 3.2648, "step": 10500 }, { "epoch": 0.10686238606770833, "grad_norm": 8.861063003540039, "learning_rate": 4.862778548528239e-06, "loss": 3.4088, "step": 10505 }, { "epoch": 0.10691324869791667, "grad_norm": 10.639623641967773, "learning_rate": 4.862647857913308e-06, "loss": 3.1807, "step": 10510 }, { "epoch": 0.106964111328125, "grad_norm": 11.753366470336914, "learning_rate": 4.862517106850592e-06, "loss": 4.0414, "step": 10515 }, { "epoch": 0.10701497395833333, "grad_norm": 6.897514343261719, "learning_rate": 4.862386295343435e-06, "loss": 3.1811, "step": 10520 }, { "epoch": 0.10706583658854167, "grad_norm": 17.118993759155273, "learning_rate": 4.862255423395184e-06, "loss": 3.549, "step": 10525 }, { "epoch": 0.10711669921875, "grad_norm": 12.017741203308105, "learning_rate": 4.862124491009188e-06, "loss": 3.4445, "step": 10530 }, { "epoch": 0.10716756184895833, "grad_norm": 18.230016708374023, "learning_rate": 4.861993498188798e-06, "loss": 3.381, "step": 10535 }, { "epoch": 0.10721842447916667, "grad_norm": 16.504117965698242, "learning_rate": 4.861862444937363e-06, "loss": 4.0049, "step": 10540 }, { "epoch": 0.107269287109375, "grad_norm": 13.888938903808594, "learning_rate": 4.861731331258238e-06, "loss": 3.5727, "step": 10545 }, { "epoch": 0.10732014973958333, "grad_norm": 14.863890647888184, "learning_rate": 4.8616001571547764e-06, "loss": 3.4779, "step": 10550 }, { "epoch": 0.10737101236979167, "grad_norm": 13.943183898925781, "learning_rate": 4.8614689226303345e-06, "loss": 3.602, "step": 10555 }, { "epoch": 0.107421875, "grad_norm": 10.1746826171875, "learning_rate": 4.86133762768827e-06, "loss": 3.4561, "step": 10560 }, { "epoch": 0.10747273763020833, "grad_norm": 12.866034507751465, "learning_rate": 4.861206272331941e-06, "loss": 3.2867, "step": 10565 }, { "epoch": 0.10752360026041667, "grad_norm": 13.17371654510498, "learning_rate": 4.86107485656471e-06, "loss": 3.4148, "step": 10570 }, { "epoch": 0.107574462890625, "grad_norm": 8.4532470703125, "learning_rate": 4.860943380389939e-06, "loss": 3.1673, "step": 10575 }, { "epoch": 0.10762532552083333, "grad_norm": 10.179365158081055, "learning_rate": 4.86081184381099e-06, "loss": 3.3304, "step": 10580 }, { "epoch": 0.10767618815104167, "grad_norm": 10.048255920410156, "learning_rate": 4.860680246831231e-06, "loss": 3.3539, "step": 10585 }, { "epoch": 0.10772705078125, "grad_norm": 14.505502700805664, "learning_rate": 4.860548589454026e-06, "loss": 3.5862, "step": 10590 }, { "epoch": 0.10777791341145833, "grad_norm": 11.778282165527344, "learning_rate": 4.860416871682746e-06, "loss": 3.3957, "step": 10595 }, { "epoch": 0.10782877604166667, "grad_norm": 10.966447830200195, "learning_rate": 4.860285093520759e-06, "loss": 3.2105, "step": 10600 }, { "epoch": 0.107879638671875, "grad_norm": 12.947134017944336, "learning_rate": 4.860153254971437e-06, "loss": 3.7268, "step": 10605 }, { "epoch": 0.10793050130208333, "grad_norm": 12.653708457946777, "learning_rate": 4.860021356038155e-06, "loss": 3.4977, "step": 10610 }, { "epoch": 0.10798136393229167, "grad_norm": 15.799676895141602, "learning_rate": 4.859889396724284e-06, "loss": 3.7318, "step": 10615 }, { "epoch": 0.1080322265625, "grad_norm": 10.117684364318848, "learning_rate": 4.859757377033204e-06, "loss": 3.8512, "step": 10620 }, { "epoch": 0.10808308919270833, "grad_norm": 16.469112396240234, "learning_rate": 4.85962529696829e-06, "loss": 3.6864, "step": 10625 }, { "epoch": 0.10813395182291667, "grad_norm": 8.12623119354248, "learning_rate": 4.859493156532922e-06, "loss": 3.162, "step": 10630 }, { "epoch": 0.108184814453125, "grad_norm": 10.442404747009277, "learning_rate": 4.859360955730481e-06, "loss": 3.3013, "step": 10635 }, { "epoch": 0.10823567708333333, "grad_norm": 13.673596382141113, "learning_rate": 4.8592286945643485e-06, "loss": 3.3599, "step": 10640 }, { "epoch": 0.10828653971354167, "grad_norm": 8.366315841674805, "learning_rate": 4.859096373037911e-06, "loss": 3.2563, "step": 10645 }, { "epoch": 0.10833740234375, "grad_norm": 16.115209579467773, "learning_rate": 4.8589639911545495e-06, "loss": 3.6421, "step": 10650 }, { "epoch": 0.10838826497395833, "grad_norm": 12.849785804748535, "learning_rate": 4.858831548917655e-06, "loss": 3.4333, "step": 10655 }, { "epoch": 0.10843912760416667, "grad_norm": 12.12121295928955, "learning_rate": 4.858699046330614e-06, "loss": 3.8339, "step": 10660 }, { "epoch": 0.108489990234375, "grad_norm": 12.373353004455566, "learning_rate": 4.858566483396816e-06, "loss": 3.3636, "step": 10665 }, { "epoch": 0.10854085286458333, "grad_norm": 13.366467475891113, "learning_rate": 4.858433860119655e-06, "loss": 3.1534, "step": 10670 }, { "epoch": 0.10859171549479167, "grad_norm": 11.035595893859863, "learning_rate": 4.858301176502522e-06, "loss": 3.0366, "step": 10675 }, { "epoch": 0.108642578125, "grad_norm": 9.009740829467773, "learning_rate": 4.858168432548813e-06, "loss": 3.1213, "step": 10680 }, { "epoch": 0.10869344075520833, "grad_norm": 13.181190490722656, "learning_rate": 4.858035628261924e-06, "loss": 3.2362, "step": 10685 }, { "epoch": 0.10874430338541667, "grad_norm": 12.755681991577148, "learning_rate": 4.85790276364525e-06, "loss": 3.493, "step": 10690 }, { "epoch": 0.108795166015625, "grad_norm": 16.021320343017578, "learning_rate": 4.857769838702195e-06, "loss": 3.3145, "step": 10695 }, { "epoch": 0.10884602864583333, "grad_norm": 11.281843185424805, "learning_rate": 4.857636853436156e-06, "loss": 3.4385, "step": 10700 }, { "epoch": 0.10889689127604167, "grad_norm": 13.89880657196045, "learning_rate": 4.857503807850538e-06, "loss": 3.5553, "step": 10705 }, { "epoch": 0.10894775390625, "grad_norm": 17.039382934570312, "learning_rate": 4.857370701948744e-06, "loss": 3.7388, "step": 10710 }, { "epoch": 0.10899861653645833, "grad_norm": 8.904913902282715, "learning_rate": 4.857237535734179e-06, "loss": 3.5905, "step": 10715 }, { "epoch": 0.10904947916666667, "grad_norm": 10.64655876159668, "learning_rate": 4.8571043092102496e-06, "loss": 3.174, "step": 10720 }, { "epoch": 0.109100341796875, "grad_norm": 11.749823570251465, "learning_rate": 4.856971022380366e-06, "loss": 3.295, "step": 10725 }, { "epoch": 0.10915120442708333, "grad_norm": 9.72337532043457, "learning_rate": 4.856837675247938e-06, "loss": 3.3855, "step": 10730 }, { "epoch": 0.10920206705729167, "grad_norm": 16.54507827758789, "learning_rate": 4.856704267816375e-06, "loss": 3.5955, "step": 10735 }, { "epoch": 0.1092529296875, "grad_norm": 13.975875854492188, "learning_rate": 4.856570800089093e-06, "loss": 3.3142, "step": 10740 }, { "epoch": 0.10930379231770833, "grad_norm": 15.316740989685059, "learning_rate": 4.856437272069506e-06, "loss": 3.4958, "step": 10745 }, { "epoch": 0.10935465494791667, "grad_norm": 10.500741004943848, "learning_rate": 4.856303683761029e-06, "loss": 3.2991, "step": 10750 }, { "epoch": 0.109405517578125, "grad_norm": 10.17525577545166, "learning_rate": 4.8561700351670815e-06, "loss": 3.7226, "step": 10755 }, { "epoch": 0.10945638020833333, "grad_norm": 12.4932861328125, "learning_rate": 4.856036326291082e-06, "loss": 3.2116, "step": 10760 }, { "epoch": 0.10950724283854167, "grad_norm": 9.614534378051758, "learning_rate": 4.855902557136451e-06, "loss": 3.351, "step": 10765 }, { "epoch": 0.10955810546875, "grad_norm": 14.387618064880371, "learning_rate": 4.855768727706613e-06, "loss": 3.5085, "step": 10770 }, { "epoch": 0.10960896809895833, "grad_norm": 11.664185523986816, "learning_rate": 4.855634838004988e-06, "loss": 3.2219, "step": 10775 }, { "epoch": 0.10965983072916667, "grad_norm": 8.379794120788574, "learning_rate": 4.8555008880350055e-06, "loss": 3.0816, "step": 10780 }, { "epoch": 0.109710693359375, "grad_norm": 14.983808517456055, "learning_rate": 4.8553668778000905e-06, "loss": 3.0428, "step": 10785 }, { "epoch": 0.10976155598958333, "grad_norm": 10.499786376953125, "learning_rate": 4.855232807303673e-06, "loss": 3.1102, "step": 10790 }, { "epoch": 0.10981241861979167, "grad_norm": 15.770118713378906, "learning_rate": 4.8550986765491825e-06, "loss": 4.0818, "step": 10795 }, { "epoch": 0.10986328125, "grad_norm": 13.66915225982666, "learning_rate": 4.85496448554005e-06, "loss": 3.7093, "step": 10800 }, { "epoch": 0.10991414388020833, "grad_norm": 8.571983337402344, "learning_rate": 4.85483023427971e-06, "loss": 3.0368, "step": 10805 }, { "epoch": 0.10996500651041667, "grad_norm": 15.759471893310547, "learning_rate": 4.854695922771595e-06, "loss": 3.5172, "step": 10810 }, { "epoch": 0.110015869140625, "grad_norm": 13.83719539642334, "learning_rate": 4.854561551019145e-06, "loss": 3.5774, "step": 10815 }, { "epoch": 0.11006673177083333, "grad_norm": 11.690563201904297, "learning_rate": 4.854427119025794e-06, "loss": 3.5709, "step": 10820 }, { "epoch": 0.11011759440104167, "grad_norm": 16.460613250732422, "learning_rate": 4.854292626794984e-06, "loss": 3.0922, "step": 10825 }, { "epoch": 0.11016845703125, "grad_norm": 9.609016418457031, "learning_rate": 4.854158074330155e-06, "loss": 3.6565, "step": 10830 }, { "epoch": 0.11021931966145833, "grad_norm": 8.492650985717773, "learning_rate": 4.85402346163475e-06, "loss": 3.0948, "step": 10835 }, { "epoch": 0.11027018229166667, "grad_norm": 8.52239990234375, "learning_rate": 4.853888788712213e-06, "loss": 3.6836, "step": 10840 }, { "epoch": 0.110321044921875, "grad_norm": 14.75137710571289, "learning_rate": 4.853754055565988e-06, "loss": 3.5731, "step": 10845 }, { "epoch": 0.11037190755208333, "grad_norm": 17.106433868408203, "learning_rate": 4.853619262199525e-06, "loss": 3.3211, "step": 10850 }, { "epoch": 0.11042277018229167, "grad_norm": 13.21174430847168, "learning_rate": 4.85348440861627e-06, "loss": 3.1972, "step": 10855 }, { "epoch": 0.1104736328125, "grad_norm": 15.949371337890625, "learning_rate": 4.8533494948196746e-06, "loss": 3.7361, "step": 10860 }, { "epoch": 0.11052449544270833, "grad_norm": 13.830065727233887, "learning_rate": 4.8532145208131894e-06, "loss": 3.4961, "step": 10865 }, { "epoch": 0.11057535807291667, "grad_norm": 14.657466888427734, "learning_rate": 4.85307948660027e-06, "loss": 3.3326, "step": 10870 }, { "epoch": 0.110626220703125, "grad_norm": 18.071182250976562, "learning_rate": 4.852944392184369e-06, "loss": 3.5664, "step": 10875 }, { "epoch": 0.11067708333333333, "grad_norm": 259.91387939453125, "learning_rate": 4.852809237568943e-06, "loss": 3.6806, "step": 10880 }, { "epoch": 0.11072794596354167, "grad_norm": 12.160555839538574, "learning_rate": 4.85267402275745e-06, "loss": 3.8418, "step": 10885 }, { "epoch": 0.11077880859375, "grad_norm": 15.426597595214844, "learning_rate": 4.852538747753351e-06, "loss": 3.5979, "step": 10890 }, { "epoch": 0.11082967122395833, "grad_norm": 13.01453685760498, "learning_rate": 4.852403412560105e-06, "loss": 3.2011, "step": 10895 }, { "epoch": 0.11088053385416667, "grad_norm": 10.577211380004883, "learning_rate": 4.852268017181176e-06, "loss": 3.727, "step": 10900 }, { "epoch": 0.110931396484375, "grad_norm": 14.287973403930664, "learning_rate": 4.852132561620026e-06, "loss": 3.758, "step": 10905 }, { "epoch": 0.11098225911458333, "grad_norm": 11.887785911560059, "learning_rate": 4.851997045880123e-06, "loss": 2.9819, "step": 10910 }, { "epoch": 0.11103312174479167, "grad_norm": 7.388796329498291, "learning_rate": 4.851861469964932e-06, "loss": 3.0688, "step": 10915 }, { "epoch": 0.111083984375, "grad_norm": 11.511301040649414, "learning_rate": 4.851725833877924e-06, "loss": 3.3384, "step": 10920 }, { "epoch": 0.11113484700520833, "grad_norm": 13.27392864227295, "learning_rate": 4.851590137622567e-06, "loss": 3.0957, "step": 10925 }, { "epoch": 0.11118570963541667, "grad_norm": 14.255321502685547, "learning_rate": 4.851454381202334e-06, "loss": 3.2306, "step": 10930 }, { "epoch": 0.111236572265625, "grad_norm": 11.45173168182373, "learning_rate": 4.851318564620699e-06, "loss": 3.4344, "step": 10935 }, { "epoch": 0.11128743489583333, "grad_norm": 12.557133674621582, "learning_rate": 4.851182687881134e-06, "loss": 3.4574, "step": 10940 }, { "epoch": 0.11133829752604167, "grad_norm": 12.226534843444824, "learning_rate": 4.851046750987118e-06, "loss": 3.8019, "step": 10945 }, { "epoch": 0.11138916015625, "grad_norm": 10.195042610168457, "learning_rate": 4.850910753942129e-06, "loss": 3.0214, "step": 10950 }, { "epoch": 0.11144002278645833, "grad_norm": 14.165132522583008, "learning_rate": 4.850774696749645e-06, "loss": 3.5724, "step": 10955 }, { "epoch": 0.11149088541666667, "grad_norm": 13.284117698669434, "learning_rate": 4.850638579413147e-06, "loss": 4.1911, "step": 10960 }, { "epoch": 0.111541748046875, "grad_norm": 16.30782127380371, "learning_rate": 4.850502401936119e-06, "loss": 3.1399, "step": 10965 }, { "epoch": 0.11159261067708333, "grad_norm": 13.27759838104248, "learning_rate": 4.850366164322044e-06, "loss": 3.4511, "step": 10970 }, { "epoch": 0.11164347330729167, "grad_norm": 12.456367492675781, "learning_rate": 4.850229866574407e-06, "loss": 3.5491, "step": 10975 }, { "epoch": 0.1116943359375, "grad_norm": 10.18445873260498, "learning_rate": 4.850093508696697e-06, "loss": 3.5397, "step": 10980 }, { "epoch": 0.11174519856770833, "grad_norm": 14.981690406799316, "learning_rate": 4.849957090692401e-06, "loss": 3.3078, "step": 10985 }, { "epoch": 0.11179606119791667, "grad_norm": 18.036338806152344, "learning_rate": 4.84982061256501e-06, "loss": 3.2716, "step": 10990 }, { "epoch": 0.111846923828125, "grad_norm": 18.29755973815918, "learning_rate": 4.849684074318015e-06, "loss": 3.6321, "step": 10995 }, { "epoch": 0.11189778645833333, "grad_norm": 13.462907791137695, "learning_rate": 4.849547475954911e-06, "loss": 3.3401, "step": 11000 }, { "epoch": 0.11194864908854167, "grad_norm": 9.00394058227539, "learning_rate": 4.849410817479191e-06, "loss": 3.6376, "step": 11005 }, { "epoch": 0.11199951171875, "grad_norm": 7.56473970413208, "learning_rate": 4.849274098894352e-06, "loss": 3.5239, "step": 11010 }, { "epoch": 0.11205037434895833, "grad_norm": 10.673871994018555, "learning_rate": 4.849137320203892e-06, "loss": 3.4489, "step": 11015 }, { "epoch": 0.11210123697916667, "grad_norm": 17.68508529663086, "learning_rate": 4.849000481411312e-06, "loss": 3.3573, "step": 11020 }, { "epoch": 0.112152099609375, "grad_norm": 13.00468921661377, "learning_rate": 4.84886358252011e-06, "loss": 3.3832, "step": 11025 }, { "epoch": 0.11220296223958333, "grad_norm": 8.050721168518066, "learning_rate": 4.8487266235337895e-06, "loss": 3.2107, "step": 11030 }, { "epoch": 0.11225382486979167, "grad_norm": 10.355036735534668, "learning_rate": 4.848589604455856e-06, "loss": 3.304, "step": 11035 }, { "epoch": 0.1123046875, "grad_norm": 19.039920806884766, "learning_rate": 4.848452525289814e-06, "loss": 3.7786, "step": 11040 }, { "epoch": 0.11235555013020833, "grad_norm": 12.501426696777344, "learning_rate": 4.8483153860391705e-06, "loss": 3.5708, "step": 11045 }, { "epoch": 0.11240641276041667, "grad_norm": 15.971506118774414, "learning_rate": 4.848178186707435e-06, "loss": 3.209, "step": 11050 }, { "epoch": 0.112457275390625, "grad_norm": 7.047082424163818, "learning_rate": 4.8480409272981165e-06, "loss": 3.6421, "step": 11055 }, { "epoch": 0.11250813802083333, "grad_norm": 14.781586647033691, "learning_rate": 4.847903607814728e-06, "loss": 3.4269, "step": 11060 }, { "epoch": 0.11255900065104167, "grad_norm": 10.393120765686035, "learning_rate": 4.847766228260781e-06, "loss": 3.5995, "step": 11065 }, { "epoch": 0.11260986328125, "grad_norm": 10.039695739746094, "learning_rate": 4.847628788639793e-06, "loss": 3.3204, "step": 11070 }, { "epoch": 0.11266072591145833, "grad_norm": 13.824012756347656, "learning_rate": 4.847491288955279e-06, "loss": 3.6698, "step": 11075 }, { "epoch": 0.11271158854166667, "grad_norm": 11.334912300109863, "learning_rate": 4.847353729210756e-06, "loss": 3.5687, "step": 11080 }, { "epoch": 0.112762451171875, "grad_norm": 15.515557289123535, "learning_rate": 4.847216109409744e-06, "loss": 3.761, "step": 11085 }, { "epoch": 0.11281331380208333, "grad_norm": 7.701551914215088, "learning_rate": 4.847078429555765e-06, "loss": 3.6052, "step": 11090 }, { "epoch": 0.11286417643229167, "grad_norm": 13.438464164733887, "learning_rate": 4.8469406896523405e-06, "loss": 3.2666, "step": 11095 }, { "epoch": 0.1129150390625, "grad_norm": 11.519120216369629, "learning_rate": 4.846802889702994e-06, "loss": 3.1568, "step": 11100 }, { "epoch": 0.11296590169270833, "grad_norm": 10.409358978271484, "learning_rate": 4.8466650297112525e-06, "loss": 3.5128, "step": 11105 }, { "epoch": 0.11301676432291667, "grad_norm": 10.928455352783203, "learning_rate": 4.846527109680642e-06, "loss": 3.7998, "step": 11110 }, { "epoch": 0.113067626953125, "grad_norm": 13.899455070495605, "learning_rate": 4.846389129614692e-06, "loss": 3.1142, "step": 11115 }, { "epoch": 0.11311848958333333, "grad_norm": 10.756808280944824, "learning_rate": 4.846251089516932e-06, "loss": 3.3781, "step": 11120 }, { "epoch": 0.11316935221354167, "grad_norm": 10.080056190490723, "learning_rate": 4.846112989390894e-06, "loss": 3.1493, "step": 11125 }, { "epoch": 0.11322021484375, "grad_norm": 16.393461227416992, "learning_rate": 4.845974829240112e-06, "loss": 3.6096, "step": 11130 }, { "epoch": 0.11327107747395833, "grad_norm": 13.437868118286133, "learning_rate": 4.845836609068119e-06, "loss": 3.3507, "step": 11135 }, { "epoch": 0.11332194010416667, "grad_norm": 12.06811237335205, "learning_rate": 4.8456983288784535e-06, "loss": 3.197, "step": 11140 }, { "epoch": 0.113372802734375, "grad_norm": 11.709611892700195, "learning_rate": 4.845559988674651e-06, "loss": 3.5839, "step": 11145 }, { "epoch": 0.11342366536458333, "grad_norm": 17.550220489501953, "learning_rate": 4.8454215884602525e-06, "loss": 3.6834, "step": 11150 }, { "epoch": 0.11347452799479167, "grad_norm": 10.27842903137207, "learning_rate": 4.845283128238799e-06, "loss": 3.5057, "step": 11155 }, { "epoch": 0.113525390625, "grad_norm": 16.689462661743164, "learning_rate": 4.845144608013832e-06, "loss": 3.3252, "step": 11160 }, { "epoch": 0.11357625325520833, "grad_norm": 10.314776420593262, "learning_rate": 4.845006027788897e-06, "loss": 4.4668, "step": 11165 }, { "epoch": 0.11362711588541667, "grad_norm": 12.564552307128906, "learning_rate": 4.844867387567538e-06, "loss": 3.3531, "step": 11170 }, { "epoch": 0.113677978515625, "grad_norm": 16.5703182220459, "learning_rate": 4.8447286873533025e-06, "loss": 3.3511, "step": 11175 }, { "epoch": 0.11372884114583333, "grad_norm": 10.776202201843262, "learning_rate": 4.84458992714974e-06, "loss": 3.5057, "step": 11180 }, { "epoch": 0.11377970377604167, "grad_norm": 9.926742553710938, "learning_rate": 4.844451106960399e-06, "loss": 3.5368, "step": 11185 }, { "epoch": 0.11383056640625, "grad_norm": 13.466375350952148, "learning_rate": 4.844312226788833e-06, "loss": 3.4744, "step": 11190 }, { "epoch": 0.11388142903645833, "grad_norm": 13.84046745300293, "learning_rate": 4.844173286638593e-06, "loss": 3.4421, "step": 11195 }, { "epoch": 0.11393229166666667, "grad_norm": 16.326255798339844, "learning_rate": 4.8440342865132365e-06, "loss": 3.5388, "step": 11200 }, { "epoch": 0.113983154296875, "grad_norm": 11.938819885253906, "learning_rate": 4.843895226416317e-06, "loss": 3.171, "step": 11205 }, { "epoch": 0.11403401692708333, "grad_norm": 8.013636589050293, "learning_rate": 4.843756106351396e-06, "loss": 4.0519, "step": 11210 }, { "epoch": 0.11408487955729167, "grad_norm": 12.114385604858398, "learning_rate": 4.843616926322029e-06, "loss": 3.5346, "step": 11215 }, { "epoch": 0.1141357421875, "grad_norm": 14.332465171813965, "learning_rate": 4.843477686331778e-06, "loss": 3.3585, "step": 11220 }, { "epoch": 0.11418660481770833, "grad_norm": 13.207276344299316, "learning_rate": 4.8433383863842065e-06, "loss": 3.2574, "step": 11225 }, { "epoch": 0.11423746744791667, "grad_norm": 13.37678050994873, "learning_rate": 4.8431990264828775e-06, "loss": 3.0765, "step": 11230 }, { "epoch": 0.114288330078125, "grad_norm": 10.288887977600098, "learning_rate": 4.843059606631358e-06, "loss": 3.4786, "step": 11235 }, { "epoch": 0.11433919270833333, "grad_norm": 9.490957260131836, "learning_rate": 4.842920126833212e-06, "loss": 3.0409, "step": 11240 }, { "epoch": 0.11439005533854167, "grad_norm": 12.170294761657715, "learning_rate": 4.842780587092011e-06, "loss": 3.4692, "step": 11245 }, { "epoch": 0.11444091796875, "grad_norm": 8.890890121459961, "learning_rate": 4.842640987411323e-06, "loss": 3.1144, "step": 11250 }, { "epoch": 0.11449178059895833, "grad_norm": 10.649124145507812, "learning_rate": 4.842501327794722e-06, "loss": 3.3058, "step": 11255 }, { "epoch": 0.11454264322916667, "grad_norm": 15.376280784606934, "learning_rate": 4.842361608245779e-06, "loss": 3.2482, "step": 11260 }, { "epoch": 0.114593505859375, "grad_norm": 11.391185760498047, "learning_rate": 4.84222182876807e-06, "loss": 3.2936, "step": 11265 }, { "epoch": 0.11464436848958333, "grad_norm": 8.210448265075684, "learning_rate": 4.84208198936517e-06, "loss": 3.5134, "step": 11270 }, { "epoch": 0.11469523111979167, "grad_norm": 15.108521461486816, "learning_rate": 4.841942090040658e-06, "loss": 3.4495, "step": 11275 }, { "epoch": 0.11474609375, "grad_norm": 10.561015129089355, "learning_rate": 4.841802130798112e-06, "loss": 3.3369, "step": 11280 }, { "epoch": 0.11479695638020833, "grad_norm": 13.979567527770996, "learning_rate": 4.841662111641114e-06, "loss": 3.5918, "step": 11285 }, { "epoch": 0.11484781901041667, "grad_norm": 12.601395606994629, "learning_rate": 4.841522032573246e-06, "loss": 3.3726, "step": 11290 }, { "epoch": 0.114898681640625, "grad_norm": 14.680171966552734, "learning_rate": 4.841381893598092e-06, "loss": 3.3161, "step": 11295 }, { "epoch": 0.11494954427083333, "grad_norm": 11.404961585998535, "learning_rate": 4.841241694719236e-06, "loss": 3.9943, "step": 11300 }, { "epoch": 0.11500040690104167, "grad_norm": 7.569732666015625, "learning_rate": 4.841101435940268e-06, "loss": 3.2494, "step": 11305 }, { "epoch": 0.11505126953125, "grad_norm": 13.507386207580566, "learning_rate": 4.840961117264773e-06, "loss": 3.2946, "step": 11310 }, { "epoch": 0.11510213216145833, "grad_norm": 8.6294584274292, "learning_rate": 4.840820738696343e-06, "loss": 3.6376, "step": 11315 }, { "epoch": 0.11515299479166667, "grad_norm": 12.276251792907715, "learning_rate": 4.8406803002385696e-06, "loss": 3.5475, "step": 11320 }, { "epoch": 0.115203857421875, "grad_norm": 13.95095157623291, "learning_rate": 4.8405398018950465e-06, "loss": 3.4318, "step": 11325 }, { "epoch": 0.11525472005208333, "grad_norm": 8.981563568115234, "learning_rate": 4.840399243669366e-06, "loss": 3.3984, "step": 11330 }, { "epoch": 0.11530558268229167, "grad_norm": 17.332687377929688, "learning_rate": 4.840258625565126e-06, "loss": 3.4236, "step": 11335 }, { "epoch": 0.1153564453125, "grad_norm": 13.317326545715332, "learning_rate": 4.840117947585924e-06, "loss": 3.7795, "step": 11340 }, { "epoch": 0.11540730794270833, "grad_norm": 7.8791656494140625, "learning_rate": 4.8399772097353585e-06, "loss": 3.316, "step": 11345 }, { "epoch": 0.11545817057291667, "grad_norm": 11.29205322265625, "learning_rate": 4.839836412017031e-06, "loss": 3.5903, "step": 11350 }, { "epoch": 0.115509033203125, "grad_norm": 9.628116607666016, "learning_rate": 4.839695554434543e-06, "loss": 3.9415, "step": 11355 }, { "epoch": 0.11555989583333333, "grad_norm": 13.431588172912598, "learning_rate": 4.839554636991499e-06, "loss": 3.3724, "step": 11360 }, { "epoch": 0.11561075846354167, "grad_norm": 16.05211639404297, "learning_rate": 4.8394136596915044e-06, "loss": 3.6998, "step": 11365 }, { "epoch": 0.11566162109375, "grad_norm": 15.686810493469238, "learning_rate": 4.839272622538166e-06, "loss": 3.3292, "step": 11370 }, { "epoch": 0.11571248372395833, "grad_norm": 10.349496841430664, "learning_rate": 4.839131525535093e-06, "loss": 3.7189, "step": 11375 }, { "epoch": 0.11576334635416667, "grad_norm": 12.135592460632324, "learning_rate": 4.838990368685892e-06, "loss": 4.0048, "step": 11380 }, { "epoch": 0.115814208984375, "grad_norm": 16.356788635253906, "learning_rate": 4.838849151994178e-06, "loss": 3.6877, "step": 11385 }, { "epoch": 0.11586507161458333, "grad_norm": 16.019092559814453, "learning_rate": 4.838707875463563e-06, "loss": 3.4608, "step": 11390 }, { "epoch": 0.11591593424479167, "grad_norm": 9.560792922973633, "learning_rate": 4.838566539097661e-06, "loss": 3.3188, "step": 11395 }, { "epoch": 0.115966796875, "grad_norm": 14.858901023864746, "learning_rate": 4.838425142900089e-06, "loss": 3.589, "step": 11400 }, { "epoch": 0.11601765950520833, "grad_norm": 8.294513702392578, "learning_rate": 4.8382836868744635e-06, "loss": 3.3469, "step": 11405 }, { "epoch": 0.11606852213541667, "grad_norm": 10.487101554870605, "learning_rate": 4.838142171024404e-06, "loss": 3.6045, "step": 11410 }, { "epoch": 0.116119384765625, "grad_norm": 12.35394287109375, "learning_rate": 4.838000595353531e-06, "loss": 3.4698, "step": 11415 }, { "epoch": 0.11617024739583333, "grad_norm": 16.841279983520508, "learning_rate": 4.8378589598654675e-06, "loss": 3.3743, "step": 11420 }, { "epoch": 0.11622111002604167, "grad_norm": 8.052149772644043, "learning_rate": 4.837717264563837e-06, "loss": 3.7649, "step": 11425 }, { "epoch": 0.11627197265625, "grad_norm": 8.459037780761719, "learning_rate": 4.837575509452264e-06, "loss": 3.1424, "step": 11430 }, { "epoch": 0.11632283528645833, "grad_norm": 13.29317855834961, "learning_rate": 4.837433694534376e-06, "loss": 3.2511, "step": 11435 }, { "epoch": 0.11637369791666667, "grad_norm": 10.47177791595459, "learning_rate": 4.8372918198138e-06, "loss": 3.1985, "step": 11440 }, { "epoch": 0.116424560546875, "grad_norm": 12.912489891052246, "learning_rate": 4.837149885294167e-06, "loss": 3.414, "step": 11445 }, { "epoch": 0.11647542317708333, "grad_norm": 11.507333755493164, "learning_rate": 4.837007890979108e-06, "loss": 3.362, "step": 11450 }, { "epoch": 0.11652628580729167, "grad_norm": 11.616119384765625, "learning_rate": 4.836865836872257e-06, "loss": 3.4036, "step": 11455 }, { "epoch": 0.1165771484375, "grad_norm": 16.553955078125, "learning_rate": 4.8367237229772466e-06, "loss": 3.5742, "step": 11460 }, { "epoch": 0.11662801106770833, "grad_norm": 10.673224449157715, "learning_rate": 4.836581549297715e-06, "loss": 3.6775, "step": 11465 }, { "epoch": 0.11667887369791667, "grad_norm": 12.565034866333008, "learning_rate": 4.836439315837297e-06, "loss": 3.8003, "step": 11470 }, { "epoch": 0.116729736328125, "grad_norm": 8.820014953613281, "learning_rate": 4.8362970225996334e-06, "loss": 3.1671, "step": 11475 }, { "epoch": 0.11678059895833333, "grad_norm": 13.71123218536377, "learning_rate": 4.836154669588363e-06, "loss": 3.2272, "step": 11480 }, { "epoch": 0.11683146158854167, "grad_norm": 12.12161636352539, "learning_rate": 4.8360122568071304e-06, "loss": 3.6246, "step": 11485 }, { "epoch": 0.11688232421875, "grad_norm": 9.981605529785156, "learning_rate": 4.835869784259578e-06, "loss": 3.2711, "step": 11490 }, { "epoch": 0.11693318684895833, "grad_norm": 8.769081115722656, "learning_rate": 4.83572725194935e-06, "loss": 3.4075, "step": 11495 }, { "epoch": 0.11698404947916667, "grad_norm": 9.689691543579102, "learning_rate": 4.835584659880095e-06, "loss": 3.2837, "step": 11500 }, { "epoch": 0.117034912109375, "grad_norm": 10.834155082702637, "learning_rate": 4.835442008055459e-06, "loss": 3.1802, "step": 11505 }, { "epoch": 0.11708577473958333, "grad_norm": 13.873346328735352, "learning_rate": 4.835299296479093e-06, "loss": 3.8511, "step": 11510 }, { "epoch": 0.11713663736979167, "grad_norm": 10.343167304992676, "learning_rate": 4.835156525154648e-06, "loss": 3.5516, "step": 11515 }, { "epoch": 0.1171875, "grad_norm": 13.721023559570312, "learning_rate": 4.8350136940857775e-06, "loss": 3.4011, "step": 11520 }, { "epoch": 0.11723836263020833, "grad_norm": 15.717689514160156, "learning_rate": 4.834870803276134e-06, "loss": 3.5618, "step": 11525 }, { "epoch": 0.11728922526041667, "grad_norm": 16.386823654174805, "learning_rate": 4.834727852729375e-06, "loss": 3.3112, "step": 11530 }, { "epoch": 0.117340087890625, "grad_norm": 15.418145179748535, "learning_rate": 4.834584842449158e-06, "loss": 4.0253, "step": 11535 }, { "epoch": 0.11739095052083333, "grad_norm": 8.149100303649902, "learning_rate": 4.83444177243914e-06, "loss": 3.5978, "step": 11540 }, { "epoch": 0.11744181315104167, "grad_norm": 8.802492141723633, "learning_rate": 4.834298642702983e-06, "loss": 3.931, "step": 11545 }, { "epoch": 0.11749267578125, "grad_norm": 10.84029483795166, "learning_rate": 4.834155453244348e-06, "loss": 3.4902, "step": 11550 }, { "epoch": 0.11754353841145833, "grad_norm": 13.952017784118652, "learning_rate": 4.8340122040669e-06, "loss": 3.3633, "step": 11555 }, { "epoch": 0.11759440104166667, "grad_norm": 9.742794036865234, "learning_rate": 4.833868895174303e-06, "loss": 3.46, "step": 11560 }, { "epoch": 0.117645263671875, "grad_norm": 9.079483032226562, "learning_rate": 4.833725526570223e-06, "loss": 3.2567, "step": 11565 }, { "epoch": 0.11769612630208333, "grad_norm": 11.4492826461792, "learning_rate": 4.833582098258328e-06, "loss": 3.332, "step": 11570 }, { "epoch": 0.11774698893229167, "grad_norm": 13.898246765136719, "learning_rate": 4.833438610242289e-06, "loss": 3.6246, "step": 11575 }, { "epoch": 0.1177978515625, "grad_norm": 18.6462459564209, "learning_rate": 4.833295062525775e-06, "loss": 4.2665, "step": 11580 }, { "epoch": 0.11784871419270833, "grad_norm": 15.118053436279297, "learning_rate": 4.833151455112462e-06, "loss": 3.4488, "step": 11585 }, { "epoch": 0.11789957682291667, "grad_norm": 12.25283432006836, "learning_rate": 4.833007788006021e-06, "loss": 3.3655, "step": 11590 }, { "epoch": 0.117950439453125, "grad_norm": 12.915300369262695, "learning_rate": 4.832864061210128e-06, "loss": 3.2749, "step": 11595 }, { "epoch": 0.11800130208333333, "grad_norm": 13.077988624572754, "learning_rate": 4.832720274728462e-06, "loss": 3.7166, "step": 11600 }, { "epoch": 0.11805216471354167, "grad_norm": 12.226242065429688, "learning_rate": 4.8325764285647e-06, "loss": 3.4749, "step": 11605 }, { "epoch": 0.11810302734375, "grad_norm": 12.33906078338623, "learning_rate": 4.832432522722523e-06, "loss": 3.8167, "step": 11610 }, { "epoch": 0.11815388997395833, "grad_norm": 18.989513397216797, "learning_rate": 4.832288557205612e-06, "loss": 3.2801, "step": 11615 }, { "epoch": 0.11820475260416667, "grad_norm": 13.292867660522461, "learning_rate": 4.832144532017653e-06, "loss": 3.4304, "step": 11620 }, { "epoch": 0.118255615234375, "grad_norm": 13.21804141998291, "learning_rate": 4.832000447162328e-06, "loss": 3.1607, "step": 11625 }, { "epoch": 0.11830647786458333, "grad_norm": 9.76976203918457, "learning_rate": 4.8318563026433244e-06, "loss": 3.6529, "step": 11630 }, { "epoch": 0.11835734049479167, "grad_norm": 14.536739349365234, "learning_rate": 4.831712098464329e-06, "loss": 3.216, "step": 11635 }, { "epoch": 0.118408203125, "grad_norm": 7.9985880851745605, "learning_rate": 4.831567834629033e-06, "loss": 3.0674, "step": 11640 }, { "epoch": 0.11845906575520833, "grad_norm": 11.221656799316406, "learning_rate": 4.831423511141127e-06, "loss": 3.3163, "step": 11645 }, { "epoch": 0.11850992838541667, "grad_norm": 16.915536880493164, "learning_rate": 4.831279128004303e-06, "loss": 3.6208, "step": 11650 }, { "epoch": 0.118560791015625, "grad_norm": 11.9306058883667, "learning_rate": 4.831134685222255e-06, "loss": 3.6851, "step": 11655 }, { "epoch": 0.11861165364583333, "grad_norm": 10.024066925048828, "learning_rate": 4.8309901827986785e-06, "loss": 3.4386, "step": 11660 }, { "epoch": 0.11866251627604167, "grad_norm": 14.097890853881836, "learning_rate": 4.83084562073727e-06, "loss": 3.3924, "step": 11665 }, { "epoch": 0.11871337890625, "grad_norm": 17.218984603881836, "learning_rate": 4.83070099904173e-06, "loss": 3.3288, "step": 11670 }, { "epoch": 0.11876424153645833, "grad_norm": 9.430810928344727, "learning_rate": 4.830556317715757e-06, "loss": 3.1723, "step": 11675 }, { "epoch": 0.11881510416666667, "grad_norm": 16.22243309020996, "learning_rate": 4.830411576763052e-06, "loss": 3.2928, "step": 11680 }, { "epoch": 0.118865966796875, "grad_norm": 14.999611854553223, "learning_rate": 4.83026677618732e-06, "loss": 3.5795, "step": 11685 }, { "epoch": 0.11891682942708333, "grad_norm": 13.795806884765625, "learning_rate": 4.830121915992265e-06, "loss": 3.6035, "step": 11690 }, { "epoch": 0.11896769205729167, "grad_norm": 11.390329360961914, "learning_rate": 4.829976996181593e-06, "loss": 3.6227, "step": 11695 }, { "epoch": 0.1190185546875, "grad_norm": 13.322566986083984, "learning_rate": 4.829832016759012e-06, "loss": 3.2774, "step": 11700 }, { "epoch": 0.11906941731770833, "grad_norm": 13.909505844116211, "learning_rate": 4.829686977728231e-06, "loss": 3.3357, "step": 11705 }, { "epoch": 0.11912027994791667, "grad_norm": 12.06095027923584, "learning_rate": 4.82954187909296e-06, "loss": 3.9097, "step": 11710 }, { "epoch": 0.119171142578125, "grad_norm": 11.268613815307617, "learning_rate": 4.829396720856913e-06, "loss": 3.2764, "step": 11715 }, { "epoch": 0.11922200520833333, "grad_norm": 8.79672622680664, "learning_rate": 4.829251503023803e-06, "loss": 3.587, "step": 11720 }, { "epoch": 0.11927286783854167, "grad_norm": 15.61196231842041, "learning_rate": 4.8291062255973455e-06, "loss": 3.2073, "step": 11725 }, { "epoch": 0.11932373046875, "grad_norm": 8.96154499053955, "learning_rate": 4.828960888581256e-06, "loss": 3.4395, "step": 11730 }, { "epoch": 0.11937459309895833, "grad_norm": 13.609565734863281, "learning_rate": 4.828815491979256e-06, "loss": 3.3523, "step": 11735 }, { "epoch": 0.11942545572916667, "grad_norm": 8.735452651977539, "learning_rate": 4.828670035795063e-06, "loss": 3.3582, "step": 11740 }, { "epoch": 0.119476318359375, "grad_norm": 13.924631118774414, "learning_rate": 4.828524520032399e-06, "loss": 3.3311, "step": 11745 }, { "epoch": 0.11952718098958333, "grad_norm": 13.989679336547852, "learning_rate": 4.828378944694987e-06, "loss": 3.1395, "step": 11750 }, { "epoch": 0.11957804361979167, "grad_norm": 17.6063289642334, "learning_rate": 4.828233309786552e-06, "loss": 3.2459, "step": 11755 }, { "epoch": 0.11962890625, "grad_norm": 14.077485084533691, "learning_rate": 4.828087615310819e-06, "loss": 3.5539, "step": 11760 }, { "epoch": 0.11967976888020833, "grad_norm": 11.900781631469727, "learning_rate": 4.8279418612715165e-06, "loss": 3.3846, "step": 11765 }, { "epoch": 0.11973063151041667, "grad_norm": 9.877041816711426, "learning_rate": 4.8277960476723726e-06, "loss": 3.7396, "step": 11770 }, { "epoch": 0.119781494140625, "grad_norm": 16.47829818725586, "learning_rate": 4.82765017451712e-06, "loss": 3.1649, "step": 11775 }, { "epoch": 0.11983235677083333, "grad_norm": 12.586395263671875, "learning_rate": 4.827504241809488e-06, "loss": 3.513, "step": 11780 }, { "epoch": 0.11988321940104167, "grad_norm": 12.90040111541748, "learning_rate": 4.827358249553213e-06, "loss": 3.5154, "step": 11785 }, { "epoch": 0.11993408203125, "grad_norm": 7.8959503173828125, "learning_rate": 4.8272121977520266e-06, "loss": 3.5876, "step": 11790 }, { "epoch": 0.11998494466145833, "grad_norm": 14.199788093566895, "learning_rate": 4.82706608640967e-06, "loss": 3.3843, "step": 11795 }, { "epoch": 0.12003580729166667, "grad_norm": 14.042448043823242, "learning_rate": 4.826919915529878e-06, "loss": 2.9448, "step": 11800 }, { "epoch": 0.120086669921875, "grad_norm": 14.99729061126709, "learning_rate": 4.826773685116392e-06, "loss": 3.3641, "step": 11805 }, { "epoch": 0.12013753255208333, "grad_norm": 8.840888023376465, "learning_rate": 4.826627395172952e-06, "loss": 3.1467, "step": 11810 }, { "epoch": 0.12018839518229167, "grad_norm": 8.198177337646484, "learning_rate": 4.8264810457033025e-06, "loss": 3.5442, "step": 11815 }, { "epoch": 0.1202392578125, "grad_norm": 10.527442932128906, "learning_rate": 4.826334636711186e-06, "loss": 3.431, "step": 11820 }, { "epoch": 0.12029012044270833, "grad_norm": 11.18076229095459, "learning_rate": 4.82618816820035e-06, "loss": 3.2887, "step": 11825 }, { "epoch": 0.12034098307291667, "grad_norm": 13.812418937683105, "learning_rate": 4.826041640174542e-06, "loss": 3.5518, "step": 11830 }, { "epoch": 0.120391845703125, "grad_norm": 17.44132423400879, "learning_rate": 4.825895052637508e-06, "loss": 3.629, "step": 11835 }, { "epoch": 0.12044270833333333, "grad_norm": 13.55534553527832, "learning_rate": 4.825748405593001e-06, "loss": 3.4921, "step": 11840 }, { "epoch": 0.12049357096354167, "grad_norm": 10.288969993591309, "learning_rate": 4.825601699044773e-06, "loss": 3.876, "step": 11845 }, { "epoch": 0.12054443359375, "grad_norm": 15.818678855895996, "learning_rate": 4.825454932996576e-06, "loss": 3.2189, "step": 11850 }, { "epoch": 0.12059529622395833, "grad_norm": 8.30019474029541, "learning_rate": 4.825308107452166e-06, "loss": 3.3629, "step": 11855 }, { "epoch": 0.12064615885416667, "grad_norm": 8.714160919189453, "learning_rate": 4.825161222415299e-06, "loss": 3.2909, "step": 11860 }, { "epoch": 0.120697021484375, "grad_norm": 8.198768615722656, "learning_rate": 4.825014277889733e-06, "loss": 3.5741, "step": 11865 }, { "epoch": 0.12074788411458333, "grad_norm": 14.308550834655762, "learning_rate": 4.824867273879229e-06, "loss": 3.5978, "step": 11870 }, { "epoch": 0.12079874674479167, "grad_norm": 11.020129203796387, "learning_rate": 4.8247202103875455e-06, "loss": 3.3011, "step": 11875 }, { "epoch": 0.120849609375, "grad_norm": 12.168553352355957, "learning_rate": 4.824573087418447e-06, "loss": 3.4922, "step": 11880 }, { "epoch": 0.12090047200520833, "grad_norm": 8.048748016357422, "learning_rate": 4.824425904975697e-06, "loss": 3.7659, "step": 11885 }, { "epoch": 0.12095133463541667, "grad_norm": 13.554608345031738, "learning_rate": 4.8242786630630615e-06, "loss": 3.2647, "step": 11890 }, { "epoch": 0.121002197265625, "grad_norm": 14.179122924804688, "learning_rate": 4.824131361684308e-06, "loss": 3.3172, "step": 11895 }, { "epoch": 0.12105305989583333, "grad_norm": 17.352275848388672, "learning_rate": 4.823984000843203e-06, "loss": 3.9405, "step": 11900 }, { "epoch": 0.12110392252604167, "grad_norm": 7.687533378601074, "learning_rate": 4.823836580543519e-06, "loss": 3.6776, "step": 11905 }, { "epoch": 0.12115478515625, "grad_norm": 18.918060302734375, "learning_rate": 4.823689100789026e-06, "loss": 3.6594, "step": 11910 }, { "epoch": 0.12120564778645833, "grad_norm": 16.174633026123047, "learning_rate": 4.823541561583499e-06, "loss": 3.4054, "step": 11915 }, { "epoch": 0.12125651041666667, "grad_norm": 12.678868293762207, "learning_rate": 4.8233939629307115e-06, "loss": 3.4099, "step": 11920 }, { "epoch": 0.121307373046875, "grad_norm": 12.389989852905273, "learning_rate": 4.82324630483444e-06, "loss": 3.6306, "step": 11925 }, { "epoch": 0.12135823567708333, "grad_norm": 8.188562393188477, "learning_rate": 4.823098587298463e-06, "loss": 3.2383, "step": 11930 }, { "epoch": 0.12140909830729167, "grad_norm": 8.033101081848145, "learning_rate": 4.8229508103265595e-06, "loss": 3.322, "step": 11935 }, { "epoch": 0.1214599609375, "grad_norm": 11.021990776062012, "learning_rate": 4.822802973922509e-06, "loss": 3.704, "step": 11940 }, { "epoch": 0.12151082356770833, "grad_norm": 6.139153003692627, "learning_rate": 4.822655078090096e-06, "loss": 3.2763, "step": 11945 }, { "epoch": 0.12156168619791667, "grad_norm": 13.162734985351562, "learning_rate": 4.822507122833104e-06, "loss": 3.8643, "step": 11950 }, { "epoch": 0.121612548828125, "grad_norm": 11.088370323181152, "learning_rate": 4.8223591081553154e-06, "loss": 3.5046, "step": 11955 }, { "epoch": 0.12166341145833333, "grad_norm": 13.00243091583252, "learning_rate": 4.822211034060521e-06, "loss": 4.2186, "step": 11960 }, { "epoch": 0.12171427408854167, "grad_norm": 11.786962509155273, "learning_rate": 4.822062900552507e-06, "loss": 3.4922, "step": 11965 }, { "epoch": 0.12176513671875, "grad_norm": 8.783388137817383, "learning_rate": 4.821914707635065e-06, "loss": 3.3294, "step": 11970 }, { "epoch": 0.12181599934895833, "grad_norm": 9.021117210388184, "learning_rate": 4.821766455311986e-06, "loss": 3.4608, "step": 11975 }, { "epoch": 0.12186686197916667, "grad_norm": 11.563819885253906, "learning_rate": 4.821618143587062e-06, "loss": 3.4183, "step": 11980 }, { "epoch": 0.121917724609375, "grad_norm": 7.746058464050293, "learning_rate": 4.821469772464087e-06, "loss": 3.0433, "step": 11985 }, { "epoch": 0.12196858723958333, "grad_norm": 16.20210838317871, "learning_rate": 4.821321341946859e-06, "loss": 3.6232, "step": 11990 }, { "epoch": 0.12201944986979167, "grad_norm": 14.226766586303711, "learning_rate": 4.821172852039175e-06, "loss": 3.5745, "step": 11995 }, { "epoch": 0.1220703125, "grad_norm": 13.494339942932129, "learning_rate": 4.821024302744834e-06, "loss": 3.5401, "step": 12000 }, { "epoch": 0.12212117513020833, "grad_norm": 9.108319282531738, "learning_rate": 4.820875694067635e-06, "loss": 3.2929, "step": 12005 }, { "epoch": 0.12217203776041667, "grad_norm": 9.119816780090332, "learning_rate": 4.820727026011382e-06, "loss": 2.9584, "step": 12010 }, { "epoch": 0.122222900390625, "grad_norm": 14.945995330810547, "learning_rate": 4.820578298579879e-06, "loss": 3.4583, "step": 12015 }, { "epoch": 0.12227376302083333, "grad_norm": 12.933895111083984, "learning_rate": 4.820429511776929e-06, "loss": 3.5372, "step": 12020 }, { "epoch": 0.12232462565104167, "grad_norm": 14.16519546508789, "learning_rate": 4.820280665606341e-06, "loss": 3.3085, "step": 12025 }, { "epoch": 0.12237548828125, "grad_norm": 14.623350143432617, "learning_rate": 4.820131760071921e-06, "loss": 2.9811, "step": 12030 }, { "epoch": 0.12242635091145833, "grad_norm": 13.574602127075195, "learning_rate": 4.8199827951774805e-06, "loss": 3.5069, "step": 12035 }, { "epoch": 0.12247721354166667, "grad_norm": 15.912371635437012, "learning_rate": 4.8198337709268305e-06, "loss": 3.6601, "step": 12040 }, { "epoch": 0.122528076171875, "grad_norm": 15.12542724609375, "learning_rate": 4.819684687323783e-06, "loss": 3.3761, "step": 12045 }, { "epoch": 0.12257893880208333, "grad_norm": 8.16356372833252, "learning_rate": 4.819535544372153e-06, "loss": 3.474, "step": 12050 }, { "epoch": 0.12262980143229167, "grad_norm": 9.369928359985352, "learning_rate": 4.819386342075755e-06, "loss": 3.548, "step": 12055 }, { "epoch": 0.1226806640625, "grad_norm": 16.48990249633789, "learning_rate": 4.8192370804384075e-06, "loss": 3.5765, "step": 12060 }, { "epoch": 0.12273152669270833, "grad_norm": 14.442497253417969, "learning_rate": 4.819087759463929e-06, "loss": 3.3908, "step": 12065 }, { "epoch": 0.12278238932291667, "grad_norm": 15.705107688903809, "learning_rate": 4.81893837915614e-06, "loss": 3.6492, "step": 12070 }, { "epoch": 0.122833251953125, "grad_norm": 15.82107162475586, "learning_rate": 4.818788939518863e-06, "loss": 3.2617, "step": 12075 }, { "epoch": 0.12288411458333333, "grad_norm": 16.034608840942383, "learning_rate": 4.8186394405559186e-06, "loss": 3.2683, "step": 12080 }, { "epoch": 0.12293497721354167, "grad_norm": 10.579744338989258, "learning_rate": 4.818489882271135e-06, "loss": 3.2758, "step": 12085 }, { "epoch": 0.12298583984375, "grad_norm": 12.461812019348145, "learning_rate": 4.818340264668337e-06, "loss": 3.2529, "step": 12090 }, { "epoch": 0.12303670247395833, "grad_norm": 17.07436752319336, "learning_rate": 4.8181905877513535e-06, "loss": 3.7906, "step": 12095 }, { "epoch": 0.12308756510416667, "grad_norm": 11.529945373535156, "learning_rate": 4.818040851524013e-06, "loss": 3.5339, "step": 12100 }, { "epoch": 0.123138427734375, "grad_norm": 15.79837417602539, "learning_rate": 4.817891055990146e-06, "loss": 3.4221, "step": 12105 }, { "epoch": 0.12318929036458333, "grad_norm": 15.319779396057129, "learning_rate": 4.817741201153587e-06, "loss": 3.4168, "step": 12110 }, { "epoch": 0.12324015299479167, "grad_norm": 13.032570838928223, "learning_rate": 4.817591287018168e-06, "loss": 3.5855, "step": 12115 }, { "epoch": 0.123291015625, "grad_norm": 10.453932762145996, "learning_rate": 4.817441313587725e-06, "loss": 3.3555, "step": 12120 }, { "epoch": 0.12334187825520833, "grad_norm": 15.642955780029297, "learning_rate": 4.817291280866096e-06, "loss": 3.5448, "step": 12125 }, { "epoch": 0.12339274088541667, "grad_norm": 10.044322967529297, "learning_rate": 4.8171411888571185e-06, "loss": 3.3746, "step": 12130 }, { "epoch": 0.123443603515625, "grad_norm": 11.619243621826172, "learning_rate": 4.816991037564632e-06, "loss": 3.6049, "step": 12135 }, { "epoch": 0.12349446614583333, "grad_norm": 9.354294776916504, "learning_rate": 4.81684082699248e-06, "loss": 3.5606, "step": 12140 }, { "epoch": 0.12354532877604167, "grad_norm": 15.51186752319336, "learning_rate": 4.816690557144505e-06, "loss": 3.6708, "step": 12145 }, { "epoch": 0.12359619140625, "grad_norm": 13.874622344970703, "learning_rate": 4.816540228024551e-06, "loss": 3.5003, "step": 12150 }, { "epoch": 0.12364705403645833, "grad_norm": 11.789558410644531, "learning_rate": 4.816389839636463e-06, "loss": 3.2361, "step": 12155 }, { "epoch": 0.12369791666666667, "grad_norm": 14.9029541015625, "learning_rate": 4.816239391984091e-06, "loss": 3.8518, "step": 12160 }, { "epoch": 0.123748779296875, "grad_norm": 16.188743591308594, "learning_rate": 4.8160888850712835e-06, "loss": 3.4669, "step": 12165 }, { "epoch": 0.12379964192708333, "grad_norm": 16.114046096801758, "learning_rate": 4.81593831890189e-06, "loss": 3.329, "step": 12170 }, { "epoch": 0.12385050455729167, "grad_norm": 9.05451774597168, "learning_rate": 4.815787693479764e-06, "loss": 3.1477, "step": 12175 }, { "epoch": 0.1239013671875, "grad_norm": 13.087759971618652, "learning_rate": 4.815637008808759e-06, "loss": 3.4289, "step": 12180 }, { "epoch": 0.12395222981770833, "grad_norm": 11.878509521484375, "learning_rate": 4.81548626489273e-06, "loss": 3.3253, "step": 12185 }, { "epoch": 0.12400309244791667, "grad_norm": 23.86294937133789, "learning_rate": 4.815335461735534e-06, "loss": 3.7135, "step": 12190 }, { "epoch": 0.124053955078125, "grad_norm": 12.333020210266113, "learning_rate": 4.815184599341029e-06, "loss": 3.5888, "step": 12195 }, { "epoch": 0.12410481770833333, "grad_norm": 11.767260551452637, "learning_rate": 4.8150336777130736e-06, "loss": 3.2373, "step": 12200 }, { "epoch": 0.12415568033854167, "grad_norm": 11.746440887451172, "learning_rate": 4.8148826968555306e-06, "loss": 3.3153, "step": 12205 }, { "epoch": 0.12420654296875, "grad_norm": 12.361739158630371, "learning_rate": 4.814731656772263e-06, "loss": 3.4058, "step": 12210 }, { "epoch": 0.12425740559895833, "grad_norm": 10.730027198791504, "learning_rate": 4.8145805574671346e-06, "loss": 3.4855, "step": 12215 }, { "epoch": 0.12430826822916667, "grad_norm": 16.28338623046875, "learning_rate": 4.814429398944011e-06, "loss": 3.5275, "step": 12220 }, { "epoch": 0.124359130859375, "grad_norm": 16.588144302368164, "learning_rate": 4.81427818120676e-06, "loss": 3.6741, "step": 12225 }, { "epoch": 0.12440999348958333, "grad_norm": 15.339471817016602, "learning_rate": 4.81412690425925e-06, "loss": 3.9718, "step": 12230 }, { "epoch": 0.12446085611979167, "grad_norm": 8.622764587402344, "learning_rate": 4.8139755681053526e-06, "loss": 3.5536, "step": 12235 }, { "epoch": 0.12451171875, "grad_norm": 13.108068466186523, "learning_rate": 4.813824172748938e-06, "loss": 3.283, "step": 12240 }, { "epoch": 0.12456258138020833, "grad_norm": 12.999624252319336, "learning_rate": 4.8136727181938804e-06, "loss": 3.7082, "step": 12245 }, { "epoch": 0.12461344401041667, "grad_norm": 9.05246639251709, "learning_rate": 4.813521204444055e-06, "loss": 3.655, "step": 12250 }, { "epoch": 0.124664306640625, "grad_norm": 11.840641021728516, "learning_rate": 4.8133696315033375e-06, "loss": 3.4432, "step": 12255 }, { "epoch": 0.12471516927083333, "grad_norm": 12.203230857849121, "learning_rate": 4.813217999375606e-06, "loss": 3.1917, "step": 12260 }, { "epoch": 0.12476603190104167, "grad_norm": 10.946849822998047, "learning_rate": 4.813066308064741e-06, "loss": 3.2396, "step": 12265 }, { "epoch": 0.12481689453125, "grad_norm": 9.767146110534668, "learning_rate": 4.812914557574622e-06, "loss": 3.337, "step": 12270 }, { "epoch": 0.12486775716145833, "grad_norm": 10.88211727142334, "learning_rate": 4.8127627479091336e-06, "loss": 3.2582, "step": 12275 }, { "epoch": 0.12491861979166667, "grad_norm": 9.982248306274414, "learning_rate": 4.812610879072157e-06, "loss": 3.2663, "step": 12280 }, { "epoch": 0.124969482421875, "grad_norm": 8.785655975341797, "learning_rate": 4.8124589510675805e-06, "loss": 3.1978, "step": 12285 }, { "epoch": 0.12502034505208334, "grad_norm": 13.370884895324707, "learning_rate": 4.812306963899289e-06, "loss": 3.5219, "step": 12290 }, { "epoch": 0.12507120768229166, "grad_norm": 14.19118595123291, "learning_rate": 4.812154917571172e-06, "loss": 3.1948, "step": 12295 }, { "epoch": 0.1251220703125, "grad_norm": 15.058856010437012, "learning_rate": 4.81200281208712e-06, "loss": 3.4093, "step": 12300 }, { "epoch": 0.12517293294270834, "grad_norm": 17.146102905273438, "learning_rate": 4.811850647451024e-06, "loss": 3.3073, "step": 12305 }, { "epoch": 0.12522379557291666, "grad_norm": 10.474042892456055, "learning_rate": 4.811698423666777e-06, "loss": 3.3589, "step": 12310 }, { "epoch": 0.125274658203125, "grad_norm": 12.146330833435059, "learning_rate": 4.811546140738273e-06, "loss": 3.4111, "step": 12315 }, { "epoch": 0.12532552083333334, "grad_norm": 8.893861770629883, "learning_rate": 4.811393798669409e-06, "loss": 3.4912, "step": 12320 }, { "epoch": 0.12537638346354166, "grad_norm": 14.769695281982422, "learning_rate": 4.811241397464083e-06, "loss": 3.459, "step": 12325 }, { "epoch": 0.12542724609375, "grad_norm": 16.10647964477539, "learning_rate": 4.811088937126194e-06, "loss": 3.3882, "step": 12330 }, { "epoch": 0.12547810872395834, "grad_norm": 14.999397277832031, "learning_rate": 4.8109364176596416e-06, "loss": 3.3708, "step": 12335 }, { "epoch": 0.12552897135416666, "grad_norm": 11.39329719543457, "learning_rate": 4.810783839068329e-06, "loss": 3.5732, "step": 12340 }, { "epoch": 0.125579833984375, "grad_norm": 10.63199234008789, "learning_rate": 4.81063120135616e-06, "loss": 3.5412, "step": 12345 }, { "epoch": 0.12563069661458334, "grad_norm": 9.905284881591797, "learning_rate": 4.81047850452704e-06, "loss": 3.1996, "step": 12350 }, { "epoch": 0.12568155924479166, "grad_norm": 15.250255584716797, "learning_rate": 4.810325748584873e-06, "loss": 3.3639, "step": 12355 }, { "epoch": 0.125732421875, "grad_norm": 12.014054298400879, "learning_rate": 4.8101729335335716e-06, "loss": 3.2804, "step": 12360 }, { "epoch": 0.12578328450520834, "grad_norm": 15.268871307373047, "learning_rate": 4.810020059377042e-06, "loss": 3.4151, "step": 12365 }, { "epoch": 0.12583414713541666, "grad_norm": 15.318325996398926, "learning_rate": 4.809867126119197e-06, "loss": 3.6018, "step": 12370 }, { "epoch": 0.125885009765625, "grad_norm": 13.372442245483398, "learning_rate": 4.8097141337639485e-06, "loss": 3.8433, "step": 12375 }, { "epoch": 0.12593587239583334, "grad_norm": 15.433815956115723, "learning_rate": 4.809561082315212e-06, "loss": 3.2409, "step": 12380 }, { "epoch": 0.12598673502604166, "grad_norm": 9.874568939208984, "learning_rate": 4.809407971776902e-06, "loss": 3.5737, "step": 12385 }, { "epoch": 0.12603759765625, "grad_norm": 13.617438316345215, "learning_rate": 4.809254802152937e-06, "loss": 3.598, "step": 12390 }, { "epoch": 0.12608846028645834, "grad_norm": 17.306371688842773, "learning_rate": 4.809101573447236e-06, "loss": 3.9022, "step": 12395 }, { "epoch": 0.12613932291666666, "grad_norm": 12.497845649719238, "learning_rate": 4.808948285663717e-06, "loss": 3.292, "step": 12400 }, { "epoch": 0.126190185546875, "grad_norm": 11.501578330993652, "learning_rate": 4.808794938806305e-06, "loss": 3.4564, "step": 12405 }, { "epoch": 0.12624104817708334, "grad_norm": 15.333441734313965, "learning_rate": 4.808641532878921e-06, "loss": 3.257, "step": 12410 }, { "epoch": 0.12629191080729166, "grad_norm": 9.874608993530273, "learning_rate": 4.80848806788549e-06, "loss": 3.2448, "step": 12415 }, { "epoch": 0.1263427734375, "grad_norm": 9.775317192077637, "learning_rate": 4.808334543829939e-06, "loss": 3.4979, "step": 12420 }, { "epoch": 0.12639363606770834, "grad_norm": 9.998175621032715, "learning_rate": 4.808180960716196e-06, "loss": 3.1983, "step": 12425 }, { "epoch": 0.12644449869791666, "grad_norm": 10.001486778259277, "learning_rate": 4.808027318548191e-06, "loss": 3.2788, "step": 12430 }, { "epoch": 0.126495361328125, "grad_norm": 15.605623245239258, "learning_rate": 4.807873617329854e-06, "loss": 3.7982, "step": 12435 }, { "epoch": 0.12654622395833334, "grad_norm": 16.90433692932129, "learning_rate": 4.807719857065117e-06, "loss": 3.7252, "step": 12440 }, { "epoch": 0.12659708658854166, "grad_norm": 9.86048412322998, "learning_rate": 4.807566037757914e-06, "loss": 3.3411, "step": 12445 }, { "epoch": 0.12664794921875, "grad_norm": 9.026899337768555, "learning_rate": 4.807412159412181e-06, "loss": 3.1362, "step": 12450 }, { "epoch": 0.12669881184895834, "grad_norm": 11.268509864807129, "learning_rate": 4.807258222031855e-06, "loss": 3.2253, "step": 12455 }, { "epoch": 0.12674967447916666, "grad_norm": 14.036666870117188, "learning_rate": 4.807104225620875e-06, "loss": 3.1731, "step": 12460 }, { "epoch": 0.126800537109375, "grad_norm": 9.418083190917969, "learning_rate": 4.8069501701831795e-06, "loss": 3.1714, "step": 12465 }, { "epoch": 0.12685139973958334, "grad_norm": 12.404669761657715, "learning_rate": 4.8067960557227114e-06, "loss": 3.6047, "step": 12470 }, { "epoch": 0.12690226236979166, "grad_norm": 12.391560554504395, "learning_rate": 4.806641882243412e-06, "loss": 3.4368, "step": 12475 }, { "epoch": 0.126953125, "grad_norm": 13.154583930969238, "learning_rate": 4.806487649749228e-06, "loss": 4.0829, "step": 12480 }, { "epoch": 0.12700398763020834, "grad_norm": 13.884321212768555, "learning_rate": 4.806333358244103e-06, "loss": 3.1453, "step": 12485 }, { "epoch": 0.12705485026041666, "grad_norm": 11.85291862487793, "learning_rate": 4.806179007731986e-06, "loss": 3.4844, "step": 12490 }, { "epoch": 0.127105712890625, "grad_norm": 12.22746753692627, "learning_rate": 4.806024598216826e-06, "loss": 3.7238, "step": 12495 }, { "epoch": 0.12715657552083334, "grad_norm": 14.834999084472656, "learning_rate": 4.805870129702573e-06, "loss": 3.7607, "step": 12500 }, { "epoch": 0.12720743815104166, "grad_norm": 18.713239669799805, "learning_rate": 4.8057156021931795e-06, "loss": 3.5314, "step": 12505 }, { "epoch": 0.12725830078125, "grad_norm": 15.553114891052246, "learning_rate": 4.8055610156925984e-06, "loss": 3.9413, "step": 12510 }, { "epoch": 0.12730916341145834, "grad_norm": 12.916542053222656, "learning_rate": 4.805406370204785e-06, "loss": 3.3368, "step": 12515 }, { "epoch": 0.12736002604166666, "grad_norm": 13.694195747375488, "learning_rate": 4.805251665733696e-06, "loss": 3.2709, "step": 12520 }, { "epoch": 0.127410888671875, "grad_norm": 10.602334976196289, "learning_rate": 4.805096902283291e-06, "loss": 3.3952, "step": 12525 }, { "epoch": 0.12746175130208334, "grad_norm": 14.200652122497559, "learning_rate": 4.804942079857527e-06, "loss": 3.5215, "step": 12530 }, { "epoch": 0.12751261393229166, "grad_norm": 7.785111427307129, "learning_rate": 4.804787198460366e-06, "loss": 3.3147, "step": 12535 }, { "epoch": 0.1275634765625, "grad_norm": 12.794201850891113, "learning_rate": 4.804632258095772e-06, "loss": 3.4677, "step": 12540 }, { "epoch": 0.12761433919270834, "grad_norm": 12.663290977478027, "learning_rate": 4.804477258767707e-06, "loss": 3.6271, "step": 12545 }, { "epoch": 0.12766520182291666, "grad_norm": 10.109106063842773, "learning_rate": 4.804322200480138e-06, "loss": 3.5064, "step": 12550 }, { "epoch": 0.127716064453125, "grad_norm": 15.36133861541748, "learning_rate": 4.804167083237031e-06, "loss": 3.4629, "step": 12555 }, { "epoch": 0.12776692708333334, "grad_norm": 12.881776809692383, "learning_rate": 4.804011907042356e-06, "loss": 3.4292, "step": 12560 }, { "epoch": 0.12781778971354166, "grad_norm": 8.816761016845703, "learning_rate": 4.8038566719000825e-06, "loss": 3.0355, "step": 12565 }, { "epoch": 0.12786865234375, "grad_norm": 15.458996772766113, "learning_rate": 4.803701377814181e-06, "loss": 4.0244, "step": 12570 }, { "epoch": 0.12791951497395834, "grad_norm": 9.55286979675293, "learning_rate": 4.803546024788628e-06, "loss": 3.4125, "step": 12575 }, { "epoch": 0.12797037760416666, "grad_norm": 15.313224792480469, "learning_rate": 4.803390612827394e-06, "loss": 3.4685, "step": 12580 }, { "epoch": 0.128021240234375, "grad_norm": 13.189021110534668, "learning_rate": 4.803235141934458e-06, "loss": 3.3217, "step": 12585 }, { "epoch": 0.12807210286458334, "grad_norm": 16.103500366210938, "learning_rate": 4.803079612113796e-06, "loss": 3.1009, "step": 12590 }, { "epoch": 0.12812296549479166, "grad_norm": 16.239477157592773, "learning_rate": 4.802924023369388e-06, "loss": 3.4193, "step": 12595 }, { "epoch": 0.128173828125, "grad_norm": 11.9988374710083, "learning_rate": 4.802768375705216e-06, "loss": 3.6531, "step": 12600 }, { "epoch": 0.12822469075520834, "grad_norm": 11.093536376953125, "learning_rate": 4.802612669125261e-06, "loss": 3.2401, "step": 12605 }, { "epoch": 0.12827555338541666, "grad_norm": 10.944393157958984, "learning_rate": 4.8024569036335055e-06, "loss": 3.2432, "step": 12610 }, { "epoch": 0.128326416015625, "grad_norm": 14.31828498840332, "learning_rate": 4.802301079233936e-06, "loss": 3.6411, "step": 12615 }, { "epoch": 0.12837727864583334, "grad_norm": 9.36100959777832, "learning_rate": 4.802145195930539e-06, "loss": 3.4192, "step": 12620 }, { "epoch": 0.12842814127604166, "grad_norm": 13.963410377502441, "learning_rate": 4.801989253727303e-06, "loss": 3.1725, "step": 12625 }, { "epoch": 0.12847900390625, "grad_norm": 16.763933181762695, "learning_rate": 4.801833252628218e-06, "loss": 3.3481, "step": 12630 }, { "epoch": 0.12852986653645834, "grad_norm": 12.801989555358887, "learning_rate": 4.801677192637275e-06, "loss": 3.5976, "step": 12635 }, { "epoch": 0.12858072916666666, "grad_norm": 81.20991516113281, "learning_rate": 4.801521073758466e-06, "loss": 3.3127, "step": 12640 }, { "epoch": 0.128631591796875, "grad_norm": 13.878232955932617, "learning_rate": 4.801364895995786e-06, "loss": 3.313, "step": 12645 }, { "epoch": 0.12868245442708334, "grad_norm": 10.5735445022583, "learning_rate": 4.8012086593532306e-06, "loss": 3.298, "step": 12650 }, { "epoch": 0.12873331705729166, "grad_norm": 10.607198715209961, "learning_rate": 4.8010523638347965e-06, "loss": 3.4635, "step": 12655 }, { "epoch": 0.1287841796875, "grad_norm": 10.28955078125, "learning_rate": 4.800896009444484e-06, "loss": 3.5608, "step": 12660 }, { "epoch": 0.12883504231770834, "grad_norm": 12.767330169677734, "learning_rate": 4.800739596186293e-06, "loss": 3.222, "step": 12665 }, { "epoch": 0.12888590494791666, "grad_norm": 15.003509521484375, "learning_rate": 4.800583124064223e-06, "loss": 3.2579, "step": 12670 }, { "epoch": 0.128936767578125, "grad_norm": 14.754562377929688, "learning_rate": 4.80042659308228e-06, "loss": 3.2074, "step": 12675 }, { "epoch": 0.12898763020833334, "grad_norm": 15.33542251586914, "learning_rate": 4.800270003244467e-06, "loss": 3.2303, "step": 12680 }, { "epoch": 0.12903849283854166, "grad_norm": 14.25710391998291, "learning_rate": 4.800113354554793e-06, "loss": 3.8232, "step": 12685 }, { "epoch": 0.12908935546875, "grad_norm": 16.291183471679688, "learning_rate": 4.799956647017262e-06, "loss": 3.3102, "step": 12690 }, { "epoch": 0.12914021809895834, "grad_norm": 13.827568054199219, "learning_rate": 4.799799880635887e-06, "loss": 3.8675, "step": 12695 }, { "epoch": 0.12919108072916666, "grad_norm": 13.914209365844727, "learning_rate": 4.799643055414677e-06, "loss": 3.0558, "step": 12700 }, { "epoch": 0.129241943359375, "grad_norm": 10.144859313964844, "learning_rate": 4.799486171357644e-06, "loss": 3.4235, "step": 12705 }, { "epoch": 0.12929280598958334, "grad_norm": 10.29310417175293, "learning_rate": 4.799329228468802e-06, "loss": 3.4347, "step": 12710 }, { "epoch": 0.12934366861979166, "grad_norm": 13.560129165649414, "learning_rate": 4.7991722267521665e-06, "loss": 3.3288, "step": 12715 }, { "epoch": 0.12939453125, "grad_norm": 13.470560073852539, "learning_rate": 4.799015166211756e-06, "loss": 3.2676, "step": 12720 }, { "epoch": 0.12944539388020834, "grad_norm": 12.207547187805176, "learning_rate": 4.798858046851587e-06, "loss": 3.0864, "step": 12725 }, { "epoch": 0.12949625651041666, "grad_norm": 15.827969551086426, "learning_rate": 4.798700868675679e-06, "loss": 3.3807, "step": 12730 }, { "epoch": 0.129547119140625, "grad_norm": 14.415461540222168, "learning_rate": 4.798543631688054e-06, "loss": 3.0016, "step": 12735 }, { "epoch": 0.12959798177083334, "grad_norm": 7.998185157775879, "learning_rate": 4.798386335892735e-06, "loss": 3.2422, "step": 12740 }, { "epoch": 0.12964884440104166, "grad_norm": 7.472536087036133, "learning_rate": 4.798228981293747e-06, "loss": 3.4627, "step": 12745 }, { "epoch": 0.12969970703125, "grad_norm": 7.803865909576416, "learning_rate": 4.798071567895115e-06, "loss": 3.4333, "step": 12750 }, { "epoch": 0.12975056966145834, "grad_norm": 11.206137657165527, "learning_rate": 4.797914095700867e-06, "loss": 3.2908, "step": 12755 }, { "epoch": 0.12980143229166666, "grad_norm": 13.730634689331055, "learning_rate": 4.797756564715031e-06, "loss": 3.4254, "step": 12760 }, { "epoch": 0.129852294921875, "grad_norm": 13.240988731384277, "learning_rate": 4.797598974941638e-06, "loss": 3.8927, "step": 12765 }, { "epoch": 0.12990315755208334, "grad_norm": 14.986083030700684, "learning_rate": 4.79744132638472e-06, "loss": 2.9281, "step": 12770 }, { "epoch": 0.12995402018229166, "grad_norm": 12.508495330810547, "learning_rate": 4.79728361904831e-06, "loss": 3.2039, "step": 12775 }, { "epoch": 0.1300048828125, "grad_norm": 13.931479454040527, "learning_rate": 4.797125852936444e-06, "loss": 3.3995, "step": 12780 }, { "epoch": 0.13005574544270834, "grad_norm": 15.499288558959961, "learning_rate": 4.796968028053156e-06, "loss": 3.5259, "step": 12785 }, { "epoch": 0.13010660807291666, "grad_norm": 15.569916725158691, "learning_rate": 4.796810144402486e-06, "loss": 3.5487, "step": 12790 }, { "epoch": 0.130157470703125, "grad_norm": 11.951336860656738, "learning_rate": 4.796652201988474e-06, "loss": 3.1815, "step": 12795 }, { "epoch": 0.13020833333333334, "grad_norm": 10.30989933013916, "learning_rate": 4.796494200815158e-06, "loss": 3.4612, "step": 12800 }, { "epoch": 0.13025919596354166, "grad_norm": 10.12043571472168, "learning_rate": 4.796336140886584e-06, "loss": 3.9442, "step": 12805 }, { "epoch": 0.13031005859375, "grad_norm": 16.965572357177734, "learning_rate": 4.796178022206793e-06, "loss": 3.603, "step": 12810 }, { "epoch": 0.13036092122395834, "grad_norm": 12.111398696899414, "learning_rate": 4.796019844779831e-06, "loss": 3.6743, "step": 12815 }, { "epoch": 0.13041178385416666, "grad_norm": 14.302387237548828, "learning_rate": 4.795861608609747e-06, "loss": 3.2853, "step": 12820 }, { "epoch": 0.130462646484375, "grad_norm": 12.947281837463379, "learning_rate": 4.795703313700587e-06, "loss": 3.3062, "step": 12825 }, { "epoch": 0.13051350911458334, "grad_norm": 12.033870697021484, "learning_rate": 4.795544960056402e-06, "loss": 3.1838, "step": 12830 }, { "epoch": 0.13056437174479166, "grad_norm": 9.891314506530762, "learning_rate": 4.7953865476812435e-06, "loss": 3.4533, "step": 12835 }, { "epoch": 0.130615234375, "grad_norm": 11.173048973083496, "learning_rate": 4.795228076579164e-06, "loss": 3.3019, "step": 12840 }, { "epoch": 0.13066609700520834, "grad_norm": 58.573089599609375, "learning_rate": 4.795069546754219e-06, "loss": 3.2771, "step": 12845 }, { "epoch": 0.13071695963541666, "grad_norm": 10.534260749816895, "learning_rate": 4.794910958210463e-06, "loss": 3.5155, "step": 12850 }, { "epoch": 0.130767822265625, "grad_norm": 10.294408798217773, "learning_rate": 4.7947523109519535e-06, "loss": 3.1176, "step": 12855 }, { "epoch": 0.13081868489583334, "grad_norm": 8.581644058227539, "learning_rate": 4.79459360498275e-06, "loss": 3.6988, "step": 12860 }, { "epoch": 0.13086954752604166, "grad_norm": 7.670634746551514, "learning_rate": 4.794434840306914e-06, "loss": 3.4395, "step": 12865 }, { "epoch": 0.13092041015625, "grad_norm": 8.824413299560547, "learning_rate": 4.794276016928506e-06, "loss": 3.7669, "step": 12870 }, { "epoch": 0.13097127278645834, "grad_norm": 16.448566436767578, "learning_rate": 4.794117134851589e-06, "loss": 3.6758, "step": 12875 }, { "epoch": 0.13102213541666666, "grad_norm": 17.62189483642578, "learning_rate": 4.79395819408023e-06, "loss": 3.4432, "step": 12880 }, { "epoch": 0.131072998046875, "grad_norm": 13.63729190826416, "learning_rate": 4.793799194618495e-06, "loss": 3.1668, "step": 12885 }, { "epoch": 0.13112386067708334, "grad_norm": 9.173359870910645, "learning_rate": 4.79364013647045e-06, "loss": 3.28, "step": 12890 }, { "epoch": 0.13117472330729166, "grad_norm": 10.53978157043457, "learning_rate": 4.793481019640166e-06, "loss": 3.2492, "step": 12895 }, { "epoch": 0.1312255859375, "grad_norm": 11.081829071044922, "learning_rate": 4.793321844131714e-06, "loss": 3.3987, "step": 12900 }, { "epoch": 0.13127644856770834, "grad_norm": 8.339278221130371, "learning_rate": 4.793162609949166e-06, "loss": 3.1725, "step": 12905 }, { "epoch": 0.13132731119791666, "grad_norm": 9.031716346740723, "learning_rate": 4.793003317096596e-06, "loss": 3.2547, "step": 12910 }, { "epoch": 0.131378173828125, "grad_norm": 12.638387680053711, "learning_rate": 4.79284396557808e-06, "loss": 3.367, "step": 12915 }, { "epoch": 0.13142903645833334, "grad_norm": 12.951153755187988, "learning_rate": 4.7926845553976945e-06, "loss": 3.2392, "step": 12920 }, { "epoch": 0.13147989908854166, "grad_norm": 15.467037200927734, "learning_rate": 4.792525086559518e-06, "loss": 3.5068, "step": 12925 }, { "epoch": 0.13153076171875, "grad_norm": 8.698431015014648, "learning_rate": 4.792365559067631e-06, "loss": 3.3362, "step": 12930 }, { "epoch": 0.13158162434895834, "grad_norm": 14.218523979187012, "learning_rate": 4.792205972926114e-06, "loss": 3.0097, "step": 12935 }, { "epoch": 0.13163248697916666, "grad_norm": 7.461666584014893, "learning_rate": 4.792046328139051e-06, "loss": 3.3224, "step": 12940 }, { "epoch": 0.131683349609375, "grad_norm": 15.739773750305176, "learning_rate": 4.791886624710525e-06, "loss": 3.8441, "step": 12945 }, { "epoch": 0.13173421223958334, "grad_norm": 10.866748809814453, "learning_rate": 4.791726862644623e-06, "loss": 3.861, "step": 12950 }, { "epoch": 0.13178507486979166, "grad_norm": 9.433417320251465, "learning_rate": 4.791567041945433e-06, "loss": 3.0838, "step": 12955 }, { "epoch": 0.1318359375, "grad_norm": 16.90796661376953, "learning_rate": 4.791407162617043e-06, "loss": 3.7086, "step": 12960 }, { "epoch": 0.13188680013020834, "grad_norm": 14.038589477539062, "learning_rate": 4.791247224663545e-06, "loss": 3.4241, "step": 12965 }, { "epoch": 0.13193766276041666, "grad_norm": 8.530431747436523, "learning_rate": 4.791087228089029e-06, "loss": 3.4557, "step": 12970 }, { "epoch": 0.131988525390625, "grad_norm": 16.40488624572754, "learning_rate": 4.790927172897589e-06, "loss": 3.7428, "step": 12975 }, { "epoch": 0.13203938802083334, "grad_norm": 10.48965072631836, "learning_rate": 4.790767059093321e-06, "loss": 3.8246, "step": 12980 }, { "epoch": 0.13209025065104166, "grad_norm": 11.309000968933105, "learning_rate": 4.79060688668032e-06, "loss": 3.3004, "step": 12985 }, { "epoch": 0.13214111328125, "grad_norm": 13.044082641601562, "learning_rate": 4.790446655662686e-06, "loss": 3.4936, "step": 12990 }, { "epoch": 0.13219197591145834, "grad_norm": 14.110028266906738, "learning_rate": 4.790286366044516e-06, "loss": 2.9904, "step": 12995 }, { "epoch": 0.13224283854166666, "grad_norm": 16.38824462890625, "learning_rate": 4.790126017829913e-06, "loss": 3.1172, "step": 13000 }, { "epoch": 0.132293701171875, "grad_norm": 12.276939392089844, "learning_rate": 4.789965611022977e-06, "loss": 3.588, "step": 13005 }, { "epoch": 0.13234456380208334, "grad_norm": 12.605525016784668, "learning_rate": 4.7898051456278155e-06, "loss": 3.1167, "step": 13010 }, { "epoch": 0.13239542643229166, "grad_norm": 10.54214096069336, "learning_rate": 4.7896446216485314e-06, "loss": 3.496, "step": 13015 }, { "epoch": 0.1324462890625, "grad_norm": 11.415157318115234, "learning_rate": 4.789484039089232e-06, "loss": 3.5018, "step": 13020 }, { "epoch": 0.13249715169270834, "grad_norm": 10.655110359191895, "learning_rate": 4.789323397954027e-06, "loss": 3.5284, "step": 13025 }, { "epoch": 0.13254801432291666, "grad_norm": 12.17111873626709, "learning_rate": 4.789162698247024e-06, "loss": 3.0777, "step": 13030 }, { "epoch": 0.132598876953125, "grad_norm": 14.725519180297852, "learning_rate": 4.789001939972338e-06, "loss": 3.358, "step": 13035 }, { "epoch": 0.13264973958333334, "grad_norm": 15.81676959991455, "learning_rate": 4.7888411231340785e-06, "loss": 3.2394, "step": 13040 }, { "epoch": 0.13270060221354166, "grad_norm": 9.43869686126709, "learning_rate": 4.788680247736362e-06, "loss": 3.3947, "step": 13045 }, { "epoch": 0.13275146484375, "grad_norm": 14.853139877319336, "learning_rate": 4.788519313783303e-06, "loss": 3.0722, "step": 13050 }, { "epoch": 0.13280232747395834, "grad_norm": 12.241186141967773, "learning_rate": 4.788358321279021e-06, "loss": 3.8822, "step": 13055 }, { "epoch": 0.13285319010416666, "grad_norm": 8.0377779006958, "learning_rate": 4.788197270227633e-06, "loss": 3.4792, "step": 13060 }, { "epoch": 0.132904052734375, "grad_norm": 13.722543716430664, "learning_rate": 4.78803616063326e-06, "loss": 3.3762, "step": 13065 }, { "epoch": 0.13295491536458334, "grad_norm": 9.478768348693848, "learning_rate": 4.787874992500024e-06, "loss": 3.1809, "step": 13070 }, { "epoch": 0.13300577799479166, "grad_norm": 9.255359649658203, "learning_rate": 4.7877137658320496e-06, "loss": 3.4478, "step": 13075 }, { "epoch": 0.133056640625, "grad_norm": 14.590103149414062, "learning_rate": 4.7875524806334605e-06, "loss": 3.4376, "step": 13080 }, { "epoch": 0.13310750325520834, "grad_norm": 9.905108451843262, "learning_rate": 4.787391136908383e-06, "loss": 3.4683, "step": 13085 }, { "epoch": 0.13315836588541666, "grad_norm": 14.800880432128906, "learning_rate": 4.787229734660945e-06, "loss": 3.3281, "step": 13090 }, { "epoch": 0.133209228515625, "grad_norm": 15.413259506225586, "learning_rate": 4.787068273895278e-06, "loss": 3.222, "step": 13095 }, { "epoch": 0.13326009114583334, "grad_norm": 14.764119148254395, "learning_rate": 4.7869067546155105e-06, "loss": 3.3957, "step": 13100 }, { "epoch": 0.13331095377604166, "grad_norm": 10.476200103759766, "learning_rate": 4.786745176825775e-06, "loss": 3.2024, "step": 13105 }, { "epoch": 0.13336181640625, "grad_norm": 6.172470569610596, "learning_rate": 4.786583540530206e-06, "loss": 3.0385, "step": 13110 }, { "epoch": 0.13341267903645834, "grad_norm": 16.662702560424805, "learning_rate": 4.78642184573294e-06, "loss": 3.3144, "step": 13115 }, { "epoch": 0.13346354166666666, "grad_norm": 14.16555404663086, "learning_rate": 4.786260092438113e-06, "loss": 3.4183, "step": 13120 }, { "epoch": 0.133514404296875, "grad_norm": 13.170214653015137, "learning_rate": 4.7860982806498635e-06, "loss": 3.2948, "step": 13125 }, { "epoch": 0.13356526692708334, "grad_norm": 14.692107200622559, "learning_rate": 4.78593641037233e-06, "loss": 3.583, "step": 13130 }, { "epoch": 0.13361612955729166, "grad_norm": 14.330788612365723, "learning_rate": 4.785774481609657e-06, "loss": 3.5935, "step": 13135 }, { "epoch": 0.1336669921875, "grad_norm": 10.425037384033203, "learning_rate": 4.785612494365985e-06, "loss": 3.4952, "step": 13140 }, { "epoch": 0.13371785481770834, "grad_norm": 9.754368782043457, "learning_rate": 4.785450448645459e-06, "loss": 3.1403, "step": 13145 }, { "epoch": 0.13376871744791666, "grad_norm": 12.570056915283203, "learning_rate": 4.785288344452226e-06, "loss": 4.0699, "step": 13150 }, { "epoch": 0.133819580078125, "grad_norm": 9.117902755737305, "learning_rate": 4.78512618179043e-06, "loss": 3.8218, "step": 13155 }, { "epoch": 0.13387044270833334, "grad_norm": 13.497501373291016, "learning_rate": 4.784963960664224e-06, "loss": 3.5526, "step": 13160 }, { "epoch": 0.13392130533854166, "grad_norm": 12.687576293945312, "learning_rate": 4.784801681077757e-06, "loss": 3.5295, "step": 13165 }, { "epoch": 0.13397216796875, "grad_norm": 9.852676391601562, "learning_rate": 4.78463934303518e-06, "loss": 3.4353, "step": 13170 }, { "epoch": 0.13402303059895834, "grad_norm": 11.296305656433105, "learning_rate": 4.7844769465406464e-06, "loss": 3.3819, "step": 13175 }, { "epoch": 0.13407389322916666, "grad_norm": 9.368551254272461, "learning_rate": 4.784314491598312e-06, "loss": 3.1461, "step": 13180 }, { "epoch": 0.134124755859375, "grad_norm": 9.357016563415527, "learning_rate": 4.784151978212333e-06, "loss": 3.582, "step": 13185 }, { "epoch": 0.13417561848958334, "grad_norm": 11.293736457824707, "learning_rate": 4.783989406386867e-06, "loss": 3.5203, "step": 13190 }, { "epoch": 0.13422648111979166, "grad_norm": 13.790380477905273, "learning_rate": 4.783826776126073e-06, "loss": 3.3007, "step": 13195 }, { "epoch": 0.13427734375, "grad_norm": 14.679352760314941, "learning_rate": 4.783664087434112e-06, "loss": 3.8487, "step": 13200 }, { "epoch": 0.13432820638020834, "grad_norm": 15.090688705444336, "learning_rate": 4.783501340315147e-06, "loss": 2.9251, "step": 13205 }, { "epoch": 0.13437906901041666, "grad_norm": 15.12507152557373, "learning_rate": 4.783338534773343e-06, "loss": 3.2318, "step": 13210 }, { "epoch": 0.134429931640625, "grad_norm": 10.328474044799805, "learning_rate": 4.783175670812862e-06, "loss": 3.3133, "step": 13215 }, { "epoch": 0.13448079427083334, "grad_norm": 9.897954940795898, "learning_rate": 4.783012748437873e-06, "loss": 3.3901, "step": 13220 }, { "epoch": 0.13453165690104166, "grad_norm": 12.634456634521484, "learning_rate": 4.782849767652544e-06, "loss": 3.4324, "step": 13225 }, { "epoch": 0.13458251953125, "grad_norm": 13.496397972106934, "learning_rate": 4.782686728461044e-06, "loss": 3.4852, "step": 13230 }, { "epoch": 0.13463338216145834, "grad_norm": 9.415319442749023, "learning_rate": 4.782523630867546e-06, "loss": 3.8193, "step": 13235 }, { "epoch": 0.13468424479166666, "grad_norm": 14.183247566223145, "learning_rate": 4.782360474876222e-06, "loss": 3.7154, "step": 13240 }, { "epoch": 0.134735107421875, "grad_norm": 9.120186805725098, "learning_rate": 4.7821972604912464e-06, "loss": 3.0765, "step": 13245 }, { "epoch": 0.13478597005208334, "grad_norm": 15.911162376403809, "learning_rate": 4.782033987716794e-06, "loss": 3.2823, "step": 13250 }, { "epoch": 0.13483683268229166, "grad_norm": 12.479145050048828, "learning_rate": 4.781870656557044e-06, "loss": 3.3999, "step": 13255 }, { "epoch": 0.1348876953125, "grad_norm": 10.130742073059082, "learning_rate": 4.781707267016174e-06, "loss": 3.7453, "step": 13260 }, { "epoch": 0.13493855794270834, "grad_norm": 13.885787963867188, "learning_rate": 4.781543819098363e-06, "loss": 3.348, "step": 13265 }, { "epoch": 0.13498942057291666, "grad_norm": 9.897067070007324, "learning_rate": 4.781380312807795e-06, "loss": 3.6482, "step": 13270 }, { "epoch": 0.135040283203125, "grad_norm": 10.115368843078613, "learning_rate": 4.781216748148653e-06, "loss": 3.5157, "step": 13275 }, { "epoch": 0.13509114583333334, "grad_norm": 11.32646369934082, "learning_rate": 4.78105312512512e-06, "loss": 3.3044, "step": 13280 }, { "epoch": 0.13514200846354166, "grad_norm": 12.162534713745117, "learning_rate": 4.780889443741384e-06, "loss": 3.3909, "step": 13285 }, { "epoch": 0.13519287109375, "grad_norm": 14.136061668395996, "learning_rate": 4.780725704001633e-06, "loss": 3.5301, "step": 13290 }, { "epoch": 0.13524373372395834, "grad_norm": 12.348136901855469, "learning_rate": 4.780561905910055e-06, "loss": 3.979, "step": 13295 }, { "epoch": 0.13529459635416666, "grad_norm": 12.501129150390625, "learning_rate": 4.780398049470841e-06, "loss": 3.6263, "step": 13300 }, { "epoch": 0.135345458984375, "grad_norm": 16.70313835144043, "learning_rate": 4.780234134688184e-06, "loss": 3.584, "step": 13305 }, { "epoch": 0.13539632161458334, "grad_norm": 12.98574447631836, "learning_rate": 4.780070161566276e-06, "loss": 2.9994, "step": 13310 }, { "epoch": 0.13544718424479166, "grad_norm": 9.226192474365234, "learning_rate": 4.7799061301093144e-06, "loss": 3.1362, "step": 13315 }, { "epoch": 0.135498046875, "grad_norm": 7.743569850921631, "learning_rate": 4.779742040321494e-06, "loss": 3.789, "step": 13320 }, { "epoch": 0.13554890950520834, "grad_norm": 10.638326644897461, "learning_rate": 4.779577892207015e-06, "loss": 3.5861, "step": 13325 }, { "epoch": 0.13559977213541666, "grad_norm": 10.221707344055176, "learning_rate": 4.779413685770075e-06, "loss": 3.1357, "step": 13330 }, { "epoch": 0.135650634765625, "grad_norm": 12.546625137329102, "learning_rate": 4.779249421014876e-06, "loss": 3.2787, "step": 13335 }, { "epoch": 0.13570149739583334, "grad_norm": 15.528656959533691, "learning_rate": 4.779085097945621e-06, "loss": 3.4499, "step": 13340 }, { "epoch": 0.13575236002604166, "grad_norm": 15.490991592407227, "learning_rate": 4.778920716566514e-06, "loss": 3.7281, "step": 13345 }, { "epoch": 0.13580322265625, "grad_norm": 12.92549991607666, "learning_rate": 4.7787562768817605e-06, "loss": 3.3561, "step": 13350 }, { "epoch": 0.13585408528645834, "grad_norm": 14.30997371673584, "learning_rate": 4.778591778895568e-06, "loss": 3.4102, "step": 13355 }, { "epoch": 0.13590494791666666, "grad_norm": 9.108956336975098, "learning_rate": 4.778427222612145e-06, "loss": 3.2839, "step": 13360 }, { "epoch": 0.135955810546875, "grad_norm": 15.586246490478516, "learning_rate": 4.778262608035702e-06, "loss": 3.6318, "step": 13365 }, { "epoch": 0.13600667317708334, "grad_norm": 12.77320671081543, "learning_rate": 4.778097935170449e-06, "loss": 3.4799, "step": 13370 }, { "epoch": 0.13605753580729166, "grad_norm": 8.657517433166504, "learning_rate": 4.777933204020602e-06, "loss": 3.2848, "step": 13375 }, { "epoch": 0.1361083984375, "grad_norm": 13.57066822052002, "learning_rate": 4.777768414590372e-06, "loss": 3.0883, "step": 13380 }, { "epoch": 0.13615926106770834, "grad_norm": 15.412737846374512, "learning_rate": 4.777603566883978e-06, "loss": 3.3734, "step": 13385 }, { "epoch": 0.13621012369791666, "grad_norm": 7.619217395782471, "learning_rate": 4.777438660905637e-06, "loss": 3.2357, "step": 13390 }, { "epoch": 0.136260986328125, "grad_norm": 8.894469261169434, "learning_rate": 4.777273696659567e-06, "loss": 3.3946, "step": 13395 }, { "epoch": 0.13631184895833334, "grad_norm": 11.534381866455078, "learning_rate": 4.7771086741499895e-06, "loss": 3.3128, "step": 13400 }, { "epoch": 0.13636271158854166, "grad_norm": 9.838509559631348, "learning_rate": 4.776943593381126e-06, "loss": 3.5254, "step": 13405 }, { "epoch": 0.13641357421875, "grad_norm": 12.967144012451172, "learning_rate": 4.7767784543572e-06, "loss": 3.1434, "step": 13410 }, { "epoch": 0.13646443684895834, "grad_norm": 9.366642951965332, "learning_rate": 4.776613257082439e-06, "loss": 2.9998, "step": 13415 }, { "epoch": 0.13651529947916666, "grad_norm": 10.693907737731934, "learning_rate": 4.776448001561065e-06, "loss": 3.9294, "step": 13420 }, { "epoch": 0.136566162109375, "grad_norm": 14.118185997009277, "learning_rate": 4.7762826877973095e-06, "loss": 3.4184, "step": 13425 }, { "epoch": 0.13661702473958334, "grad_norm": 16.79452133178711, "learning_rate": 4.776117315795401e-06, "loss": 3.4651, "step": 13430 }, { "epoch": 0.13666788736979166, "grad_norm": 7.560031890869141, "learning_rate": 4.77595188555957e-06, "loss": 3.3598, "step": 13435 }, { "epoch": 0.13671875, "grad_norm": 8.069879531860352, "learning_rate": 4.77578639709405e-06, "loss": 3.3903, "step": 13440 }, { "epoch": 0.13676961263020834, "grad_norm": 14.968077659606934, "learning_rate": 4.775620850403075e-06, "loss": 3.1777, "step": 13445 }, { "epoch": 0.13682047526041666, "grad_norm": 11.37788200378418, "learning_rate": 4.775455245490879e-06, "loss": 3.1365, "step": 13450 }, { "epoch": 0.136871337890625, "grad_norm": 11.77219295501709, "learning_rate": 4.7752895823616995e-06, "loss": 3.397, "step": 13455 }, { "epoch": 0.13692220052083334, "grad_norm": 11.196371078491211, "learning_rate": 4.775123861019776e-06, "loss": 3.146, "step": 13460 }, { "epoch": 0.13697306315104166, "grad_norm": 19.208072662353516, "learning_rate": 4.774958081469348e-06, "loss": 3.2435, "step": 13465 }, { "epoch": 0.13702392578125, "grad_norm": 13.371184349060059, "learning_rate": 4.774792243714656e-06, "loss": 3.4511, "step": 13470 }, { "epoch": 0.13707478841145834, "grad_norm": 7.495774745941162, "learning_rate": 4.774626347759944e-06, "loss": 3.174, "step": 13475 }, { "epoch": 0.13712565104166666, "grad_norm": 9.855815887451172, "learning_rate": 4.774460393609456e-06, "loss": 3.4519, "step": 13480 }, { "epoch": 0.137176513671875, "grad_norm": 15.752311706542969, "learning_rate": 4.774294381267438e-06, "loss": 3.3032, "step": 13485 }, { "epoch": 0.13722737630208334, "grad_norm": 10.933402061462402, "learning_rate": 4.774128310738137e-06, "loss": 3.5641, "step": 13490 }, { "epoch": 0.13727823893229166, "grad_norm": 10.031787872314453, "learning_rate": 4.773962182025803e-06, "loss": 3.4982, "step": 13495 }, { "epoch": 0.1373291015625, "grad_norm": 10.56356430053711, "learning_rate": 4.773795995134685e-06, "loss": 3.2439, "step": 13500 }, { "epoch": 0.13737996419270834, "grad_norm": 7.9167585372924805, "learning_rate": 4.773629750069036e-06, "loss": 2.9149, "step": 13505 }, { "epoch": 0.13743082682291666, "grad_norm": 11.75800609588623, "learning_rate": 4.773463446833108e-06, "loss": 3.3936, "step": 13510 }, { "epoch": 0.137481689453125, "grad_norm": 10.611553192138672, "learning_rate": 4.773297085431156e-06, "loss": 3.3874, "step": 13515 }, { "epoch": 0.13753255208333334, "grad_norm": 7.607089996337891, "learning_rate": 4.773130665867438e-06, "loss": 3.1678, "step": 13520 }, { "epoch": 0.13758341471354166, "grad_norm": 13.853635787963867, "learning_rate": 4.7729641881462106e-06, "loss": 3.5818, "step": 13525 }, { "epoch": 0.13763427734375, "grad_norm": 14.964798927307129, "learning_rate": 4.772797652271732e-06, "loss": 3.722, "step": 13530 }, { "epoch": 0.13768513997395834, "grad_norm": 13.768731117248535, "learning_rate": 4.772631058248266e-06, "loss": 4.1596, "step": 13535 }, { "epoch": 0.13773600260416666, "grad_norm": 14.737395286560059, "learning_rate": 4.772464406080072e-06, "loss": 3.3029, "step": 13540 }, { "epoch": 0.137786865234375, "grad_norm": 13.914802551269531, "learning_rate": 4.772297695771415e-06, "loss": 3.4859, "step": 13545 }, { "epoch": 0.13783772786458334, "grad_norm": 11.838445663452148, "learning_rate": 4.7721309273265605e-06, "loss": 3.3564, "step": 13550 }, { "epoch": 0.13788859049479166, "grad_norm": 12.323065757751465, "learning_rate": 4.771964100749774e-06, "loss": 3.3043, "step": 13555 }, { "epoch": 0.137939453125, "grad_norm": 13.693683624267578, "learning_rate": 4.771797216045325e-06, "loss": 3.4427, "step": 13560 }, { "epoch": 0.13799031575520834, "grad_norm": 14.236191749572754, "learning_rate": 4.771630273217483e-06, "loss": 3.3801, "step": 13565 }, { "epoch": 0.13804117838541666, "grad_norm": 12.197969436645508, "learning_rate": 4.7714632722705175e-06, "loss": 3.6923, "step": 13570 }, { "epoch": 0.138092041015625, "grad_norm": 12.962698936462402, "learning_rate": 4.771296213208704e-06, "loss": 3.3643, "step": 13575 }, { "epoch": 0.13814290364583334, "grad_norm": 15.476211547851562, "learning_rate": 4.7711290960363145e-06, "loss": 3.3381, "step": 13580 }, { "epoch": 0.13819376627604166, "grad_norm": 12.055511474609375, "learning_rate": 4.770961920757626e-06, "loss": 3.1077, "step": 13585 }, { "epoch": 0.13824462890625, "grad_norm": 11.437824249267578, "learning_rate": 4.7707946873769144e-06, "loss": 3.4872, "step": 13590 }, { "epoch": 0.13829549153645834, "grad_norm": 14.684785842895508, "learning_rate": 4.77062739589846e-06, "loss": 3.1012, "step": 13595 }, { "epoch": 0.13834635416666666, "grad_norm": 11.902538299560547, "learning_rate": 4.77046004632654e-06, "loss": 3.3424, "step": 13600 }, { "epoch": 0.138397216796875, "grad_norm": 13.112920761108398, "learning_rate": 4.770292638665439e-06, "loss": 3.6353, "step": 13605 }, { "epoch": 0.13844807942708334, "grad_norm": 12.018083572387695, "learning_rate": 4.7701251729194396e-06, "loss": 3.7497, "step": 13610 }, { "epoch": 0.13849894205729166, "grad_norm": 12.88236141204834, "learning_rate": 4.769957649092825e-06, "loss": 3.9322, "step": 13615 }, { "epoch": 0.1385498046875, "grad_norm": 16.120954513549805, "learning_rate": 4.769790067189882e-06, "loss": 3.4474, "step": 13620 }, { "epoch": 0.13860066731770834, "grad_norm": 9.990036010742188, "learning_rate": 4.769622427214898e-06, "loss": 3.1569, "step": 13625 }, { "epoch": 0.13865152994791666, "grad_norm": 9.095274925231934, "learning_rate": 4.769454729172163e-06, "loss": 3.3547, "step": 13630 }, { "epoch": 0.138702392578125, "grad_norm": 12.9636812210083, "learning_rate": 4.7692869730659655e-06, "loss": 3.3197, "step": 13635 }, { "epoch": 0.13875325520833334, "grad_norm": 14.65504264831543, "learning_rate": 4.769119158900599e-06, "loss": 3.4425, "step": 13640 }, { "epoch": 0.13880411783854166, "grad_norm": 9.957447052001953, "learning_rate": 4.768951286680357e-06, "loss": 3.4952, "step": 13645 }, { "epoch": 0.13885498046875, "grad_norm": 12.655380249023438, "learning_rate": 4.768783356409535e-06, "loss": 3.4519, "step": 13650 }, { "epoch": 0.13890584309895834, "grad_norm": 14.230171203613281, "learning_rate": 4.768615368092427e-06, "loss": 3.2948, "step": 13655 }, { "epoch": 0.13895670572916666, "grad_norm": 15.258803367614746, "learning_rate": 4.768447321733332e-06, "loss": 3.5278, "step": 13660 }, { "epoch": 0.139007568359375, "grad_norm": 8.109949111938477, "learning_rate": 4.7682792173365525e-06, "loss": 3.2576, "step": 13665 }, { "epoch": 0.13905843098958334, "grad_norm": 12.126956939697266, "learning_rate": 4.768111054906384e-06, "loss": 3.5308, "step": 13670 }, { "epoch": 0.13910929361979166, "grad_norm": 12.557209014892578, "learning_rate": 4.767942834447134e-06, "loss": 3.405, "step": 13675 }, { "epoch": 0.13916015625, "grad_norm": 8.383021354675293, "learning_rate": 4.767774555963103e-06, "loss": 3.3727, "step": 13680 }, { "epoch": 0.13921101888020834, "grad_norm": 9.561806678771973, "learning_rate": 4.767606219458598e-06, "loss": 3.7015, "step": 13685 }, { "epoch": 0.13926188151041666, "grad_norm": 11.462172508239746, "learning_rate": 4.767437824937926e-06, "loss": 3.6486, "step": 13690 }, { "epoch": 0.139312744140625, "grad_norm": 16.29766273498535, "learning_rate": 4.767269372405393e-06, "loss": 3.0515, "step": 13695 }, { "epoch": 0.13936360677083334, "grad_norm": 8.491412162780762, "learning_rate": 4.767100861865311e-06, "loss": 3.3933, "step": 13700 }, { "epoch": 0.13941446940104166, "grad_norm": 8.013467788696289, "learning_rate": 4.766932293321992e-06, "loss": 3.3615, "step": 13705 }, { "epoch": 0.13946533203125, "grad_norm": 8.9276704788208, "learning_rate": 4.766763666779747e-06, "loss": 3.4722, "step": 13710 }, { "epoch": 0.13951619466145834, "grad_norm": 8.328059196472168, "learning_rate": 4.76659498224289e-06, "loss": 3.6743, "step": 13715 }, { "epoch": 0.13956705729166666, "grad_norm": 10.622822761535645, "learning_rate": 4.766426239715739e-06, "loss": 3.3682, "step": 13720 }, { "epoch": 0.139617919921875, "grad_norm": 13.057267189025879, "learning_rate": 4.766257439202609e-06, "loss": 3.41, "step": 13725 }, { "epoch": 0.13966878255208334, "grad_norm": 13.841679573059082, "learning_rate": 4.766088580707819e-06, "loss": 3.2495, "step": 13730 }, { "epoch": 0.13971964518229166, "grad_norm": 16.03135108947754, "learning_rate": 4.765919664235691e-06, "loss": 3.3903, "step": 13735 }, { "epoch": 0.1397705078125, "grad_norm": 8.109480857849121, "learning_rate": 4.765750689790545e-06, "loss": 3.2099, "step": 13740 }, { "epoch": 0.13982137044270834, "grad_norm": 15.432792663574219, "learning_rate": 4.765581657376705e-06, "loss": 3.2156, "step": 13745 }, { "epoch": 0.13987223307291666, "grad_norm": 13.207000732421875, "learning_rate": 4.7654125669984945e-06, "loss": 3.3425, "step": 13750 }, { "epoch": 0.139923095703125, "grad_norm": 10.853780746459961, "learning_rate": 4.765243418660241e-06, "loss": 3.5909, "step": 13755 }, { "epoch": 0.13997395833333334, "grad_norm": 11.465790748596191, "learning_rate": 4.765074212366271e-06, "loss": 3.7235, "step": 13760 }, { "epoch": 0.14002482096354166, "grad_norm": 14.370993614196777, "learning_rate": 4.764904948120915e-06, "loss": 3.6259, "step": 13765 }, { "epoch": 0.14007568359375, "grad_norm": 11.535296440124512, "learning_rate": 4.7647356259285025e-06, "loss": 3.5197, "step": 13770 }, { "epoch": 0.14012654622395834, "grad_norm": 11.556644439697266, "learning_rate": 4.764566245793365e-06, "loss": 3.5663, "step": 13775 }, { "epoch": 0.14017740885416666, "grad_norm": 14.494634628295898, "learning_rate": 4.764396807719838e-06, "loss": 3.4005, "step": 13780 }, { "epoch": 0.140228271484375, "grad_norm": 14.56704330444336, "learning_rate": 4.764227311712255e-06, "loss": 3.335, "step": 13785 }, { "epoch": 0.14027913411458334, "grad_norm": 10.961151123046875, "learning_rate": 4.764057757774953e-06, "loss": 3.7573, "step": 13790 }, { "epoch": 0.14032999674479166, "grad_norm": 8.999013900756836, "learning_rate": 4.76388814591227e-06, "loss": 3.4393, "step": 13795 }, { "epoch": 0.140380859375, "grad_norm": 85.62931823730469, "learning_rate": 4.763718476128545e-06, "loss": 3.9256, "step": 13800 }, { "epoch": 0.14043172200520834, "grad_norm": 11.35169792175293, "learning_rate": 4.763548748428119e-06, "loss": 3.6844, "step": 13805 }, { "epoch": 0.14048258463541666, "grad_norm": 10.133095741271973, "learning_rate": 4.763378962815335e-06, "loss": 3.3458, "step": 13810 }, { "epoch": 0.140533447265625, "grad_norm": 14.392997741699219, "learning_rate": 4.763209119294537e-06, "loss": 3.4801, "step": 13815 }, { "epoch": 0.14058430989583334, "grad_norm": 13.355161666870117, "learning_rate": 4.76303921787007e-06, "loss": 3.98, "step": 13820 }, { "epoch": 0.14063517252604166, "grad_norm": 15.28199291229248, "learning_rate": 4.762869258546281e-06, "loss": 3.565, "step": 13825 }, { "epoch": 0.14068603515625, "grad_norm": 15.826837539672852, "learning_rate": 4.762699241327518e-06, "loss": 3.9634, "step": 13830 }, { "epoch": 0.14073689778645834, "grad_norm": 19.135751724243164, "learning_rate": 4.762529166218133e-06, "loss": 3.298, "step": 13835 }, { "epoch": 0.14078776041666666, "grad_norm": 14.346761703491211, "learning_rate": 4.7623590332224735e-06, "loss": 3.3069, "step": 13840 }, { "epoch": 0.140838623046875, "grad_norm": 18.030282974243164, "learning_rate": 4.762188842344896e-06, "loss": 3.2781, "step": 13845 }, { "epoch": 0.14088948567708334, "grad_norm": 11.988079071044922, "learning_rate": 4.762018593589752e-06, "loss": 3.7506, "step": 13850 }, { "epoch": 0.14094034830729166, "grad_norm": 12.060202598571777, "learning_rate": 4.761848286961398e-06, "loss": 3.3371, "step": 13855 }, { "epoch": 0.1409912109375, "grad_norm": 10.818347930908203, "learning_rate": 4.7616779224641925e-06, "loss": 3.528, "step": 13860 }, { "epoch": 0.14104207356770834, "grad_norm": 14.578571319580078, "learning_rate": 4.761507500102493e-06, "loss": 3.3091, "step": 13865 }, { "epoch": 0.14109293619791666, "grad_norm": 14.782280921936035, "learning_rate": 4.761337019880661e-06, "loss": 3.382, "step": 13870 }, { "epoch": 0.141143798828125, "grad_norm": 16.304611206054688, "learning_rate": 4.761166481803057e-06, "loss": 3.3789, "step": 13875 }, { "epoch": 0.14119466145833334, "grad_norm": 17.154775619506836, "learning_rate": 4.760995885874045e-06, "loss": 4.1372, "step": 13880 }, { "epoch": 0.14124552408854166, "grad_norm": 7.446081161499023, "learning_rate": 4.760825232097988e-06, "loss": 3.44, "step": 13885 }, { "epoch": 0.14129638671875, "grad_norm": 15.864761352539062, "learning_rate": 4.760654520479254e-06, "loss": 3.7214, "step": 13890 }, { "epoch": 0.14134724934895834, "grad_norm": 10.105338096618652, "learning_rate": 4.76048375102221e-06, "loss": 3.0877, "step": 13895 }, { "epoch": 0.14139811197916666, "grad_norm": 11.060812950134277, "learning_rate": 4.760312923731224e-06, "loss": 3.406, "step": 13900 }, { "epoch": 0.141448974609375, "grad_norm": 17.411237716674805, "learning_rate": 4.760142038610669e-06, "loss": 3.4056, "step": 13905 }, { "epoch": 0.14149983723958334, "grad_norm": 12.01961898803711, "learning_rate": 4.759971095664915e-06, "loss": 3.6575, "step": 13910 }, { "epoch": 0.14155069986979166, "grad_norm": 10.522672653198242, "learning_rate": 4.7598000948983355e-06, "loss": 3.462, "step": 13915 }, { "epoch": 0.1416015625, "grad_norm": 14.614590644836426, "learning_rate": 4.759629036315307e-06, "loss": 3.5353, "step": 13920 }, { "epoch": 0.14165242513020834, "grad_norm": 13.122455596923828, "learning_rate": 4.759457919920206e-06, "loss": 3.2238, "step": 13925 }, { "epoch": 0.14170328776041666, "grad_norm": 11.244791984558105, "learning_rate": 4.759286745717409e-06, "loss": 3.8902, "step": 13930 }, { "epoch": 0.141754150390625, "grad_norm": 13.832063674926758, "learning_rate": 4.759115513711296e-06, "loss": 3.2644, "step": 13935 }, { "epoch": 0.14180501302083334, "grad_norm": 18.43947982788086, "learning_rate": 4.758944223906248e-06, "loss": 3.3131, "step": 13940 }, { "epoch": 0.14185587565104166, "grad_norm": 15.581222534179688, "learning_rate": 4.758772876306647e-06, "loss": 3.2354, "step": 13945 }, { "epoch": 0.14190673828125, "grad_norm": 7.942548751831055, "learning_rate": 4.758601470916878e-06, "loss": 3.2157, "step": 13950 }, { "epoch": 0.14195760091145834, "grad_norm": 15.951170921325684, "learning_rate": 4.758430007741325e-06, "loss": 3.3562, "step": 13955 }, { "epoch": 0.14200846354166666, "grad_norm": 14.815132141113281, "learning_rate": 4.7582584867843764e-06, "loss": 2.939, "step": 13960 }, { "epoch": 0.142059326171875, "grad_norm": 12.236970901489258, "learning_rate": 4.7580869080504185e-06, "loss": 3.2991, "step": 13965 }, { "epoch": 0.14211018880208334, "grad_norm": 12.710593223571777, "learning_rate": 4.757915271543844e-06, "loss": 3.2584, "step": 13970 }, { "epoch": 0.14216105143229166, "grad_norm": 12.963603019714355, "learning_rate": 4.757743577269042e-06, "loss": 3.1463, "step": 13975 }, { "epoch": 0.1422119140625, "grad_norm": 10.430787086486816, "learning_rate": 4.7575718252304046e-06, "loss": 4.2165, "step": 13980 }, { "epoch": 0.14226277669270834, "grad_norm": 10.955326080322266, "learning_rate": 4.7574000154323274e-06, "loss": 3.195, "step": 13985 }, { "epoch": 0.14231363932291666, "grad_norm": 8.364349365234375, "learning_rate": 4.757228147879207e-06, "loss": 3.1683, "step": 13990 }, { "epoch": 0.142364501953125, "grad_norm": 14.662646293640137, "learning_rate": 4.757056222575438e-06, "loss": 3.2215, "step": 13995 }, { "epoch": 0.14241536458333334, "grad_norm": 14.448378562927246, "learning_rate": 4.756884239525422e-06, "loss": 3.3889, "step": 14000 }, { "epoch": 0.14246622721354166, "grad_norm": 12.963875770568848, "learning_rate": 4.756712198733557e-06, "loss": 3.2769, "step": 14005 }, { "epoch": 0.14251708984375, "grad_norm": 11.409318923950195, "learning_rate": 4.756540100204245e-06, "loss": 3.3303, "step": 14010 }, { "epoch": 0.14256795247395834, "grad_norm": 9.132806777954102, "learning_rate": 4.75636794394189e-06, "loss": 3.2503, "step": 14015 }, { "epoch": 0.14261881510416666, "grad_norm": 12.652349472045898, "learning_rate": 4.756195729950896e-06, "loss": 3.2854, "step": 14020 }, { "epoch": 0.142669677734375, "grad_norm": 11.568374633789062, "learning_rate": 4.756023458235668e-06, "loss": 3.2795, "step": 14025 }, { "epoch": 0.14272054036458334, "grad_norm": 14.18939208984375, "learning_rate": 4.755851128800616e-06, "loss": 3.2294, "step": 14030 }, { "epoch": 0.14277140299479166, "grad_norm": 12.417285919189453, "learning_rate": 4.755678741650146e-06, "loss": 3.3423, "step": 14035 }, { "epoch": 0.142822265625, "grad_norm": 12.653460502624512, "learning_rate": 4.755506296788671e-06, "loss": 3.4815, "step": 14040 }, { "epoch": 0.14287312825520834, "grad_norm": 10.841574668884277, "learning_rate": 4.7553337942206025e-06, "loss": 3.3619, "step": 14045 }, { "epoch": 0.14292399088541666, "grad_norm": 9.72189712524414, "learning_rate": 4.7551612339503524e-06, "loss": 3.0766, "step": 14050 }, { "epoch": 0.142974853515625, "grad_norm": 10.824090003967285, "learning_rate": 4.754988615982336e-06, "loss": 3.2932, "step": 14055 }, { "epoch": 0.14302571614583334, "grad_norm": 14.650367736816406, "learning_rate": 4.7548159403209725e-06, "loss": 3.3557, "step": 14060 }, { "epoch": 0.14307657877604166, "grad_norm": 13.419211387634277, "learning_rate": 4.7546432069706765e-06, "loss": 3.135, "step": 14065 }, { "epoch": 0.14312744140625, "grad_norm": 10.86098861694336, "learning_rate": 4.754470415935868e-06, "loss": 3.7383, "step": 14070 }, { "epoch": 0.14317830403645834, "grad_norm": 16.1043758392334, "learning_rate": 4.754297567220969e-06, "loss": 3.5326, "step": 14075 }, { "epoch": 0.14322916666666666, "grad_norm": 11.805451393127441, "learning_rate": 4.754124660830401e-06, "loss": 3.2176, "step": 14080 }, { "epoch": 0.143280029296875, "grad_norm": 10.292030334472656, "learning_rate": 4.753951696768587e-06, "loss": 3.5026, "step": 14085 }, { "epoch": 0.14333089192708334, "grad_norm": 9.801261901855469, "learning_rate": 4.753778675039954e-06, "loss": 3.5177, "step": 14090 }, { "epoch": 0.14338175455729166, "grad_norm": 14.867837905883789, "learning_rate": 4.753605595648928e-06, "loss": 3.1356, "step": 14095 }, { "epoch": 0.1434326171875, "grad_norm": 19.2530460357666, "learning_rate": 4.753432458599936e-06, "loss": 3.4818, "step": 14100 }, { "epoch": 0.14348347981770834, "grad_norm": 11.164162635803223, "learning_rate": 4.753259263897409e-06, "loss": 3.65, "step": 14105 }, { "epoch": 0.14353434244791666, "grad_norm": 7.864550590515137, "learning_rate": 4.7530860115457785e-06, "loss": 3.1865, "step": 14110 }, { "epoch": 0.143585205078125, "grad_norm": 7.927250862121582, "learning_rate": 4.7529127015494754e-06, "loss": 3.2852, "step": 14115 }, { "epoch": 0.14363606770833334, "grad_norm": 15.011672973632812, "learning_rate": 4.752739333912936e-06, "loss": 3.2359, "step": 14120 }, { "epoch": 0.14368693033854166, "grad_norm": 10.188263893127441, "learning_rate": 4.752565908640594e-06, "loss": 3.4599, "step": 14125 }, { "epoch": 0.14373779296875, "grad_norm": 13.60891056060791, "learning_rate": 4.752392425736888e-06, "loss": 3.5074, "step": 14130 }, { "epoch": 0.14378865559895834, "grad_norm": 7.990344524383545, "learning_rate": 4.752218885206255e-06, "loss": 3.1402, "step": 14135 }, { "epoch": 0.14383951822916666, "grad_norm": 13.563754081726074, "learning_rate": 4.752045287053135e-06, "loss": 3.6083, "step": 14140 }, { "epoch": 0.143890380859375, "grad_norm": 7.188718795776367, "learning_rate": 4.751871631281971e-06, "loss": 3.6155, "step": 14145 }, { "epoch": 0.14394124348958334, "grad_norm": 11.118795394897461, "learning_rate": 4.751697917897204e-06, "loss": 3.2539, "step": 14150 }, { "epoch": 0.14399210611979166, "grad_norm": 11.786707878112793, "learning_rate": 4.7515241469032805e-06, "loss": 3.4012, "step": 14155 }, { "epoch": 0.14404296875, "grad_norm": 15.172300338745117, "learning_rate": 4.751350318304645e-06, "loss": 3.4998, "step": 14160 }, { "epoch": 0.14409383138020834, "grad_norm": 14.242692947387695, "learning_rate": 4.751176432105746e-06, "loss": 3.374, "step": 14165 }, { "epoch": 0.14414469401041666, "grad_norm": 12.410831451416016, "learning_rate": 4.751002488311031e-06, "loss": 3.1663, "step": 14170 }, { "epoch": 0.144195556640625, "grad_norm": 16.139020919799805, "learning_rate": 4.75082848692495e-06, "loss": 3.6473, "step": 14175 }, { "epoch": 0.14424641927083334, "grad_norm": 9.563576698303223, "learning_rate": 4.750654427951957e-06, "loss": 3.133, "step": 14180 }, { "epoch": 0.14429728190104166, "grad_norm": 9.554606437683105, "learning_rate": 4.750480311396503e-06, "loss": 3.2635, "step": 14185 }, { "epoch": 0.14434814453125, "grad_norm": 16.014537811279297, "learning_rate": 4.750306137263044e-06, "loss": 3.4213, "step": 14190 }, { "epoch": 0.14439900716145834, "grad_norm": 15.562359809875488, "learning_rate": 4.750131905556036e-06, "loss": 3.2832, "step": 14195 }, { "epoch": 0.14444986979166666, "grad_norm": 15.114102363586426, "learning_rate": 4.749957616279937e-06, "loss": 3.3687, "step": 14200 }, { "epoch": 0.144500732421875, "grad_norm": 15.910360336303711, "learning_rate": 4.749783269439205e-06, "loss": 3.1607, "step": 14205 }, { "epoch": 0.14455159505208334, "grad_norm": 8.769811630249023, "learning_rate": 4.749608865038301e-06, "loss": 3.3552, "step": 14210 }, { "epoch": 0.14460245768229166, "grad_norm": 9.91917896270752, "learning_rate": 4.749434403081688e-06, "loss": 3.2902, "step": 14215 }, { "epoch": 0.1446533203125, "grad_norm": 9.88044261932373, "learning_rate": 4.749259883573829e-06, "loss": 3.4576, "step": 14220 }, { "epoch": 0.14470418294270834, "grad_norm": 15.199368476867676, "learning_rate": 4.749085306519189e-06, "loss": 3.2871, "step": 14225 }, { "epoch": 0.14475504557291666, "grad_norm": 15.931928634643555, "learning_rate": 4.748910671922234e-06, "loss": 3.5535, "step": 14230 }, { "epoch": 0.144805908203125, "grad_norm": 11.521714210510254, "learning_rate": 4.748735979787433e-06, "loss": 3.3337, "step": 14235 }, { "epoch": 0.14485677083333334, "grad_norm": 9.132640838623047, "learning_rate": 4.7485612301192545e-06, "loss": 3.599, "step": 14240 }, { "epoch": 0.14490763346354166, "grad_norm": 14.95056438446045, "learning_rate": 4.74838642292217e-06, "loss": 3.2524, "step": 14245 }, { "epoch": 0.14495849609375, "grad_norm": 13.88601303100586, "learning_rate": 4.748211558200653e-06, "loss": 3.3675, "step": 14250 }, { "epoch": 0.14500935872395834, "grad_norm": 11.538806915283203, "learning_rate": 4.748036635959174e-06, "loss": 3.4054, "step": 14255 }, { "epoch": 0.14506022135416666, "grad_norm": 12.001321792602539, "learning_rate": 4.747861656202212e-06, "loss": 3.481, "step": 14260 }, { "epoch": 0.145111083984375, "grad_norm": 13.24527359008789, "learning_rate": 4.747686618934242e-06, "loss": 2.8579, "step": 14265 }, { "epoch": 0.14516194661458334, "grad_norm": 13.960221290588379, "learning_rate": 4.747511524159743e-06, "loss": 3.6163, "step": 14270 }, { "epoch": 0.14521280924479166, "grad_norm": 14.137311935424805, "learning_rate": 4.747336371883194e-06, "loss": 3.2439, "step": 14275 }, { "epoch": 0.145263671875, "grad_norm": 13.414327621459961, "learning_rate": 4.747161162109076e-06, "loss": 3.6854, "step": 14280 }, { "epoch": 0.14531453450520834, "grad_norm": 7.808047294616699, "learning_rate": 4.746985894841873e-06, "loss": 3.4737, "step": 14285 }, { "epoch": 0.14536539713541666, "grad_norm": 7.56891393661499, "learning_rate": 4.746810570086069e-06, "loss": 3.2434, "step": 14290 }, { "epoch": 0.145416259765625, "grad_norm": 15.081562042236328, "learning_rate": 4.746635187846148e-06, "loss": 3.3065, "step": 14295 }, { "epoch": 0.14546712239583334, "grad_norm": 12.434776306152344, "learning_rate": 4.746459748126599e-06, "loss": 3.2885, "step": 14300 }, { "epoch": 0.14551798502604166, "grad_norm": 14.816580772399902, "learning_rate": 4.74628425093191e-06, "loss": 3.3603, "step": 14305 }, { "epoch": 0.14556884765625, "grad_norm": 10.694483757019043, "learning_rate": 4.74610869626657e-06, "loss": 3.4447, "step": 14310 }, { "epoch": 0.14561971028645834, "grad_norm": 11.717911720275879, "learning_rate": 4.745933084135071e-06, "loss": 3.7955, "step": 14315 }, { "epoch": 0.14567057291666666, "grad_norm": 12.842635154724121, "learning_rate": 4.745757414541908e-06, "loss": 3.63, "step": 14320 }, { "epoch": 0.145721435546875, "grad_norm": 13.658503532409668, "learning_rate": 4.745581687491573e-06, "loss": 3.5957, "step": 14325 }, { "epoch": 0.14577229817708334, "grad_norm": 8.604068756103516, "learning_rate": 4.745405902988563e-06, "loss": 3.3326, "step": 14330 }, { "epoch": 0.14582316080729166, "grad_norm": 10.351202964782715, "learning_rate": 4.745230061037375e-06, "loss": 3.2391, "step": 14335 }, { "epoch": 0.1458740234375, "grad_norm": 9.471860885620117, "learning_rate": 4.745054161642508e-06, "loss": 3.3626, "step": 14340 }, { "epoch": 0.14592488606770834, "grad_norm": 10.003438949584961, "learning_rate": 4.744878204808463e-06, "loss": 3.5786, "step": 14345 }, { "epoch": 0.14597574869791666, "grad_norm": 10.829023361206055, "learning_rate": 4.744702190539741e-06, "loss": 3.7278, "step": 14350 }, { "epoch": 0.146026611328125, "grad_norm": 9.213700294494629, "learning_rate": 4.744526118840844e-06, "loss": 2.9384, "step": 14355 }, { "epoch": 0.14607747395833334, "grad_norm": 12.245752334594727, "learning_rate": 4.7443499897162794e-06, "loss": 3.4541, "step": 14360 }, { "epoch": 0.14612833658854166, "grad_norm": 9.579618453979492, "learning_rate": 4.744173803170553e-06, "loss": 3.2422, "step": 14365 }, { "epoch": 0.14617919921875, "grad_norm": 7.907454967498779, "learning_rate": 4.743997559208171e-06, "loss": 3.2129, "step": 14370 }, { "epoch": 0.14623006184895834, "grad_norm": 15.023747444152832, "learning_rate": 4.743821257833644e-06, "loss": 3.3496, "step": 14375 }, { "epoch": 0.14628092447916666, "grad_norm": 8.13438892364502, "learning_rate": 4.743644899051481e-06, "loss": 3.4805, "step": 14380 }, { "epoch": 0.146331787109375, "grad_norm": 10.506806373596191, "learning_rate": 4.743468482866196e-06, "loss": 3.653, "step": 14385 }, { "epoch": 0.14638264973958334, "grad_norm": 13.162545204162598, "learning_rate": 4.743292009282301e-06, "loss": 3.4791, "step": 14390 }, { "epoch": 0.14643351236979166, "grad_norm": 14.953128814697266, "learning_rate": 4.743115478304312e-06, "loss": 3.3943, "step": 14395 }, { "epoch": 0.146484375, "grad_norm": 8.338605880737305, "learning_rate": 4.742938889936745e-06, "loss": 3.484, "step": 14400 }, { "epoch": 0.14653523763020834, "grad_norm": 11.624462127685547, "learning_rate": 4.742762244184117e-06, "loss": 3.1865, "step": 14405 }, { "epoch": 0.14658610026041666, "grad_norm": 9.61160945892334, "learning_rate": 4.74258554105095e-06, "loss": 3.518, "step": 14410 }, { "epoch": 0.146636962890625, "grad_norm": 10.283778190612793, "learning_rate": 4.742408780541763e-06, "loss": 3.4033, "step": 14415 }, { "epoch": 0.14668782552083334, "grad_norm": 14.608953475952148, "learning_rate": 4.742231962661079e-06, "loss": 3.0345, "step": 14420 }, { "epoch": 0.14673868815104166, "grad_norm": 13.46399974822998, "learning_rate": 4.742055087413422e-06, "loss": 3.3489, "step": 14425 }, { "epoch": 0.14678955078125, "grad_norm": 9.469012260437012, "learning_rate": 4.741878154803316e-06, "loss": 2.9819, "step": 14430 }, { "epoch": 0.14684041341145834, "grad_norm": 10.287275314331055, "learning_rate": 4.741701164835291e-06, "loss": 3.191, "step": 14435 }, { "epoch": 0.14689127604166666, "grad_norm": 12.825785636901855, "learning_rate": 4.741524117513871e-06, "loss": 2.9444, "step": 14440 }, { "epoch": 0.146942138671875, "grad_norm": 11.87830924987793, "learning_rate": 4.741347012843588e-06, "loss": 3.3462, "step": 14445 }, { "epoch": 0.14699300130208334, "grad_norm": 9.253486633300781, "learning_rate": 4.7411698508289735e-06, "loss": 3.7622, "step": 14450 }, { "epoch": 0.14704386393229166, "grad_norm": 11.679107666015625, "learning_rate": 4.740992631474559e-06, "loss": 3.2838, "step": 14455 }, { "epoch": 0.1470947265625, "grad_norm": 13.79211711883545, "learning_rate": 4.740815354784879e-06, "loss": 3.2918, "step": 14460 }, { "epoch": 0.14714558919270834, "grad_norm": 14.639359474182129, "learning_rate": 4.74063802076447e-06, "loss": 3.4211, "step": 14465 }, { "epoch": 0.14719645182291666, "grad_norm": 12.899956703186035, "learning_rate": 4.7404606294178684e-06, "loss": 3.6037, "step": 14470 }, { "epoch": 0.147247314453125, "grad_norm": 15.321117401123047, "learning_rate": 4.740283180749613e-06, "loss": 3.5764, "step": 14475 }, { "epoch": 0.14729817708333334, "grad_norm": 12.093023300170898, "learning_rate": 4.740105674764243e-06, "loss": 3.4525, "step": 14480 }, { "epoch": 0.14734903971354166, "grad_norm": 12.851874351501465, "learning_rate": 4.739928111466299e-06, "loss": 3.7187, "step": 14485 }, { "epoch": 0.14739990234375, "grad_norm": 11.185502052307129, "learning_rate": 4.739750490860327e-06, "loss": 3.6987, "step": 14490 }, { "epoch": 0.14745076497395834, "grad_norm": 10.547276496887207, "learning_rate": 4.7395728129508686e-06, "loss": 3.746, "step": 14495 }, { "epoch": 0.14750162760416666, "grad_norm": 14.449246406555176, "learning_rate": 4.739395077742471e-06, "loss": 3.6518, "step": 14500 }, { "epoch": 0.147552490234375, "grad_norm": 14.639830589294434, "learning_rate": 4.739217285239681e-06, "loss": 4.1111, "step": 14505 }, { "epoch": 0.14760335286458334, "grad_norm": 13.559803009033203, "learning_rate": 4.739039435447047e-06, "loss": 3.5169, "step": 14510 }, { "epoch": 0.14765421549479166, "grad_norm": 17.011873245239258, "learning_rate": 4.738861528369121e-06, "loss": 3.3476, "step": 14515 }, { "epoch": 0.147705078125, "grad_norm": 10.405207633972168, "learning_rate": 4.7386835640104525e-06, "loss": 3.1015, "step": 14520 }, { "epoch": 0.14775594075520834, "grad_norm": 15.131099700927734, "learning_rate": 4.738505542375595e-06, "loss": 3.7291, "step": 14525 }, { "epoch": 0.14780680338541666, "grad_norm": 17.359588623046875, "learning_rate": 4.738327463469105e-06, "loss": 3.3012, "step": 14530 }, { "epoch": 0.147857666015625, "grad_norm": 8.4977445602417, "learning_rate": 4.738149327295537e-06, "loss": 4.0538, "step": 14535 }, { "epoch": 0.14790852864583334, "grad_norm": 14.337766647338867, "learning_rate": 4.737971133859449e-06, "loss": 2.9475, "step": 14540 }, { "epoch": 0.14795939127604166, "grad_norm": 6.989287853240967, "learning_rate": 4.7377928831654e-06, "loss": 3.5471, "step": 14545 }, { "epoch": 0.14801025390625, "grad_norm": 8.3816499710083, "learning_rate": 4.73761457521795e-06, "loss": 3.4316, "step": 14550 }, { "epoch": 0.14806111653645834, "grad_norm": 10.17712116241455, "learning_rate": 4.7374362100216625e-06, "loss": 3.5723, "step": 14555 }, { "epoch": 0.14811197916666666, "grad_norm": 13.229191780090332, "learning_rate": 4.737257787581099e-06, "loss": 3.5133, "step": 14560 }, { "epoch": 0.148162841796875, "grad_norm": 13.506834983825684, "learning_rate": 4.737079307900826e-06, "loss": 3.5203, "step": 14565 }, { "epoch": 0.14821370442708334, "grad_norm": 13.098067283630371, "learning_rate": 4.736900770985409e-06, "loss": 3.1275, "step": 14570 }, { "epoch": 0.14826456705729166, "grad_norm": 15.300044059753418, "learning_rate": 4.7367221768394155e-06, "loss": 3.3876, "step": 14575 }, { "epoch": 0.1483154296875, "grad_norm": 11.651641845703125, "learning_rate": 4.736543525467415e-06, "loss": 3.1862, "step": 14580 }, { "epoch": 0.14836629231770834, "grad_norm": 17.858064651489258, "learning_rate": 4.736364816873979e-06, "loss": 3.2058, "step": 14585 }, { "epoch": 0.14841715494791666, "grad_norm": 11.784101486206055, "learning_rate": 4.73618605106368e-06, "loss": 3.3627, "step": 14590 }, { "epoch": 0.148468017578125, "grad_norm": 16.125946044921875, "learning_rate": 4.73600722804109e-06, "loss": 3.2323, "step": 14595 }, { "epoch": 0.14851888020833334, "grad_norm": 17.72896385192871, "learning_rate": 4.735828347810785e-06, "loss": 3.542, "step": 14600 }, { "epoch": 0.14856974283854166, "grad_norm": 13.747530937194824, "learning_rate": 4.735649410377342e-06, "loss": 3.0246, "step": 14605 }, { "epoch": 0.14862060546875, "grad_norm": 10.233755111694336, "learning_rate": 4.735470415745339e-06, "loss": 3.6899, "step": 14610 }, { "epoch": 0.14867146809895834, "grad_norm": 13.699902534484863, "learning_rate": 4.735291363919355e-06, "loss": 3.1592, "step": 14615 }, { "epoch": 0.14872233072916666, "grad_norm": 11.987502098083496, "learning_rate": 4.735112254903971e-06, "loss": 3.3525, "step": 14620 }, { "epoch": 0.148773193359375, "grad_norm": 13.890175819396973, "learning_rate": 4.73493308870377e-06, "loss": 3.3058, "step": 14625 }, { "epoch": 0.14882405598958334, "grad_norm": 14.703634262084961, "learning_rate": 4.734753865323336e-06, "loss": 3.3691, "step": 14630 }, { "epoch": 0.14887491861979166, "grad_norm": 11.793313980102539, "learning_rate": 4.734574584767253e-06, "loss": 3.1809, "step": 14635 }, { "epoch": 0.14892578125, "grad_norm": 13.151676177978516, "learning_rate": 4.73439524704011e-06, "loss": 3.4036, "step": 14640 }, { "epoch": 0.14897664388020834, "grad_norm": 10.763456344604492, "learning_rate": 4.734215852146493e-06, "loss": 3.2028, "step": 14645 }, { "epoch": 0.14902750651041666, "grad_norm": 15.11712646484375, "learning_rate": 4.734036400090994e-06, "loss": 3.5627, "step": 14650 }, { "epoch": 0.149078369140625, "grad_norm": 7.594333171844482, "learning_rate": 4.7338568908782036e-06, "loss": 3.526, "step": 14655 }, { "epoch": 0.14912923177083334, "grad_norm": 16.161596298217773, "learning_rate": 4.733677324512713e-06, "loss": 3.3709, "step": 14660 }, { "epoch": 0.14918009440104166, "grad_norm": 9.044793128967285, "learning_rate": 4.733497700999119e-06, "loss": 3.2466, "step": 14665 }, { "epoch": 0.14923095703125, "grad_norm": 9.249896049499512, "learning_rate": 4.733318020342014e-06, "loss": 3.7076, "step": 14670 }, { "epoch": 0.14928181966145834, "grad_norm": 9.653711318969727, "learning_rate": 4.7331382825459985e-06, "loss": 3.3792, "step": 14675 }, { "epoch": 0.14933268229166666, "grad_norm": 10.778152465820312, "learning_rate": 4.732958487615668e-06, "loss": 3.2468, "step": 14680 }, { "epoch": 0.149383544921875, "grad_norm": 12.605219841003418, "learning_rate": 4.7327786355556235e-06, "loss": 3.717, "step": 14685 }, { "epoch": 0.14943440755208334, "grad_norm": 11.11141300201416, "learning_rate": 4.7325987263704685e-06, "loss": 3.1906, "step": 14690 }, { "epoch": 0.14948527018229166, "grad_norm": 13.047835350036621, "learning_rate": 4.732418760064803e-06, "loss": 3.5693, "step": 14695 }, { "epoch": 0.1495361328125, "grad_norm": 12.675945281982422, "learning_rate": 4.7322387366432335e-06, "loss": 3.2765, "step": 14700 }, { "epoch": 0.14958699544270834, "grad_norm": 11.04984188079834, "learning_rate": 4.732058656110364e-06, "loss": 3.0329, "step": 14705 }, { "epoch": 0.14963785807291666, "grad_norm": 14.509688377380371, "learning_rate": 4.7318785184708035e-06, "loss": 3.5102, "step": 14710 }, { "epoch": 0.149688720703125, "grad_norm": 12.124781608581543, "learning_rate": 4.731698323729161e-06, "loss": 3.2282, "step": 14715 }, { "epoch": 0.14973958333333334, "grad_norm": 10.267120361328125, "learning_rate": 4.731518071890045e-06, "loss": 3.3788, "step": 14720 }, { "epoch": 0.14979044596354166, "grad_norm": 9.034321784973145, "learning_rate": 4.731337762958067e-06, "loss": 3.4097, "step": 14725 }, { "epoch": 0.14984130859375, "grad_norm": 13.264888763427734, "learning_rate": 4.731157396937842e-06, "loss": 3.5671, "step": 14730 }, { "epoch": 0.14989217122395834, "grad_norm": 12.145549774169922, "learning_rate": 4.730976973833984e-06, "loss": 3.1394, "step": 14735 }, { "epoch": 0.14994303385416666, "grad_norm": 10.380547523498535, "learning_rate": 4.7307964936511095e-06, "loss": 3.4537, "step": 14740 }, { "epoch": 0.149993896484375, "grad_norm": 10.687427520751953, "learning_rate": 4.730615956393835e-06, "loss": 3.2803, "step": 14745 }, { "epoch": 0.15004475911458334, "grad_norm": 9.110136985778809, "learning_rate": 4.730435362066779e-06, "loss": 3.1083, "step": 14750 }, { "epoch": 0.15009562174479166, "grad_norm": 27.618864059448242, "learning_rate": 4.730254710674564e-06, "loss": 3.4778, "step": 14755 }, { "epoch": 0.150146484375, "grad_norm": 9.159721374511719, "learning_rate": 4.73007400222181e-06, "loss": 3.5116, "step": 14760 }, { "epoch": 0.15019734700520834, "grad_norm": 13.149566650390625, "learning_rate": 4.729893236713142e-06, "loss": 3.3386, "step": 14765 }, { "epoch": 0.15024820963541666, "grad_norm": 10.829029083251953, "learning_rate": 4.729712414153184e-06, "loss": 3.862, "step": 14770 }, { "epoch": 0.150299072265625, "grad_norm": 11.96367073059082, "learning_rate": 4.729531534546563e-06, "loss": 3.66, "step": 14775 }, { "epoch": 0.15034993489583334, "grad_norm": 14.411954879760742, "learning_rate": 4.729350597897905e-06, "loss": 3.2965, "step": 14780 }, { "epoch": 0.15040079752604166, "grad_norm": 12.512883186340332, "learning_rate": 4.729169604211841e-06, "loss": 3.2615, "step": 14785 }, { "epoch": 0.15045166015625, "grad_norm": 11.264093399047852, "learning_rate": 4.728988553493001e-06, "loss": 3.1227, "step": 14790 }, { "epoch": 0.15050252278645834, "grad_norm": 7.70527458190918, "learning_rate": 4.728807445746018e-06, "loss": 3.3477, "step": 14795 }, { "epoch": 0.15055338541666666, "grad_norm": 14.769386291503906, "learning_rate": 4.728626280975523e-06, "loss": 3.6628, "step": 14800 }, { "epoch": 0.150604248046875, "grad_norm": 13.030516624450684, "learning_rate": 4.7284450591861545e-06, "loss": 3.377, "step": 14805 }, { "epoch": 0.15065511067708334, "grad_norm": 12.844939231872559, "learning_rate": 4.728263780382546e-06, "loss": 3.8196, "step": 14810 }, { "epoch": 0.15070597330729166, "grad_norm": 10.097814559936523, "learning_rate": 4.728082444569337e-06, "loss": 3.8156, "step": 14815 }, { "epoch": 0.1507568359375, "grad_norm": 10.388051986694336, "learning_rate": 4.727901051751167e-06, "loss": 3.4716, "step": 14820 }, { "epoch": 0.15080769856770834, "grad_norm": 13.543767929077148, "learning_rate": 4.727719601932678e-06, "loss": 3.4144, "step": 14825 }, { "epoch": 0.15085856119791666, "grad_norm": 11.783435821533203, "learning_rate": 4.727538095118509e-06, "loss": 3.2369, "step": 14830 }, { "epoch": 0.150909423828125, "grad_norm": 14.941376686096191, "learning_rate": 4.727356531313307e-06, "loss": 3.5681, "step": 14835 }, { "epoch": 0.15096028645833334, "grad_norm": 13.9028959274292, "learning_rate": 4.727174910521716e-06, "loss": 3.2229, "step": 14840 }, { "epoch": 0.15101114908854166, "grad_norm": 13.589739799499512, "learning_rate": 4.726993232748382e-06, "loss": 3.3788, "step": 14845 }, { "epoch": 0.15106201171875, "grad_norm": 11.502520561218262, "learning_rate": 4.7268114979979555e-06, "loss": 3.4142, "step": 14850 }, { "epoch": 0.15111287434895834, "grad_norm": 12.786849975585938, "learning_rate": 4.726629706275083e-06, "loss": 3.5127, "step": 14855 }, { "epoch": 0.15116373697916666, "grad_norm": 7.40986967086792, "learning_rate": 4.7264478575844185e-06, "loss": 3.127, "step": 14860 }, { "epoch": 0.151214599609375, "grad_norm": 10.638337135314941, "learning_rate": 4.726265951930612e-06, "loss": 3.0998, "step": 14865 }, { "epoch": 0.15126546223958334, "grad_norm": 10.695724487304688, "learning_rate": 4.72608398931832e-06, "loss": 3.5446, "step": 14870 }, { "epoch": 0.15131632486979166, "grad_norm": 11.744098663330078, "learning_rate": 4.7259019697521955e-06, "loss": 2.9158, "step": 14875 }, { "epoch": 0.1513671875, "grad_norm": 9.356399536132812, "learning_rate": 4.725719893236898e-06, "loss": 3.2486, "step": 14880 }, { "epoch": 0.15141805013020834, "grad_norm": 12.832469940185547, "learning_rate": 4.725537759777084e-06, "loss": 3.2114, "step": 14885 }, { "epoch": 0.15146891276041666, "grad_norm": 12.14478588104248, "learning_rate": 4.725355569377415e-06, "loss": 2.9888, "step": 14890 }, { "epoch": 0.151519775390625, "grad_norm": 8.157812118530273, "learning_rate": 4.72517332204255e-06, "loss": 3.3431, "step": 14895 }, { "epoch": 0.15157063802083334, "grad_norm": 12.803070068359375, "learning_rate": 4.724991017777153e-06, "loss": 3.5178, "step": 14900 }, { "epoch": 0.15162150065104166, "grad_norm": 8.299994468688965, "learning_rate": 4.7248086565858886e-06, "loss": 3.5268, "step": 14905 }, { "epoch": 0.15167236328125, "grad_norm": 12.789700508117676, "learning_rate": 4.724626238473421e-06, "loss": 3.2294, "step": 14910 }, { "epoch": 0.15172322591145834, "grad_norm": 9.37491226196289, "learning_rate": 4.724443763444419e-06, "loss": 3.22, "step": 14915 }, { "epoch": 0.15177408854166666, "grad_norm": 14.288036346435547, "learning_rate": 4.724261231503552e-06, "loss": 3.3765, "step": 14920 }, { "epoch": 0.151824951171875, "grad_norm": 17.946796417236328, "learning_rate": 4.724078642655487e-06, "loss": 3.1767, "step": 14925 }, { "epoch": 0.15187581380208334, "grad_norm": 9.992767333984375, "learning_rate": 4.723895996904897e-06, "loss": 4.5134, "step": 14930 }, { "epoch": 0.15192667643229166, "grad_norm": 12.649903297424316, "learning_rate": 4.7237132942564565e-06, "loss": 3.2216, "step": 14935 }, { "epoch": 0.1519775390625, "grad_norm": 11.577804565429688, "learning_rate": 4.723530534714837e-06, "loss": 3.3525, "step": 14940 }, { "epoch": 0.15202840169270834, "grad_norm": 11.609172821044922, "learning_rate": 4.723347718284716e-06, "loss": 3.2994, "step": 14945 }, { "epoch": 0.15207926432291666, "grad_norm": 11.450626373291016, "learning_rate": 4.723164844970771e-06, "loss": 3.1224, "step": 14950 }, { "epoch": 0.152130126953125, "grad_norm": 12.139467239379883, "learning_rate": 4.722981914777681e-06, "loss": 2.981, "step": 14955 }, { "epoch": 0.15218098958333334, "grad_norm": 14.975263595581055, "learning_rate": 4.722798927710124e-06, "loss": 3.2868, "step": 14960 }, { "epoch": 0.15223185221354166, "grad_norm": 14.647977828979492, "learning_rate": 4.722615883772785e-06, "loss": 3.3901, "step": 14965 }, { "epoch": 0.15228271484375, "grad_norm": 9.550837516784668, "learning_rate": 4.7224327829703444e-06, "loss": 3.8862, "step": 14970 }, { "epoch": 0.15233357747395834, "grad_norm": 10.410728454589844, "learning_rate": 4.7222496253074876e-06, "loss": 3.3335, "step": 14975 }, { "epoch": 0.15238444010416666, "grad_norm": 9.198328018188477, "learning_rate": 4.722066410788902e-06, "loss": 3.7736, "step": 14980 }, { "epoch": 0.152435302734375, "grad_norm": 15.138815879821777, "learning_rate": 4.721883139419273e-06, "loss": 3.2838, "step": 14985 }, { "epoch": 0.15248616536458334, "grad_norm": 10.338898658752441, "learning_rate": 4.721699811203291e-06, "loss": 3.3141, "step": 14990 }, { "epoch": 0.15253702799479166, "grad_norm": 11.960433959960938, "learning_rate": 4.721516426145646e-06, "loss": 3.3508, "step": 14995 }, { "epoch": 0.152587890625, "grad_norm": 12.279603958129883, "learning_rate": 4.7213329842510295e-06, "loss": 3.3861, "step": 15000 }, { "epoch": 0.15263875325520834, "grad_norm": 11.51431941986084, "learning_rate": 4.721149485524135e-06, "loss": 3.3131, "step": 15005 }, { "epoch": 0.15268961588541666, "grad_norm": 8.839967727661133, "learning_rate": 4.720965929969658e-06, "loss": 3.2533, "step": 15010 }, { "epoch": 0.152740478515625, "grad_norm": 8.14935302734375, "learning_rate": 4.720782317592293e-06, "loss": 3.6221, "step": 15015 }, { "epoch": 0.15279134114583334, "grad_norm": 12.631562232971191, "learning_rate": 4.7205986483967396e-06, "loss": 3.6434, "step": 15020 }, { "epoch": 0.15284220377604166, "grad_norm": 12.91220760345459, "learning_rate": 4.720414922387696e-06, "loss": 3.1931, "step": 15025 }, { "epoch": 0.15289306640625, "grad_norm": 15.801246643066406, "learning_rate": 4.720231139569863e-06, "loss": 3.4381, "step": 15030 }, { "epoch": 0.15294392903645834, "grad_norm": 11.53238296508789, "learning_rate": 4.720047299947943e-06, "loss": 3.7497, "step": 15035 }, { "epoch": 0.15299479166666666, "grad_norm": 12.755319595336914, "learning_rate": 4.7198634035266375e-06, "loss": 3.2225, "step": 15040 }, { "epoch": 0.153045654296875, "grad_norm": 9.745579719543457, "learning_rate": 4.719679450310654e-06, "loss": 3.5779, "step": 15045 }, { "epoch": 0.15309651692708334, "grad_norm": 12.108492851257324, "learning_rate": 4.719495440304698e-06, "loss": 3.9278, "step": 15050 }, { "epoch": 0.15314737955729166, "grad_norm": 13.5195951461792, "learning_rate": 4.719311373513477e-06, "loss": 3.3074, "step": 15055 }, { "epoch": 0.1531982421875, "grad_norm": 14.34796142578125, "learning_rate": 4.719127249941701e-06, "loss": 3.2327, "step": 15060 }, { "epoch": 0.15324910481770834, "grad_norm": 10.067496299743652, "learning_rate": 4.718943069594079e-06, "loss": 3.8594, "step": 15065 }, { "epoch": 0.15329996744791666, "grad_norm": 9.026041984558105, "learning_rate": 4.718758832475326e-06, "loss": 3.4053, "step": 15070 }, { "epoch": 0.153350830078125, "grad_norm": 14.132732391357422, "learning_rate": 4.718574538590154e-06, "loss": 3.3339, "step": 15075 }, { "epoch": 0.15340169270833334, "grad_norm": 13.146659851074219, "learning_rate": 4.718390187943278e-06, "loss": 4.1499, "step": 15080 }, { "epoch": 0.15345255533854166, "grad_norm": 12.891654014587402, "learning_rate": 4.7182057805394145e-06, "loss": 3.1477, "step": 15085 }, { "epoch": 0.15350341796875, "grad_norm": 8.704354286193848, "learning_rate": 4.718021316383282e-06, "loss": 3.4241, "step": 15090 }, { "epoch": 0.15355428059895834, "grad_norm": 8.283037185668945, "learning_rate": 4.7178367954796e-06, "loss": 3.4241, "step": 15095 }, { "epoch": 0.15360514322916666, "grad_norm": 14.645727157592773, "learning_rate": 4.7176522178330895e-06, "loss": 4.3103, "step": 15100 }, { "epoch": 0.153656005859375, "grad_norm": 8.679193496704102, "learning_rate": 4.717467583448472e-06, "loss": 4.3254, "step": 15105 }, { "epoch": 0.15370686848958334, "grad_norm": 22.80760383605957, "learning_rate": 4.717282892330472e-06, "loss": 3.3115, "step": 15110 }, { "epoch": 0.15375773111979166, "grad_norm": 10.638278007507324, "learning_rate": 4.717098144483815e-06, "loss": 3.4555, "step": 15115 }, { "epoch": 0.15380859375, "grad_norm": 16.177385330200195, "learning_rate": 4.7169133399132285e-06, "loss": 3.2353, "step": 15120 }, { "epoch": 0.15385945638020834, "grad_norm": 13.588946342468262, "learning_rate": 4.7167284786234385e-06, "loss": 3.512, "step": 15125 }, { "epoch": 0.15391031901041666, "grad_norm": 14.394001960754395, "learning_rate": 4.716543560619175e-06, "loss": 3.0573, "step": 15130 }, { "epoch": 0.153961181640625, "grad_norm": 9.461201667785645, "learning_rate": 4.716358585905172e-06, "loss": 3.2514, "step": 15135 }, { "epoch": 0.15401204427083334, "grad_norm": 13.264630317687988, "learning_rate": 4.716173554486159e-06, "loss": 3.2544, "step": 15140 }, { "epoch": 0.15406290690104166, "grad_norm": 11.358147621154785, "learning_rate": 4.71598846636687e-06, "loss": 3.5706, "step": 15145 }, { "epoch": 0.15411376953125, "grad_norm": 14.053950309753418, "learning_rate": 4.715803321552043e-06, "loss": 3.4664, "step": 15150 }, { "epoch": 0.15416463216145834, "grad_norm": 14.992704391479492, "learning_rate": 4.715618120046412e-06, "loss": 3.9273, "step": 15155 }, { "epoch": 0.15421549479166666, "grad_norm": 13.31781005859375, "learning_rate": 4.715432861854717e-06, "loss": 3.5456, "step": 15160 }, { "epoch": 0.154266357421875, "grad_norm": 8.325743675231934, "learning_rate": 4.715247546981697e-06, "loss": 3.3918, "step": 15165 }, { "epoch": 0.15431722005208334, "grad_norm": 11.262593269348145, "learning_rate": 4.715062175432093e-06, "loss": 3.5529, "step": 15170 }, { "epoch": 0.15436808268229166, "grad_norm": 10.394039154052734, "learning_rate": 4.71487674721065e-06, "loss": 3.3751, "step": 15175 }, { "epoch": 0.1544189453125, "grad_norm": 16.578327178955078, "learning_rate": 4.7146912623221095e-06, "loss": 3.1776, "step": 15180 }, { "epoch": 0.15446980794270834, "grad_norm": 11.776280403137207, "learning_rate": 4.7145057207712175e-06, "loss": 3.5786, "step": 15185 }, { "epoch": 0.15452067057291666, "grad_norm": 12.497096061706543, "learning_rate": 4.714320122562722e-06, "loss": 3.0066, "step": 15190 }, { "epoch": 0.154571533203125, "grad_norm": 9.880024909973145, "learning_rate": 4.714134467701371e-06, "loss": 3.3478, "step": 15195 }, { "epoch": 0.15462239583333334, "grad_norm": 9.223662376403809, "learning_rate": 4.713948756191915e-06, "loss": 3.3263, "step": 15200 }, { "epoch": 0.15467325846354166, "grad_norm": 7.955392837524414, "learning_rate": 4.713762988039105e-06, "loss": 3.4025, "step": 15205 }, { "epoch": 0.15472412109375, "grad_norm": 10.604644775390625, "learning_rate": 4.713577163247692e-06, "loss": 2.9697, "step": 15210 }, { "epoch": 0.15477498372395834, "grad_norm": 7.452188014984131, "learning_rate": 4.713391281822433e-06, "loss": 3.6369, "step": 15215 }, { "epoch": 0.15482584635416666, "grad_norm": 12.423209190368652, "learning_rate": 4.713205343768082e-06, "loss": 3.3861, "step": 15220 }, { "epoch": 0.154876708984375, "grad_norm": 17.55681610107422, "learning_rate": 4.713019349089399e-06, "loss": 3.6858, "step": 15225 }, { "epoch": 0.15492757161458334, "grad_norm": 14.02679443359375, "learning_rate": 4.712833297791138e-06, "loss": 3.167, "step": 15230 }, { "epoch": 0.15497843424479166, "grad_norm": 11.839001655578613, "learning_rate": 4.712647189878063e-06, "loss": 2.955, "step": 15235 }, { "epoch": 0.155029296875, "grad_norm": 11.20364761352539, "learning_rate": 4.712461025354933e-06, "loss": 3.0909, "step": 15240 }, { "epoch": 0.15508015950520834, "grad_norm": 18.19971466064453, "learning_rate": 4.712274804226513e-06, "loss": 3.2367, "step": 15245 }, { "epoch": 0.15513102213541666, "grad_norm": 7.526338577270508, "learning_rate": 4.712088526497566e-06, "loss": 3.3709, "step": 15250 }, { "epoch": 0.155181884765625, "grad_norm": 15.513593673706055, "learning_rate": 4.711902192172858e-06, "loss": 3.9162, "step": 15255 }, { "epoch": 0.15523274739583334, "grad_norm": 10.80632495880127, "learning_rate": 4.7117158012571585e-06, "loss": 3.1795, "step": 15260 }, { "epoch": 0.15528361002604166, "grad_norm": 13.773353576660156, "learning_rate": 4.711529353755233e-06, "loss": 3.7235, "step": 15265 }, { "epoch": 0.15533447265625, "grad_norm": 9.968823432922363, "learning_rate": 4.711342849671853e-06, "loss": 3.4381, "step": 15270 }, { "epoch": 0.15538533528645834, "grad_norm": 7.348340034484863, "learning_rate": 4.711156289011792e-06, "loss": 2.9837, "step": 15275 }, { "epoch": 0.15543619791666666, "grad_norm": 19.31353187561035, "learning_rate": 4.710969671779819e-06, "loss": 3.2548, "step": 15280 }, { "epoch": 0.155487060546875, "grad_norm": 13.9642915725708, "learning_rate": 4.7107829979807124e-06, "loss": 3.1202, "step": 15285 }, { "epoch": 0.15553792317708334, "grad_norm": 12.23745346069336, "learning_rate": 4.710596267619247e-06, "loss": 3.3532, "step": 15290 }, { "epoch": 0.15558878580729166, "grad_norm": 10.830820083618164, "learning_rate": 4.710409480700199e-06, "loss": 3.5973, "step": 15295 }, { "epoch": 0.1556396484375, "grad_norm": 10.229269981384277, "learning_rate": 4.710222637228349e-06, "loss": 3.5623, "step": 15300 }, { "epoch": 0.15569051106770834, "grad_norm": 11.168558120727539, "learning_rate": 4.710035737208477e-06, "loss": 3.0141, "step": 15305 }, { "epoch": 0.15574137369791666, "grad_norm": 7.080033302307129, "learning_rate": 4.709848780645364e-06, "loss": 3.4462, "step": 15310 }, { "epoch": 0.155792236328125, "grad_norm": 7.330209255218506, "learning_rate": 4.709661767543794e-06, "loss": 3.2099, "step": 15315 }, { "epoch": 0.15584309895833334, "grad_norm": 7.501708030700684, "learning_rate": 4.709474697908552e-06, "loss": 3.3722, "step": 15320 }, { "epoch": 0.15589396158854166, "grad_norm": 9.475503921508789, "learning_rate": 4.709287571744423e-06, "loss": 3.6584, "step": 15325 }, { "epoch": 0.15594482421875, "grad_norm": 14.18191146850586, "learning_rate": 4.709100389056195e-06, "loss": 3.3899, "step": 15330 }, { "epoch": 0.15599568684895834, "grad_norm": 9.33660888671875, "learning_rate": 4.708913149848658e-06, "loss": 3.455, "step": 15335 }, { "epoch": 0.15604654947916666, "grad_norm": 10.5362548828125, "learning_rate": 4.7087258541266e-06, "loss": 3.5219, "step": 15340 }, { "epoch": 0.156097412109375, "grad_norm": 13.641423225402832, "learning_rate": 4.7085385018948155e-06, "loss": 3.8866, "step": 15345 }, { "epoch": 0.15614827473958334, "grad_norm": 14.66014289855957, "learning_rate": 4.708351093158097e-06, "loss": 3.6575, "step": 15350 }, { "epoch": 0.15619913736979166, "grad_norm": 10.71635627746582, "learning_rate": 4.708163627921239e-06, "loss": 3.5451, "step": 15355 }, { "epoch": 0.15625, "grad_norm": 12.844642639160156, "learning_rate": 4.7079761061890374e-06, "loss": 3.3541, "step": 15360 }, { "epoch": 0.15630086263020834, "grad_norm": 9.12993335723877, "learning_rate": 4.707788527966291e-06, "loss": 3.3004, "step": 15365 }, { "epoch": 0.15635172526041666, "grad_norm": 8.735359191894531, "learning_rate": 4.707600893257799e-06, "loss": 3.6416, "step": 15370 }, { "epoch": 0.156402587890625, "grad_norm": 17.718563079833984, "learning_rate": 4.707413202068361e-06, "loss": 3.4838, "step": 15375 }, { "epoch": 0.15645345052083334, "grad_norm": 13.61154556274414, "learning_rate": 4.707225454402779e-06, "loss": 3.3291, "step": 15380 }, { "epoch": 0.15650431315104166, "grad_norm": 10.323596954345703, "learning_rate": 4.707037650265857e-06, "loss": 3.44, "step": 15385 }, { "epoch": 0.15655517578125, "grad_norm": 12.949178695678711, "learning_rate": 4.7068497896624014e-06, "loss": 3.6229, "step": 15390 }, { "epoch": 0.15660603841145834, "grad_norm": 10.059599876403809, "learning_rate": 4.706661872597215e-06, "loss": 3.1794, "step": 15395 }, { "epoch": 0.15665690104166666, "grad_norm": 13.678911209106445, "learning_rate": 4.706473899075108e-06, "loss": 3.9318, "step": 15400 }, { "epoch": 0.156707763671875, "grad_norm": 11.830682754516602, "learning_rate": 4.7062858691008906e-06, "loss": 3.383, "step": 15405 }, { "epoch": 0.15675862630208334, "grad_norm": 17.724088668823242, "learning_rate": 4.706097782679371e-06, "loss": 3.9705, "step": 15410 }, { "epoch": 0.15680948893229166, "grad_norm": 7.41434907913208, "learning_rate": 4.7059096398153624e-06, "loss": 3.3684, "step": 15415 }, { "epoch": 0.1568603515625, "grad_norm": 10.549930572509766, "learning_rate": 4.705721440513679e-06, "loss": 3.5617, "step": 15420 }, { "epoch": 0.15691121419270834, "grad_norm": 13.3803071975708, "learning_rate": 4.705533184779135e-06, "loss": 3.4956, "step": 15425 }, { "epoch": 0.15696207682291666, "grad_norm": 9.431879043579102, "learning_rate": 4.705344872616548e-06, "loss": 3.3123, "step": 15430 }, { "epoch": 0.157012939453125, "grad_norm": 13.48584270477295, "learning_rate": 4.705156504030735e-06, "loss": 3.5462, "step": 15435 }, { "epoch": 0.15706380208333334, "grad_norm": 12.505586624145508, "learning_rate": 4.7049680790265145e-06, "loss": 3.5568, "step": 15440 }, { "epoch": 0.15711466471354166, "grad_norm": 14.382122993469238, "learning_rate": 4.704779597608709e-06, "loss": 3.2458, "step": 15445 }, { "epoch": 0.15716552734375, "grad_norm": 17.052955627441406, "learning_rate": 4.70459105978214e-06, "loss": 3.3895, "step": 15450 }, { "epoch": 0.15721638997395834, "grad_norm": 9.036126136779785, "learning_rate": 4.704402465551632e-06, "loss": 3.3079, "step": 15455 }, { "epoch": 0.15726725260416666, "grad_norm": 10.451210021972656, "learning_rate": 4.704213814922008e-06, "loss": 3.5565, "step": 15460 }, { "epoch": 0.157318115234375, "grad_norm": 9.082598686218262, "learning_rate": 4.704025107898097e-06, "loss": 3.1954, "step": 15465 }, { "epoch": 0.15736897786458334, "grad_norm": 15.115447044372559, "learning_rate": 4.703836344484726e-06, "loss": 3.3543, "step": 15470 }, { "epoch": 0.15741984049479166, "grad_norm": 11.461282730102539, "learning_rate": 4.7036475246867245e-06, "loss": 3.7555, "step": 15475 }, { "epoch": 0.157470703125, "grad_norm": 10.950669288635254, "learning_rate": 4.703458648508923e-06, "loss": 3.5753, "step": 15480 }, { "epoch": 0.15752156575520834, "grad_norm": 10.564921379089355, "learning_rate": 4.703269715956154e-06, "loss": 3.0771, "step": 15485 }, { "epoch": 0.15757242838541666, "grad_norm": 11.57642650604248, "learning_rate": 4.703080727033252e-06, "loss": 3.1755, "step": 15490 }, { "epoch": 0.157623291015625, "grad_norm": 10.133732795715332, "learning_rate": 4.702891681745052e-06, "loss": 3.6182, "step": 15495 }, { "epoch": 0.15767415364583334, "grad_norm": 11.367161750793457, "learning_rate": 4.70270258009639e-06, "loss": 3.2131, "step": 15500 }, { "epoch": 0.15772501627604166, "grad_norm": 10.937525749206543, "learning_rate": 4.702513422092106e-06, "loss": 3.6436, "step": 15505 }, { "epoch": 0.15777587890625, "grad_norm": 7.845228672027588, "learning_rate": 4.7023242077370365e-06, "loss": 3.2949, "step": 15510 }, { "epoch": 0.15782674153645834, "grad_norm": 10.833147048950195, "learning_rate": 4.7021349370360246e-06, "loss": 3.5691, "step": 15515 }, { "epoch": 0.15787760416666666, "grad_norm": 10.04218578338623, "learning_rate": 4.701945609993912e-06, "loss": 3.6244, "step": 15520 }, { "epoch": 0.157928466796875, "grad_norm": 15.010988235473633, "learning_rate": 4.701756226615544e-06, "loss": 3.5053, "step": 15525 }, { "epoch": 0.15797932942708334, "grad_norm": 12.318516731262207, "learning_rate": 4.701566786905763e-06, "loss": 3.2215, "step": 15530 }, { "epoch": 0.15803019205729166, "grad_norm": 12.219952583312988, "learning_rate": 4.701377290869419e-06, "loss": 3.5105, "step": 15535 }, { "epoch": 0.1580810546875, "grad_norm": 15.182580947875977, "learning_rate": 4.701187738511358e-06, "loss": 3.6047, "step": 15540 }, { "epoch": 0.15813191731770834, "grad_norm": 8.09954833984375, "learning_rate": 4.700998129836431e-06, "loss": 3.2873, "step": 15545 }, { "epoch": 0.15818277994791666, "grad_norm": 12.153461456298828, "learning_rate": 4.700808464849489e-06, "loss": 3.2927, "step": 15550 }, { "epoch": 0.158233642578125, "grad_norm": 16.8814754486084, "learning_rate": 4.700618743555384e-06, "loss": 3.6992, "step": 15555 }, { "epoch": 0.15828450520833334, "grad_norm": 8.98434066772461, "learning_rate": 4.700428965958968e-06, "loss": 3.2662, "step": 15560 }, { "epoch": 0.15833536783854166, "grad_norm": 14.116239547729492, "learning_rate": 4.700239132065101e-06, "loss": 3.6816, "step": 15565 }, { "epoch": 0.15838623046875, "grad_norm": 14.018820762634277, "learning_rate": 4.700049241878637e-06, "loss": 3.4, "step": 15570 }, { "epoch": 0.15843709309895834, "grad_norm": 9.490670204162598, "learning_rate": 4.699859295404433e-06, "loss": 3.3853, "step": 15575 }, { "epoch": 0.15848795572916666, "grad_norm": 12.291707992553711, "learning_rate": 4.699669292647352e-06, "loss": 3.4993, "step": 15580 }, { "epoch": 0.158538818359375, "grad_norm": 11.097095489501953, "learning_rate": 4.699479233612252e-06, "loss": 3.4258, "step": 15585 }, { "epoch": 0.15858968098958334, "grad_norm": 16.145360946655273, "learning_rate": 4.699289118303998e-06, "loss": 3.6462, "step": 15590 }, { "epoch": 0.15864054361979166, "grad_norm": 13.295560836791992, "learning_rate": 4.699098946727454e-06, "loss": 3.7865, "step": 15595 }, { "epoch": 0.15869140625, "grad_norm": 7.7286152839660645, "learning_rate": 4.6989087188874835e-06, "loss": 3.2336, "step": 15600 }, { "epoch": 0.15874226888020834, "grad_norm": 8.864896774291992, "learning_rate": 4.698718434788955e-06, "loss": 3.5222, "step": 15605 }, { "epoch": 0.15879313151041666, "grad_norm": 16.60683822631836, "learning_rate": 4.698528094436737e-06, "loss": 3.5871, "step": 15610 }, { "epoch": 0.158843994140625, "grad_norm": 13.247583389282227, "learning_rate": 4.698337697835697e-06, "loss": 3.3722, "step": 15615 }, { "epoch": 0.15889485677083334, "grad_norm": 10.77502155303955, "learning_rate": 4.69814724499071e-06, "loss": 3.3955, "step": 15620 }, { "epoch": 0.15894571940104166, "grad_norm": 12.344950675964355, "learning_rate": 4.697956735906646e-06, "loss": 3.197, "step": 15625 }, { "epoch": 0.15899658203125, "grad_norm": 14.187060356140137, "learning_rate": 4.6977661705883805e-06, "loss": 3.3216, "step": 15630 }, { "epoch": 0.15904744466145834, "grad_norm": 14.362591743469238, "learning_rate": 4.697575549040788e-06, "loss": 3.4256, "step": 15635 }, { "epoch": 0.15909830729166666, "grad_norm": 10.191631317138672, "learning_rate": 4.697384871268745e-06, "loss": 3.4145, "step": 15640 }, { "epoch": 0.159149169921875, "grad_norm": 15.049766540527344, "learning_rate": 4.697194137277132e-06, "loss": 3.4378, "step": 15645 }, { "epoch": 0.15920003255208334, "grad_norm": 16.25983428955078, "learning_rate": 4.697003347070828e-06, "loss": 3.3361, "step": 15650 }, { "epoch": 0.15925089518229166, "grad_norm": 17.122467041015625, "learning_rate": 4.696812500654714e-06, "loss": 3.2949, "step": 15655 }, { "epoch": 0.1593017578125, "grad_norm": 11.814437866210938, "learning_rate": 4.696621598033673e-06, "loss": 3.1715, "step": 15660 }, { "epoch": 0.15935262044270834, "grad_norm": 9.952800750732422, "learning_rate": 4.696430639212588e-06, "loss": 3.3326, "step": 15665 }, { "epoch": 0.15940348307291666, "grad_norm": 13.430285453796387, "learning_rate": 4.696239624196346e-06, "loss": 3.542, "step": 15670 }, { "epoch": 0.159454345703125, "grad_norm": 8.788776397705078, "learning_rate": 4.696048552989835e-06, "loss": 3.3412, "step": 15675 }, { "epoch": 0.15950520833333334, "grad_norm": 11.158987045288086, "learning_rate": 4.69585742559794e-06, "loss": 3.1511, "step": 15680 }, { "epoch": 0.15955607096354166, "grad_norm": 8.167466163635254, "learning_rate": 4.695666242025556e-06, "loss": 3.7305, "step": 15685 }, { "epoch": 0.15960693359375, "grad_norm": 13.274105072021484, "learning_rate": 4.69547500227757e-06, "loss": 3.3312, "step": 15690 }, { "epoch": 0.15965779622395834, "grad_norm": 10.515645027160645, "learning_rate": 4.6952837063588766e-06, "loss": 3.07, "step": 15695 }, { "epoch": 0.15970865885416666, "grad_norm": 12.83730411529541, "learning_rate": 4.69509235427437e-06, "loss": 3.5047, "step": 15700 }, { "epoch": 0.159759521484375, "grad_norm": 12.311369895935059, "learning_rate": 4.694900946028946e-06, "loss": 3.6497, "step": 15705 }, { "epoch": 0.15981038411458334, "grad_norm": 12.914471626281738, "learning_rate": 4.694709481627502e-06, "loss": 3.2395, "step": 15710 }, { "epoch": 0.15986124674479166, "grad_norm": 14.76524543762207, "learning_rate": 4.694517961074934e-06, "loss": 3.2397, "step": 15715 }, { "epoch": 0.159912109375, "grad_norm": 18.405723571777344, "learning_rate": 4.694326384376146e-06, "loss": 3.3388, "step": 15720 }, { "epoch": 0.15996297200520834, "grad_norm": 9.726785659790039, "learning_rate": 4.694134751536038e-06, "loss": 3.287, "step": 15725 }, { "epoch": 0.16001383463541666, "grad_norm": 12.435685157775879, "learning_rate": 4.693943062559512e-06, "loss": 3.2259, "step": 15730 }, { "epoch": 0.160064697265625, "grad_norm": 11.35924243927002, "learning_rate": 4.693751317451472e-06, "loss": 3.1574, "step": 15735 }, { "epoch": 0.16011555989583334, "grad_norm": 10.276074409484863, "learning_rate": 4.693559516216825e-06, "loss": 3.3365, "step": 15740 }, { "epoch": 0.16016642252604166, "grad_norm": 8.467253684997559, "learning_rate": 4.693367658860478e-06, "loss": 3.1872, "step": 15745 }, { "epoch": 0.16021728515625, "grad_norm": 11.348249435424805, "learning_rate": 4.693175745387339e-06, "loss": 3.3335, "step": 15750 }, { "epoch": 0.16026814778645834, "grad_norm": 12.088251113891602, "learning_rate": 4.692983775802318e-06, "loss": 3.4162, "step": 15755 }, { "epoch": 0.16031901041666666, "grad_norm": 10.288962364196777, "learning_rate": 4.692791750110327e-06, "loss": 3.4439, "step": 15760 }, { "epoch": 0.160369873046875, "grad_norm": 15.410057067871094, "learning_rate": 4.692599668316279e-06, "loss": 3.4407, "step": 15765 }, { "epoch": 0.16042073567708334, "grad_norm": 13.116768836975098, "learning_rate": 4.692407530425089e-06, "loss": 3.4639, "step": 15770 }, { "epoch": 0.16047159830729166, "grad_norm": 9.727956771850586, "learning_rate": 4.692215336441671e-06, "loss": 3.3015, "step": 15775 }, { "epoch": 0.1605224609375, "grad_norm": 11.078639030456543, "learning_rate": 4.692023086370944e-06, "loss": 3.265, "step": 15780 }, { "epoch": 0.16057332356770834, "grad_norm": 7.883986473083496, "learning_rate": 4.6918307802178255e-06, "loss": 3.4122, "step": 15785 }, { "epoch": 0.16062418619791666, "grad_norm": 12.443114280700684, "learning_rate": 4.6916384179872356e-06, "loss": 3.0951, "step": 15790 }, { "epoch": 0.160675048828125, "grad_norm": 9.635088920593262, "learning_rate": 4.691445999684097e-06, "loss": 3.8282, "step": 15795 }, { "epoch": 0.16072591145833334, "grad_norm": 8.927238464355469, "learning_rate": 4.69125352531333e-06, "loss": 3.4864, "step": 15800 }, { "epoch": 0.16077677408854166, "grad_norm": 10.879573822021484, "learning_rate": 4.6910609948798636e-06, "loss": 3.7555, "step": 15805 }, { "epoch": 0.16082763671875, "grad_norm": 8.361101150512695, "learning_rate": 4.69086840838862e-06, "loss": 3.5791, "step": 15810 }, { "epoch": 0.16087849934895834, "grad_norm": 10.21220588684082, "learning_rate": 4.6906757658445265e-06, "loss": 3.2996, "step": 15815 }, { "epoch": 0.16092936197916666, "grad_norm": 12.77525520324707, "learning_rate": 4.690483067252514e-06, "loss": 3.4817, "step": 15820 }, { "epoch": 0.160980224609375, "grad_norm": 12.158894538879395, "learning_rate": 4.690290312617512e-06, "loss": 3.3044, "step": 15825 }, { "epoch": 0.16103108723958334, "grad_norm": 15.93702507019043, "learning_rate": 4.69009750194445e-06, "loss": 3.3049, "step": 15830 }, { "epoch": 0.16108194986979166, "grad_norm": 14.75804615020752, "learning_rate": 4.6899046352382625e-06, "loss": 3.2458, "step": 15835 }, { "epoch": 0.1611328125, "grad_norm": 7.818066596984863, "learning_rate": 4.689711712503885e-06, "loss": 3.3629, "step": 15840 }, { "epoch": 0.16118367513020834, "grad_norm": 11.566434860229492, "learning_rate": 4.689518733746251e-06, "loss": 3.3139, "step": 15845 }, { "epoch": 0.16123453776041666, "grad_norm": 11.620870590209961, "learning_rate": 4.689325698970301e-06, "loss": 3.3736, "step": 15850 }, { "epoch": 0.161285400390625, "grad_norm": 9.550261497497559, "learning_rate": 4.6891326081809705e-06, "loss": 3.1503, "step": 15855 }, { "epoch": 0.16133626302083334, "grad_norm": 14.424015045166016, "learning_rate": 4.688939461383202e-06, "loss": 3.3043, "step": 15860 }, { "epoch": 0.16138712565104166, "grad_norm": 14.749124526977539, "learning_rate": 4.688746258581936e-06, "loss": 3.2469, "step": 15865 }, { "epoch": 0.16143798828125, "grad_norm": 14.39970588684082, "learning_rate": 4.688552999782114e-06, "loss": 3.4534, "step": 15870 }, { "epoch": 0.16148885091145834, "grad_norm": 11.76413631439209, "learning_rate": 4.6883596849886845e-06, "loss": 3.1851, "step": 15875 }, { "epoch": 0.16153971354166666, "grad_norm": 14.800198554992676, "learning_rate": 4.68816631420659e-06, "loss": 3.7561, "step": 15880 }, { "epoch": 0.161590576171875, "grad_norm": 13.527456283569336, "learning_rate": 4.68797288744078e-06, "loss": 3.4993, "step": 15885 }, { "epoch": 0.16164143880208334, "grad_norm": 13.89983081817627, "learning_rate": 4.6877794046962014e-06, "loss": 3.5607, "step": 15890 }, { "epoch": 0.16169230143229166, "grad_norm": 12.58095932006836, "learning_rate": 4.687585865977806e-06, "loss": 3.2699, "step": 15895 }, { "epoch": 0.1617431640625, "grad_norm": 14.336568832397461, "learning_rate": 4.687392271290544e-06, "loss": 3.5323, "step": 15900 }, { "epoch": 0.16179402669270834, "grad_norm": 12.0953950881958, "learning_rate": 4.6871986206393695e-06, "loss": 3.549, "step": 15905 }, { "epoch": 0.16184488932291666, "grad_norm": 10.101968765258789, "learning_rate": 4.687004914029237e-06, "loss": 3.3465, "step": 15910 }, { "epoch": 0.161895751953125, "grad_norm": 14.160582542419434, "learning_rate": 4.6868111514651025e-06, "loss": 3.3592, "step": 15915 }, { "epoch": 0.16194661458333334, "grad_norm": 8.53564453125, "learning_rate": 4.686617332951922e-06, "loss": 3.3656, "step": 15920 }, { "epoch": 0.16199747721354166, "grad_norm": 13.714225769042969, "learning_rate": 4.686423458494655e-06, "loss": 2.912, "step": 15925 }, { "epoch": 0.16204833984375, "grad_norm": 13.133865356445312, "learning_rate": 4.686229528098263e-06, "loss": 3.5427, "step": 15930 }, { "epoch": 0.16209920247395834, "grad_norm": 10.839924812316895, "learning_rate": 4.686035541767707e-06, "loss": 3.1262, "step": 15935 }, { "epoch": 0.16215006510416666, "grad_norm": 16.54444694519043, "learning_rate": 4.6858414995079495e-06, "loss": 3.5354, "step": 15940 }, { "epoch": 0.162200927734375, "grad_norm": 14.630023002624512, "learning_rate": 4.685647401323955e-06, "loss": 3.9413, "step": 15945 }, { "epoch": 0.16225179036458334, "grad_norm": 9.191962242126465, "learning_rate": 4.68545324722069e-06, "loss": 3.011, "step": 15950 }, { "epoch": 0.16230265299479166, "grad_norm": 9.845999717712402, "learning_rate": 4.685259037203121e-06, "loss": 3.433, "step": 15955 }, { "epoch": 0.162353515625, "grad_norm": 10.282391548156738, "learning_rate": 4.685064771276219e-06, "loss": 4.0526, "step": 15960 }, { "epoch": 0.16240437825520834, "grad_norm": 11.311758995056152, "learning_rate": 4.684870449444951e-06, "loss": 3.3342, "step": 15965 }, { "epoch": 0.16245524088541666, "grad_norm": 10.48366641998291, "learning_rate": 4.684676071714292e-06, "loss": 3.3553, "step": 15970 }, { "epoch": 0.162506103515625, "grad_norm": 14.828400611877441, "learning_rate": 4.684481638089212e-06, "loss": 3.3099, "step": 15975 }, { "epoch": 0.16255696614583334, "grad_norm": 9.731256484985352, "learning_rate": 4.684287148574689e-06, "loss": 3.4463, "step": 15980 }, { "epoch": 0.16260782877604166, "grad_norm": 11.136251449584961, "learning_rate": 4.684092603175696e-06, "loss": 3.5473, "step": 15985 }, { "epoch": 0.16265869140625, "grad_norm": 13.161229133605957, "learning_rate": 4.683898001897211e-06, "loss": 3.3273, "step": 15990 }, { "epoch": 0.16270955403645834, "grad_norm": 9.826051712036133, "learning_rate": 4.683703344744213e-06, "loss": 3.3483, "step": 15995 }, { "epoch": 0.16276041666666666, "grad_norm": 7.0690460205078125, "learning_rate": 4.683508631721684e-06, "loss": 3.0126, "step": 16000 }, { "epoch": 0.162811279296875, "grad_norm": 13.985572814941406, "learning_rate": 4.683313862834603e-06, "loss": 3.6359, "step": 16005 }, { "epoch": 0.16286214192708334, "grad_norm": 10.578701972961426, "learning_rate": 4.683119038087955e-06, "loss": 3.2651, "step": 16010 }, { "epoch": 0.16291300455729166, "grad_norm": 11.11215591430664, "learning_rate": 4.682924157486724e-06, "loss": 3.1562, "step": 16015 }, { "epoch": 0.1629638671875, "grad_norm": 11.88314151763916, "learning_rate": 4.682729221035895e-06, "loss": 3.677, "step": 16020 }, { "epoch": 0.16301472981770834, "grad_norm": 8.311817169189453, "learning_rate": 4.6825342287404564e-06, "loss": 3.6961, "step": 16025 }, { "epoch": 0.16306559244791666, "grad_norm": 13.549260139465332, "learning_rate": 4.682339180605397e-06, "loss": 3.7595, "step": 16030 }, { "epoch": 0.163116455078125, "grad_norm": 9.020939826965332, "learning_rate": 4.682144076635707e-06, "loss": 3.2208, "step": 16035 }, { "epoch": 0.16316731770833334, "grad_norm": 16.133804321289062, "learning_rate": 4.681948916836378e-06, "loss": 3.4582, "step": 16040 }, { "epoch": 0.16321818033854166, "grad_norm": 9.808793067932129, "learning_rate": 4.681753701212404e-06, "loss": 3.3957, "step": 16045 }, { "epoch": 0.16326904296875, "grad_norm": 13.307278633117676, "learning_rate": 4.681558429768777e-06, "loss": 3.2849, "step": 16050 }, { "epoch": 0.16331990559895834, "grad_norm": 17.242029190063477, "learning_rate": 4.681363102510496e-06, "loss": 3.2799, "step": 16055 }, { "epoch": 0.16337076822916666, "grad_norm": 9.080634117126465, "learning_rate": 4.6811677194425566e-06, "loss": 2.9969, "step": 16060 }, { "epoch": 0.163421630859375, "grad_norm": 8.223971366882324, "learning_rate": 4.680972280569958e-06, "loss": 3.3398, "step": 16065 }, { "epoch": 0.16347249348958334, "grad_norm": 13.515527725219727, "learning_rate": 4.680776785897701e-06, "loss": 3.1698, "step": 16070 }, { "epoch": 0.16352335611979166, "grad_norm": 10.615397453308105, "learning_rate": 4.680581235430786e-06, "loss": 3.644, "step": 16075 }, { "epoch": 0.16357421875, "grad_norm": 13.023798942565918, "learning_rate": 4.680385629174218e-06, "loss": 3.8601, "step": 16080 }, { "epoch": 0.16362508138020834, "grad_norm": 9.510238647460938, "learning_rate": 4.680189967133e-06, "loss": 3.2448, "step": 16085 }, { "epoch": 0.16367594401041666, "grad_norm": 10.635534286499023, "learning_rate": 4.6799942493121374e-06, "loss": 3.7123, "step": 16090 }, { "epoch": 0.163726806640625, "grad_norm": 8.131168365478516, "learning_rate": 4.67979847571664e-06, "loss": 3.4378, "step": 16095 }, { "epoch": 0.16377766927083334, "grad_norm": 9.373008728027344, "learning_rate": 4.679602646351515e-06, "loss": 3.5492, "step": 16100 }, { "epoch": 0.16382853190104166, "grad_norm": 15.981472969055176, "learning_rate": 4.6794067612217734e-06, "loss": 3.7413, "step": 16105 }, { "epoch": 0.16387939453125, "grad_norm": 9.636930465698242, "learning_rate": 4.679210820332425e-06, "loss": 3.5977, "step": 16110 }, { "epoch": 0.16393025716145834, "grad_norm": 13.382928848266602, "learning_rate": 4.679014823688485e-06, "loss": 3.3873, "step": 16115 }, { "epoch": 0.16398111979166666, "grad_norm": 11.43319320678711, "learning_rate": 4.678818771294967e-06, "loss": 3.4517, "step": 16120 }, { "epoch": 0.164031982421875, "grad_norm": 10.521183013916016, "learning_rate": 4.678622663156888e-06, "loss": 3.4823, "step": 16125 }, { "epoch": 0.16408284505208334, "grad_norm": 10.852574348449707, "learning_rate": 4.678426499279264e-06, "loss": 3.4299, "step": 16130 }, { "epoch": 0.16413370768229166, "grad_norm": 13.135746955871582, "learning_rate": 4.6782302796671145e-06, "loss": 3.4938, "step": 16135 }, { "epoch": 0.1641845703125, "grad_norm": 16.104429244995117, "learning_rate": 4.678034004325459e-06, "loss": 3.4273, "step": 16140 }, { "epoch": 0.16423543294270834, "grad_norm": 14.600593566894531, "learning_rate": 4.677837673259321e-06, "loss": 3.4873, "step": 16145 }, { "epoch": 0.16428629557291666, "grad_norm": 10.380234718322754, "learning_rate": 4.677641286473722e-06, "loss": 3.4697, "step": 16150 }, { "epoch": 0.164337158203125, "grad_norm": 13.653000831604004, "learning_rate": 4.677444843973685e-06, "loss": 3.2568, "step": 16155 }, { "epoch": 0.16438802083333334, "grad_norm": 10.491547584533691, "learning_rate": 4.67724834576424e-06, "loss": 3.7773, "step": 16160 }, { "epoch": 0.16443888346354166, "grad_norm": 18.804861068725586, "learning_rate": 4.677051791850411e-06, "loss": 3.1534, "step": 16165 }, { "epoch": 0.16448974609375, "grad_norm": 10.031415939331055, "learning_rate": 4.676855182237229e-06, "loss": 3.1366, "step": 16170 }, { "epoch": 0.16454060872395834, "grad_norm": 11.652718544006348, "learning_rate": 4.6766585169297215e-06, "loss": 3.302, "step": 16175 }, { "epoch": 0.16459147135416666, "grad_norm": 12.849843978881836, "learning_rate": 4.6764617959329226e-06, "loss": 3.7761, "step": 16180 }, { "epoch": 0.164642333984375, "grad_norm": 12.61086368560791, "learning_rate": 4.676265019251865e-06, "loss": 3.3563, "step": 16185 }, { "epoch": 0.16469319661458334, "grad_norm": 10.435957908630371, "learning_rate": 4.676068186891582e-06, "loss": 3.4891, "step": 16190 }, { "epoch": 0.16474405924479166, "grad_norm": 13.85044002532959, "learning_rate": 4.675871298857111e-06, "loss": 3.0696, "step": 16195 }, { "epoch": 0.164794921875, "grad_norm": 9.022653579711914, "learning_rate": 4.675674355153488e-06, "loss": 3.4813, "step": 16200 }, { "epoch": 0.16484578450520834, "grad_norm": 11.870932579040527, "learning_rate": 4.675477355785752e-06, "loss": 3.7666, "step": 16205 }, { "epoch": 0.16489664713541666, "grad_norm": 12.24802303314209, "learning_rate": 4.675280300758944e-06, "loss": 3.3968, "step": 16210 }, { "epoch": 0.164947509765625, "grad_norm": 12.894149780273438, "learning_rate": 4.6750831900781055e-06, "loss": 3.4708, "step": 16215 }, { "epoch": 0.16499837239583334, "grad_norm": 9.178655624389648, "learning_rate": 4.674886023748279e-06, "loss": 3.3961, "step": 16220 }, { "epoch": 0.16504923502604166, "grad_norm": 15.221061706542969, "learning_rate": 4.674688801774508e-06, "loss": 3.5445, "step": 16225 }, { "epoch": 0.16510009765625, "grad_norm": 12.730554580688477, "learning_rate": 4.67449152416184e-06, "loss": 3.3576, "step": 16230 }, { "epoch": 0.16515096028645834, "grad_norm": 10.138298988342285, "learning_rate": 4.674294190915321e-06, "loss": 3.3352, "step": 16235 }, { "epoch": 0.16520182291666666, "grad_norm": 9.869894981384277, "learning_rate": 4.674096802040003e-06, "loss": 3.3375, "step": 16240 }, { "epoch": 0.165252685546875, "grad_norm": 9.15701961517334, "learning_rate": 4.673899357540932e-06, "loss": 3.2079, "step": 16245 }, { "epoch": 0.16530354817708334, "grad_norm": 13.204168319702148, "learning_rate": 4.673701857423161e-06, "loss": 3.2676, "step": 16250 }, { "epoch": 0.16535441080729166, "grad_norm": 15.898265838623047, "learning_rate": 4.6735043016917435e-06, "loss": 3.3132, "step": 16255 }, { "epoch": 0.1654052734375, "grad_norm": 10.086653709411621, "learning_rate": 4.673306690351733e-06, "loss": 3.2754, "step": 16260 }, { "epoch": 0.16545613606770834, "grad_norm": 15.631150245666504, "learning_rate": 4.6731090234081865e-06, "loss": 3.6231, "step": 16265 }, { "epoch": 0.16550699869791666, "grad_norm": 10.62215518951416, "learning_rate": 4.672911300866161e-06, "loss": 3.2062, "step": 16270 }, { "epoch": 0.165557861328125, "grad_norm": 8.654640197753906, "learning_rate": 4.672713522730715e-06, "loss": 3.2674, "step": 16275 }, { "epoch": 0.16560872395833334, "grad_norm": 11.249765396118164, "learning_rate": 4.672515689006908e-06, "loss": 3.0822, "step": 16280 }, { "epoch": 0.16565958658854166, "grad_norm": 14.134060859680176, "learning_rate": 4.6723177996998025e-06, "loss": 3.58, "step": 16285 }, { "epoch": 0.16571044921875, "grad_norm": 16.133581161499023, "learning_rate": 4.672119854814461e-06, "loss": 3.6814, "step": 16290 }, { "epoch": 0.16576131184895834, "grad_norm": 13.389992713928223, "learning_rate": 4.671921854355947e-06, "loss": 3.4922, "step": 16295 }, { "epoch": 0.16581217447916666, "grad_norm": 13.663773536682129, "learning_rate": 4.671723798329328e-06, "loss": 3.1055, "step": 16300 }, { "epoch": 0.165863037109375, "grad_norm": 13.669440269470215, "learning_rate": 4.671525686739669e-06, "loss": 3.1514, "step": 16305 }, { "epoch": 0.16591389973958334, "grad_norm": 12.217931747436523, "learning_rate": 4.671327519592042e-06, "loss": 3.6507, "step": 16310 }, { "epoch": 0.16596476236979166, "grad_norm": 15.413629531860352, "learning_rate": 4.6711292968915145e-06, "loss": 3.4689, "step": 16315 }, { "epoch": 0.166015625, "grad_norm": 11.545214653015137, "learning_rate": 4.670931018643158e-06, "loss": 3.2486, "step": 16320 }, { "epoch": 0.16606648763020834, "grad_norm": 12.391217231750488, "learning_rate": 4.670732684852046e-06, "loss": 3.3009, "step": 16325 }, { "epoch": 0.16611735026041666, "grad_norm": 10.62708854675293, "learning_rate": 4.670534295523253e-06, "loss": 3.3732, "step": 16330 }, { "epoch": 0.166168212890625, "grad_norm": 8.105783462524414, "learning_rate": 4.670335850661855e-06, "loss": 3.3016, "step": 16335 }, { "epoch": 0.16621907552083334, "grad_norm": 7.761030197143555, "learning_rate": 4.670137350272927e-06, "loss": 3.2436, "step": 16340 }, { "epoch": 0.16626993815104166, "grad_norm": 12.776176452636719, "learning_rate": 4.669938794361552e-06, "loss": 3.3074, "step": 16345 }, { "epoch": 0.16632080078125, "grad_norm": 7.695553302764893, "learning_rate": 4.669740182932805e-06, "loss": 3.0618, "step": 16350 }, { "epoch": 0.16637166341145834, "grad_norm": 14.618805885314941, "learning_rate": 4.66954151599177e-06, "loss": 3.4561, "step": 16355 }, { "epoch": 0.16642252604166666, "grad_norm": 14.894763946533203, "learning_rate": 4.66934279354353e-06, "loss": 3.492, "step": 16360 }, { "epoch": 0.166473388671875, "grad_norm": 9.516297340393066, "learning_rate": 4.669144015593169e-06, "loss": 3.3012, "step": 16365 }, { "epoch": 0.16652425130208334, "grad_norm": 15.990934371948242, "learning_rate": 4.668945182145773e-06, "loss": 3.4032, "step": 16370 }, { "epoch": 0.16657511393229166, "grad_norm": 11.829225540161133, "learning_rate": 4.668746293206428e-06, "loss": 3.57, "step": 16375 }, { "epoch": 0.1666259765625, "grad_norm": 9.242964744567871, "learning_rate": 4.668547348780222e-06, "loss": 3.43, "step": 16380 }, { "epoch": 0.16667683919270834, "grad_norm": 15.611289024353027, "learning_rate": 4.668348348872248e-06, "loss": 3.2432, "step": 16385 }, { "epoch": 0.16672770182291666, "grad_norm": 12.005697250366211, "learning_rate": 4.668149293487595e-06, "loss": 2.9898, "step": 16390 }, { "epoch": 0.166778564453125, "grad_norm": 14.01111888885498, "learning_rate": 4.6679501826313554e-06, "loss": 3.5077, "step": 16395 }, { "epoch": 0.16682942708333334, "grad_norm": 17.95404052734375, "learning_rate": 4.667751016308624e-06, "loss": 3.3168, "step": 16400 }, { "epoch": 0.16688028971354166, "grad_norm": 14.67130184173584, "learning_rate": 4.6675517945244975e-06, "loss": 3.0814, "step": 16405 }, { "epoch": 0.16693115234375, "grad_norm": 9.70960521697998, "learning_rate": 4.667352517284072e-06, "loss": 3.3854, "step": 16410 }, { "epoch": 0.16698201497395834, "grad_norm": 10.68616008758545, "learning_rate": 4.667153184592446e-06, "loss": 3.375, "step": 16415 }, { "epoch": 0.16703287760416666, "grad_norm": 12.836880683898926, "learning_rate": 4.6669537964547195e-06, "loss": 3.2576, "step": 16420 }, { "epoch": 0.167083740234375, "grad_norm": 10.322628021240234, "learning_rate": 4.666754352875994e-06, "loss": 3.5553, "step": 16425 }, { "epoch": 0.16713460286458334, "grad_norm": 13.36998176574707, "learning_rate": 4.6665548538613715e-06, "loss": 3.5801, "step": 16430 }, { "epoch": 0.16718546549479166, "grad_norm": 11.6786527633667, "learning_rate": 4.666355299415956e-06, "loss": 3.1604, "step": 16435 }, { "epoch": 0.167236328125, "grad_norm": 15.699369430541992, "learning_rate": 4.666155689544855e-06, "loss": 3.2281, "step": 16440 }, { "epoch": 0.16728719075520834, "grad_norm": 23.027801513671875, "learning_rate": 4.6659560242531735e-06, "loss": 3.0434, "step": 16445 }, { "epoch": 0.16733805338541666, "grad_norm": 10.911322593688965, "learning_rate": 4.665756303546021e-06, "loss": 3.5173, "step": 16450 }, { "epoch": 0.167388916015625, "grad_norm": 9.32667064666748, "learning_rate": 4.665556527428506e-06, "loss": 3.5175, "step": 16455 }, { "epoch": 0.16743977864583334, "grad_norm": 13.066845893859863, "learning_rate": 4.66535669590574e-06, "loss": 3.7572, "step": 16460 }, { "epoch": 0.16749064127604166, "grad_norm": 14.599390029907227, "learning_rate": 4.6651568089828384e-06, "loss": 3.1729, "step": 16465 }, { "epoch": 0.16754150390625, "grad_norm": 10.783397674560547, "learning_rate": 4.664956866664912e-06, "loss": 3.1904, "step": 16470 }, { "epoch": 0.16759236653645834, "grad_norm": 15.606278419494629, "learning_rate": 4.664756868957076e-06, "loss": 3.3966, "step": 16475 }, { "epoch": 0.16764322916666666, "grad_norm": 11.634224891662598, "learning_rate": 4.6645568158644496e-06, "loss": 3.4924, "step": 16480 }, { "epoch": 0.167694091796875, "grad_norm": 8.687528610229492, "learning_rate": 4.66435670739215e-06, "loss": 3.4183, "step": 16485 }, { "epoch": 0.16774495442708334, "grad_norm": 13.002685546875, "learning_rate": 4.6641565435452975e-06, "loss": 3.3721, "step": 16490 }, { "epoch": 0.16779581705729166, "grad_norm": 13.268308639526367, "learning_rate": 4.663956324329012e-06, "loss": 3.7302, "step": 16495 }, { "epoch": 0.1678466796875, "grad_norm": 9.55381965637207, "learning_rate": 4.663756049748418e-06, "loss": 3.2204, "step": 16500 }, { "epoch": 0.16789754231770834, "grad_norm": 13.7435302734375, "learning_rate": 4.6635557198086375e-06, "loss": 3.6933, "step": 16505 }, { "epoch": 0.16794840494791666, "grad_norm": 13.537063598632812, "learning_rate": 4.663355334514796e-06, "loss": 3.0507, "step": 16510 }, { "epoch": 0.167999267578125, "grad_norm": 13.002664566040039, "learning_rate": 4.663154893872023e-06, "loss": 3.1599, "step": 16515 }, { "epoch": 0.16805013020833334, "grad_norm": 8.084352493286133, "learning_rate": 4.662954397885443e-06, "loss": 3.5544, "step": 16520 }, { "epoch": 0.16810099283854166, "grad_norm": 13.704971313476562, "learning_rate": 4.662753846560189e-06, "loss": 3.5446, "step": 16525 }, { "epoch": 0.16815185546875, "grad_norm": 13.047155380249023, "learning_rate": 4.662553239901389e-06, "loss": 3.6407, "step": 16530 }, { "epoch": 0.16820271809895834, "grad_norm": 10.460413932800293, "learning_rate": 4.662352577914178e-06, "loss": 3.5399, "step": 16535 }, { "epoch": 0.16825358072916666, "grad_norm": 12.304898262023926, "learning_rate": 4.6621518606036875e-06, "loss": 3.3801, "step": 16540 }, { "epoch": 0.168304443359375, "grad_norm": 10.110926628112793, "learning_rate": 4.661951087975055e-06, "loss": 3.2155, "step": 16545 }, { "epoch": 0.16835530598958334, "grad_norm": 7.640111446380615, "learning_rate": 4.661750260033417e-06, "loss": 3.1164, "step": 16550 }, { "epoch": 0.16840616861979166, "grad_norm": 8.465399742126465, "learning_rate": 4.66154937678391e-06, "loss": 3.7351, "step": 16555 }, { "epoch": 0.16845703125, "grad_norm": 11.797218322753906, "learning_rate": 4.661348438231675e-06, "loss": 3.6523, "step": 16560 }, { "epoch": 0.16850789388020834, "grad_norm": 9.346522331237793, "learning_rate": 4.6611474443818525e-06, "loss": 3.34, "step": 16565 }, { "epoch": 0.16855875651041666, "grad_norm": 14.899089813232422, "learning_rate": 4.660946395239584e-06, "loss": 3.5477, "step": 16570 }, { "epoch": 0.168609619140625, "grad_norm": 11.063288688659668, "learning_rate": 4.660745290810015e-06, "loss": 3.333, "step": 16575 }, { "epoch": 0.16866048177083334, "grad_norm": 9.561201095581055, "learning_rate": 4.66054413109829e-06, "loss": 3.8984, "step": 16580 }, { "epoch": 0.16871134440104166, "grad_norm": 13.455229759216309, "learning_rate": 4.6603429161095556e-06, "loss": 3.2126, "step": 16585 }, { "epoch": 0.16876220703125, "grad_norm": 10.709653854370117, "learning_rate": 4.660141645848959e-06, "loss": 3.2361, "step": 16590 }, { "epoch": 0.16881306966145834, "grad_norm": 12.909401893615723, "learning_rate": 4.659940320321651e-06, "loss": 3.3619, "step": 16595 }, { "epoch": 0.16886393229166666, "grad_norm": 15.584907531738281, "learning_rate": 4.6597389395327816e-06, "loss": 3.1329, "step": 16600 }, { "epoch": 0.168914794921875, "grad_norm": 14.770988464355469, "learning_rate": 4.659537503487503e-06, "loss": 3.9151, "step": 16605 }, { "epoch": 0.16896565755208334, "grad_norm": 7.884305953979492, "learning_rate": 4.6593360121909706e-06, "loss": 3.2376, "step": 16610 }, { "epoch": 0.16901652018229166, "grad_norm": 14.378878593444824, "learning_rate": 4.659134465648338e-06, "loss": 3.4527, "step": 16615 }, { "epoch": 0.1690673828125, "grad_norm": 9.519623756408691, "learning_rate": 4.65893286386476e-06, "loss": 3.445, "step": 16620 }, { "epoch": 0.16911824544270834, "grad_norm": 9.221073150634766, "learning_rate": 4.658731206845398e-06, "loss": 3.3477, "step": 16625 }, { "epoch": 0.16916910807291666, "grad_norm": 10.170306205749512, "learning_rate": 4.658529494595408e-06, "loss": 3.3417, "step": 16630 }, { "epoch": 0.169219970703125, "grad_norm": 15.229034423828125, "learning_rate": 4.6583277271199545e-06, "loss": 3.2686, "step": 16635 }, { "epoch": 0.16927083333333334, "grad_norm": 12.785039901733398, "learning_rate": 4.658125904424197e-06, "loss": 3.4329, "step": 16640 }, { "epoch": 0.16932169596354166, "grad_norm": 9.296720504760742, "learning_rate": 4.6579240265133e-06, "loss": 3.3711, "step": 16645 }, { "epoch": 0.16937255859375, "grad_norm": 10.980588912963867, "learning_rate": 4.657722093392428e-06, "loss": 3.3667, "step": 16650 }, { "epoch": 0.16942342122395834, "grad_norm": 11.73270034790039, "learning_rate": 4.657520105066747e-06, "loss": 3.4216, "step": 16655 }, { "epoch": 0.16947428385416666, "grad_norm": 18.23244857788086, "learning_rate": 4.6573180615414265e-06, "loss": 3.6996, "step": 16660 }, { "epoch": 0.169525146484375, "grad_norm": 13.225893020629883, "learning_rate": 4.657115962821635e-06, "loss": 3.5311, "step": 16665 }, { "epoch": 0.16957600911458334, "grad_norm": 12.947046279907227, "learning_rate": 4.656913808912542e-06, "loss": 3.2338, "step": 16670 }, { "epoch": 0.16962687174479166, "grad_norm": 16.09568977355957, "learning_rate": 4.65671159981932e-06, "loss": 3.3158, "step": 16675 }, { "epoch": 0.169677734375, "grad_norm": 9.442344665527344, "learning_rate": 4.656509335547144e-06, "loss": 3.5821, "step": 16680 }, { "epoch": 0.16972859700520834, "grad_norm": 8.947610855102539, "learning_rate": 4.656307016101187e-06, "loss": 3.2935, "step": 16685 }, { "epoch": 0.16977945963541666, "grad_norm": 15.548479080200195, "learning_rate": 4.656104641486628e-06, "loss": 3.6593, "step": 16690 }, { "epoch": 0.169830322265625, "grad_norm": 9.9256591796875, "learning_rate": 4.655902211708641e-06, "loss": 3.4071, "step": 16695 }, { "epoch": 0.16988118489583334, "grad_norm": 11.299330711364746, "learning_rate": 4.655699726772407e-06, "loss": 3.4039, "step": 16700 }, { "epoch": 0.16993204752604166, "grad_norm": 10.457588195800781, "learning_rate": 4.655497186683107e-06, "loss": 3.4151, "step": 16705 }, { "epoch": 0.16998291015625, "grad_norm": 9.101759910583496, "learning_rate": 4.655294591445921e-06, "loss": 3.0531, "step": 16710 }, { "epoch": 0.17003377278645834, "grad_norm": 9.755556106567383, "learning_rate": 4.6550919410660355e-06, "loss": 3.7921, "step": 16715 }, { "epoch": 0.17008463541666666, "grad_norm": 6.866489410400391, "learning_rate": 4.654889235548633e-06, "loss": 3.3934, "step": 16720 }, { "epoch": 0.170135498046875, "grad_norm": 11.931754112243652, "learning_rate": 4.6546864748989e-06, "loss": 3.2464, "step": 16725 }, { "epoch": 0.17018636067708334, "grad_norm": 12.620525360107422, "learning_rate": 4.654483659122025e-06, "loss": 3.1906, "step": 16730 }, { "epoch": 0.17023722330729166, "grad_norm": 9.603962898254395, "learning_rate": 4.654280788223195e-06, "loss": 3.1315, "step": 16735 }, { "epoch": 0.1702880859375, "grad_norm": 10.157326698303223, "learning_rate": 4.654077862207601e-06, "loss": 3.4375, "step": 16740 }, { "epoch": 0.17033894856770834, "grad_norm": 6.509634971618652, "learning_rate": 4.653874881080437e-06, "loss": 3.2071, "step": 16745 }, { "epoch": 0.17038981119791666, "grad_norm": 11.346307754516602, "learning_rate": 4.653671844846895e-06, "loss": 3.4308, "step": 16750 }, { "epoch": 0.170440673828125, "grad_norm": 10.316970825195312, "learning_rate": 4.653468753512168e-06, "loss": 3.5133, "step": 16755 }, { "epoch": 0.17049153645833334, "grad_norm": 12.79603099822998, "learning_rate": 4.653265607081454e-06, "loss": 3.2712, "step": 16760 }, { "epoch": 0.17054239908854166, "grad_norm": 12.55677604675293, "learning_rate": 4.653062405559951e-06, "loss": 3.2636, "step": 16765 }, { "epoch": 0.17059326171875, "grad_norm": 15.563427925109863, "learning_rate": 4.652859148952855e-06, "loss": 3.2278, "step": 16770 }, { "epoch": 0.17064412434895834, "grad_norm": 13.07026481628418, "learning_rate": 4.652655837265369e-06, "loss": 3.3884, "step": 16775 }, { "epoch": 0.17069498697916666, "grad_norm": 13.581730842590332, "learning_rate": 4.6524524705026925e-06, "loss": 3.6175, "step": 16780 }, { "epoch": 0.170745849609375, "grad_norm": 8.78980827331543, "learning_rate": 4.65224904867003e-06, "loss": 3.4371, "step": 16785 }, { "epoch": 0.17079671223958334, "grad_norm": 11.52972412109375, "learning_rate": 4.652045571772586e-06, "loss": 3.2436, "step": 16790 }, { "epoch": 0.17084757486979166, "grad_norm": 17.321304321289062, "learning_rate": 4.651842039815566e-06, "loss": 3.393, "step": 16795 }, { "epoch": 0.1708984375, "grad_norm": 12.183976173400879, "learning_rate": 4.651638452804178e-06, "loss": 3.3443, "step": 16800 }, { "epoch": 0.17094930013020834, "grad_norm": 11.435518264770508, "learning_rate": 4.6514348107436305e-06, "loss": 3.4001, "step": 16805 }, { "epoch": 0.17100016276041666, "grad_norm": 15.479721069335938, "learning_rate": 4.651231113639132e-06, "loss": 3.0751, "step": 16810 }, { "epoch": 0.171051025390625, "grad_norm": 12.146183013916016, "learning_rate": 4.651027361495896e-06, "loss": 3.238, "step": 16815 }, { "epoch": 0.17110188802083334, "grad_norm": 12.965581893920898, "learning_rate": 4.650823554319135e-06, "loss": 3.8735, "step": 16820 }, { "epoch": 0.17115275065104166, "grad_norm": 9.760518074035645, "learning_rate": 4.650619692114063e-06, "loss": 3.2039, "step": 16825 }, { "epoch": 0.17120361328125, "grad_norm": 12.903510093688965, "learning_rate": 4.650415774885896e-06, "loss": 3.5834, "step": 16830 }, { "epoch": 0.17125447591145834, "grad_norm": 13.00151538848877, "learning_rate": 4.650211802639851e-06, "loss": 3.243, "step": 16835 }, { "epoch": 0.17130533854166666, "grad_norm": 15.11430549621582, "learning_rate": 4.6500077753811465e-06, "loss": 3.4178, "step": 16840 }, { "epoch": 0.171356201171875, "grad_norm": 10.01813793182373, "learning_rate": 4.649803693115003e-06, "loss": 3.2601, "step": 16845 }, { "epoch": 0.17140706380208334, "grad_norm": 11.622364044189453, "learning_rate": 4.649599555846641e-06, "loss": 3.6725, "step": 16850 }, { "epoch": 0.17145792643229166, "grad_norm": 15.42918872833252, "learning_rate": 4.649395363581285e-06, "loss": 3.3048, "step": 16855 }, { "epoch": 0.1715087890625, "grad_norm": 11.032341957092285, "learning_rate": 4.649191116324158e-06, "loss": 3.4853, "step": 16860 }, { "epoch": 0.17155965169270834, "grad_norm": 12.54845142364502, "learning_rate": 4.648986814080485e-06, "loss": 3.2918, "step": 16865 }, { "epoch": 0.17161051432291666, "grad_norm": 8.605419158935547, "learning_rate": 4.648782456855493e-06, "loss": 3.4214, "step": 16870 }, { "epoch": 0.171661376953125, "grad_norm": 9.82323169708252, "learning_rate": 4.648578044654412e-06, "loss": 3.3439, "step": 16875 }, { "epoch": 0.17171223958333334, "grad_norm": 15.435731887817383, "learning_rate": 4.648373577482471e-06, "loss": 3.2196, "step": 16880 }, { "epoch": 0.17176310221354166, "grad_norm": 9.784239768981934, "learning_rate": 4.6481690553449015e-06, "loss": 4.0164, "step": 16885 }, { "epoch": 0.17181396484375, "grad_norm": 11.91480541229248, "learning_rate": 4.647964478246936e-06, "loss": 3.2925, "step": 16890 }, { "epoch": 0.17186482747395834, "grad_norm": 7.702728271484375, "learning_rate": 4.647759846193808e-06, "loss": 3.2802, "step": 16895 }, { "epoch": 0.17191569010416666, "grad_norm": 7.844374656677246, "learning_rate": 4.647555159190753e-06, "loss": 3.3919, "step": 16900 }, { "epoch": 0.171966552734375, "grad_norm": 9.673126220703125, "learning_rate": 4.647350417243009e-06, "loss": 3.7028, "step": 16905 }, { "epoch": 0.17201741536458334, "grad_norm": 13.436027526855469, "learning_rate": 4.647145620355813e-06, "loss": 3.0947, "step": 16910 }, { "epoch": 0.17206827799479166, "grad_norm": 9.890853881835938, "learning_rate": 4.646940768534406e-06, "loss": 3.3502, "step": 16915 }, { "epoch": 0.172119140625, "grad_norm": 12.191718101501465, "learning_rate": 4.6467358617840275e-06, "loss": 3.1166, "step": 16920 }, { "epoch": 0.17217000325520834, "grad_norm": 10.807548522949219, "learning_rate": 4.646530900109921e-06, "loss": 3.3743, "step": 16925 }, { "epoch": 0.17222086588541666, "grad_norm": 15.573385238647461, "learning_rate": 4.646325883517331e-06, "loss": 3.4527, "step": 16930 }, { "epoch": 0.172271728515625, "grad_norm": 12.284563064575195, "learning_rate": 4.646120812011501e-06, "loss": 3.1113, "step": 16935 }, { "epoch": 0.17232259114583334, "grad_norm": 15.29260540008545, "learning_rate": 4.645915685597679e-06, "loss": 3.4474, "step": 16940 }, { "epoch": 0.17237345377604166, "grad_norm": 12.571720123291016, "learning_rate": 4.645710504281113e-06, "loss": 3.1863, "step": 16945 }, { "epoch": 0.17242431640625, "grad_norm": 14.381819725036621, "learning_rate": 4.645505268067052e-06, "loss": 3.3417, "step": 16950 }, { "epoch": 0.17247517903645834, "grad_norm": 9.561775207519531, "learning_rate": 4.645299976960747e-06, "loss": 3.1955, "step": 16955 }, { "epoch": 0.17252604166666666, "grad_norm": 12.148945808410645, "learning_rate": 4.645094630967451e-06, "loss": 3.7219, "step": 16960 }, { "epoch": 0.172576904296875, "grad_norm": 10.793240547180176, "learning_rate": 4.644889230092418e-06, "loss": 3.5109, "step": 16965 }, { "epoch": 0.17262776692708334, "grad_norm": 13.821535110473633, "learning_rate": 4.644683774340902e-06, "loss": 3.3895, "step": 16970 }, { "epoch": 0.17267862955729166, "grad_norm": 10.016497611999512, "learning_rate": 4.64447826371816e-06, "loss": 3.317, "step": 16975 }, { "epoch": 0.1727294921875, "grad_norm": 12.0796480178833, "learning_rate": 4.64427269822945e-06, "loss": 3.1043, "step": 16980 }, { "epoch": 0.17278035481770834, "grad_norm": 11.921280860900879, "learning_rate": 4.644067077880031e-06, "loss": 3.4567, "step": 16985 }, { "epoch": 0.17283121744791666, "grad_norm": 10.839923858642578, "learning_rate": 4.643861402675164e-06, "loss": 3.4736, "step": 16990 }, { "epoch": 0.172882080078125, "grad_norm": 13.740301132202148, "learning_rate": 4.643655672620111e-06, "loss": 3.6096, "step": 16995 }, { "epoch": 0.17293294270833334, "grad_norm": 14.093925476074219, "learning_rate": 4.643449887720136e-06, "loss": 3.2615, "step": 17000 }, { "epoch": 0.17298380533854166, "grad_norm": 10.022769927978516, "learning_rate": 4.643244047980503e-06, "loss": 3.0516, "step": 17005 }, { "epoch": 0.17303466796875, "grad_norm": 9.807087898254395, "learning_rate": 4.64303815340648e-06, "loss": 3.6837, "step": 17010 }, { "epoch": 0.17308553059895834, "grad_norm": 10.774398803710938, "learning_rate": 4.642832204003333e-06, "loss": 3.046, "step": 17015 }, { "epoch": 0.17313639322916666, "grad_norm": 8.116972923278809, "learning_rate": 4.642626199776333e-06, "loss": 3.071, "step": 17020 }, { "epoch": 0.173187255859375, "grad_norm": 13.305663108825684, "learning_rate": 4.642420140730749e-06, "loss": 3.8665, "step": 17025 }, { "epoch": 0.17323811848958334, "grad_norm": 16.738006591796875, "learning_rate": 4.642214026871853e-06, "loss": 3.6563, "step": 17030 }, { "epoch": 0.17328898111979166, "grad_norm": 10.710490226745605, "learning_rate": 4.642007858204919e-06, "loss": 3.3579, "step": 17035 }, { "epoch": 0.17333984375, "grad_norm": 17.46071434020996, "learning_rate": 4.641801634735222e-06, "loss": 3.2495, "step": 17040 }, { "epoch": 0.17339070638020834, "grad_norm": 8.741815567016602, "learning_rate": 4.6415953564680385e-06, "loss": 3.0387, "step": 17045 }, { "epoch": 0.17344156901041666, "grad_norm": 14.374468803405762, "learning_rate": 4.641389023408644e-06, "loss": 3.3034, "step": 17050 }, { "epoch": 0.173492431640625, "grad_norm": 10.53064250946045, "learning_rate": 4.64118263556232e-06, "loss": 3.3958, "step": 17055 }, { "epoch": 0.17354329427083334, "grad_norm": 16.45563316345215, "learning_rate": 4.640976192934345e-06, "loss": 3.6537, "step": 17060 }, { "epoch": 0.17359415690104166, "grad_norm": 13.669546127319336, "learning_rate": 4.6407696955300025e-06, "loss": 3.136, "step": 17065 }, { "epoch": 0.17364501953125, "grad_norm": 15.326542854309082, "learning_rate": 4.640563143354574e-06, "loss": 3.4866, "step": 17070 }, { "epoch": 0.17369588216145834, "grad_norm": 7.852635860443115, "learning_rate": 4.640356536413345e-06, "loss": 3.4082, "step": 17075 }, { "epoch": 0.17374674479166666, "grad_norm": 15.086888313293457, "learning_rate": 4.640149874711601e-06, "loss": 3.3045, "step": 17080 }, { "epoch": 0.173797607421875, "grad_norm": 13.305411338806152, "learning_rate": 4.63994315825463e-06, "loss": 2.9298, "step": 17085 }, { "epoch": 0.17384847005208334, "grad_norm": 6.963002681732178, "learning_rate": 4.639736387047722e-06, "loss": 3.5608, "step": 17090 }, { "epoch": 0.17389933268229166, "grad_norm": 13.062210083007812, "learning_rate": 4.639529561096164e-06, "loss": 3.1996, "step": 17095 }, { "epoch": 0.1739501953125, "grad_norm": 9.515623092651367, "learning_rate": 4.639322680405249e-06, "loss": 3.3422, "step": 17100 }, { "epoch": 0.17400105794270834, "grad_norm": 11.610623359680176, "learning_rate": 4.639115744980272e-06, "loss": 3.3988, "step": 17105 }, { "epoch": 0.17405192057291666, "grad_norm": 11.479599952697754, "learning_rate": 4.6389087548265245e-06, "loss": 3.2056, "step": 17110 }, { "epoch": 0.174102783203125, "grad_norm": 11.929533004760742, "learning_rate": 4.638701709949303e-06, "loss": 3.0456, "step": 17115 }, { "epoch": 0.17415364583333334, "grad_norm": 12.549202919006348, "learning_rate": 4.638494610353907e-06, "loss": 3.3503, "step": 17120 }, { "epoch": 0.17420450846354166, "grad_norm": 14.711870193481445, "learning_rate": 4.638287456045632e-06, "loss": 3.4214, "step": 17125 }, { "epoch": 0.17425537109375, "grad_norm": 14.64809513092041, "learning_rate": 4.638080247029779e-06, "loss": 3.4133, "step": 17130 }, { "epoch": 0.17430623372395834, "grad_norm": 16.768775939941406, "learning_rate": 4.63787298331165e-06, "loss": 3.3163, "step": 17135 }, { "epoch": 0.17435709635416666, "grad_norm": 13.071782112121582, "learning_rate": 4.637665664896547e-06, "loss": 3.8572, "step": 17140 }, { "epoch": 0.174407958984375, "grad_norm": 11.809013366699219, "learning_rate": 4.637458291789776e-06, "loss": 3.4033, "step": 17145 }, { "epoch": 0.17445882161458334, "grad_norm": 11.942599296569824, "learning_rate": 4.63725086399664e-06, "loss": 3.3937, "step": 17150 }, { "epoch": 0.17450968424479166, "grad_norm": 11.510283470153809, "learning_rate": 4.637043381522447e-06, "loss": 3.2646, "step": 17155 }, { "epoch": 0.174560546875, "grad_norm": 15.842909812927246, "learning_rate": 4.636835844372507e-06, "loss": 3.2227, "step": 17160 }, { "epoch": 0.17461140950520834, "grad_norm": 9.673988342285156, "learning_rate": 4.636628252552128e-06, "loss": 3.5092, "step": 17165 }, { "epoch": 0.17466227213541666, "grad_norm": 12.802469253540039, "learning_rate": 4.636420606066621e-06, "loss": 3.3495, "step": 17170 }, { "epoch": 0.174713134765625, "grad_norm": 9.603645324707031, "learning_rate": 4.636212904921299e-06, "loss": 3.404, "step": 17175 }, { "epoch": 0.17476399739583334, "grad_norm": 11.273427963256836, "learning_rate": 4.6360051491214765e-06, "loss": 3.2583, "step": 17180 }, { "epoch": 0.17481486002604166, "grad_norm": 9.479177474975586, "learning_rate": 4.635797338672469e-06, "loss": 3.086, "step": 17185 }, { "epoch": 0.17486572265625, "grad_norm": 15.164192199707031, "learning_rate": 4.635589473579592e-06, "loss": 3.7945, "step": 17190 }, { "epoch": 0.17491658528645834, "grad_norm": 13.359272003173828, "learning_rate": 4.635381553848165e-06, "loss": 3.3775, "step": 17195 }, { "epoch": 0.17496744791666666, "grad_norm": 7.546788692474365, "learning_rate": 4.635173579483507e-06, "loss": 3.2542, "step": 17200 }, { "epoch": 0.175018310546875, "grad_norm": 13.577391624450684, "learning_rate": 4.634965550490939e-06, "loss": 3.4331, "step": 17205 }, { "epoch": 0.17506917317708334, "grad_norm": 14.945005416870117, "learning_rate": 4.6347574668757835e-06, "loss": 3.2556, "step": 17210 }, { "epoch": 0.17512003580729166, "grad_norm": 13.663776397705078, "learning_rate": 4.634549328643364e-06, "loss": 3.4574, "step": 17215 }, { "epoch": 0.1751708984375, "grad_norm": 8.942784309387207, "learning_rate": 4.634341135799007e-06, "loss": 3.4193, "step": 17220 }, { "epoch": 0.17522176106770834, "grad_norm": 15.326900482177734, "learning_rate": 4.634132888348037e-06, "loss": 3.6722, "step": 17225 }, { "epoch": 0.17527262369791666, "grad_norm": 13.63649845123291, "learning_rate": 4.633924586295782e-06, "loss": 3.4126, "step": 17230 }, { "epoch": 0.175323486328125, "grad_norm": 12.05754280090332, "learning_rate": 4.633716229647573e-06, "loss": 3.5295, "step": 17235 }, { "epoch": 0.17537434895833334, "grad_norm": 14.691134452819824, "learning_rate": 4.633507818408741e-06, "loss": 3.2017, "step": 17240 }, { "epoch": 0.17542521158854166, "grad_norm": 12.102783203125, "learning_rate": 4.633299352584616e-06, "loss": 3.3013, "step": 17245 }, { "epoch": 0.17547607421875, "grad_norm": 8.052931785583496, "learning_rate": 4.6330908321805336e-06, "loss": 3.4158, "step": 17250 }, { "epoch": 0.17552693684895834, "grad_norm": 11.040821075439453, "learning_rate": 4.632882257201826e-06, "loss": 3.1246, "step": 17255 }, { "epoch": 0.17557779947916666, "grad_norm": 11.392463684082031, "learning_rate": 4.632673627653833e-06, "loss": 3.4227, "step": 17260 }, { "epoch": 0.175628662109375, "grad_norm": 14.34813404083252, "learning_rate": 4.6324649435418916e-06, "loss": 3.2329, "step": 17265 }, { "epoch": 0.17567952473958334, "grad_norm": 9.003851890563965, "learning_rate": 4.632256204871338e-06, "loss": 3.5315, "step": 17270 }, { "epoch": 0.17573038736979166, "grad_norm": 11.924999237060547, "learning_rate": 4.632047411647516e-06, "loss": 3.4919, "step": 17275 }, { "epoch": 0.17578125, "grad_norm": 11.398469924926758, "learning_rate": 4.6318385638757665e-06, "loss": 3.9053, "step": 17280 }, { "epoch": 0.17583211263020834, "grad_norm": 10.507608413696289, "learning_rate": 4.631629661561432e-06, "loss": 3.8949, "step": 17285 }, { "epoch": 0.17588297526041666, "grad_norm": 6.944844722747803, "learning_rate": 4.6314207047098585e-06, "loss": 3.6582, "step": 17290 }, { "epoch": 0.175933837890625, "grad_norm": 15.985193252563477, "learning_rate": 4.63121169332639e-06, "loss": 3.1175, "step": 17295 }, { "epoch": 0.17598470052083334, "grad_norm": 14.670384407043457, "learning_rate": 4.6310026274163765e-06, "loss": 3.547, "step": 17300 }, { "epoch": 0.17603556315104166, "grad_norm": 12.856901168823242, "learning_rate": 4.630793506985166e-06, "loss": 3.3646, "step": 17305 }, { "epoch": 0.17608642578125, "grad_norm": 14.06156063079834, "learning_rate": 4.6305843320381085e-06, "loss": 3.4701, "step": 17310 }, { "epoch": 0.17613728841145834, "grad_norm": 18.088773727416992, "learning_rate": 4.630375102580557e-06, "loss": 3.5199, "step": 17315 }, { "epoch": 0.17618815104166666, "grad_norm": 16.56032943725586, "learning_rate": 4.630165818617862e-06, "loss": 3.3376, "step": 17320 }, { "epoch": 0.176239013671875, "grad_norm": 13.883744239807129, "learning_rate": 4.62995648015538e-06, "loss": 3.3735, "step": 17325 }, { "epoch": 0.17628987630208334, "grad_norm": 9.11486530303955, "learning_rate": 4.629747087198466e-06, "loss": 3.2933, "step": 17330 }, { "epoch": 0.17634073893229166, "grad_norm": 12.755553245544434, "learning_rate": 4.629537639752477e-06, "loss": 3.5813, "step": 17335 }, { "epoch": 0.1763916015625, "grad_norm": 9.951680183410645, "learning_rate": 4.629328137822774e-06, "loss": 3.4514, "step": 17340 }, { "epoch": 0.17644246419270834, "grad_norm": 14.455367088317871, "learning_rate": 4.629118581414713e-06, "loss": 2.6122, "step": 17345 }, { "epoch": 0.17649332682291666, "grad_norm": 8.975110054016113, "learning_rate": 4.6289089705336595e-06, "loss": 3.1559, "step": 17350 }, { "epoch": 0.176544189453125, "grad_norm": 17.275592803955078, "learning_rate": 4.628699305184974e-06, "loss": 3.3324, "step": 17355 }, { "epoch": 0.17659505208333334, "grad_norm": 14.314291954040527, "learning_rate": 4.628489585374022e-06, "loss": 3.4651, "step": 17360 }, { "epoch": 0.17664591471354166, "grad_norm": 15.956419944763184, "learning_rate": 4.628279811106168e-06, "loss": 3.6316, "step": 17365 }, { "epoch": 0.17669677734375, "grad_norm": 13.444526672363281, "learning_rate": 4.628069982386779e-06, "loss": 3.4095, "step": 17370 }, { "epoch": 0.17674763997395834, "grad_norm": 14.760951042175293, "learning_rate": 4.627860099221224e-06, "loss": 3.2458, "step": 17375 }, { "epoch": 0.17679850260416666, "grad_norm": 9.031475067138672, "learning_rate": 4.627650161614873e-06, "loss": 3.1318, "step": 17380 }, { "epoch": 0.176849365234375, "grad_norm": 9.814698219299316, "learning_rate": 4.627440169573098e-06, "loss": 3.8271, "step": 17385 }, { "epoch": 0.17690022786458334, "grad_norm": 12.851936340332031, "learning_rate": 4.627230123101268e-06, "loss": 3.5598, "step": 17390 }, { "epoch": 0.17695109049479166, "grad_norm": 44.61574935913086, "learning_rate": 4.627020022204761e-06, "loss": 3.6756, "step": 17395 }, { "epoch": 0.177001953125, "grad_norm": 11.95059871673584, "learning_rate": 4.626809866888951e-06, "loss": 3.3316, "step": 17400 }, { "epoch": 0.17705281575520834, "grad_norm": 16.417972564697266, "learning_rate": 4.626599657159216e-06, "loss": 3.3807, "step": 17405 }, { "epoch": 0.17710367838541666, "grad_norm": 13.880332946777344, "learning_rate": 4.6263893930209304e-06, "loss": 3.3911, "step": 17410 }, { "epoch": 0.177154541015625, "grad_norm": 15.084425926208496, "learning_rate": 4.6261790744794765e-06, "loss": 3.4722, "step": 17415 }, { "epoch": 0.17720540364583334, "grad_norm": 15.803872108459473, "learning_rate": 4.625968701540236e-06, "loss": 3.1724, "step": 17420 }, { "epoch": 0.17725626627604166, "grad_norm": 7.609067440032959, "learning_rate": 4.62575827420859e-06, "loss": 3.2543, "step": 17425 }, { "epoch": 0.17730712890625, "grad_norm": 11.47111701965332, "learning_rate": 4.625547792489922e-06, "loss": 3.3642, "step": 17430 }, { "epoch": 0.17735799153645834, "grad_norm": 9.357483863830566, "learning_rate": 4.625337256389618e-06, "loss": 3.5617, "step": 17435 }, { "epoch": 0.17740885416666666, "grad_norm": 12.891478538513184, "learning_rate": 4.625126665913063e-06, "loss": 3.2682, "step": 17440 }, { "epoch": 0.177459716796875, "grad_norm": 14.151500701904297, "learning_rate": 4.6249160210656476e-06, "loss": 3.3943, "step": 17445 }, { "epoch": 0.17751057942708334, "grad_norm": 11.056961059570312, "learning_rate": 4.624705321852758e-06, "loss": 3.2693, "step": 17450 }, { "epoch": 0.17756144205729166, "grad_norm": 16.99146270751953, "learning_rate": 4.624494568279787e-06, "loss": 3.5405, "step": 17455 }, { "epoch": 0.1776123046875, "grad_norm": 9.272522926330566, "learning_rate": 4.624283760352126e-06, "loss": 3.0549, "step": 17460 }, { "epoch": 0.17766316731770834, "grad_norm": 10.904102325439453, "learning_rate": 4.624072898075168e-06, "loss": 3.0915, "step": 17465 }, { "epoch": 0.17771402994791666, "grad_norm": 15.731610298156738, "learning_rate": 4.6238619814543094e-06, "loss": 3.3617, "step": 17470 }, { "epoch": 0.177764892578125, "grad_norm": 15.29787540435791, "learning_rate": 4.623651010494945e-06, "loss": 3.4183, "step": 17475 }, { "epoch": 0.17781575520833334, "grad_norm": 13.272435188293457, "learning_rate": 4.623439985202472e-06, "loss": 3.3338, "step": 17480 }, { "epoch": 0.17786661783854166, "grad_norm": 10.852397918701172, "learning_rate": 4.623228905582292e-06, "loss": 3.6102, "step": 17485 }, { "epoch": 0.17791748046875, "grad_norm": 10.077887535095215, "learning_rate": 4.623017771639803e-06, "loss": 3.4454, "step": 17490 }, { "epoch": 0.17796834309895834, "grad_norm": 13.571096420288086, "learning_rate": 4.622806583380407e-06, "loss": 3.3273, "step": 17495 }, { "epoch": 0.17801920572916666, "grad_norm": 9.822858810424805, "learning_rate": 4.622595340809508e-06, "loss": 3.4383, "step": 17500 }, { "epoch": 0.178070068359375, "grad_norm": 16.22844123840332, "learning_rate": 4.622384043932509e-06, "loss": 3.0372, "step": 17505 }, { "epoch": 0.17812093098958334, "grad_norm": 7.383034706115723, "learning_rate": 4.622172692754819e-06, "loss": 3.1181, "step": 17510 }, { "epoch": 0.17817179361979166, "grad_norm": 9.609219551086426, "learning_rate": 4.621961287281843e-06, "loss": 3.3651, "step": 17515 }, { "epoch": 0.17822265625, "grad_norm": 9.730359077453613, "learning_rate": 4.621749827518991e-06, "loss": 3.4125, "step": 17520 }, { "epoch": 0.17827351888020834, "grad_norm": 7.237141132354736, "learning_rate": 4.621538313471673e-06, "loss": 3.2425, "step": 17525 }, { "epoch": 0.17832438151041666, "grad_norm": 10.888049125671387, "learning_rate": 4.621326745145299e-06, "loss": 3.5376, "step": 17530 }, { "epoch": 0.178375244140625, "grad_norm": 12.970224380493164, "learning_rate": 4.6211151225452835e-06, "loss": 3.5308, "step": 17535 }, { "epoch": 0.17842610677083334, "grad_norm": 12.810026168823242, "learning_rate": 4.62090344567704e-06, "loss": 3.1474, "step": 17540 }, { "epoch": 0.17847696940104166, "grad_norm": 8.826539993286133, "learning_rate": 4.6206917145459855e-06, "loss": 3.2885, "step": 17545 }, { "epoch": 0.17852783203125, "grad_norm": 10.239234924316406, "learning_rate": 4.620479929157535e-06, "loss": 3.1573, "step": 17550 }, { "epoch": 0.17857869466145834, "grad_norm": 14.187760353088379, "learning_rate": 4.620268089517108e-06, "loss": 3.2755, "step": 17555 }, { "epoch": 0.17862955729166666, "grad_norm": 11.916472434997559, "learning_rate": 4.620056195630125e-06, "loss": 3.2658, "step": 17560 }, { "epoch": 0.178680419921875, "grad_norm": 18.4185791015625, "learning_rate": 4.619844247502007e-06, "loss": 3.3846, "step": 17565 }, { "epoch": 0.17873128255208334, "grad_norm": 10.282975196838379, "learning_rate": 4.619632245138176e-06, "loss": 3.497, "step": 17570 }, { "epoch": 0.17878214518229166, "grad_norm": 15.2913179397583, "learning_rate": 4.619420188544057e-06, "loss": 3.5962, "step": 17575 }, { "epoch": 0.1788330078125, "grad_norm": 8.401836395263672, "learning_rate": 4.619208077725075e-06, "loss": 3.2448, "step": 17580 }, { "epoch": 0.17888387044270834, "grad_norm": 11.083022117614746, "learning_rate": 4.6189959126866555e-06, "loss": 3.0683, "step": 17585 }, { "epoch": 0.17893473307291666, "grad_norm": 11.012330055236816, "learning_rate": 4.618783693434229e-06, "loss": 3.3922, "step": 17590 }, { "epoch": 0.178985595703125, "grad_norm": 10.7522611618042, "learning_rate": 4.618571419973222e-06, "loss": 3.1457, "step": 17595 }, { "epoch": 0.17903645833333334, "grad_norm": 9.602813720703125, "learning_rate": 4.6183590923090696e-06, "loss": 3.2299, "step": 17600 }, { "epoch": 0.17908732096354166, "grad_norm": 8.291756629943848, "learning_rate": 4.6181467104472005e-06, "loss": 3.0146, "step": 17605 }, { "epoch": 0.17913818359375, "grad_norm": 13.860746383666992, "learning_rate": 4.61793427439305e-06, "loss": 3.1955, "step": 17610 }, { "epoch": 0.17918904622395834, "grad_norm": 13.45958423614502, "learning_rate": 4.6177217841520535e-06, "loss": 2.8876, "step": 17615 }, { "epoch": 0.17923990885416666, "grad_norm": 12.669150352478027, "learning_rate": 4.617509239729647e-06, "loss": 3.6825, "step": 17620 }, { "epoch": 0.179290771484375, "grad_norm": 11.674629211425781, "learning_rate": 4.61729664113127e-06, "loss": 3.4646, "step": 17625 }, { "epoch": 0.17934163411458334, "grad_norm": 14.575140953063965, "learning_rate": 4.617083988362358e-06, "loss": 2.9819, "step": 17630 }, { "epoch": 0.17939249674479166, "grad_norm": 13.144431114196777, "learning_rate": 4.616871281428355e-06, "loss": 3.5436, "step": 17635 }, { "epoch": 0.179443359375, "grad_norm": 14.929411888122559, "learning_rate": 4.616658520334701e-06, "loss": 3.3415, "step": 17640 }, { "epoch": 0.17949422200520834, "grad_norm": 13.124006271362305, "learning_rate": 4.616445705086842e-06, "loss": 3.3565, "step": 17645 }, { "epoch": 0.17954508463541666, "grad_norm": 8.569463729858398, "learning_rate": 4.616232835690221e-06, "loss": 4.0458, "step": 17650 }, { "epoch": 0.179595947265625, "grad_norm": 11.597271919250488, "learning_rate": 4.616019912150284e-06, "loss": 3.5618, "step": 17655 }, { "epoch": 0.17964680989583334, "grad_norm": 12.50623607635498, "learning_rate": 4.615806934472479e-06, "loss": 3.4437, "step": 17660 }, { "epoch": 0.17969767252604166, "grad_norm": 13.184673309326172, "learning_rate": 4.615593902662256e-06, "loss": 3.4312, "step": 17665 }, { "epoch": 0.17974853515625, "grad_norm": 10.000298500061035, "learning_rate": 4.615380816725063e-06, "loss": 3.693, "step": 17670 }, { "epoch": 0.17979939778645834, "grad_norm": 15.589776039123535, "learning_rate": 4.6151676766663536e-06, "loss": 3.4443, "step": 17675 }, { "epoch": 0.17985026041666666, "grad_norm": 12.44705867767334, "learning_rate": 4.614954482491581e-06, "loss": 3.2575, "step": 17680 }, { "epoch": 0.179901123046875, "grad_norm": 16.3400821685791, "learning_rate": 4.6147412342061995e-06, "loss": 3.1107, "step": 17685 }, { "epoch": 0.17995198567708334, "grad_norm": 9.2102689743042, "learning_rate": 4.614527931815664e-06, "loss": 3.2749, "step": 17690 }, { "epoch": 0.18000284830729166, "grad_norm": 9.545188903808594, "learning_rate": 4.6143145753254335e-06, "loss": 3.5115, "step": 17695 }, { "epoch": 0.1800537109375, "grad_norm": 13.04957389831543, "learning_rate": 4.614101164740965e-06, "loss": 3.5562, "step": 17700 }, { "epoch": 0.18010457356770834, "grad_norm": 10.874368667602539, "learning_rate": 4.613887700067719e-06, "loss": 3.3832, "step": 17705 }, { "epoch": 0.18015543619791666, "grad_norm": 11.728689193725586, "learning_rate": 4.613674181311158e-06, "loss": 3.2963, "step": 17710 }, { "epoch": 0.180206298828125, "grad_norm": 10.86733341217041, "learning_rate": 4.613460608476744e-06, "loss": 3.2448, "step": 17715 }, { "epoch": 0.18025716145833334, "grad_norm": 13.16111946105957, "learning_rate": 4.613246981569941e-06, "loss": 3.5583, "step": 17720 }, { "epoch": 0.18030802408854166, "grad_norm": 15.626137733459473, "learning_rate": 4.6130333005962144e-06, "loss": 3.4084, "step": 17725 }, { "epoch": 0.18035888671875, "grad_norm": 10.255508422851562, "learning_rate": 4.612819565561033e-06, "loss": 3.2747, "step": 17730 }, { "epoch": 0.18040974934895834, "grad_norm": 10.121077537536621, "learning_rate": 4.612605776469863e-06, "loss": 3.3058, "step": 17735 }, { "epoch": 0.18046061197916666, "grad_norm": 12.807663917541504, "learning_rate": 4.612391933328175e-06, "loss": 3.1888, "step": 17740 }, { "epoch": 0.180511474609375, "grad_norm": 7.487052917480469, "learning_rate": 4.61217803614144e-06, "loss": 3.2892, "step": 17745 }, { "epoch": 0.18056233723958334, "grad_norm": 13.956525802612305, "learning_rate": 4.61196408491513e-06, "loss": 3.7043, "step": 17750 }, { "epoch": 0.18061319986979166, "grad_norm": 9.125615119934082, "learning_rate": 4.611750079654721e-06, "loss": 3.4562, "step": 17755 }, { "epoch": 0.1806640625, "grad_norm": 14.65468692779541, "learning_rate": 4.611536020365686e-06, "loss": 3.3539, "step": 17760 }, { "epoch": 0.18071492513020834, "grad_norm": 10.214896202087402, "learning_rate": 4.611321907053502e-06, "loss": 3.2788, "step": 17765 }, { "epoch": 0.18076578776041666, "grad_norm": 13.62902545928955, "learning_rate": 4.611107739723647e-06, "loss": 3.3931, "step": 17770 }, { "epoch": 0.180816650390625, "grad_norm": 12.189156532287598, "learning_rate": 4.610893518381602e-06, "loss": 3.4467, "step": 17775 }, { "epoch": 0.18086751302083334, "grad_norm": 17.752410888671875, "learning_rate": 4.610679243032846e-06, "loss": 3.1061, "step": 17780 }, { "epoch": 0.18091837565104166, "grad_norm": 9.7039794921875, "learning_rate": 4.610464913682863e-06, "loss": 3.3899, "step": 17785 }, { "epoch": 0.18096923828125, "grad_norm": 18.238666534423828, "learning_rate": 4.610250530337134e-06, "loss": 3.4644, "step": 17790 }, { "epoch": 0.18102010091145834, "grad_norm": 8.860824584960938, "learning_rate": 4.6100360930011455e-06, "loss": 3.4091, "step": 17795 }, { "epoch": 0.18107096354166666, "grad_norm": 10.903741836547852, "learning_rate": 4.6098216016803845e-06, "loss": 3.276, "step": 17800 }, { "epoch": 0.181121826171875, "grad_norm": 13.30379581451416, "learning_rate": 4.609607056380337e-06, "loss": 3.2626, "step": 17805 }, { "epoch": 0.18117268880208334, "grad_norm": 12.02137565612793, "learning_rate": 4.609392457106494e-06, "loss": 3.6866, "step": 17810 }, { "epoch": 0.18122355143229166, "grad_norm": 10.083224296569824, "learning_rate": 4.6091778038643445e-06, "loss": 3.2943, "step": 17815 }, { "epoch": 0.1812744140625, "grad_norm": 13.689177513122559, "learning_rate": 4.608963096659381e-06, "loss": 3.4036, "step": 17820 }, { "epoch": 0.18132527669270834, "grad_norm": 10.178418159484863, "learning_rate": 4.608748335497096e-06, "loss": 3.3081, "step": 17825 }, { "epoch": 0.18137613932291666, "grad_norm": 13.94970417022705, "learning_rate": 4.608533520382985e-06, "loss": 3.3372, "step": 17830 }, { "epoch": 0.181427001953125, "grad_norm": 10.877111434936523, "learning_rate": 4.608318651322543e-06, "loss": 3.6664, "step": 17835 }, { "epoch": 0.18147786458333334, "grad_norm": 11.025904655456543, "learning_rate": 4.608103728321269e-06, "loss": 3.7866, "step": 17840 }, { "epoch": 0.18152872721354166, "grad_norm": 14.711570739746094, "learning_rate": 4.6078887513846605e-06, "loss": 3.4579, "step": 17845 }, { "epoch": 0.18157958984375, "grad_norm": 9.070608139038086, "learning_rate": 4.607673720518218e-06, "loss": 3.3378, "step": 17850 }, { "epoch": 0.18163045247395834, "grad_norm": 10.735955238342285, "learning_rate": 4.607458635727443e-06, "loss": 3.4461, "step": 17855 }, { "epoch": 0.18168131510416666, "grad_norm": 16.661575317382812, "learning_rate": 4.607243497017838e-06, "loss": 3.3425, "step": 17860 }, { "epoch": 0.181732177734375, "grad_norm": 12.659605979919434, "learning_rate": 4.607028304394907e-06, "loss": 3.9381, "step": 17865 }, { "epoch": 0.18178304036458334, "grad_norm": 16.774187088012695, "learning_rate": 4.606813057864158e-06, "loss": 3.5132, "step": 17870 }, { "epoch": 0.18183390299479166, "grad_norm": 6.721248149871826, "learning_rate": 4.606597757431095e-06, "loss": 3.3477, "step": 17875 }, { "epoch": 0.181884765625, "grad_norm": 11.525874137878418, "learning_rate": 4.606382403101228e-06, "loss": 3.5922, "step": 17880 }, { "epoch": 0.18193562825520834, "grad_norm": 13.829561233520508, "learning_rate": 4.606166994880067e-06, "loss": 3.2248, "step": 17885 }, { "epoch": 0.18198649088541666, "grad_norm": 8.705430030822754, "learning_rate": 4.605951532773122e-06, "loss": 3.2562, "step": 17890 }, { "epoch": 0.182037353515625, "grad_norm": 8.117154121398926, "learning_rate": 4.605736016785905e-06, "loss": 3.2657, "step": 17895 }, { "epoch": 0.18208821614583334, "grad_norm": 12.32487964630127, "learning_rate": 4.605520446923933e-06, "loss": 3.1516, "step": 17900 }, { "epoch": 0.18213907877604166, "grad_norm": 14.840004920959473, "learning_rate": 4.605304823192719e-06, "loss": 3.4167, "step": 17905 }, { "epoch": 0.18218994140625, "grad_norm": 14.861167907714844, "learning_rate": 4.60508914559778e-06, "loss": 3.729, "step": 17910 }, { "epoch": 0.18224080403645834, "grad_norm": 12.168790817260742, "learning_rate": 4.6048734141446335e-06, "loss": 3.2407, "step": 17915 }, { "epoch": 0.18229166666666666, "grad_norm": 7.106325149536133, "learning_rate": 4.604657628838801e-06, "loss": 3.2299, "step": 17920 }, { "epoch": 0.182342529296875, "grad_norm": 7.858881950378418, "learning_rate": 4.604441789685801e-06, "loss": 3.477, "step": 17925 }, { "epoch": 0.18239339192708334, "grad_norm": 11.167740821838379, "learning_rate": 4.604225896691157e-06, "loss": 3.673, "step": 17930 }, { "epoch": 0.18244425455729166, "grad_norm": 14.516616821289062, "learning_rate": 4.604009949860392e-06, "loss": 3.0951, "step": 17935 }, { "epoch": 0.1824951171875, "grad_norm": 11.77841567993164, "learning_rate": 4.603793949199031e-06, "loss": 3.4499, "step": 17940 }, { "epoch": 0.18254597981770834, "grad_norm": 12.027185440063477, "learning_rate": 4.603577894712601e-06, "loss": 3.8324, "step": 17945 }, { "epoch": 0.18259684244791666, "grad_norm": 13.794437408447266, "learning_rate": 4.603361786406628e-06, "loss": 3.6594, "step": 17950 }, { "epoch": 0.182647705078125, "grad_norm": 13.343639373779297, "learning_rate": 4.603145624286643e-06, "loss": 3.3471, "step": 17955 }, { "epoch": 0.18269856770833334, "grad_norm": 10.639410018920898, "learning_rate": 4.602929408358176e-06, "loss": 3.3093, "step": 17960 }, { "epoch": 0.18274943033854166, "grad_norm": 15.507131576538086, "learning_rate": 4.602713138626758e-06, "loss": 3.3536, "step": 17965 }, { "epoch": 0.18280029296875, "grad_norm": 13.404929161071777, "learning_rate": 4.602496815097923e-06, "loss": 3.244, "step": 17970 }, { "epoch": 0.18285115559895834, "grad_norm": 13.34079647064209, "learning_rate": 4.602280437777205e-06, "loss": 3.4758, "step": 17975 }, { "epoch": 0.18290201822916666, "grad_norm": 10.848614692687988, "learning_rate": 4.602064006670141e-06, "loss": 3.2688, "step": 17980 }, { "epoch": 0.182952880859375, "grad_norm": 11.669103622436523, "learning_rate": 4.601847521782268e-06, "loss": 3.51, "step": 17985 }, { "epoch": 0.18300374348958334, "grad_norm": 9.459100723266602, "learning_rate": 4.601630983119123e-06, "loss": 3.3047, "step": 17990 }, { "epoch": 0.18305460611979166, "grad_norm": 10.783151626586914, "learning_rate": 4.601414390686248e-06, "loss": 3.1554, "step": 17995 }, { "epoch": 0.18310546875, "grad_norm": 7.823272705078125, "learning_rate": 4.601197744489184e-06, "loss": 3.2805, "step": 18000 }, { "epoch": 0.18315633138020834, "grad_norm": 10.413420677185059, "learning_rate": 4.600981044533473e-06, "loss": 3.1569, "step": 18005 }, { "epoch": 0.18320719401041666, "grad_norm": 18.30168342590332, "learning_rate": 4.600764290824661e-06, "loss": 3.2039, "step": 18010 }, { "epoch": 0.183258056640625, "grad_norm": 13.457409858703613, "learning_rate": 4.600547483368292e-06, "loss": 3.5678, "step": 18015 }, { "epoch": 0.18330891927083334, "grad_norm": 12.043107032775879, "learning_rate": 4.600330622169914e-06, "loss": 3.2696, "step": 18020 }, { "epoch": 0.18335978190104166, "grad_norm": 10.646010398864746, "learning_rate": 4.600113707235075e-06, "loss": 3.2272, "step": 18025 }, { "epoch": 0.18341064453125, "grad_norm": 12.23196792602539, "learning_rate": 4.5998967385693235e-06, "loss": 3.4313, "step": 18030 }, { "epoch": 0.18346150716145834, "grad_norm": 9.132007598876953, "learning_rate": 4.599679716178212e-06, "loss": 3.5767, "step": 18035 }, { "epoch": 0.18351236979166666, "grad_norm": 15.052911758422852, "learning_rate": 4.599462640067294e-06, "loss": 3.5414, "step": 18040 }, { "epoch": 0.183563232421875, "grad_norm": 12.69869327545166, "learning_rate": 4.599245510242121e-06, "loss": 3.2965, "step": 18045 }, { "epoch": 0.18361409505208334, "grad_norm": 10.353547096252441, "learning_rate": 4.599028326708248e-06, "loss": 2.9671, "step": 18050 }, { "epoch": 0.18366495768229166, "grad_norm": 8.513712882995605, "learning_rate": 4.598811089471235e-06, "loss": 3.4682, "step": 18055 }, { "epoch": 0.1837158203125, "grad_norm": 10.49561882019043, "learning_rate": 4.598593798536636e-06, "loss": 3.1158, "step": 18060 }, { "epoch": 0.18376668294270834, "grad_norm": 7.227719306945801, "learning_rate": 4.598376453910013e-06, "loss": 3.4783, "step": 18065 }, { "epoch": 0.18381754557291666, "grad_norm": 8.991704940795898, "learning_rate": 4.598159055596926e-06, "loss": 3.258, "step": 18070 }, { "epoch": 0.183868408203125, "grad_norm": 11.315898895263672, "learning_rate": 4.5979416036029366e-06, "loss": 3.3756, "step": 18075 }, { "epoch": 0.18391927083333334, "grad_norm": 10.222885131835938, "learning_rate": 4.597724097933608e-06, "loss": 3.6465, "step": 18080 }, { "epoch": 0.18397013346354166, "grad_norm": 15.640301704406738, "learning_rate": 4.597506538594506e-06, "loss": 3.247, "step": 18085 }, { "epoch": 0.18402099609375, "grad_norm": 14.560544967651367, "learning_rate": 4.597288925591196e-06, "loss": 3.6574, "step": 18090 }, { "epoch": 0.18407185872395834, "grad_norm": 11.333292007446289, "learning_rate": 4.597071258929247e-06, "loss": 3.6407, "step": 18095 }, { "epoch": 0.18412272135416666, "grad_norm": 16.446786880493164, "learning_rate": 4.596853538614226e-06, "loss": 3.505, "step": 18100 }, { "epoch": 0.184173583984375, "grad_norm": 8.610262870788574, "learning_rate": 4.596635764651704e-06, "loss": 3.867, "step": 18105 }, { "epoch": 0.18422444661458334, "grad_norm": 11.611706733703613, "learning_rate": 4.596417937047253e-06, "loss": 3.4687, "step": 18110 }, { "epoch": 0.18427530924479166, "grad_norm": 9.320916175842285, "learning_rate": 4.5962000558064465e-06, "loss": 3.6369, "step": 18115 }, { "epoch": 0.184326171875, "grad_norm": 10.020218849182129, "learning_rate": 4.5959821209348585e-06, "loss": 3.2122, "step": 18120 }, { "epoch": 0.18437703450520834, "grad_norm": 15.03809642791748, "learning_rate": 4.595764132438064e-06, "loss": 3.3018, "step": 18125 }, { "epoch": 0.18442789713541666, "grad_norm": 13.933128356933594, "learning_rate": 4.595546090321642e-06, "loss": 3.4161, "step": 18130 }, { "epoch": 0.184478759765625, "grad_norm": 15.626864433288574, "learning_rate": 4.595327994591169e-06, "loss": 3.1919, "step": 18135 }, { "epoch": 0.18452962239583334, "grad_norm": 15.305878639221191, "learning_rate": 4.595109845252226e-06, "loss": 3.7762, "step": 18140 }, { "epoch": 0.18458048502604166, "grad_norm": 12.688610076904297, "learning_rate": 4.594891642310395e-06, "loss": 3.5695, "step": 18145 }, { "epoch": 0.18463134765625, "grad_norm": 12.089020729064941, "learning_rate": 4.594673385771257e-06, "loss": 3.651, "step": 18150 }, { "epoch": 0.18468221028645834, "grad_norm": 14.751028060913086, "learning_rate": 4.594455075640397e-06, "loss": 3.3362, "step": 18155 }, { "epoch": 0.18473307291666666, "grad_norm": 14.168632507324219, "learning_rate": 4.594236711923401e-06, "loss": 3.5103, "step": 18160 }, { "epoch": 0.184783935546875, "grad_norm": 11.68898868560791, "learning_rate": 4.594018294625855e-06, "loss": 3.0502, "step": 18165 }, { "epoch": 0.18483479817708334, "grad_norm": 11.675402641296387, "learning_rate": 4.593799823753347e-06, "loss": 3.2437, "step": 18170 }, { "epoch": 0.18488566080729166, "grad_norm": 14.426863670349121, "learning_rate": 4.593581299311467e-06, "loss": 3.5249, "step": 18175 }, { "epoch": 0.1849365234375, "grad_norm": 14.298029899597168, "learning_rate": 4.593362721305805e-06, "loss": 3.3278, "step": 18180 }, { "epoch": 0.18498738606770834, "grad_norm": 13.843632698059082, "learning_rate": 4.5931440897419546e-06, "loss": 3.3659, "step": 18185 }, { "epoch": 0.18503824869791666, "grad_norm": 15.963536262512207, "learning_rate": 4.592925404625509e-06, "loss": 3.2535, "step": 18190 }, { "epoch": 0.185089111328125, "grad_norm": 13.099442481994629, "learning_rate": 4.592706665962063e-06, "loss": 3.4257, "step": 18195 }, { "epoch": 0.18513997395833334, "grad_norm": 10.886616706848145, "learning_rate": 4.592487873757212e-06, "loss": 3.3174, "step": 18200 }, { "epoch": 0.18519083658854166, "grad_norm": 14.132744789123535, "learning_rate": 4.592269028016555e-06, "loss": 3.5966, "step": 18205 }, { "epoch": 0.18524169921875, "grad_norm": 12.172066688537598, "learning_rate": 4.5920501287456905e-06, "loss": 3.25, "step": 18210 }, { "epoch": 0.18529256184895834, "grad_norm": 9.599930763244629, "learning_rate": 4.591831175950221e-06, "loss": 3.3482, "step": 18215 }, { "epoch": 0.18534342447916666, "grad_norm": 14.642792701721191, "learning_rate": 4.5916121696357454e-06, "loss": 3.195, "step": 18220 }, { "epoch": 0.185394287109375, "grad_norm": 17.15027618408203, "learning_rate": 4.591393109807868e-06, "loss": 3.6213, "step": 18225 }, { "epoch": 0.18544514973958334, "grad_norm": 10.881146430969238, "learning_rate": 4.591173996472195e-06, "loss": 3.054, "step": 18230 }, { "epoch": 0.18549601236979166, "grad_norm": 16.243993759155273, "learning_rate": 4.5909548296343295e-06, "loss": 3.2491, "step": 18235 }, { "epoch": 0.185546875, "grad_norm": 12.273772239685059, "learning_rate": 4.590735609299881e-06, "loss": 3.208, "step": 18240 }, { "epoch": 0.18559773763020834, "grad_norm": 12.737349510192871, "learning_rate": 4.590516335474458e-06, "loss": 3.0281, "step": 18245 }, { "epoch": 0.18564860026041666, "grad_norm": 10.880949020385742, "learning_rate": 4.590297008163669e-06, "loss": 3.3972, "step": 18250 }, { "epoch": 0.185699462890625, "grad_norm": 10.571752548217773, "learning_rate": 4.590077627373126e-06, "loss": 3.5009, "step": 18255 }, { "epoch": 0.18575032552083334, "grad_norm": 17.717178344726562, "learning_rate": 4.589858193108444e-06, "loss": 3.5101, "step": 18260 }, { "epoch": 0.18580118815104166, "grad_norm": 12.134099006652832, "learning_rate": 4.589638705375234e-06, "loss": 3.3066, "step": 18265 }, { "epoch": 0.18585205078125, "grad_norm": 15.478253364562988, "learning_rate": 4.5894191641791145e-06, "loss": 3.329, "step": 18270 }, { "epoch": 0.18590291341145834, "grad_norm": 8.206337928771973, "learning_rate": 4.5891995695257e-06, "loss": 3.2733, "step": 18275 }, { "epoch": 0.18595377604166666, "grad_norm": 10.964665412902832, "learning_rate": 4.58897992142061e-06, "loss": 3.5635, "step": 18280 }, { "epoch": 0.186004638671875, "grad_norm": 12.019591331481934, "learning_rate": 4.588760219869463e-06, "loss": 3.3073, "step": 18285 }, { "epoch": 0.18605550130208334, "grad_norm": 12.096267700195312, "learning_rate": 4.588540464877882e-06, "loss": 3.4862, "step": 18290 }, { "epoch": 0.18610636393229166, "grad_norm": 10.249032020568848, "learning_rate": 4.588320656451487e-06, "loss": 3.6171, "step": 18295 }, { "epoch": 0.1861572265625, "grad_norm": 16.42626190185547, "learning_rate": 4.588100794595904e-06, "loss": 3.5845, "step": 18300 }, { "epoch": 0.18620808919270834, "grad_norm": 16.055395126342773, "learning_rate": 4.587880879316758e-06, "loss": 3.5212, "step": 18305 }, { "epoch": 0.18625895182291666, "grad_norm": 14.237022399902344, "learning_rate": 4.587660910619672e-06, "loss": 3.1991, "step": 18310 }, { "epoch": 0.186309814453125, "grad_norm": 7.7949652671813965, "learning_rate": 4.5874408885102785e-06, "loss": 3.4076, "step": 18315 }, { "epoch": 0.18636067708333334, "grad_norm": 8.902517318725586, "learning_rate": 4.5872208129942045e-06, "loss": 3.2999, "step": 18320 }, { "epoch": 0.18641153971354166, "grad_norm": 11.778844833374023, "learning_rate": 4.58700068407708e-06, "loss": 3.2865, "step": 18325 }, { "epoch": 0.18646240234375, "grad_norm": 8.537171363830566, "learning_rate": 4.586780501764538e-06, "loss": 3.2332, "step": 18330 }, { "epoch": 0.18651326497395834, "grad_norm": 15.2643461227417, "learning_rate": 4.586560266062211e-06, "loss": 3.3204, "step": 18335 }, { "epoch": 0.18656412760416666, "grad_norm": 8.688283920288086, "learning_rate": 4.586339976975735e-06, "loss": 3.3692, "step": 18340 }, { "epoch": 0.186614990234375, "grad_norm": 12.671655654907227, "learning_rate": 4.586119634510745e-06, "loss": 3.3349, "step": 18345 }, { "epoch": 0.18666585286458334, "grad_norm": 12.594867706298828, "learning_rate": 4.585899238672878e-06, "loss": 3.4412, "step": 18350 }, { "epoch": 0.18671671549479166, "grad_norm": 16.03032875061035, "learning_rate": 4.585678789467774e-06, "loss": 3.474, "step": 18355 }, { "epoch": 0.186767578125, "grad_norm": 13.385184288024902, "learning_rate": 4.585458286901072e-06, "loss": 3.2519, "step": 18360 }, { "epoch": 0.18681844075520834, "grad_norm": 9.913110733032227, "learning_rate": 4.5852377309784146e-06, "loss": 3.3587, "step": 18365 }, { "epoch": 0.18686930338541666, "grad_norm": 10.111922264099121, "learning_rate": 4.585017121705444e-06, "loss": 3.1416, "step": 18370 }, { "epoch": 0.186920166015625, "grad_norm": 9.840933799743652, "learning_rate": 4.584796459087805e-06, "loss": 3.5775, "step": 18375 }, { "epoch": 0.18697102864583334, "grad_norm": 12.728557586669922, "learning_rate": 4.584575743131142e-06, "loss": 3.6974, "step": 18380 }, { "epoch": 0.18702189127604166, "grad_norm": 12.671290397644043, "learning_rate": 4.584354973841103e-06, "loss": 3.343, "step": 18385 }, { "epoch": 0.18707275390625, "grad_norm": 14.869614601135254, "learning_rate": 4.584134151223335e-06, "loss": 3.4718, "step": 18390 }, { "epoch": 0.18712361653645834, "grad_norm": 11.287585258483887, "learning_rate": 4.58391327528349e-06, "loss": 3.4128, "step": 18395 }, { "epoch": 0.18717447916666666, "grad_norm": 16.449617385864258, "learning_rate": 4.5836923460272175e-06, "loss": 3.7161, "step": 18400 }, { "epoch": 0.187225341796875, "grad_norm": 16.260473251342773, "learning_rate": 4.58347136346017e-06, "loss": 3.3264, "step": 18405 }, { "epoch": 0.18727620442708334, "grad_norm": 13.514466285705566, "learning_rate": 4.5832503275880015e-06, "loss": 3.294, "step": 18410 }, { "epoch": 0.18732706705729166, "grad_norm": 13.021100044250488, "learning_rate": 4.583029238416368e-06, "loss": 3.3275, "step": 18415 }, { "epoch": 0.1873779296875, "grad_norm": 10.005925178527832, "learning_rate": 4.582808095950924e-06, "loss": 3.308, "step": 18420 }, { "epoch": 0.18742879231770834, "grad_norm": 11.48652172088623, "learning_rate": 4.5825869001973285e-06, "loss": 3.5553, "step": 18425 }, { "epoch": 0.18747965494791666, "grad_norm": 15.618109703063965, "learning_rate": 4.582365651161242e-06, "loss": 3.3904, "step": 18430 }, { "epoch": 0.187530517578125, "grad_norm": 14.050233840942383, "learning_rate": 4.582144348848323e-06, "loss": 3.7672, "step": 18435 }, { "epoch": 0.18758138020833334, "grad_norm": 10.15687370300293, "learning_rate": 4.581922993264235e-06, "loss": 3.2861, "step": 18440 }, { "epoch": 0.18763224283854166, "grad_norm": 10.453628540039062, "learning_rate": 4.58170158441464e-06, "loss": 3.1252, "step": 18445 }, { "epoch": 0.18768310546875, "grad_norm": 16.22777557373047, "learning_rate": 4.5814801223052035e-06, "loss": 3.43, "step": 18450 }, { "epoch": 0.18773396809895834, "grad_norm": 9.1096830368042, "learning_rate": 4.581258606941592e-06, "loss": 3.3274, "step": 18455 }, { "epoch": 0.18778483072916666, "grad_norm": 16.2208251953125, "learning_rate": 4.581037038329472e-06, "loss": 3.8525, "step": 18460 }, { "epoch": 0.187835693359375, "grad_norm": 16.626689910888672, "learning_rate": 4.580815416474512e-06, "loss": 3.2766, "step": 18465 }, { "epoch": 0.18788655598958334, "grad_norm": 16.821229934692383, "learning_rate": 4.580593741382384e-06, "loss": 3.6567, "step": 18470 }, { "epoch": 0.18793741861979166, "grad_norm": 11.31299877166748, "learning_rate": 4.580372013058757e-06, "loss": 3.4805, "step": 18475 }, { "epoch": 0.18798828125, "grad_norm": 16.890230178833008, "learning_rate": 4.580150231509306e-06, "loss": 3.2733, "step": 18480 }, { "epoch": 0.18803914388020834, "grad_norm": 8.345052719116211, "learning_rate": 4.579928396739704e-06, "loss": 3.7072, "step": 18485 }, { "epoch": 0.18809000651041666, "grad_norm": 7.271616458892822, "learning_rate": 4.579706508755627e-06, "loss": 3.4539, "step": 18490 }, { "epoch": 0.188140869140625, "grad_norm": 14.4603910446167, "learning_rate": 4.579484567562752e-06, "loss": 3.8369, "step": 18495 }, { "epoch": 0.18819173177083334, "grad_norm": 8.192587852478027, "learning_rate": 4.579262573166757e-06, "loss": 2.9722, "step": 18500 }, { "epoch": 0.18824259440104166, "grad_norm": 8.657429695129395, "learning_rate": 4.579040525573323e-06, "loss": 3.7435, "step": 18505 }, { "epoch": 0.18829345703125, "grad_norm": 11.86208724975586, "learning_rate": 4.578818424788129e-06, "loss": 3.3249, "step": 18510 }, { "epoch": 0.18834431966145834, "grad_norm": 11.226592063903809, "learning_rate": 4.578596270816858e-06, "loss": 3.4405, "step": 18515 }, { "epoch": 0.18839518229166666, "grad_norm": 7.9588727951049805, "learning_rate": 4.578374063665195e-06, "loss": 3.378, "step": 18520 }, { "epoch": 0.188446044921875, "grad_norm": 10.29086971282959, "learning_rate": 4.578151803338824e-06, "loss": 3.6522, "step": 18525 }, { "epoch": 0.18849690755208334, "grad_norm": 12.614090919494629, "learning_rate": 4.577929489843431e-06, "loss": 3.0822, "step": 18530 }, { "epoch": 0.18854777018229166, "grad_norm": 14.052781105041504, "learning_rate": 4.577707123184705e-06, "loss": 3.3621, "step": 18535 }, { "epoch": 0.1885986328125, "grad_norm": 13.460591316223145, "learning_rate": 4.577484703368335e-06, "loss": 3.5421, "step": 18540 }, { "epoch": 0.18864949544270834, "grad_norm": 10.110848426818848, "learning_rate": 4.57726223040001e-06, "loss": 3.5516, "step": 18545 }, { "epoch": 0.18870035807291666, "grad_norm": 11.632022857666016, "learning_rate": 4.577039704285424e-06, "loss": 3.5707, "step": 18550 }, { "epoch": 0.188751220703125, "grad_norm": 8.971404075622559, "learning_rate": 4.5768171250302706e-06, "loss": 3.4157, "step": 18555 }, { "epoch": 0.18880208333333334, "grad_norm": 14.906126976013184, "learning_rate": 4.576594492640242e-06, "loss": 3.7568, "step": 18560 }, { "epoch": 0.18885294596354166, "grad_norm": 12.4305419921875, "learning_rate": 4.576371807121036e-06, "loss": 3.5414, "step": 18565 }, { "epoch": 0.18890380859375, "grad_norm": 13.76634407043457, "learning_rate": 4.57614906847835e-06, "loss": 3.4253, "step": 18570 }, { "epoch": 0.18895467122395834, "grad_norm": 14.682574272155762, "learning_rate": 4.5759262767178805e-06, "loss": 3.4316, "step": 18575 }, { "epoch": 0.18900553385416666, "grad_norm": 12.575339317321777, "learning_rate": 4.575703431845331e-06, "loss": 3.6921, "step": 18580 }, { "epoch": 0.189056396484375, "grad_norm": 7.278709411621094, "learning_rate": 4.5754805338664e-06, "loss": 3.1521, "step": 18585 }, { "epoch": 0.18910725911458334, "grad_norm": 12.746679306030273, "learning_rate": 4.575257582786792e-06, "loss": 3.2062, "step": 18590 }, { "epoch": 0.18915812174479166, "grad_norm": 15.597989082336426, "learning_rate": 4.575034578612211e-06, "loss": 3.5262, "step": 18595 }, { "epoch": 0.189208984375, "grad_norm": 7.950192928314209, "learning_rate": 4.574811521348361e-06, "loss": 3.6785, "step": 18600 }, { "epoch": 0.18925984700520834, "grad_norm": 10.346031188964844, "learning_rate": 4.574588411000951e-06, "loss": 3.3503, "step": 18605 }, { "epoch": 0.18931070963541666, "grad_norm": 11.723124504089355, "learning_rate": 4.574365247575688e-06, "loss": 3.3964, "step": 18610 }, { "epoch": 0.189361572265625, "grad_norm": 8.930464744567871, "learning_rate": 4.574142031078282e-06, "loss": 3.2352, "step": 18615 }, { "epoch": 0.18941243489583334, "grad_norm": 13.8888521194458, "learning_rate": 4.573918761514443e-06, "loss": 3.315, "step": 18620 }, { "epoch": 0.18946329752604166, "grad_norm": 7.327556133270264, "learning_rate": 4.573695438889885e-06, "loss": 3.1141, "step": 18625 }, { "epoch": 0.18951416015625, "grad_norm": 8.570147514343262, "learning_rate": 4.57347206321032e-06, "loss": 3.1851, "step": 18630 }, { "epoch": 0.18956502278645834, "grad_norm": 9.34282112121582, "learning_rate": 4.573248634481464e-06, "loss": 3.5862, "step": 18635 }, { "epoch": 0.18961588541666666, "grad_norm": 12.512747764587402, "learning_rate": 4.573025152709033e-06, "loss": 3.5737, "step": 18640 }, { "epoch": 0.189666748046875, "grad_norm": 12.564814567565918, "learning_rate": 4.572801617898746e-06, "loss": 3.4582, "step": 18645 }, { "epoch": 0.18971761067708334, "grad_norm": 14.594040870666504, "learning_rate": 4.572578030056319e-06, "loss": 3.0915, "step": 18650 }, { "epoch": 0.18976847330729166, "grad_norm": 11.229549407958984, "learning_rate": 4.572354389187476e-06, "loss": 3.6208, "step": 18655 }, { "epoch": 0.1898193359375, "grad_norm": 13.392086029052734, "learning_rate": 4.572130695297936e-06, "loss": 3.5334, "step": 18660 }, { "epoch": 0.18987019856770834, "grad_norm": 9.601693153381348, "learning_rate": 4.571906948393424e-06, "loss": 3.3134, "step": 18665 }, { "epoch": 0.18992106119791666, "grad_norm": 13.067540168762207, "learning_rate": 4.571683148479663e-06, "loss": 3.0544, "step": 18670 }, { "epoch": 0.189971923828125, "grad_norm": 10.653081893920898, "learning_rate": 4.57145929556238e-06, "loss": 3.7998, "step": 18675 }, { "epoch": 0.19002278645833334, "grad_norm": 12.351947784423828, "learning_rate": 4.5712353896473025e-06, "loss": 3.3205, "step": 18680 }, { "epoch": 0.19007364908854166, "grad_norm": 9.99809455871582, "learning_rate": 4.571011430740158e-06, "loss": 3.577, "step": 18685 }, { "epoch": 0.19012451171875, "grad_norm": 11.64851188659668, "learning_rate": 4.5707874188466774e-06, "loss": 3.3207, "step": 18690 }, { "epoch": 0.19017537434895834, "grad_norm": 10.789388656616211, "learning_rate": 4.5705633539725915e-06, "loss": 3.5073, "step": 18695 }, { "epoch": 0.19022623697916666, "grad_norm": 11.41800308227539, "learning_rate": 4.570339236123632e-06, "loss": 3.4615, "step": 18700 }, { "epoch": 0.190277099609375, "grad_norm": 12.321640968322754, "learning_rate": 4.5701150653055345e-06, "loss": 3.3763, "step": 18705 }, { "epoch": 0.19032796223958334, "grad_norm": 14.531020164489746, "learning_rate": 4.569890841524034e-06, "loss": 3.5657, "step": 18710 }, { "epoch": 0.19037882486979166, "grad_norm": 10.604043960571289, "learning_rate": 4.569666564784867e-06, "loss": 3.4327, "step": 18715 }, { "epoch": 0.1904296875, "grad_norm": 9.537259101867676, "learning_rate": 4.569442235093771e-06, "loss": 3.3424, "step": 18720 }, { "epoch": 0.19048055013020834, "grad_norm": 10.841087341308594, "learning_rate": 4.569217852456486e-06, "loss": 3.4501, "step": 18725 }, { "epoch": 0.19053141276041666, "grad_norm": 10.725885391235352, "learning_rate": 4.568993416878753e-06, "loss": 3.6736, "step": 18730 }, { "epoch": 0.190582275390625, "grad_norm": 13.705768585205078, "learning_rate": 4.568768928366313e-06, "loss": 3.7702, "step": 18735 }, { "epoch": 0.19063313802083334, "grad_norm": 8.565932273864746, "learning_rate": 4.568544386924911e-06, "loss": 3.6213, "step": 18740 }, { "epoch": 0.19068400065104166, "grad_norm": 14.157207489013672, "learning_rate": 4.568319792560292e-06, "loss": 3.4415, "step": 18745 }, { "epoch": 0.19073486328125, "grad_norm": 12.56330680847168, "learning_rate": 4.5680951452782e-06, "loss": 3.1235, "step": 18750 }, { "epoch": 0.19078572591145834, "grad_norm": 7.816888809204102, "learning_rate": 4.567870445084385e-06, "loss": 3.1856, "step": 18755 }, { "epoch": 0.19083658854166666, "grad_norm": 13.05587387084961, "learning_rate": 4.567645691984594e-06, "loss": 3.1723, "step": 18760 }, { "epoch": 0.190887451171875, "grad_norm": 13.687058448791504, "learning_rate": 4.567420885984578e-06, "loss": 3.8625, "step": 18765 }, { "epoch": 0.19093831380208334, "grad_norm": 12.832854270935059, "learning_rate": 4.567196027090088e-06, "loss": 3.1376, "step": 18770 }, { "epoch": 0.19098917643229166, "grad_norm": 13.068547248840332, "learning_rate": 4.56697111530688e-06, "loss": 3.3375, "step": 18775 }, { "epoch": 0.1910400390625, "grad_norm": 10.62330150604248, "learning_rate": 4.566746150640704e-06, "loss": 3.4257, "step": 18780 }, { "epoch": 0.19109090169270834, "grad_norm": 13.381546020507812, "learning_rate": 4.566521133097318e-06, "loss": 3.2436, "step": 18785 }, { "epoch": 0.19114176432291666, "grad_norm": 9.422579765319824, "learning_rate": 4.566296062682478e-06, "loss": 3.4788, "step": 18790 }, { "epoch": 0.191192626953125, "grad_norm": 16.37440299987793, "learning_rate": 4.566070939401944e-06, "loss": 3.5952, "step": 18795 }, { "epoch": 0.19124348958333334, "grad_norm": 11.154594421386719, "learning_rate": 4.565845763261475e-06, "loss": 3.1788, "step": 18800 }, { "epoch": 0.19129435221354166, "grad_norm": 13.181290626525879, "learning_rate": 4.565620534266831e-06, "loss": 3.2884, "step": 18805 }, { "epoch": 0.19134521484375, "grad_norm": 10.758809089660645, "learning_rate": 4.565395252423775e-06, "loss": 3.2639, "step": 18810 }, { "epoch": 0.19139607747395834, "grad_norm": 11.981339454650879, "learning_rate": 4.5651699177380714e-06, "loss": 3.2688, "step": 18815 }, { "epoch": 0.19144694010416666, "grad_norm": 9.572751998901367, "learning_rate": 4.564944530215486e-06, "loss": 3.5481, "step": 18820 }, { "epoch": 0.191497802734375, "grad_norm": 9.87020492553711, "learning_rate": 4.564719089861783e-06, "loss": 3.4662, "step": 18825 }, { "epoch": 0.19154866536458334, "grad_norm": 9.3651123046875, "learning_rate": 4.564493596682732e-06, "loss": 3.2547, "step": 18830 }, { "epoch": 0.19159952799479166, "grad_norm": 9.596661567687988, "learning_rate": 4.564268050684101e-06, "loss": 3.3904, "step": 18835 }, { "epoch": 0.191650390625, "grad_norm": 15.922765731811523, "learning_rate": 4.564042451871662e-06, "loss": 3.2157, "step": 18840 }, { "epoch": 0.19170125325520834, "grad_norm": 11.004732131958008, "learning_rate": 4.563816800251185e-06, "loss": 3.3153, "step": 18845 }, { "epoch": 0.19175211588541666, "grad_norm": 8.292093276977539, "learning_rate": 4.563591095828446e-06, "loss": 3.2079, "step": 18850 }, { "epoch": 0.191802978515625, "grad_norm": 8.698326110839844, "learning_rate": 4.563365338609216e-06, "loss": 3.404, "step": 18855 }, { "epoch": 0.19185384114583334, "grad_norm": 11.080509185791016, "learning_rate": 4.563139528599274e-06, "loss": 3.2749, "step": 18860 }, { "epoch": 0.19190470377604166, "grad_norm": 14.920134544372559, "learning_rate": 4.562913665804397e-06, "loss": 3.455, "step": 18865 }, { "epoch": 0.19195556640625, "grad_norm": 10.069123268127441, "learning_rate": 4.562687750230361e-06, "loss": 3.6829, "step": 18870 }, { "epoch": 0.19200642903645834, "grad_norm": 8.710797309875488, "learning_rate": 4.562461781882949e-06, "loss": 3.3883, "step": 18875 }, { "epoch": 0.19205729166666666, "grad_norm": 13.156225204467773, "learning_rate": 4.5622357607679415e-06, "loss": 3.1545, "step": 18880 }, { "epoch": 0.192108154296875, "grad_norm": 12.595132827758789, "learning_rate": 4.5620096868911205e-06, "loss": 3.4499, "step": 18885 }, { "epoch": 0.19215901692708334, "grad_norm": 12.80722713470459, "learning_rate": 4.561783560258269e-06, "loss": 3.6053, "step": 18890 }, { "epoch": 0.19220987955729166, "grad_norm": 10.552962303161621, "learning_rate": 4.561557380875175e-06, "loss": 3.4, "step": 18895 }, { "epoch": 0.1922607421875, "grad_norm": 12.450384140014648, "learning_rate": 4.561331148747624e-06, "loss": 3.4466, "step": 18900 }, { "epoch": 0.19231160481770834, "grad_norm": 9.195076942443848, "learning_rate": 4.5611048638814045e-06, "loss": 3.0326, "step": 18905 }, { "epoch": 0.19236246744791666, "grad_norm": 10.586387634277344, "learning_rate": 4.560878526282305e-06, "loss": 3.1759, "step": 18910 }, { "epoch": 0.192413330078125, "grad_norm": 17.3674259185791, "learning_rate": 4.560652135956117e-06, "loss": 3.8675, "step": 18915 }, { "epoch": 0.19246419270833334, "grad_norm": 8.781184196472168, "learning_rate": 4.5604256929086334e-06, "loss": 3.292, "step": 18920 }, { "epoch": 0.19251505533854166, "grad_norm": 9.68058967590332, "learning_rate": 4.560199197145646e-06, "loss": 3.3997, "step": 18925 }, { "epoch": 0.19256591796875, "grad_norm": 8.510169982910156, "learning_rate": 4.559972648672951e-06, "loss": 3.4601, "step": 18930 }, { "epoch": 0.19261678059895834, "grad_norm": 13.213333129882812, "learning_rate": 4.559746047496343e-06, "loss": 3.2534, "step": 18935 }, { "epoch": 0.19266764322916666, "grad_norm": 13.007706642150879, "learning_rate": 4.559519393621621e-06, "loss": 3.3954, "step": 18940 }, { "epoch": 0.192718505859375, "grad_norm": 14.338438987731934, "learning_rate": 4.559292687054584e-06, "loss": 3.1865, "step": 18945 }, { "epoch": 0.19276936848958334, "grad_norm": 12.187652587890625, "learning_rate": 4.559065927801032e-06, "loss": 3.03, "step": 18950 }, { "epoch": 0.19282023111979166, "grad_norm": 12.161337852478027, "learning_rate": 4.558839115866765e-06, "loss": 3.426, "step": 18955 }, { "epoch": 0.19287109375, "grad_norm": 10.178563117980957, "learning_rate": 4.558612251257588e-06, "loss": 3.2395, "step": 18960 }, { "epoch": 0.19292195638020834, "grad_norm": 9.272007942199707, "learning_rate": 4.558385333979304e-06, "loss": 3.7845, "step": 18965 }, { "epoch": 0.19297281901041666, "grad_norm": 13.764449119567871, "learning_rate": 4.55815836403772e-06, "loss": 3.1891, "step": 18970 }, { "epoch": 0.193023681640625, "grad_norm": 13.366809844970703, "learning_rate": 4.557931341438642e-06, "loss": 3.2882, "step": 18975 }, { "epoch": 0.19307454427083334, "grad_norm": 11.97940731048584, "learning_rate": 4.557704266187878e-06, "loss": 3.0493, "step": 18980 }, { "epoch": 0.19312540690104166, "grad_norm": 15.40910816192627, "learning_rate": 4.557477138291238e-06, "loss": 3.4534, "step": 18985 }, { "epoch": 0.19317626953125, "grad_norm": 9.693882942199707, "learning_rate": 4.557249957754533e-06, "loss": 3.628, "step": 18990 }, { "epoch": 0.19322713216145834, "grad_norm": 9.635483741760254, "learning_rate": 4.557022724583575e-06, "loss": 3.3948, "step": 18995 }, { "epoch": 0.19327799479166666, "grad_norm": 8.52334213256836, "learning_rate": 4.55679543878418e-06, "loss": 3.5678, "step": 19000 }, { "epoch": 0.193328857421875, "grad_norm": 10.489480018615723, "learning_rate": 4.5565681003621596e-06, "loss": 3.2659, "step": 19005 }, { "epoch": 0.19337972005208334, "grad_norm": 10.904191970825195, "learning_rate": 4.556340709323333e-06, "loss": 3.7266, "step": 19010 }, { "epoch": 0.19343058268229166, "grad_norm": 9.743817329406738, "learning_rate": 4.556113265673515e-06, "loss": 3.8189, "step": 19015 }, { "epoch": 0.1934814453125, "grad_norm": 16.735994338989258, "learning_rate": 4.555885769418528e-06, "loss": 3.6075, "step": 19020 }, { "epoch": 0.19353230794270834, "grad_norm": 13.078174591064453, "learning_rate": 4.55565822056419e-06, "loss": 3.3227, "step": 19025 }, { "epoch": 0.19358317057291666, "grad_norm": 6.742351055145264, "learning_rate": 4.555430619116324e-06, "loss": 3.2009, "step": 19030 }, { "epoch": 0.193634033203125, "grad_norm": 11.052045822143555, "learning_rate": 4.555202965080752e-06, "loss": 3.5503, "step": 19035 }, { "epoch": 0.19368489583333334, "grad_norm": 12.830464363098145, "learning_rate": 4.554975258463301e-06, "loss": 3.4425, "step": 19040 }, { "epoch": 0.19373575846354166, "grad_norm": 14.631771087646484, "learning_rate": 4.554747499269794e-06, "loss": 2.9493, "step": 19045 }, { "epoch": 0.19378662109375, "grad_norm": 12.542807579040527, "learning_rate": 4.554519687506059e-06, "loss": 3.3026, "step": 19050 }, { "epoch": 0.19383748372395834, "grad_norm": 12.711750030517578, "learning_rate": 4.5542918231779255e-06, "loss": 3.3107, "step": 19055 }, { "epoch": 0.19388834635416666, "grad_norm": 15.20833969116211, "learning_rate": 4.554063906291222e-06, "loss": 3.2423, "step": 19060 }, { "epoch": 0.193939208984375, "grad_norm": 11.543058395385742, "learning_rate": 4.553835936851781e-06, "loss": 3.4903, "step": 19065 }, { "epoch": 0.19399007161458334, "grad_norm": 12.416997909545898, "learning_rate": 4.553607914865434e-06, "loss": 3.3484, "step": 19070 }, { "epoch": 0.19404093424479166, "grad_norm": 15.233044624328613, "learning_rate": 4.553379840338015e-06, "loss": 3.187, "step": 19075 }, { "epoch": 0.194091796875, "grad_norm": 16.927047729492188, "learning_rate": 4.55315171327536e-06, "loss": 3.4679, "step": 19080 }, { "epoch": 0.19414265950520834, "grad_norm": 13.578765869140625, "learning_rate": 4.552923533683305e-06, "loss": 2.9851, "step": 19085 }, { "epoch": 0.19419352213541666, "grad_norm": 12.253621101379395, "learning_rate": 4.552695301567688e-06, "loss": 3.3136, "step": 19090 }, { "epoch": 0.194244384765625, "grad_norm": 9.944585800170898, "learning_rate": 4.5524670169343475e-06, "loss": 3.7707, "step": 19095 }, { "epoch": 0.19429524739583334, "grad_norm": 13.689692497253418, "learning_rate": 4.552238679789126e-06, "loss": 3.6948, "step": 19100 }, { "epoch": 0.19434611002604166, "grad_norm": 11.918596267700195, "learning_rate": 4.5520102901378636e-06, "loss": 3.4049, "step": 19105 }, { "epoch": 0.19439697265625, "grad_norm": 14.738883972167969, "learning_rate": 4.551781847986404e-06, "loss": 3.6657, "step": 19110 }, { "epoch": 0.19444783528645834, "grad_norm": 8.853859901428223, "learning_rate": 4.551553353340593e-06, "loss": 3.5498, "step": 19115 }, { "epoch": 0.19449869791666666, "grad_norm": 11.349398612976074, "learning_rate": 4.551324806206276e-06, "loss": 3.5923, "step": 19120 }, { "epoch": 0.194549560546875, "grad_norm": 11.168492317199707, "learning_rate": 4.551096206589298e-06, "loss": 3.4872, "step": 19125 }, { "epoch": 0.19460042317708334, "grad_norm": 6.9756574630737305, "learning_rate": 4.5508675544955115e-06, "loss": 3.0787, "step": 19130 }, { "epoch": 0.19465128580729166, "grad_norm": 9.951805114746094, "learning_rate": 4.550638849930764e-06, "loss": 3.2865, "step": 19135 }, { "epoch": 0.1947021484375, "grad_norm": 9.90751838684082, "learning_rate": 4.550410092900908e-06, "loss": 3.3865, "step": 19140 }, { "epoch": 0.19475301106770834, "grad_norm": 12.605509757995605, "learning_rate": 4.550181283411796e-06, "loss": 3.3004, "step": 19145 }, { "epoch": 0.19480387369791666, "grad_norm": 10.202880859375, "learning_rate": 4.549952421469281e-06, "loss": 3.3373, "step": 19150 }, { "epoch": 0.194854736328125, "grad_norm": 9.19309139251709, "learning_rate": 4.54972350707922e-06, "loss": 2.9459, "step": 19155 }, { "epoch": 0.19490559895833334, "grad_norm": 12.980036735534668, "learning_rate": 4.549494540247469e-06, "loss": 3.4799, "step": 19160 }, { "epoch": 0.19495646158854166, "grad_norm": 12.7277250289917, "learning_rate": 4.549265520979885e-06, "loss": 3.0953, "step": 19165 }, { "epoch": 0.19500732421875, "grad_norm": 13.657605171203613, "learning_rate": 4.549036449282329e-06, "loss": 3.1377, "step": 19170 }, { "epoch": 0.19505818684895834, "grad_norm": 15.17473316192627, "learning_rate": 4.5488073251606616e-06, "loss": 3.1005, "step": 19175 }, { "epoch": 0.19510904947916666, "grad_norm": 13.394558906555176, "learning_rate": 4.548578148620744e-06, "loss": 3.162, "step": 19180 }, { "epoch": 0.195159912109375, "grad_norm": 11.801448822021484, "learning_rate": 4.54834891966844e-06, "loss": 3.1549, "step": 19185 }, { "epoch": 0.19521077473958334, "grad_norm": 13.888011932373047, "learning_rate": 4.548119638309613e-06, "loss": 3.4705, "step": 19190 }, { "epoch": 0.19526163736979166, "grad_norm": 15.35958194732666, "learning_rate": 4.547890304550132e-06, "loss": 3.7615, "step": 19195 }, { "epoch": 0.1953125, "grad_norm": 14.944520950317383, "learning_rate": 4.547660918395863e-06, "loss": 3.6172, "step": 19200 }, { "epoch": 0.19536336263020834, "grad_norm": 9.878191947937012, "learning_rate": 4.547431479852674e-06, "loss": 3.1867, "step": 19205 }, { "epoch": 0.19541422526041666, "grad_norm": 11.971900939941406, "learning_rate": 4.5472019889264375e-06, "loss": 3.1045, "step": 19210 }, { "epoch": 0.195465087890625, "grad_norm": 12.584593772888184, "learning_rate": 4.546972445623021e-06, "loss": 3.3828, "step": 19215 }, { "epoch": 0.19551595052083334, "grad_norm": 11.079330444335938, "learning_rate": 4.546742849948302e-06, "loss": 3.579, "step": 19220 }, { "epoch": 0.19556681315104166, "grad_norm": 11.899502754211426, "learning_rate": 4.54651320190815e-06, "loss": 3.4399, "step": 19225 }, { "epoch": 0.19561767578125, "grad_norm": 10.531402587890625, "learning_rate": 4.546283501508445e-06, "loss": 3.4826, "step": 19230 }, { "epoch": 0.19566853841145834, "grad_norm": 13.053157806396484, "learning_rate": 4.54605374875506e-06, "loss": 3.4088, "step": 19235 }, { "epoch": 0.19571940104166666, "grad_norm": 8.246091842651367, "learning_rate": 4.545823943653876e-06, "loss": 3.4349, "step": 19240 }, { "epoch": 0.195770263671875, "grad_norm": 8.850236892700195, "learning_rate": 4.545594086210771e-06, "loss": 3.022, "step": 19245 }, { "epoch": 0.19582112630208334, "grad_norm": 14.258888244628906, "learning_rate": 4.545364176431626e-06, "loss": 3.1912, "step": 19250 }, { "epoch": 0.19587198893229166, "grad_norm": 9.115057945251465, "learning_rate": 4.545134214322323e-06, "loss": 3.1928, "step": 19255 }, { "epoch": 0.1959228515625, "grad_norm": 12.057110786437988, "learning_rate": 4.544904199888746e-06, "loss": 3.3195, "step": 19260 }, { "epoch": 0.19597371419270834, "grad_norm": 8.72459888458252, "learning_rate": 4.54467413313678e-06, "loss": 3.5811, "step": 19265 }, { "epoch": 0.19602457682291666, "grad_norm": 7.331991195678711, "learning_rate": 4.544444014072311e-06, "loss": 3.191, "step": 19270 }, { "epoch": 0.196075439453125, "grad_norm": 13.493756294250488, "learning_rate": 4.544213842701226e-06, "loss": 2.8969, "step": 19275 }, { "epoch": 0.19612630208333334, "grad_norm": 13.320565223693848, "learning_rate": 4.543983619029415e-06, "loss": 3.3989, "step": 19280 }, { "epoch": 0.19617716471354166, "grad_norm": 12.885554313659668, "learning_rate": 4.543753343062767e-06, "loss": 3.4129, "step": 19285 }, { "epoch": 0.19622802734375, "grad_norm": 7.874749183654785, "learning_rate": 4.543523014807175e-06, "loss": 3.277, "step": 19290 }, { "epoch": 0.19627888997395834, "grad_norm": 11.990129470825195, "learning_rate": 4.54329263426853e-06, "loss": 3.3223, "step": 19295 }, { "epoch": 0.19632975260416666, "grad_norm": 14.290430068969727, "learning_rate": 4.543062201452728e-06, "loss": 3.366, "step": 19300 }, { "epoch": 0.196380615234375, "grad_norm": 12.931253433227539, "learning_rate": 4.5428317163656635e-06, "loss": 3.4963, "step": 19305 }, { "epoch": 0.19643147786458334, "grad_norm": 10.713802337646484, "learning_rate": 4.542601179013234e-06, "loss": 3.2372, "step": 19310 }, { "epoch": 0.19648234049479166, "grad_norm": 10.50666332244873, "learning_rate": 4.542370589401337e-06, "loss": 3.3301, "step": 19315 }, { "epoch": 0.196533203125, "grad_norm": 7.86500883102417, "learning_rate": 4.542139947535873e-06, "loss": 3.2692, "step": 19320 }, { "epoch": 0.19658406575520834, "grad_norm": 15.505828857421875, "learning_rate": 4.541909253422742e-06, "loss": 3.5111, "step": 19325 }, { "epoch": 0.19663492838541666, "grad_norm": 15.200455665588379, "learning_rate": 4.5416785070678475e-06, "loss": 3.5127, "step": 19330 }, { "epoch": 0.196685791015625, "grad_norm": 8.061166763305664, "learning_rate": 4.541447708477091e-06, "loss": 3.558, "step": 19335 }, { "epoch": 0.19673665364583334, "grad_norm": 15.958837509155273, "learning_rate": 4.5412168576563805e-06, "loss": 3.1756, "step": 19340 }, { "epoch": 0.19678751627604166, "grad_norm": 7.027966022491455, "learning_rate": 4.5409859546116195e-06, "loss": 3.2773, "step": 19345 }, { "epoch": 0.19683837890625, "grad_norm": 15.966991424560547, "learning_rate": 4.540754999348716e-06, "loss": 3.443, "step": 19350 }, { "epoch": 0.19688924153645834, "grad_norm": 10.5971040725708, "learning_rate": 4.540523991873581e-06, "loss": 3.3096, "step": 19355 }, { "epoch": 0.19694010416666666, "grad_norm": 15.873735427856445, "learning_rate": 4.540292932192123e-06, "loss": 3.1159, "step": 19360 }, { "epoch": 0.196990966796875, "grad_norm": 11.828910827636719, "learning_rate": 4.540061820310253e-06, "loss": 3.63, "step": 19365 }, { "epoch": 0.19704182942708334, "grad_norm": 13.37854290008545, "learning_rate": 4.539830656233886e-06, "loss": 2.9163, "step": 19370 }, { "epoch": 0.19709269205729166, "grad_norm": 12.53745174407959, "learning_rate": 4.5395994399689344e-06, "loss": 3.7792, "step": 19375 }, { "epoch": 0.1971435546875, "grad_norm": 14.388495445251465, "learning_rate": 4.539368171521315e-06, "loss": 3.4103, "step": 19380 }, { "epoch": 0.19719441731770834, "grad_norm": 13.567573547363281, "learning_rate": 4.539136850896944e-06, "loss": 3.9385, "step": 19385 }, { "epoch": 0.19724527994791666, "grad_norm": 11.386924743652344, "learning_rate": 4.53890547810174e-06, "loss": 3.1541, "step": 19390 }, { "epoch": 0.197296142578125, "grad_norm": 16.922727584838867, "learning_rate": 4.538674053141623e-06, "loss": 3.1228, "step": 19395 }, { "epoch": 0.19734700520833334, "grad_norm": 8.739219665527344, "learning_rate": 4.538442576022513e-06, "loss": 3.4801, "step": 19400 }, { "epoch": 0.19739786783854166, "grad_norm": 8.573755264282227, "learning_rate": 4.538211046750334e-06, "loss": 3.7405, "step": 19405 }, { "epoch": 0.19744873046875, "grad_norm": 10.428905487060547, "learning_rate": 4.537979465331007e-06, "loss": 3.4514, "step": 19410 }, { "epoch": 0.19749959309895834, "grad_norm": 10.055191993713379, "learning_rate": 4.53774783177046e-06, "loss": 3.3846, "step": 19415 }, { "epoch": 0.19755045572916666, "grad_norm": 7.410876750946045, "learning_rate": 4.537516146074616e-06, "loss": 3.3679, "step": 19420 }, { "epoch": 0.197601318359375, "grad_norm": 12.150521278381348, "learning_rate": 4.5372844082494045e-06, "loss": 3.8932, "step": 19425 }, { "epoch": 0.19765218098958334, "grad_norm": 13.015496253967285, "learning_rate": 4.537052618300756e-06, "loss": 3.3727, "step": 19430 }, { "epoch": 0.19770304361979166, "grad_norm": 12.705521583557129, "learning_rate": 4.536820776234597e-06, "loss": 3.5446, "step": 19435 }, { "epoch": 0.19775390625, "grad_norm": 13.614885330200195, "learning_rate": 4.536588882056863e-06, "loss": 3.3355, "step": 19440 }, { "epoch": 0.19780476888020834, "grad_norm": 10.389793395996094, "learning_rate": 4.536356935773485e-06, "loss": 3.4202, "step": 19445 }, { "epoch": 0.19785563151041666, "grad_norm": 11.007695198059082, "learning_rate": 4.536124937390396e-06, "loss": 3.7334, "step": 19450 }, { "epoch": 0.197906494140625, "grad_norm": 16.408811569213867, "learning_rate": 4.535892886913534e-06, "loss": 3.6896, "step": 19455 }, { "epoch": 0.19795735677083334, "grad_norm": 11.945623397827148, "learning_rate": 4.535660784348836e-06, "loss": 3.3185, "step": 19460 }, { "epoch": 0.19800821940104166, "grad_norm": 10.001398086547852, "learning_rate": 4.535428629702238e-06, "loss": 3.0122, "step": 19465 }, { "epoch": 0.19805908203125, "grad_norm": 18.04474449157715, "learning_rate": 4.535196422979681e-06, "loss": 3.6597, "step": 19470 }, { "epoch": 0.19810994466145834, "grad_norm": 14.083473205566406, "learning_rate": 4.534964164187106e-06, "loss": 3.4169, "step": 19475 }, { "epoch": 0.19816080729166666, "grad_norm": 14.401663780212402, "learning_rate": 4.534731853330455e-06, "loss": 3.5108, "step": 19480 }, { "epoch": 0.198211669921875, "grad_norm": 12.377889633178711, "learning_rate": 4.534499490415672e-06, "loss": 3.9204, "step": 19485 }, { "epoch": 0.19826253255208334, "grad_norm": 13.024721145629883, "learning_rate": 4.534267075448703e-06, "loss": 3.6746, "step": 19490 }, { "epoch": 0.19831339518229166, "grad_norm": 12.98880386352539, "learning_rate": 4.534034608435491e-06, "loss": 3.4475, "step": 19495 }, { "epoch": 0.1983642578125, "grad_norm": 12.89251708984375, "learning_rate": 4.533802089381988e-06, "loss": 3.5353, "step": 19500 }, { "epoch": 0.19841512044270834, "grad_norm": 11.248384475708008, "learning_rate": 4.533569518294139e-06, "loss": 3.211, "step": 19505 }, { "epoch": 0.19846598307291666, "grad_norm": 11.449090003967285, "learning_rate": 4.533336895177896e-06, "loss": 3.7413, "step": 19510 }, { "epoch": 0.198516845703125, "grad_norm": 9.919585227966309, "learning_rate": 4.53310422003921e-06, "loss": 3.7445, "step": 19515 }, { "epoch": 0.19856770833333334, "grad_norm": 9.99984359741211, "learning_rate": 4.532871492884035e-06, "loss": 3.2623, "step": 19520 }, { "epoch": 0.19861857096354166, "grad_norm": 16.127626419067383, "learning_rate": 4.532638713718326e-06, "loss": 3.4915, "step": 19525 }, { "epoch": 0.19866943359375, "grad_norm": 13.185535430908203, "learning_rate": 4.532405882548035e-06, "loss": 3.6796, "step": 19530 }, { "epoch": 0.19872029622395834, "grad_norm": 9.342668533325195, "learning_rate": 4.532172999379123e-06, "loss": 3.3614, "step": 19535 }, { "epoch": 0.19877115885416666, "grad_norm": 9.016987800598145, "learning_rate": 4.531940064217546e-06, "loss": 3.218, "step": 19540 }, { "epoch": 0.198822021484375, "grad_norm": 14.6638765335083, "learning_rate": 4.531707077069264e-06, "loss": 3.7645, "step": 19545 }, { "epoch": 0.19887288411458334, "grad_norm": 14.099635124206543, "learning_rate": 4.531474037940237e-06, "loss": 3.2719, "step": 19550 }, { "epoch": 0.19892374674479166, "grad_norm": 10.298885345458984, "learning_rate": 4.531240946836431e-06, "loss": 3.5016, "step": 19555 }, { "epoch": 0.198974609375, "grad_norm": 8.187545776367188, "learning_rate": 4.531007803763804e-06, "loss": 3.3259, "step": 19560 }, { "epoch": 0.19902547200520834, "grad_norm": 13.553715705871582, "learning_rate": 4.530774608728325e-06, "loss": 3.6581, "step": 19565 }, { "epoch": 0.19907633463541666, "grad_norm": 11.441377639770508, "learning_rate": 4.530541361735959e-06, "loss": 3.3082, "step": 19570 }, { "epoch": 0.199127197265625, "grad_norm": 13.178167343139648, "learning_rate": 4.530308062792673e-06, "loss": 3.2573, "step": 19575 }, { "epoch": 0.19917805989583334, "grad_norm": 7.674156188964844, "learning_rate": 4.530074711904437e-06, "loss": 3.5968, "step": 19580 }, { "epoch": 0.19922892252604166, "grad_norm": 13.66543197631836, "learning_rate": 4.52984130907722e-06, "loss": 3.1891, "step": 19585 }, { "epoch": 0.19927978515625, "grad_norm": 9.917035102844238, "learning_rate": 4.529607854316995e-06, "loss": 3.1431, "step": 19590 }, { "epoch": 0.19933064778645834, "grad_norm": 7.638389587402344, "learning_rate": 4.529374347629734e-06, "loss": 3.2769, "step": 19595 }, { "epoch": 0.19938151041666666, "grad_norm": 13.556234359741211, "learning_rate": 4.5291407890214105e-06, "loss": 3.3984, "step": 19600 }, { "epoch": 0.199432373046875, "grad_norm": 14.63976764678955, "learning_rate": 4.528907178498002e-06, "loss": 3.5608, "step": 19605 }, { "epoch": 0.19948323567708334, "grad_norm": 9.650500297546387, "learning_rate": 4.528673516065484e-06, "loss": 3.5258, "step": 19610 }, { "epoch": 0.19953409830729166, "grad_norm": 12.210488319396973, "learning_rate": 4.528439801729834e-06, "loss": 3.3766, "step": 19615 }, { "epoch": 0.1995849609375, "grad_norm": 10.5303373336792, "learning_rate": 4.528206035497034e-06, "loss": 3.4387, "step": 19620 }, { "epoch": 0.19963582356770834, "grad_norm": 11.196531295776367, "learning_rate": 4.527972217373062e-06, "loss": 3.3538, "step": 19625 }, { "epoch": 0.19968668619791666, "grad_norm": 10.746190071105957, "learning_rate": 4.527738347363902e-06, "loss": 3.5098, "step": 19630 }, { "epoch": 0.199737548828125, "grad_norm": 11.233555793762207, "learning_rate": 4.527504425475536e-06, "loss": 3.5044, "step": 19635 }, { "epoch": 0.19978841145833334, "grad_norm": 11.152935981750488, "learning_rate": 4.527270451713951e-06, "loss": 3.2676, "step": 19640 }, { "epoch": 0.19983927408854166, "grad_norm": 12.642974853515625, "learning_rate": 4.527036426085132e-06, "loss": 3.4784, "step": 19645 }, { "epoch": 0.19989013671875, "grad_norm": 11.634695053100586, "learning_rate": 4.526802348595066e-06, "loss": 3.3569, "step": 19650 }, { "epoch": 0.19994099934895834, "grad_norm": 16.355712890625, "learning_rate": 4.526568219249743e-06, "loss": 3.3902, "step": 19655 }, { "epoch": 0.19999186197916666, "grad_norm": 13.97595500946045, "learning_rate": 4.526334038055152e-06, "loss": 3.3811, "step": 19660 }, { "epoch": 0.200042724609375, "grad_norm": 13.499397277832031, "learning_rate": 4.526099805017285e-06, "loss": 3.4869, "step": 19665 }, { "epoch": 0.20009358723958334, "grad_norm": 14.013762474060059, "learning_rate": 4.525865520142135e-06, "loss": 3.0368, "step": 19670 }, { "epoch": 0.20014444986979166, "grad_norm": 15.110490798950195, "learning_rate": 4.525631183435696e-06, "loss": 3.3731, "step": 19675 }, { "epoch": 0.2001953125, "grad_norm": 11.767560005187988, "learning_rate": 4.525396794903962e-06, "loss": 3.3685, "step": 19680 }, { "epoch": 0.20024617513020834, "grad_norm": 11.866461753845215, "learning_rate": 4.525162354552932e-06, "loss": 3.1614, "step": 19685 }, { "epoch": 0.20029703776041666, "grad_norm": 11.312857627868652, "learning_rate": 4.524927862388604e-06, "loss": 3.203, "step": 19690 }, { "epoch": 0.200347900390625, "grad_norm": 13.30744457244873, "learning_rate": 4.524693318416975e-06, "loss": 3.5835, "step": 19695 }, { "epoch": 0.20039876302083334, "grad_norm": 12.074060440063477, "learning_rate": 4.524458722644049e-06, "loss": 3.5643, "step": 19700 }, { "epoch": 0.20044962565104166, "grad_norm": 15.581877708435059, "learning_rate": 4.524224075075825e-06, "loss": 3.3831, "step": 19705 }, { "epoch": 0.20050048828125, "grad_norm": 16.677257537841797, "learning_rate": 4.523989375718309e-06, "loss": 3.0894, "step": 19710 }, { "epoch": 0.20055135091145834, "grad_norm": 11.157782554626465, "learning_rate": 4.523754624577503e-06, "loss": 3.5676, "step": 19715 }, { "epoch": 0.20060221354166666, "grad_norm": 14.951972961425781, "learning_rate": 4.523519821659416e-06, "loss": 3.0777, "step": 19720 }, { "epoch": 0.200653076171875, "grad_norm": 12.17221450805664, "learning_rate": 4.523284966970053e-06, "loss": 3.3629, "step": 19725 }, { "epoch": 0.20070393880208334, "grad_norm": 12.007098197937012, "learning_rate": 4.523050060515424e-06, "loss": 3.7851, "step": 19730 }, { "epoch": 0.20075480143229166, "grad_norm": 11.723403930664062, "learning_rate": 4.5228151023015396e-06, "loss": 3.1963, "step": 19735 }, { "epoch": 0.2008056640625, "grad_norm": 10.277305603027344, "learning_rate": 4.522580092334409e-06, "loss": 3.6475, "step": 19740 }, { "epoch": 0.20085652669270834, "grad_norm": 12.673345565795898, "learning_rate": 4.522345030620047e-06, "loss": 3.1923, "step": 19745 }, { "epoch": 0.20090738932291666, "grad_norm": 10.174751281738281, "learning_rate": 4.5221099171644664e-06, "loss": 3.2029, "step": 19750 }, { "epoch": 0.200958251953125, "grad_norm": 13.342915534973145, "learning_rate": 4.5218747519736824e-06, "loss": 3.1688, "step": 19755 }, { "epoch": 0.20100911458333334, "grad_norm": 13.501158714294434, "learning_rate": 4.521639535053714e-06, "loss": 3.6031, "step": 19760 }, { "epoch": 0.20105997721354166, "grad_norm": 13.277938842773438, "learning_rate": 4.521404266410575e-06, "loss": 3.2503, "step": 19765 }, { "epoch": 0.20111083984375, "grad_norm": 14.475388526916504, "learning_rate": 4.521168946050288e-06, "loss": 3.169, "step": 19770 }, { "epoch": 0.20116170247395834, "grad_norm": 11.820828437805176, "learning_rate": 4.520933573978873e-06, "loss": 3.2875, "step": 19775 }, { "epoch": 0.20121256510416666, "grad_norm": 13.294929504394531, "learning_rate": 4.52069815020235e-06, "loss": 3.4509, "step": 19780 }, { "epoch": 0.201263427734375, "grad_norm": 11.186004638671875, "learning_rate": 4.520462674726746e-06, "loss": 3.3601, "step": 19785 }, { "epoch": 0.20131429036458334, "grad_norm": 9.814837455749512, "learning_rate": 4.520227147558081e-06, "loss": 3.1333, "step": 19790 }, { "epoch": 0.20136515299479166, "grad_norm": 14.82311725616455, "learning_rate": 4.519991568702385e-06, "loss": 3.4463, "step": 19795 }, { "epoch": 0.201416015625, "grad_norm": 12.3350248336792, "learning_rate": 4.519755938165683e-06, "loss": 2.8538, "step": 19800 }, { "epoch": 0.20146687825520834, "grad_norm": 9.856627464294434, "learning_rate": 4.519520255954004e-06, "loss": 3.3451, "step": 19805 }, { "epoch": 0.20151774088541666, "grad_norm": 12.37125015258789, "learning_rate": 4.519284522073377e-06, "loss": 3.5301, "step": 19810 }, { "epoch": 0.201568603515625, "grad_norm": 14.167351722717285, "learning_rate": 4.519048736529836e-06, "loss": 3.2116, "step": 19815 }, { "epoch": 0.20161946614583334, "grad_norm": 14.822011947631836, "learning_rate": 4.51881289932941e-06, "loss": 3.6803, "step": 19820 }, { "epoch": 0.20167032877604166, "grad_norm": 8.40614128112793, "learning_rate": 4.518577010478135e-06, "loss": 3.3569, "step": 19825 }, { "epoch": 0.20172119140625, "grad_norm": 7.354465007781982, "learning_rate": 4.518341069982046e-06, "loss": 3.3537, "step": 19830 }, { "epoch": 0.20177205403645834, "grad_norm": 14.864770889282227, "learning_rate": 4.518105077847179e-06, "loss": 3.6265, "step": 19835 }, { "epoch": 0.20182291666666666, "grad_norm": 11.00089168548584, "learning_rate": 4.5178690340795725e-06, "loss": 3.1597, "step": 19840 }, { "epoch": 0.201873779296875, "grad_norm": 10.744074821472168, "learning_rate": 4.5176329386852645e-06, "loss": 3.4948, "step": 19845 }, { "epoch": 0.20192464192708334, "grad_norm": 11.751157760620117, "learning_rate": 4.517396791670295e-06, "loss": 3.9409, "step": 19850 }, { "epoch": 0.20197550455729166, "grad_norm": 9.079497337341309, "learning_rate": 4.517160593040708e-06, "loss": 3.3642, "step": 19855 }, { "epoch": 0.2020263671875, "grad_norm": 13.547245979309082, "learning_rate": 4.516924342802545e-06, "loss": 3.5323, "step": 19860 }, { "epoch": 0.20207722981770834, "grad_norm": 11.432042121887207, "learning_rate": 4.516688040961851e-06, "loss": 3.0839, "step": 19865 }, { "epoch": 0.20212809244791666, "grad_norm": 8.827293395996094, "learning_rate": 4.516451687524671e-06, "loss": 3.1659, "step": 19870 }, { "epoch": 0.202178955078125, "grad_norm": 12.12353229522705, "learning_rate": 4.516215282497053e-06, "loss": 3.2329, "step": 19875 }, { "epoch": 0.20222981770833334, "grad_norm": 14.670421600341797, "learning_rate": 4.515978825885045e-06, "loss": 3.2416, "step": 19880 }, { "epoch": 0.20228068033854166, "grad_norm": 12.11870288848877, "learning_rate": 4.515742317694696e-06, "loss": 3.5721, "step": 19885 }, { "epoch": 0.20233154296875, "grad_norm": 7.884826183319092, "learning_rate": 4.515505757932057e-06, "loss": 3.4912, "step": 19890 }, { "epoch": 0.20238240559895834, "grad_norm": 15.665653228759766, "learning_rate": 4.515269146603183e-06, "loss": 3.2936, "step": 19895 }, { "epoch": 0.20243326822916666, "grad_norm": 13.599945068359375, "learning_rate": 4.5150324837141235e-06, "loss": 3.1648, "step": 19900 }, { "epoch": 0.202484130859375, "grad_norm": 13.613029479980469, "learning_rate": 4.514795769270937e-06, "loss": 3.1707, "step": 19905 }, { "epoch": 0.20253499348958334, "grad_norm": 10.294753074645996, "learning_rate": 4.514559003279677e-06, "loss": 3.6093, "step": 19910 }, { "epoch": 0.20258585611979166, "grad_norm": 9.080965995788574, "learning_rate": 4.5143221857464025e-06, "loss": 3.4589, "step": 19915 }, { "epoch": 0.20263671875, "grad_norm": 11.949151992797852, "learning_rate": 4.514085316677173e-06, "loss": 3.2098, "step": 19920 }, { "epoch": 0.20268758138020834, "grad_norm": 14.945830345153809, "learning_rate": 4.513848396078048e-06, "loss": 3.5203, "step": 19925 }, { "epoch": 0.20273844401041666, "grad_norm": 9.910847663879395, "learning_rate": 4.51361142395509e-06, "loss": 3.1446, "step": 19930 }, { "epoch": 0.202789306640625, "grad_norm": 13.371243476867676, "learning_rate": 4.5133744003143595e-06, "loss": 3.395, "step": 19935 }, { "epoch": 0.20284016927083334, "grad_norm": 16.05323600769043, "learning_rate": 4.513137325161923e-06, "loss": 3.5278, "step": 19940 }, { "epoch": 0.20289103190104166, "grad_norm": 15.017134666442871, "learning_rate": 4.5129001985038445e-06, "loss": 3.2377, "step": 19945 }, { "epoch": 0.20294189453125, "grad_norm": 13.222308158874512, "learning_rate": 4.512663020346192e-06, "loss": 3.3901, "step": 19950 }, { "epoch": 0.20299275716145834, "grad_norm": 9.478425025939941, "learning_rate": 4.512425790695033e-06, "loss": 3.4694, "step": 19955 }, { "epoch": 0.20304361979166666, "grad_norm": 12.377535820007324, "learning_rate": 4.5121885095564374e-06, "loss": 3.9845, "step": 19960 }, { "epoch": 0.203094482421875, "grad_norm": 11.293395042419434, "learning_rate": 4.5119511769364755e-06, "loss": 3.4415, "step": 19965 }, { "epoch": 0.20314534505208334, "grad_norm": 10.15709114074707, "learning_rate": 4.511713792841219e-06, "loss": 3.5203, "step": 19970 }, { "epoch": 0.20319620768229166, "grad_norm": 14.42606258392334, "learning_rate": 4.511476357276743e-06, "loss": 3.207, "step": 19975 }, { "epoch": 0.2032470703125, "grad_norm": 9.816762924194336, "learning_rate": 4.511238870249121e-06, "loss": 2.8994, "step": 19980 }, { "epoch": 0.20329793294270834, "grad_norm": 11.36377239227295, "learning_rate": 4.511001331764429e-06, "loss": 3.6271, "step": 19985 }, { "epoch": 0.20334879557291666, "grad_norm": 12.325090408325195, "learning_rate": 4.510763741828744e-06, "loss": 3.1638, "step": 19990 }, { "epoch": 0.203399658203125, "grad_norm": 9.825456619262695, "learning_rate": 4.510526100448146e-06, "loss": 3.251, "step": 19995 }, { "epoch": 0.20345052083333334, "grad_norm": 8.139120101928711, "learning_rate": 4.510288407628714e-06, "loss": 3.5742, "step": 20000 }, { "epoch": 0.20350138346354166, "grad_norm": 15.358031272888184, "learning_rate": 4.51005066337653e-06, "loss": 3.4079, "step": 20005 }, { "epoch": 0.20355224609375, "grad_norm": 14.130391120910645, "learning_rate": 4.509812867697676e-06, "loss": 3.3927, "step": 20010 }, { "epoch": 0.20360310872395834, "grad_norm": 8.38779067993164, "learning_rate": 4.509575020598237e-06, "loss": 3.4258, "step": 20015 }, { "epoch": 0.20365397135416666, "grad_norm": 16.604267120361328, "learning_rate": 4.509337122084297e-06, "loss": 3.3513, "step": 20020 }, { "epoch": 0.203704833984375, "grad_norm": 11.351149559020996, "learning_rate": 4.509099172161941e-06, "loss": 3.5092, "step": 20025 }, { "epoch": 0.20375569661458334, "grad_norm": 14.427471160888672, "learning_rate": 4.508861170837261e-06, "loss": 3.1501, "step": 20030 }, { "epoch": 0.20380655924479166, "grad_norm": 8.998710632324219, "learning_rate": 4.508623118116344e-06, "loss": 3.6365, "step": 20035 }, { "epoch": 0.203857421875, "grad_norm": 16.93038558959961, "learning_rate": 4.5083850140052795e-06, "loss": 3.538, "step": 20040 }, { "epoch": 0.20390828450520834, "grad_norm": 10.420814514160156, "learning_rate": 4.508146858510162e-06, "loss": 3.6521, "step": 20045 }, { "epoch": 0.20395914713541666, "grad_norm": 13.199938774108887, "learning_rate": 4.507908651637082e-06, "loss": 3.1762, "step": 20050 }, { "epoch": 0.204010009765625, "grad_norm": 8.664349555969238, "learning_rate": 4.507670393392135e-06, "loss": 2.9778, "step": 20055 }, { "epoch": 0.20406087239583334, "grad_norm": 16.507963180541992, "learning_rate": 4.5074320837814165e-06, "loss": 3.602, "step": 20060 }, { "epoch": 0.20411173502604166, "grad_norm": 11.548971176147461, "learning_rate": 4.507193722811025e-06, "loss": 3.3214, "step": 20065 }, { "epoch": 0.20416259765625, "grad_norm": 8.971203804016113, "learning_rate": 4.506955310487058e-06, "loss": 3.5269, "step": 20070 }, { "epoch": 0.20421346028645834, "grad_norm": 15.892366409301758, "learning_rate": 4.506716846815614e-06, "loss": 3.6735, "step": 20075 }, { "epoch": 0.20426432291666666, "grad_norm": 12.524632453918457, "learning_rate": 4.506478331802795e-06, "loss": 3.0179, "step": 20080 }, { "epoch": 0.204315185546875, "grad_norm": 13.37697982788086, "learning_rate": 4.506239765454704e-06, "loss": 3.1897, "step": 20085 }, { "epoch": 0.20436604817708334, "grad_norm": 11.929197311401367, "learning_rate": 4.506001147777445e-06, "loss": 3.2953, "step": 20090 }, { "epoch": 0.20441691080729166, "grad_norm": 11.28937816619873, "learning_rate": 4.505762478777121e-06, "loss": 3.4106, "step": 20095 }, { "epoch": 0.2044677734375, "grad_norm": 15.234105110168457, "learning_rate": 4.50552375845984e-06, "loss": 3.3616, "step": 20100 }, { "epoch": 0.20451863606770834, "grad_norm": 8.608086585998535, "learning_rate": 4.505284986831709e-06, "loss": 2.7565, "step": 20105 }, { "epoch": 0.20456949869791666, "grad_norm": 14.365691184997559, "learning_rate": 4.5050461638988366e-06, "loss": 3.4174, "step": 20110 }, { "epoch": 0.204620361328125, "grad_norm": 14.005748748779297, "learning_rate": 4.504807289667333e-06, "loss": 3.429, "step": 20115 }, { "epoch": 0.20467122395833334, "grad_norm": 14.597360610961914, "learning_rate": 4.50456836414331e-06, "loss": 3.188, "step": 20120 }, { "epoch": 0.20472208658854166, "grad_norm": 10.109228134155273, "learning_rate": 4.504329387332882e-06, "loss": 3.204, "step": 20125 }, { "epoch": 0.20477294921875, "grad_norm": 10.633742332458496, "learning_rate": 4.50409035924216e-06, "loss": 3.4177, "step": 20130 }, { "epoch": 0.20482381184895834, "grad_norm": 14.340601921081543, "learning_rate": 4.503851279877262e-06, "loss": 3.4076, "step": 20135 }, { "epoch": 0.20487467447916666, "grad_norm": 12.750170707702637, "learning_rate": 4.503612149244304e-06, "loss": 3.9644, "step": 20140 }, { "epoch": 0.204925537109375, "grad_norm": 9.519548416137695, "learning_rate": 4.503372967349404e-06, "loss": 3.4598, "step": 20145 }, { "epoch": 0.20497639973958334, "grad_norm": 11.770230293273926, "learning_rate": 4.503133734198682e-06, "loss": 3.7069, "step": 20150 }, { "epoch": 0.20502726236979166, "grad_norm": 7.587390422821045, "learning_rate": 4.502894449798258e-06, "loss": 4.2052, "step": 20155 }, { "epoch": 0.205078125, "grad_norm": 13.156473159790039, "learning_rate": 4.502655114154253e-06, "loss": 3.4261, "step": 20160 }, { "epoch": 0.20512898763020834, "grad_norm": 11.953205108642578, "learning_rate": 4.502415727272792e-06, "loss": 3.5789, "step": 20165 }, { "epoch": 0.20517985026041666, "grad_norm": 12.134018898010254, "learning_rate": 4.50217628916e-06, "loss": 3.4589, "step": 20170 }, { "epoch": 0.205230712890625, "grad_norm": 13.219483375549316, "learning_rate": 4.501936799822002e-06, "loss": 3.4232, "step": 20175 }, { "epoch": 0.20528157552083334, "grad_norm": 14.755402565002441, "learning_rate": 4.501697259264926e-06, "loss": 3.378, "step": 20180 }, { "epoch": 0.20533243815104166, "grad_norm": 14.997588157653809, "learning_rate": 4.501457667494899e-06, "loss": 3.4037, "step": 20185 }, { "epoch": 0.20538330078125, "grad_norm": 10.769617080688477, "learning_rate": 4.5012180245180525e-06, "loss": 3.4164, "step": 20190 }, { "epoch": 0.20543416341145834, "grad_norm": 13.86328125, "learning_rate": 4.500978330340517e-06, "loss": 3.5354, "step": 20195 }, { "epoch": 0.20548502604166666, "grad_norm": 9.748161315917969, "learning_rate": 4.500738584968425e-06, "loss": 3.3028, "step": 20200 }, { "epoch": 0.205535888671875, "grad_norm": 12.368988990783691, "learning_rate": 4.500498788407911e-06, "loss": 3.7509, "step": 20205 }, { "epoch": 0.20558675130208334, "grad_norm": 7.2794318199157715, "learning_rate": 4.50025894066511e-06, "loss": 3.5151, "step": 20210 }, { "epoch": 0.20563761393229166, "grad_norm": 8.730498313903809, "learning_rate": 4.500019041746157e-06, "loss": 3.2818, "step": 20215 }, { "epoch": 0.2056884765625, "grad_norm": 14.868056297302246, "learning_rate": 4.499779091657192e-06, "loss": 3.0804, "step": 20220 }, { "epoch": 0.20573933919270834, "grad_norm": 14.980323791503906, "learning_rate": 4.499539090404352e-06, "loss": 3.0393, "step": 20225 }, { "epoch": 0.20579020182291666, "grad_norm": 13.772435188293457, "learning_rate": 4.49929903799378e-06, "loss": 3.1725, "step": 20230 }, { "epoch": 0.205841064453125, "grad_norm": 16.341049194335938, "learning_rate": 4.499058934431613e-06, "loss": 3.5846, "step": 20235 }, { "epoch": 0.20589192708333334, "grad_norm": 12.222286224365234, "learning_rate": 4.498818779723999e-06, "loss": 3.5112, "step": 20240 }, { "epoch": 0.20594278971354166, "grad_norm": 14.283944129943848, "learning_rate": 4.49857857387708e-06, "loss": 3.5837, "step": 20245 }, { "epoch": 0.20599365234375, "grad_norm": 9.407193183898926, "learning_rate": 4.498338316897003e-06, "loss": 3.1534, "step": 20250 }, { "epoch": 0.20604451497395834, "grad_norm": 10.292035102844238, "learning_rate": 4.498098008789913e-06, "loss": 2.8781, "step": 20255 }, { "epoch": 0.20609537760416666, "grad_norm": 15.310603141784668, "learning_rate": 4.497857649561959e-06, "loss": 3.4637, "step": 20260 }, { "epoch": 0.206146240234375, "grad_norm": 12.086821556091309, "learning_rate": 4.4976172392192895e-06, "loss": 3.7627, "step": 20265 }, { "epoch": 0.20619710286458334, "grad_norm": 9.511617660522461, "learning_rate": 4.4973767777680576e-06, "loss": 3.2272, "step": 20270 }, { "epoch": 0.20624796549479166, "grad_norm": 14.292364120483398, "learning_rate": 4.497136265214413e-06, "loss": 3.4433, "step": 20275 }, { "epoch": 0.206298828125, "grad_norm": 9.073116302490234, "learning_rate": 4.496895701564512e-06, "loss": 3.5995, "step": 20280 }, { "epoch": 0.20634969075520834, "grad_norm": 12.548660278320312, "learning_rate": 4.496655086824507e-06, "loss": 3.4855, "step": 20285 }, { "epoch": 0.20640055338541666, "grad_norm": 12.357610702514648, "learning_rate": 4.496414421000555e-06, "loss": 3.4397, "step": 20290 }, { "epoch": 0.206451416015625, "grad_norm": 9.429367065429688, "learning_rate": 4.496173704098814e-06, "loss": 2.7644, "step": 20295 }, { "epoch": 0.20650227864583334, "grad_norm": 9.462889671325684, "learning_rate": 4.495932936125441e-06, "loss": 3.4458, "step": 20300 }, { "epoch": 0.20655314127604166, "grad_norm": 13.479082107543945, "learning_rate": 4.495692117086597e-06, "loss": 3.3089, "step": 20305 }, { "epoch": 0.20660400390625, "grad_norm": 15.136412620544434, "learning_rate": 4.495451246988443e-06, "loss": 3.4846, "step": 20310 }, { "epoch": 0.20665486653645834, "grad_norm": 15.00229263305664, "learning_rate": 4.495210325837143e-06, "loss": 3.159, "step": 20315 }, { "epoch": 0.20670572916666666, "grad_norm": 14.153552055358887, "learning_rate": 4.494969353638859e-06, "loss": 3.2891, "step": 20320 }, { "epoch": 0.206756591796875, "grad_norm": 10.715276718139648, "learning_rate": 4.494728330399757e-06, "loss": 3.3983, "step": 20325 }, { "epoch": 0.20680745442708334, "grad_norm": 14.318894386291504, "learning_rate": 4.494487256126003e-06, "loss": 3.3678, "step": 20330 }, { "epoch": 0.20685831705729166, "grad_norm": 10.274447441101074, "learning_rate": 4.494246130823765e-06, "loss": 3.0082, "step": 20335 }, { "epoch": 0.2069091796875, "grad_norm": 9.914955139160156, "learning_rate": 4.494004954499214e-06, "loss": 3.4777, "step": 20340 }, { "epoch": 0.20696004231770834, "grad_norm": 9.05743408203125, "learning_rate": 4.4937637271585174e-06, "loss": 3.186, "step": 20345 }, { "epoch": 0.20701090494791666, "grad_norm": 13.139704704284668, "learning_rate": 4.49352244880785e-06, "loss": 3.2584, "step": 20350 }, { "epoch": 0.207061767578125, "grad_norm": 11.612173080444336, "learning_rate": 4.493281119453382e-06, "loss": 3.6306, "step": 20355 }, { "epoch": 0.20711263020833334, "grad_norm": 8.827756881713867, "learning_rate": 4.493039739101289e-06, "loss": 3.5112, "step": 20360 }, { "epoch": 0.20716349283854166, "grad_norm": 13.69195556640625, "learning_rate": 4.492798307757747e-06, "loss": 3.6277, "step": 20365 }, { "epoch": 0.20721435546875, "grad_norm": 7.879222393035889, "learning_rate": 4.492556825428932e-06, "loss": 3.4635, "step": 20370 }, { "epoch": 0.20726521809895834, "grad_norm": 10.974207878112793, "learning_rate": 4.492315292121023e-06, "loss": 3.5284, "step": 20375 }, { "epoch": 0.20731608072916666, "grad_norm": 13.299656867980957, "learning_rate": 4.492073707840201e-06, "loss": 3.5775, "step": 20380 }, { "epoch": 0.207366943359375, "grad_norm": 10.87471866607666, "learning_rate": 4.491832072592644e-06, "loss": 3.3934, "step": 20385 }, { "epoch": 0.20741780598958334, "grad_norm": 10.13965129852295, "learning_rate": 4.491590386384536e-06, "loss": 3.431, "step": 20390 }, { "epoch": 0.20746866861979166, "grad_norm": 16.28556251525879, "learning_rate": 4.491348649222059e-06, "loss": 3.453, "step": 20395 }, { "epoch": 0.20751953125, "grad_norm": 15.164170265197754, "learning_rate": 4.4911068611114e-06, "loss": 3.3485, "step": 20400 }, { "epoch": 0.20757039388020834, "grad_norm": 13.961816787719727, "learning_rate": 4.4908650220587435e-06, "loss": 3.3264, "step": 20405 }, { "epoch": 0.20762125651041666, "grad_norm": 18.67232322692871, "learning_rate": 4.490623132070278e-06, "loss": 3.4781, "step": 20410 }, { "epoch": 0.207672119140625, "grad_norm": 10.25964069366455, "learning_rate": 4.49038119115219e-06, "loss": 3.2967, "step": 20415 }, { "epoch": 0.20772298177083334, "grad_norm": 14.518511772155762, "learning_rate": 4.490139199310672e-06, "loss": 3.6741, "step": 20420 }, { "epoch": 0.20777384440104166, "grad_norm": 12.733075141906738, "learning_rate": 4.489897156551914e-06, "loss": 3.4169, "step": 20425 }, { "epoch": 0.20782470703125, "grad_norm": 8.101183891296387, "learning_rate": 4.48965506288211e-06, "loss": 3.0928, "step": 20430 }, { "epoch": 0.20787556966145834, "grad_norm": 13.758424758911133, "learning_rate": 4.489412918307452e-06, "loss": 2.9868, "step": 20435 }, { "epoch": 0.20792643229166666, "grad_norm": 13.24248218536377, "learning_rate": 4.489170722834136e-06, "loss": 3.5981, "step": 20440 }, { "epoch": 0.207977294921875, "grad_norm": 12.845187187194824, "learning_rate": 4.488928476468358e-06, "loss": 3.4161, "step": 20445 }, { "epoch": 0.20802815755208334, "grad_norm": 16.167795181274414, "learning_rate": 4.488686179216317e-06, "loss": 3.45, "step": 20450 }, { "epoch": 0.20807902018229166, "grad_norm": 10.462541580200195, "learning_rate": 4.4884438310842115e-06, "loss": 3.3036, "step": 20455 }, { "epoch": 0.2081298828125, "grad_norm": 53.6070556640625, "learning_rate": 4.488201432078241e-06, "loss": 3.3821, "step": 20460 }, { "epoch": 0.20818074544270834, "grad_norm": 9.783699035644531, "learning_rate": 4.487958982204609e-06, "loss": 3.1686, "step": 20465 }, { "epoch": 0.20823160807291666, "grad_norm": 10.521998405456543, "learning_rate": 4.487716481469517e-06, "loss": 3.6972, "step": 20470 }, { "epoch": 0.208282470703125, "grad_norm": 15.251840591430664, "learning_rate": 4.487473929879171e-06, "loss": 3.729, "step": 20475 }, { "epoch": 0.20833333333333334, "grad_norm": 7.9105987548828125, "learning_rate": 4.487231327439775e-06, "loss": 3.7705, "step": 20480 }, { "epoch": 0.20838419596354166, "grad_norm": 12.525910377502441, "learning_rate": 4.486988674157536e-06, "loss": 3.5227, "step": 20485 }, { "epoch": 0.20843505859375, "grad_norm": 12.639177322387695, "learning_rate": 4.486745970038664e-06, "loss": 3.3517, "step": 20490 }, { "epoch": 0.20848592122395834, "grad_norm": 16.748825073242188, "learning_rate": 4.486503215089366e-06, "loss": 3.1295, "step": 20495 }, { "epoch": 0.20853678385416666, "grad_norm": 14.206311225891113, "learning_rate": 4.486260409315855e-06, "loss": 3.3888, "step": 20500 }, { "epoch": 0.208587646484375, "grad_norm": 17.936237335205078, "learning_rate": 4.486017552724342e-06, "loss": 3.6911, "step": 20505 }, { "epoch": 0.20863850911458334, "grad_norm": 14.42271900177002, "learning_rate": 4.4857746453210404e-06, "loss": 3.443, "step": 20510 }, { "epoch": 0.20868937174479166, "grad_norm": 14.182038307189941, "learning_rate": 4.485531687112165e-06, "loss": 3.4927, "step": 20515 }, { "epoch": 0.208740234375, "grad_norm": 19.564302444458008, "learning_rate": 4.4852886781039325e-06, "loss": 3.5512, "step": 20520 }, { "epoch": 0.20879109700520834, "grad_norm": 14.08537483215332, "learning_rate": 4.485045618302559e-06, "loss": 3.6225, "step": 20525 }, { "epoch": 0.20884195963541666, "grad_norm": 9.4270658493042, "learning_rate": 4.4848025077142645e-06, "loss": 3.2626, "step": 20530 }, { "epoch": 0.208892822265625, "grad_norm": 16.107717514038086, "learning_rate": 4.484559346345268e-06, "loss": 3.4001, "step": 20535 }, { "epoch": 0.20894368489583334, "grad_norm": 8.756304740905762, "learning_rate": 4.484316134201792e-06, "loss": 3.0488, "step": 20540 }, { "epoch": 0.20899454752604166, "grad_norm": 10.211101531982422, "learning_rate": 4.484072871290057e-06, "loss": 4.1292, "step": 20545 }, { "epoch": 0.20904541015625, "grad_norm": 7.968142986297607, "learning_rate": 4.483829557616287e-06, "loss": 3.3119, "step": 20550 }, { "epoch": 0.20909627278645834, "grad_norm": 9.335982322692871, "learning_rate": 4.48358619318671e-06, "loss": 3.2166, "step": 20555 }, { "epoch": 0.20914713541666666, "grad_norm": 12.940924644470215, "learning_rate": 4.483342778007549e-06, "loss": 3.1356, "step": 20560 }, { "epoch": 0.209197998046875, "grad_norm": 13.300333976745605, "learning_rate": 4.483099312085034e-06, "loss": 3.3874, "step": 20565 }, { "epoch": 0.20924886067708334, "grad_norm": 14.966835021972656, "learning_rate": 4.482855795425392e-06, "loss": 3.2123, "step": 20570 }, { "epoch": 0.20929972330729166, "grad_norm": 13.067811965942383, "learning_rate": 4.482612228034856e-06, "loss": 3.2944, "step": 20575 }, { "epoch": 0.2093505859375, "grad_norm": 15.019890785217285, "learning_rate": 4.4823686099196554e-06, "loss": 3.38, "step": 20580 }, { "epoch": 0.20940144856770834, "grad_norm": 14.11163330078125, "learning_rate": 4.482124941086023e-06, "loss": 3.553, "step": 20585 }, { "epoch": 0.20945231119791666, "grad_norm": 16.385244369506836, "learning_rate": 4.481881221540195e-06, "loss": 3.4293, "step": 20590 }, { "epoch": 0.209503173828125, "grad_norm": 10.880959510803223, "learning_rate": 4.481637451288405e-06, "loss": 3.1211, "step": 20595 }, { "epoch": 0.20955403645833334, "grad_norm": 14.787332534790039, "learning_rate": 4.481393630336891e-06, "loss": 3.1561, "step": 20600 }, { "epoch": 0.20960489908854166, "grad_norm": 11.408947944641113, "learning_rate": 4.48114975869189e-06, "loss": 3.5343, "step": 20605 }, { "epoch": 0.20965576171875, "grad_norm": 15.02966022491455, "learning_rate": 4.480905836359643e-06, "loss": 3.2872, "step": 20610 }, { "epoch": 0.20970662434895834, "grad_norm": 10.301656723022461, "learning_rate": 4.480661863346389e-06, "loss": 3.6296, "step": 20615 }, { "epoch": 0.20975748697916666, "grad_norm": 14.3646879196167, "learning_rate": 4.480417839658369e-06, "loss": 3.0304, "step": 20620 }, { "epoch": 0.209808349609375, "grad_norm": 12.71937084197998, "learning_rate": 4.48017376530183e-06, "loss": 3.2242, "step": 20625 }, { "epoch": 0.20985921223958334, "grad_norm": 12.150714874267578, "learning_rate": 4.479929640283015e-06, "loss": 3.3238, "step": 20630 }, { "epoch": 0.20991007486979166, "grad_norm": 12.3690185546875, "learning_rate": 4.479685464608169e-06, "loss": 3.4552, "step": 20635 }, { "epoch": 0.2099609375, "grad_norm": 8.464226722717285, "learning_rate": 4.479441238283539e-06, "loss": 3.7959, "step": 20640 }, { "epoch": 0.21001180013020834, "grad_norm": 11.325082778930664, "learning_rate": 4.479196961315374e-06, "loss": 3.5601, "step": 20645 }, { "epoch": 0.21006266276041666, "grad_norm": 13.100303649902344, "learning_rate": 4.478952633709925e-06, "loss": 3.3486, "step": 20650 }, { "epoch": 0.210113525390625, "grad_norm": 9.751178741455078, "learning_rate": 4.4787082554734406e-06, "loss": 3.401, "step": 20655 }, { "epoch": 0.21016438802083334, "grad_norm": 8.583620071411133, "learning_rate": 4.4784638266121755e-06, "loss": 3.4622, "step": 20660 }, { "epoch": 0.21021525065104166, "grad_norm": 8.873421669006348, "learning_rate": 4.478219347132382e-06, "loss": 3.4424, "step": 20665 }, { "epoch": 0.21026611328125, "grad_norm": 8.892828941345215, "learning_rate": 4.477974817040315e-06, "loss": 3.3382, "step": 20670 }, { "epoch": 0.21031697591145834, "grad_norm": 8.137938499450684, "learning_rate": 4.477730236342231e-06, "loss": 3.3886, "step": 20675 }, { "epoch": 0.21036783854166666, "grad_norm": 9.50516414642334, "learning_rate": 4.477485605044389e-06, "loss": 3.4875, "step": 20680 }, { "epoch": 0.210418701171875, "grad_norm": 17.410694122314453, "learning_rate": 4.477240923153046e-06, "loss": 3.3188, "step": 20685 }, { "epoch": 0.21046956380208334, "grad_norm": 8.748286247253418, "learning_rate": 4.476996190674462e-06, "loss": 3.8879, "step": 20690 }, { "epoch": 0.21052042643229166, "grad_norm": 12.05586051940918, "learning_rate": 4.476751407614899e-06, "loss": 3.3138, "step": 20695 }, { "epoch": 0.2105712890625, "grad_norm": 9.442726135253906, "learning_rate": 4.4765065739806205e-06, "loss": 3.0828, "step": 20700 }, { "epoch": 0.21062215169270834, "grad_norm": 13.491192817687988, "learning_rate": 4.47626168977789e-06, "loss": 3.1086, "step": 20705 }, { "epoch": 0.21067301432291666, "grad_norm": 11.93213176727295, "learning_rate": 4.476016755012971e-06, "loss": 3.5522, "step": 20710 }, { "epoch": 0.210723876953125, "grad_norm": 14.229875564575195, "learning_rate": 4.4757717696921335e-06, "loss": 3.5604, "step": 20715 }, { "epoch": 0.21077473958333334, "grad_norm": 11.76516056060791, "learning_rate": 4.475526733821642e-06, "loss": 3.4999, "step": 20720 }, { "epoch": 0.21082560221354166, "grad_norm": 10.882475852966309, "learning_rate": 4.475281647407768e-06, "loss": 3.859, "step": 20725 }, { "epoch": 0.21087646484375, "grad_norm": 17.010963439941406, "learning_rate": 4.475036510456782e-06, "loss": 4.1897, "step": 20730 }, { "epoch": 0.21092732747395834, "grad_norm": 11.392085075378418, "learning_rate": 4.474791322974954e-06, "loss": 3.4268, "step": 20735 }, { "epoch": 0.21097819010416666, "grad_norm": 8.899847984313965, "learning_rate": 4.4745460849685584e-06, "loss": 3.8221, "step": 20740 }, { "epoch": 0.211029052734375, "grad_norm": 8.42432975769043, "learning_rate": 4.4743007964438686e-06, "loss": 3.6357, "step": 20745 }, { "epoch": 0.21107991536458334, "grad_norm": 16.582895278930664, "learning_rate": 4.474055457407162e-06, "loss": 3.4706, "step": 20750 }, { "epoch": 0.21113077799479166, "grad_norm": 13.940359115600586, "learning_rate": 4.4738100678647135e-06, "loss": 3.3978, "step": 20755 }, { "epoch": 0.211181640625, "grad_norm": 13.06972599029541, "learning_rate": 4.473564627822803e-06, "loss": 3.2053, "step": 20760 }, { "epoch": 0.21123250325520834, "grad_norm": 11.801219940185547, "learning_rate": 4.4733191372877094e-06, "loss": 3.0616, "step": 20765 }, { "epoch": 0.21128336588541666, "grad_norm": 10.158659934997559, "learning_rate": 4.4730735962657125e-06, "loss": 3.3077, "step": 20770 }, { "epoch": 0.211334228515625, "grad_norm": 15.788928031921387, "learning_rate": 4.4728280047630955e-06, "loss": 3.3533, "step": 20775 }, { "epoch": 0.21138509114583334, "grad_norm": 14.644887924194336, "learning_rate": 4.472582362786142e-06, "loss": 3.7334, "step": 20780 }, { "epoch": 0.21143595377604166, "grad_norm": 17.853012084960938, "learning_rate": 4.472336670341136e-06, "loss": 3.4043, "step": 20785 }, { "epoch": 0.21148681640625, "grad_norm": 14.728127479553223, "learning_rate": 4.472090927434364e-06, "loss": 3.3049, "step": 20790 }, { "epoch": 0.21153767903645834, "grad_norm": 69.46292114257812, "learning_rate": 4.4718451340721136e-06, "loss": 3.7458, "step": 20795 }, { "epoch": 0.21158854166666666, "grad_norm": 16.712196350097656, "learning_rate": 4.471599290260672e-06, "loss": 3.9221, "step": 20800 }, { "epoch": 0.211639404296875, "grad_norm": 15.2913236618042, "learning_rate": 4.471353396006331e-06, "loss": 3.5172, "step": 20805 }, { "epoch": 0.21169026692708334, "grad_norm": 16.197208404541016, "learning_rate": 4.4711074513153795e-06, "loss": 3.344, "step": 20810 }, { "epoch": 0.21174112955729166, "grad_norm": 10.586454391479492, "learning_rate": 4.470861456194111e-06, "loss": 3.3205, "step": 20815 }, { "epoch": 0.2117919921875, "grad_norm": 13.682154655456543, "learning_rate": 4.470615410648819e-06, "loss": 3.408, "step": 20820 }, { "epoch": 0.21184285481770834, "grad_norm": 8.82666015625, "learning_rate": 4.4703693146858e-06, "loss": 2.9437, "step": 20825 }, { "epoch": 0.21189371744791666, "grad_norm": 12.18643569946289, "learning_rate": 4.470123168311349e-06, "loss": 3.5294, "step": 20830 }, { "epoch": 0.211944580078125, "grad_norm": 13.015913009643555, "learning_rate": 4.469876971531763e-06, "loss": 2.9248, "step": 20835 }, { "epoch": 0.21199544270833334, "grad_norm": 13.3177490234375, "learning_rate": 4.469630724353342e-06, "loss": 3.1084, "step": 20840 }, { "epoch": 0.21204630533854166, "grad_norm": 9.608677864074707, "learning_rate": 4.469384426782385e-06, "loss": 3.246, "step": 20845 }, { "epoch": 0.21209716796875, "grad_norm": 13.394899368286133, "learning_rate": 4.469138078825195e-06, "loss": 3.2107, "step": 20850 }, { "epoch": 0.21214803059895834, "grad_norm": 10.666743278503418, "learning_rate": 4.468891680488074e-06, "loss": 3.6222, "step": 20855 }, { "epoch": 0.21219889322916666, "grad_norm": 11.721149444580078, "learning_rate": 4.468645231777326e-06, "loss": 3.6107, "step": 20860 }, { "epoch": 0.212249755859375, "grad_norm": 12.614690780639648, "learning_rate": 4.4683987326992565e-06, "loss": 3.3962, "step": 20865 }, { "epoch": 0.21230061848958334, "grad_norm": 10.944849967956543, "learning_rate": 4.468152183260172e-06, "loss": 3.2563, "step": 20870 }, { "epoch": 0.21235148111979166, "grad_norm": 11.871996879577637, "learning_rate": 4.467905583466379e-06, "loss": 3.0758, "step": 20875 }, { "epoch": 0.21240234375, "grad_norm": 13.066951751708984, "learning_rate": 4.46765893332419e-06, "loss": 3.5028, "step": 20880 }, { "epoch": 0.21245320638020834, "grad_norm": 15.884061813354492, "learning_rate": 4.467412232839913e-06, "loss": 3.4618, "step": 20885 }, { "epoch": 0.21250406901041666, "grad_norm": 9.060402870178223, "learning_rate": 4.46716548201986e-06, "loss": 3.4159, "step": 20890 }, { "epoch": 0.212554931640625, "grad_norm": 9.965991973876953, "learning_rate": 4.466918680870344e-06, "loss": 3.4609, "step": 20895 }, { "epoch": 0.21260579427083334, "grad_norm": 12.760985374450684, "learning_rate": 4.46667182939768e-06, "loss": 3.7191, "step": 20900 }, { "epoch": 0.21265665690104166, "grad_norm": 12.720123291015625, "learning_rate": 4.466424927608184e-06, "loss": 3.3874, "step": 20905 }, { "epoch": 0.21270751953125, "grad_norm": 10.351378440856934, "learning_rate": 4.466177975508172e-06, "loss": 3.3034, "step": 20910 }, { "epoch": 0.21275838216145834, "grad_norm": 9.52064037322998, "learning_rate": 4.465930973103963e-06, "loss": 3.4826, "step": 20915 }, { "epoch": 0.21280924479166666, "grad_norm": 13.159667015075684, "learning_rate": 4.465683920401875e-06, "loss": 3.4415, "step": 20920 }, { "epoch": 0.212860107421875, "grad_norm": 14.236352920532227, "learning_rate": 4.465436817408231e-06, "loss": 3.261, "step": 20925 }, { "epoch": 0.21291097005208334, "grad_norm": 10.393044471740723, "learning_rate": 4.465189664129351e-06, "loss": 3.2247, "step": 20930 }, { "epoch": 0.21296183268229166, "grad_norm": 8.096722602844238, "learning_rate": 4.46494246057156e-06, "loss": 3.2013, "step": 20935 }, { "epoch": 0.2130126953125, "grad_norm": 15.64661693572998, "learning_rate": 4.4646952067411805e-06, "loss": 3.1279, "step": 20940 }, { "epoch": 0.21306355794270834, "grad_norm": 11.443922996520996, "learning_rate": 4.4644479026445416e-06, "loss": 3.0867, "step": 20945 }, { "epoch": 0.21311442057291666, "grad_norm": 8.727100372314453, "learning_rate": 4.464200548287968e-06, "loss": 3.3102, "step": 20950 }, { "epoch": 0.213165283203125, "grad_norm": 16.262468338012695, "learning_rate": 4.463953143677788e-06, "loss": 3.2918, "step": 20955 }, { "epoch": 0.21321614583333334, "grad_norm": 13.321330070495605, "learning_rate": 4.463705688820333e-06, "loss": 3.2454, "step": 20960 }, { "epoch": 0.21326700846354166, "grad_norm": 11.59324836730957, "learning_rate": 4.463458183721934e-06, "loss": 3.6288, "step": 20965 }, { "epoch": 0.21331787109375, "grad_norm": 14.173730850219727, "learning_rate": 4.463210628388922e-06, "loss": 3.0699, "step": 20970 }, { "epoch": 0.21336873372395834, "grad_norm": 15.06574821472168, "learning_rate": 4.462963022827632e-06, "loss": 3.6585, "step": 20975 }, { "epoch": 0.21341959635416666, "grad_norm": 9.580982208251953, "learning_rate": 4.462715367044398e-06, "loss": 3.5402, "step": 20980 }, { "epoch": 0.213470458984375, "grad_norm": 6.7173027992248535, "learning_rate": 4.462467661045556e-06, "loss": 3.4419, "step": 20985 }, { "epoch": 0.21352132161458334, "grad_norm": 9.555313110351562, "learning_rate": 4.462219904837445e-06, "loss": 3.2101, "step": 20990 }, { "epoch": 0.21357218424479166, "grad_norm": 14.696534156799316, "learning_rate": 4.461972098426402e-06, "loss": 3.6329, "step": 20995 }, { "epoch": 0.213623046875, "grad_norm": 13.641729354858398, "learning_rate": 4.461724241818769e-06, "loss": 3.6916, "step": 21000 }, { "epoch": 0.21367390950520834, "grad_norm": 13.780049324035645, "learning_rate": 4.461476335020886e-06, "loss": 3.3034, "step": 21005 }, { "epoch": 0.21372477213541666, "grad_norm": 13.071630477905273, "learning_rate": 4.461228378039096e-06, "loss": 3.2033, "step": 21010 }, { "epoch": 0.213775634765625, "grad_norm": 10.242560386657715, "learning_rate": 4.460980370879742e-06, "loss": 3.6485, "step": 21015 }, { "epoch": 0.21382649739583334, "grad_norm": 13.603046417236328, "learning_rate": 4.46073231354917e-06, "loss": 3.4695, "step": 21020 }, { "epoch": 0.21387736002604166, "grad_norm": 14.24183464050293, "learning_rate": 4.460484206053727e-06, "loss": 3.5122, "step": 21025 }, { "epoch": 0.21392822265625, "grad_norm": 12.189628601074219, "learning_rate": 4.46023604839976e-06, "loss": 3.4621, "step": 21030 }, { "epoch": 0.21397908528645834, "grad_norm": 15.0325288772583, "learning_rate": 4.459987840593618e-06, "loss": 3.6319, "step": 21035 }, { "epoch": 0.21402994791666666, "grad_norm": 15.047171592712402, "learning_rate": 4.4597395826416525e-06, "loss": 3.1841, "step": 21040 }, { "epoch": 0.214080810546875, "grad_norm": 11.110899925231934, "learning_rate": 4.459491274550214e-06, "loss": 3.261, "step": 21045 }, { "epoch": 0.21413167317708334, "grad_norm": 10.35274887084961, "learning_rate": 4.459242916325656e-06, "loss": 3.469, "step": 21050 }, { "epoch": 0.21418253580729166, "grad_norm": 8.398360252380371, "learning_rate": 4.4589945079743315e-06, "loss": 3.7006, "step": 21055 }, { "epoch": 0.2142333984375, "grad_norm": 13.217538833618164, "learning_rate": 4.458746049502597e-06, "loss": 3.3198, "step": 21060 }, { "epoch": 0.21428426106770834, "grad_norm": 12.722177505493164, "learning_rate": 4.4584975409168095e-06, "loss": 3.3009, "step": 21065 }, { "epoch": 0.21433512369791666, "grad_norm": 11.289009094238281, "learning_rate": 4.458248982223327e-06, "loss": 3.4066, "step": 21070 }, { "epoch": 0.214385986328125, "grad_norm": 13.187050819396973, "learning_rate": 4.458000373428507e-06, "loss": 3.5669, "step": 21075 }, { "epoch": 0.21443684895833334, "grad_norm": 13.233553886413574, "learning_rate": 4.457751714538713e-06, "loss": 3.1861, "step": 21080 }, { "epoch": 0.21448771158854166, "grad_norm": 11.534300804138184, "learning_rate": 4.457503005560305e-06, "loss": 3.4397, "step": 21085 }, { "epoch": 0.21453857421875, "grad_norm": 17.411758422851562, "learning_rate": 4.457254246499646e-06, "loss": 3.6133, "step": 21090 }, { "epoch": 0.21458943684895834, "grad_norm": 13.31394100189209, "learning_rate": 4.457005437363102e-06, "loss": 2.982, "step": 21095 }, { "epoch": 0.21464029947916666, "grad_norm": 8.906242370605469, "learning_rate": 4.4567565781570374e-06, "loss": 4.1482, "step": 21100 }, { "epoch": 0.214691162109375, "grad_norm": 13.039563179016113, "learning_rate": 4.456507668887819e-06, "loss": 3.5613, "step": 21105 }, { "epoch": 0.21474202473958334, "grad_norm": 8.964936256408691, "learning_rate": 4.456258709561817e-06, "loss": 3.5627, "step": 21110 }, { "epoch": 0.21479288736979166, "grad_norm": 10.239699363708496, "learning_rate": 4.456009700185398e-06, "loss": 3.1105, "step": 21115 }, { "epoch": 0.21484375, "grad_norm": 12.297407150268555, "learning_rate": 4.455760640764935e-06, "loss": 3.0417, "step": 21120 }, { "epoch": 0.21489461263020834, "grad_norm": 11.456560134887695, "learning_rate": 4.4555115313068e-06, "loss": 3.0432, "step": 21125 }, { "epoch": 0.21494547526041666, "grad_norm": 15.854055404663086, "learning_rate": 4.455262371817366e-06, "loss": 3.6049, "step": 21130 }, { "epoch": 0.214996337890625, "grad_norm": 13.468208312988281, "learning_rate": 4.455013162303007e-06, "loss": 3.3275, "step": 21135 }, { "epoch": 0.21504720052083334, "grad_norm": 15.826327323913574, "learning_rate": 4.454763902770099e-06, "loss": 3.4567, "step": 21140 }, { "epoch": 0.21509806315104166, "grad_norm": 14.552145957946777, "learning_rate": 4.454514593225022e-06, "loss": 3.3701, "step": 21145 }, { "epoch": 0.21514892578125, "grad_norm": 14.960928916931152, "learning_rate": 4.454265233674151e-06, "loss": 3.3224, "step": 21150 }, { "epoch": 0.21519978841145834, "grad_norm": 14.692644119262695, "learning_rate": 4.454015824123867e-06, "loss": 3.4464, "step": 21155 }, { "epoch": 0.21525065104166666, "grad_norm": 14.767197608947754, "learning_rate": 4.453766364580552e-06, "loss": 3.2771, "step": 21160 }, { "epoch": 0.215301513671875, "grad_norm": 12.008683204650879, "learning_rate": 4.453516855050588e-06, "loss": 3.2921, "step": 21165 }, { "epoch": 0.21535237630208334, "grad_norm": 469.43402099609375, "learning_rate": 4.453267295540357e-06, "loss": 3.4084, "step": 21170 }, { "epoch": 0.21540323893229166, "grad_norm": 9.679253578186035, "learning_rate": 4.453017686056246e-06, "loss": 3.5274, "step": 21175 }, { "epoch": 0.2154541015625, "grad_norm": 11.721029281616211, "learning_rate": 4.45276802660464e-06, "loss": 3.6015, "step": 21180 }, { "epoch": 0.21550496419270834, "grad_norm": 14.105132102966309, "learning_rate": 4.452518317191928e-06, "loss": 3.498, "step": 21185 }, { "epoch": 0.21555582682291666, "grad_norm": 14.37909984588623, "learning_rate": 4.452268557824497e-06, "loss": 3.1592, "step": 21190 }, { "epoch": 0.215606689453125, "grad_norm": 15.932445526123047, "learning_rate": 4.452018748508737e-06, "loss": 3.2462, "step": 21195 }, { "epoch": 0.21565755208333334, "grad_norm": 14.666515350341797, "learning_rate": 4.451768889251041e-06, "loss": 3.7043, "step": 21200 }, { "epoch": 0.21570841471354166, "grad_norm": 6.903579235076904, "learning_rate": 4.4515189800578e-06, "loss": 3.0669, "step": 21205 }, { "epoch": 0.21575927734375, "grad_norm": 13.199335098266602, "learning_rate": 4.451269020935409e-06, "loss": 3.1371, "step": 21210 }, { "epoch": 0.21581013997395834, "grad_norm": 11.0657320022583, "learning_rate": 4.451019011890262e-06, "loss": 2.911, "step": 21215 }, { "epoch": 0.21586100260416666, "grad_norm": 10.579645156860352, "learning_rate": 4.450768952928756e-06, "loss": 3.5178, "step": 21220 }, { "epoch": 0.215911865234375, "grad_norm": 8.820984840393066, "learning_rate": 4.450518844057289e-06, "loss": 3.3613, "step": 21225 }, { "epoch": 0.21596272786458334, "grad_norm": 16.864948272705078, "learning_rate": 4.4502686852822595e-06, "loss": 3.5213, "step": 21230 }, { "epoch": 0.21601359049479166, "grad_norm": 8.104302406311035, "learning_rate": 4.450018476610068e-06, "loss": 3.2945, "step": 21235 }, { "epoch": 0.216064453125, "grad_norm": 7.796335220336914, "learning_rate": 4.449768218047117e-06, "loss": 3.93, "step": 21240 }, { "epoch": 0.21611531575520834, "grad_norm": 12.010110855102539, "learning_rate": 4.449517909599807e-06, "loss": 3.0726, "step": 21245 }, { "epoch": 0.21616617838541666, "grad_norm": 7.897410869598389, "learning_rate": 4.449267551274543e-06, "loss": 3.2575, "step": 21250 }, { "epoch": 0.216217041015625, "grad_norm": 14.556855201721191, "learning_rate": 4.4490171430777315e-06, "loss": 3.2561, "step": 21255 }, { "epoch": 0.21626790364583334, "grad_norm": 15.189546585083008, "learning_rate": 4.448766685015778e-06, "loss": 3.2168, "step": 21260 }, { "epoch": 0.21631876627604166, "grad_norm": 9.562654495239258, "learning_rate": 4.448516177095092e-06, "loss": 3.527, "step": 21265 }, { "epoch": 0.21636962890625, "grad_norm": 14.520811080932617, "learning_rate": 4.448265619322081e-06, "loss": 3.447, "step": 21270 }, { "epoch": 0.21642049153645834, "grad_norm": 8.628990173339844, "learning_rate": 4.448015011703155e-06, "loss": 3.8215, "step": 21275 }, { "epoch": 0.21647135416666666, "grad_norm": 13.623302459716797, "learning_rate": 4.4477643542447265e-06, "loss": 3.2572, "step": 21280 }, { "epoch": 0.216522216796875, "grad_norm": 14.402774810791016, "learning_rate": 4.44751364695321e-06, "loss": 3.4455, "step": 21285 }, { "epoch": 0.21657307942708334, "grad_norm": 9.292447090148926, "learning_rate": 4.447262889835018e-06, "loss": 3.407, "step": 21290 }, { "epoch": 0.21662394205729166, "grad_norm": 13.544245719909668, "learning_rate": 4.447012082896566e-06, "loss": 3.1637, "step": 21295 }, { "epoch": 0.2166748046875, "grad_norm": 10.58362865447998, "learning_rate": 4.446761226144272e-06, "loss": 3.0985, "step": 21300 }, { "epoch": 0.21672566731770834, "grad_norm": 13.475932121276855, "learning_rate": 4.446510319584553e-06, "loss": 4.2408, "step": 21305 }, { "epoch": 0.21677652994791666, "grad_norm": 13.142467498779297, "learning_rate": 4.446259363223829e-06, "loss": 3.1587, "step": 21310 }, { "epoch": 0.216827392578125, "grad_norm": 16.452293395996094, "learning_rate": 4.44600835706852e-06, "loss": 3.46, "step": 21315 }, { "epoch": 0.21687825520833334, "grad_norm": 13.97862434387207, "learning_rate": 4.445757301125049e-06, "loss": 3.6838, "step": 21320 }, { "epoch": 0.21692911783854166, "grad_norm": 14.464639663696289, "learning_rate": 4.445506195399839e-06, "loss": 3.5322, "step": 21325 }, { "epoch": 0.21697998046875, "grad_norm": 9.015607833862305, "learning_rate": 4.445255039899313e-06, "loss": 3.4834, "step": 21330 }, { "epoch": 0.21703084309895834, "grad_norm": 9.246883392333984, "learning_rate": 4.4450038346298985e-06, "loss": 3.1686, "step": 21335 }, { "epoch": 0.21708170572916666, "grad_norm": 9.191157341003418, "learning_rate": 4.4447525795980224e-06, "loss": 3.4368, "step": 21340 }, { "epoch": 0.217132568359375, "grad_norm": 13.155099868774414, "learning_rate": 4.444501274810112e-06, "loss": 3.3268, "step": 21345 }, { "epoch": 0.21718343098958334, "grad_norm": 11.375064849853516, "learning_rate": 4.444249920272598e-06, "loss": 3.153, "step": 21350 }, { "epoch": 0.21723429361979166, "grad_norm": 10.894318580627441, "learning_rate": 4.443998515991909e-06, "loss": 3.6735, "step": 21355 }, { "epoch": 0.21728515625, "grad_norm": 11.658163070678711, "learning_rate": 4.443747061974481e-06, "loss": 3.1592, "step": 21360 }, { "epoch": 0.21733601888020834, "grad_norm": 15.226777076721191, "learning_rate": 4.443495558226744e-06, "loss": 3.4328, "step": 21365 }, { "epoch": 0.21738688151041666, "grad_norm": 12.534459114074707, "learning_rate": 4.4432440047551325e-06, "loss": 3.5584, "step": 21370 }, { "epoch": 0.217437744140625, "grad_norm": 16.684511184692383, "learning_rate": 4.442992401566085e-06, "loss": 3.4321, "step": 21375 }, { "epoch": 0.21748860677083334, "grad_norm": 10.544679641723633, "learning_rate": 4.442740748666038e-06, "loss": 3.3937, "step": 21380 }, { "epoch": 0.21753946940104166, "grad_norm": 14.9170560836792, "learning_rate": 4.442489046061429e-06, "loss": 3.4367, "step": 21385 }, { "epoch": 0.21759033203125, "grad_norm": 14.46596622467041, "learning_rate": 4.442237293758698e-06, "loss": 3.539, "step": 21390 }, { "epoch": 0.21764119466145834, "grad_norm": 11.867507934570312, "learning_rate": 4.4419854917642854e-06, "loss": 3.107, "step": 21395 }, { "epoch": 0.21769205729166666, "grad_norm": 17.437856674194336, "learning_rate": 4.441733640084636e-06, "loss": 3.3885, "step": 21400 }, { "epoch": 0.217742919921875, "grad_norm": 13.814154624938965, "learning_rate": 4.44148173872619e-06, "loss": 3.4518, "step": 21405 }, { "epoch": 0.21779378255208334, "grad_norm": 14.542771339416504, "learning_rate": 4.441229787695395e-06, "loss": 3.4028, "step": 21410 }, { "epoch": 0.21784464518229166, "grad_norm": 8.311295509338379, "learning_rate": 4.440977786998696e-06, "loss": 3.2889, "step": 21415 }, { "epoch": 0.2178955078125, "grad_norm": 7.098788261413574, "learning_rate": 4.44072573664254e-06, "loss": 3.1133, "step": 21420 }, { "epoch": 0.21794637044270834, "grad_norm": 12.52330493927002, "learning_rate": 4.440473636633376e-06, "loss": 3.0992, "step": 21425 }, { "epoch": 0.21799723307291666, "grad_norm": 11.01602554321289, "learning_rate": 4.4402214869776536e-06, "loss": 3.4034, "step": 21430 }, { "epoch": 0.218048095703125, "grad_norm": 13.289205551147461, "learning_rate": 4.439969287681826e-06, "loss": 3.6383, "step": 21435 }, { "epoch": 0.21809895833333334, "grad_norm": 11.71587085723877, "learning_rate": 4.4397170387523425e-06, "loss": 3.7132, "step": 21440 }, { "epoch": 0.21814982096354166, "grad_norm": 13.405722618103027, "learning_rate": 4.439464740195658e-06, "loss": 3.3589, "step": 21445 }, { "epoch": 0.21820068359375, "grad_norm": 8.185508728027344, "learning_rate": 4.439212392018228e-06, "loss": 3.276, "step": 21450 }, { "epoch": 0.21825154622395834, "grad_norm": 14.30631160736084, "learning_rate": 4.438959994226509e-06, "loss": 3.8529, "step": 21455 }, { "epoch": 0.21830240885416666, "grad_norm": 11.583393096923828, "learning_rate": 4.4387075468269595e-06, "loss": 3.1893, "step": 21460 }, { "epoch": 0.218353271484375, "grad_norm": 11.868514060974121, "learning_rate": 4.438455049826035e-06, "loss": 3.4185, "step": 21465 }, { "epoch": 0.21840413411458334, "grad_norm": 11.683053970336914, "learning_rate": 4.438202503230198e-06, "loss": 3.144, "step": 21470 }, { "epoch": 0.21845499674479166, "grad_norm": 12.934630393981934, "learning_rate": 4.437949907045909e-06, "loss": 3.142, "step": 21475 }, { "epoch": 0.218505859375, "grad_norm": 12.29751968383789, "learning_rate": 4.437697261279632e-06, "loss": 3.4487, "step": 21480 }, { "epoch": 0.21855672200520834, "grad_norm": 15.596671104431152, "learning_rate": 4.43744456593783e-06, "loss": 3.1818, "step": 21485 }, { "epoch": 0.21860758463541666, "grad_norm": 12.191415786743164, "learning_rate": 4.4371918210269665e-06, "loss": 3.4811, "step": 21490 }, { "epoch": 0.218658447265625, "grad_norm": 15.3618803024292, "learning_rate": 4.43693902655351e-06, "loss": 3.2084, "step": 21495 }, { "epoch": 0.21870930989583334, "grad_norm": 15.887072563171387, "learning_rate": 4.436686182523928e-06, "loss": 3.3075, "step": 21500 }, { "epoch": 0.21876017252604166, "grad_norm": 13.241227149963379, "learning_rate": 4.436433288944689e-06, "loss": 2.9199, "step": 21505 }, { "epoch": 0.21881103515625, "grad_norm": 9.028556823730469, "learning_rate": 4.436180345822263e-06, "loss": 3.3118, "step": 21510 }, { "epoch": 0.21886189778645834, "grad_norm": 7.4255452156066895, "learning_rate": 4.435927353163122e-06, "loss": 3.438, "step": 21515 }, { "epoch": 0.21891276041666666, "grad_norm": 8.402618408203125, "learning_rate": 4.435674310973739e-06, "loss": 3.6758, "step": 21520 }, { "epoch": 0.218963623046875, "grad_norm": 11.299454689025879, "learning_rate": 4.435421219260587e-06, "loss": 3.4711, "step": 21525 }, { "epoch": 0.21901448567708334, "grad_norm": 15.088027000427246, "learning_rate": 4.4351680780301425e-06, "loss": 3.558, "step": 21530 }, { "epoch": 0.21906534830729166, "grad_norm": 15.521208763122559, "learning_rate": 4.434914887288881e-06, "loss": 3.3314, "step": 21535 }, { "epoch": 0.2191162109375, "grad_norm": 14.964542388916016, "learning_rate": 4.4346616470432795e-06, "loss": 3.2707, "step": 21540 }, { "epoch": 0.21916707356770834, "grad_norm": 8.358473777770996, "learning_rate": 4.43440835729982e-06, "loss": 3.2384, "step": 21545 }, { "epoch": 0.21921793619791666, "grad_norm": 12.672173500061035, "learning_rate": 4.43415501806498e-06, "loss": 3.2498, "step": 21550 }, { "epoch": 0.219268798828125, "grad_norm": 13.793452262878418, "learning_rate": 4.433901629345243e-06, "loss": 3.5783, "step": 21555 }, { "epoch": 0.21931966145833334, "grad_norm": 14.273289680480957, "learning_rate": 4.433648191147091e-06, "loss": 3.5216, "step": 21560 }, { "epoch": 0.21937052408854166, "grad_norm": 15.174352645874023, "learning_rate": 4.433394703477009e-06, "loss": 3.5776, "step": 21565 }, { "epoch": 0.21942138671875, "grad_norm": 12.471453666687012, "learning_rate": 4.43314116634148e-06, "loss": 3.6904, "step": 21570 }, { "epoch": 0.21947224934895834, "grad_norm": 9.688103675842285, "learning_rate": 4.4328875797469944e-06, "loss": 3.4065, "step": 21575 }, { "epoch": 0.21952311197916666, "grad_norm": 14.465635299682617, "learning_rate": 4.432633943700038e-06, "loss": 3.5407, "step": 21580 }, { "epoch": 0.219573974609375, "grad_norm": 14.079458236694336, "learning_rate": 4.432380258207099e-06, "loss": 3.0966, "step": 21585 }, { "epoch": 0.21962483723958334, "grad_norm": 10.63652229309082, "learning_rate": 4.43212652327467e-06, "loss": 3.5664, "step": 21590 }, { "epoch": 0.21967569986979166, "grad_norm": 13.68020248413086, "learning_rate": 4.431872738909242e-06, "loss": 3.1991, "step": 21595 }, { "epoch": 0.2197265625, "grad_norm": 13.262728691101074, "learning_rate": 4.431618905117308e-06, "loss": 3.369, "step": 21600 }, { "epoch": 0.21977742513020834, "grad_norm": 10.705604553222656, "learning_rate": 4.431365021905361e-06, "loss": 3.725, "step": 21605 }, { "epoch": 0.21982828776041666, "grad_norm": 15.042895317077637, "learning_rate": 4.431111089279898e-06, "loss": 3.414, "step": 21610 }, { "epoch": 0.219879150390625, "grad_norm": 15.064384460449219, "learning_rate": 4.430857107247416e-06, "loss": 3.2864, "step": 21615 }, { "epoch": 0.21993001302083334, "grad_norm": 8.881093978881836, "learning_rate": 4.430603075814413e-06, "loss": 3.4096, "step": 21620 }, { "epoch": 0.21998087565104166, "grad_norm": 14.863802909851074, "learning_rate": 4.430348994987387e-06, "loss": 3.3993, "step": 21625 }, { "epoch": 0.22003173828125, "grad_norm": 12.123024940490723, "learning_rate": 4.430094864772841e-06, "loss": 3.7743, "step": 21630 }, { "epoch": 0.22008260091145834, "grad_norm": 10.595017433166504, "learning_rate": 4.4298406851772734e-06, "loss": 3.6968, "step": 21635 }, { "epoch": 0.22013346354166666, "grad_norm": 10.866302490234375, "learning_rate": 4.42958645620719e-06, "loss": 3.0981, "step": 21640 }, { "epoch": 0.220184326171875, "grad_norm": 13.092309951782227, "learning_rate": 4.429332177869094e-06, "loss": 3.1676, "step": 21645 }, { "epoch": 0.22023518880208334, "grad_norm": 12.232417106628418, "learning_rate": 4.429077850169493e-06, "loss": 3.4387, "step": 21650 }, { "epoch": 0.22028605143229166, "grad_norm": 14.34591293334961, "learning_rate": 4.428823473114891e-06, "loss": 3.4943, "step": 21655 }, { "epoch": 0.2203369140625, "grad_norm": 16.25722312927246, "learning_rate": 4.428569046711799e-06, "loss": 3.2681, "step": 21660 }, { "epoch": 0.22038777669270834, "grad_norm": 8.571758270263672, "learning_rate": 4.428314570966724e-06, "loss": 3.0704, "step": 21665 }, { "epoch": 0.22043863932291666, "grad_norm": 16.985572814941406, "learning_rate": 4.428060045886178e-06, "loss": 3.6197, "step": 21670 }, { "epoch": 0.220489501953125, "grad_norm": 9.714963912963867, "learning_rate": 4.427805471476673e-06, "loss": 3.2242, "step": 21675 }, { "epoch": 0.22054036458333334, "grad_norm": 8.596430778503418, "learning_rate": 4.427550847744721e-06, "loss": 3.3244, "step": 21680 }, { "epoch": 0.22059122721354166, "grad_norm": 12.92138957977295, "learning_rate": 4.4272961746968394e-06, "loss": 3.5212, "step": 21685 }, { "epoch": 0.22064208984375, "grad_norm": 11.397533416748047, "learning_rate": 4.427041452339541e-06, "loss": 3.32, "step": 21690 }, { "epoch": 0.22069295247395834, "grad_norm": 14.433199882507324, "learning_rate": 4.426786680679344e-06, "loss": 3.5267, "step": 21695 }, { "epoch": 0.22074381510416666, "grad_norm": 13.278440475463867, "learning_rate": 4.426531859722765e-06, "loss": 3.1954, "step": 21700 }, { "epoch": 0.220794677734375, "grad_norm": 13.766365051269531, "learning_rate": 4.426276989476327e-06, "loss": 3.2458, "step": 21705 }, { "epoch": 0.22084554036458334, "grad_norm": 12.732465744018555, "learning_rate": 4.4260220699465474e-06, "loss": 3.4943, "step": 21710 }, { "epoch": 0.22089640299479166, "grad_norm": 9.512740135192871, "learning_rate": 4.42576710113995e-06, "loss": 3.458, "step": 21715 }, { "epoch": 0.220947265625, "grad_norm": 9.880742073059082, "learning_rate": 4.425512083063058e-06, "loss": 3.0285, "step": 21720 }, { "epoch": 0.22099812825520834, "grad_norm": 11.119718551635742, "learning_rate": 4.425257015722396e-06, "loss": 2.956, "step": 21725 }, { "epoch": 0.22104899088541666, "grad_norm": 10.98831844329834, "learning_rate": 4.4250018991244895e-06, "loss": 3.2721, "step": 21730 }, { "epoch": 0.221099853515625, "grad_norm": 8.650774002075195, "learning_rate": 4.424746733275864e-06, "loss": 3.2025, "step": 21735 }, { "epoch": 0.22115071614583334, "grad_norm": 14.937943458557129, "learning_rate": 4.424491518183052e-06, "loss": 3.3219, "step": 21740 }, { "epoch": 0.22120157877604166, "grad_norm": 13.311105728149414, "learning_rate": 4.424236253852579e-06, "loss": 3.4122, "step": 21745 }, { "epoch": 0.22125244140625, "grad_norm": 13.529938697814941, "learning_rate": 4.423980940290978e-06, "loss": 3.0956, "step": 21750 }, { "epoch": 0.22130330403645834, "grad_norm": 8.813322067260742, "learning_rate": 4.423725577504781e-06, "loss": 3.3761, "step": 21755 }, { "epoch": 0.22135416666666666, "grad_norm": 12.712224006652832, "learning_rate": 4.423470165500521e-06, "loss": 2.9878, "step": 21760 }, { "epoch": 0.221405029296875, "grad_norm": 23.10104751586914, "learning_rate": 4.423214704284733e-06, "loss": 3.9616, "step": 21765 }, { "epoch": 0.22145589192708334, "grad_norm": 12.10202693939209, "learning_rate": 4.422959193863953e-06, "loss": 3.2412, "step": 21770 }, { "epoch": 0.22150675455729166, "grad_norm": 13.642451286315918, "learning_rate": 4.422703634244716e-06, "loss": 3.6663, "step": 21775 }, { "epoch": 0.2215576171875, "grad_norm": 13.663246154785156, "learning_rate": 4.422448025433564e-06, "loss": 3.497, "step": 21780 }, { "epoch": 0.22160847981770834, "grad_norm": 12.732048988342285, "learning_rate": 4.422192367437034e-06, "loss": 3.459, "step": 21785 }, { "epoch": 0.22165934244791666, "grad_norm": 10.020483016967773, "learning_rate": 4.421936660261668e-06, "loss": 3.1006, "step": 21790 }, { "epoch": 0.221710205078125, "grad_norm": 9.460532188415527, "learning_rate": 4.421680903914009e-06, "loss": 3.3341, "step": 21795 }, { "epoch": 0.22176106770833334, "grad_norm": 7.5237860679626465, "learning_rate": 4.421425098400598e-06, "loss": 3.2786, "step": 21800 }, { "epoch": 0.22181193033854166, "grad_norm": 13.649006843566895, "learning_rate": 4.421169243727983e-06, "loss": 3.2036, "step": 21805 }, { "epoch": 0.22186279296875, "grad_norm": 15.066064834594727, "learning_rate": 4.420913339902707e-06, "loss": 3.2731, "step": 21810 }, { "epoch": 0.22191365559895834, "grad_norm": 9.609551429748535, "learning_rate": 4.420657386931319e-06, "loss": 3.3248, "step": 21815 }, { "epoch": 0.22196451822916666, "grad_norm": 9.863035202026367, "learning_rate": 4.420401384820367e-06, "loss": 3.4538, "step": 21820 }, { "epoch": 0.222015380859375, "grad_norm": 7.887870788574219, "learning_rate": 4.4201453335764e-06, "loss": 3.3845, "step": 21825 }, { "epoch": 0.22206624348958334, "grad_norm": 13.143853187561035, "learning_rate": 4.4198892332059705e-06, "loss": 3.3623, "step": 21830 }, { "epoch": 0.22211710611979166, "grad_norm": 15.148838996887207, "learning_rate": 4.41963308371563e-06, "loss": 4.2309, "step": 21835 }, { "epoch": 0.22216796875, "grad_norm": 15.139830589294434, "learning_rate": 4.419376885111932e-06, "loss": 3.1964, "step": 21840 }, { "epoch": 0.22221883138020834, "grad_norm": 15.096305847167969, "learning_rate": 4.419120637401432e-06, "loss": 3.2714, "step": 21845 }, { "epoch": 0.22226969401041666, "grad_norm": 15.630721092224121, "learning_rate": 4.418864340590684e-06, "loss": 3.4326, "step": 21850 }, { "epoch": 0.222320556640625, "grad_norm": 12.31386661529541, "learning_rate": 4.418607994686248e-06, "loss": 3.7328, "step": 21855 }, { "epoch": 0.22237141927083334, "grad_norm": 7.927239894866943, "learning_rate": 4.41835159969468e-06, "loss": 3.5338, "step": 21860 }, { "epoch": 0.22242228190104166, "grad_norm": 9.581936836242676, "learning_rate": 4.418095155622542e-06, "loss": 3.3615, "step": 21865 }, { "epoch": 0.22247314453125, "grad_norm": 14.914663314819336, "learning_rate": 4.4178386624763935e-06, "loss": 3.5794, "step": 21870 }, { "epoch": 0.22252400716145834, "grad_norm": 9.037361145019531, "learning_rate": 4.417582120262798e-06, "loss": 3.6717, "step": 21875 }, { "epoch": 0.22257486979166666, "grad_norm": 13.858770370483398, "learning_rate": 4.4173255289883175e-06, "loss": 3.3966, "step": 21880 }, { "epoch": 0.222625732421875, "grad_norm": 13.544123649597168, "learning_rate": 4.41706888865952e-06, "loss": 3.4346, "step": 21885 }, { "epoch": 0.22267659505208334, "grad_norm": 14.441933631896973, "learning_rate": 4.416812199282966e-06, "loss": 3.564, "step": 21890 }, { "epoch": 0.22272745768229166, "grad_norm": 8.17879581451416, "learning_rate": 4.416555460865228e-06, "loss": 3.2117, "step": 21895 }, { "epoch": 0.2227783203125, "grad_norm": 16.045257568359375, "learning_rate": 4.416298673412874e-06, "loss": 3.6617, "step": 21900 }, { "epoch": 0.22282918294270834, "grad_norm": 8.84123706817627, "learning_rate": 4.416041836932471e-06, "loss": 3.3166, "step": 21905 }, { "epoch": 0.22288004557291666, "grad_norm": 8.175050735473633, "learning_rate": 4.415784951430592e-06, "loss": 3.8243, "step": 21910 }, { "epoch": 0.222930908203125, "grad_norm": 12.569129943847656, "learning_rate": 4.415528016913809e-06, "loss": 3.275, "step": 21915 }, { "epoch": 0.22298177083333334, "grad_norm": 9.564921379089355, "learning_rate": 4.415271033388696e-06, "loss": 2.9735, "step": 21920 }, { "epoch": 0.22303263346354166, "grad_norm": 15.479812622070312, "learning_rate": 4.415014000861828e-06, "loss": 3.6172, "step": 21925 }, { "epoch": 0.22308349609375, "grad_norm": 8.315549850463867, "learning_rate": 4.41475691933978e-06, "loss": 3.412, "step": 21930 }, { "epoch": 0.22313435872395834, "grad_norm": 9.115899085998535, "learning_rate": 4.41449978882913e-06, "loss": 3.4299, "step": 21935 }, { "epoch": 0.22318522135416666, "grad_norm": 13.794336318969727, "learning_rate": 4.4142426093364575e-06, "loss": 3.1094, "step": 21940 }, { "epoch": 0.223236083984375, "grad_norm": 12.609228134155273, "learning_rate": 4.413985380868341e-06, "loss": 3.367, "step": 21945 }, { "epoch": 0.22328694661458334, "grad_norm": 14.703821182250977, "learning_rate": 4.413728103431362e-06, "loss": 3.9406, "step": 21950 }, { "epoch": 0.22333780924479166, "grad_norm": 15.378171920776367, "learning_rate": 4.413470777032104e-06, "loss": 2.9513, "step": 21955 }, { "epoch": 0.223388671875, "grad_norm": 11.910841941833496, "learning_rate": 4.413213401677149e-06, "loss": 3.1509, "step": 21960 }, { "epoch": 0.22343953450520834, "grad_norm": 14.728949546813965, "learning_rate": 4.412955977373082e-06, "loss": 3.3036, "step": 21965 }, { "epoch": 0.22349039713541666, "grad_norm": 16.039880752563477, "learning_rate": 4.412698504126491e-06, "loss": 3.3996, "step": 21970 }, { "epoch": 0.223541259765625, "grad_norm": 9.042581558227539, "learning_rate": 4.412440981943962e-06, "loss": 3.3724, "step": 21975 }, { "epoch": 0.22359212239583334, "grad_norm": 11.22207260131836, "learning_rate": 4.4121834108320824e-06, "loss": 3.3767, "step": 21980 }, { "epoch": 0.22364298502604166, "grad_norm": 9.639937400817871, "learning_rate": 4.411925790797444e-06, "loss": 3.3927, "step": 21985 }, { "epoch": 0.22369384765625, "grad_norm": 11.378259658813477, "learning_rate": 4.4116681218466386e-06, "loss": 3.2632, "step": 21990 }, { "epoch": 0.22374471028645834, "grad_norm": 12.211450576782227, "learning_rate": 4.411410403986257e-06, "loss": 3.1877, "step": 21995 }, { "epoch": 0.22379557291666666, "grad_norm": 13.47460651397705, "learning_rate": 4.411152637222893e-06, "loss": 3.4715, "step": 22000 }, { "epoch": 0.223846435546875, "grad_norm": 9.644412994384766, "learning_rate": 4.410894821563141e-06, "loss": 3.2384, "step": 22005 }, { "epoch": 0.22389729817708334, "grad_norm": 12.741238594055176, "learning_rate": 4.410636957013599e-06, "loss": 3.4233, "step": 22010 }, { "epoch": 0.22394816080729166, "grad_norm": 18.5694580078125, "learning_rate": 4.410379043580863e-06, "loss": 3.4207, "step": 22015 }, { "epoch": 0.2239990234375, "grad_norm": 13.11888313293457, "learning_rate": 4.410121081271532e-06, "loss": 3.4742, "step": 22020 }, { "epoch": 0.22404988606770834, "grad_norm": 8.950638771057129, "learning_rate": 4.4098630700922055e-06, "loss": 3.2443, "step": 22025 }, { "epoch": 0.22410074869791666, "grad_norm": 9.21252727508545, "learning_rate": 4.409605010049486e-06, "loss": 3.2816, "step": 22030 }, { "epoch": 0.224151611328125, "grad_norm": 11.447957992553711, "learning_rate": 4.409346901149973e-06, "loss": 3.6363, "step": 22035 }, { "epoch": 0.22420247395833334, "grad_norm": 14.11512565612793, "learning_rate": 4.409088743400274e-06, "loss": 3.0668, "step": 22040 }, { "epoch": 0.22425333658854166, "grad_norm": 11.56049919128418, "learning_rate": 4.408830536806991e-06, "loss": 3.6871, "step": 22045 }, { "epoch": 0.22430419921875, "grad_norm": 16.231426239013672, "learning_rate": 4.408572281376732e-06, "loss": 3.8049, "step": 22050 }, { "epoch": 0.22435506184895834, "grad_norm": 8.128877639770508, "learning_rate": 4.408313977116103e-06, "loss": 3.3654, "step": 22055 }, { "epoch": 0.22440592447916666, "grad_norm": 7.407121658325195, "learning_rate": 4.408055624031713e-06, "loss": 3.2684, "step": 22060 }, { "epoch": 0.224456787109375, "grad_norm": 17.4244384765625, "learning_rate": 4.407797222130173e-06, "loss": 3.0736, "step": 22065 }, { "epoch": 0.22450764973958334, "grad_norm": 15.093779563903809, "learning_rate": 4.407538771418092e-06, "loss": 3.4779, "step": 22070 }, { "epoch": 0.22455851236979166, "grad_norm": 10.524264335632324, "learning_rate": 4.407280271902084e-06, "loss": 3.2996, "step": 22075 }, { "epoch": 0.224609375, "grad_norm": 9.79942512512207, "learning_rate": 4.407021723588764e-06, "loss": 3.4059, "step": 22080 }, { "epoch": 0.22466023763020834, "grad_norm": 15.806706428527832, "learning_rate": 4.4067631264847436e-06, "loss": 3.5924, "step": 22085 }, { "epoch": 0.22471110026041666, "grad_norm": 17.772584915161133, "learning_rate": 4.406504480596641e-06, "loss": 3.6493, "step": 22090 }, { "epoch": 0.224761962890625, "grad_norm": 9.457268714904785, "learning_rate": 4.406245785931073e-06, "loss": 3.3044, "step": 22095 }, { "epoch": 0.22481282552083334, "grad_norm": 12.059775352478027, "learning_rate": 4.405987042494658e-06, "loss": 3.8077, "step": 22100 }, { "epoch": 0.22486368815104166, "grad_norm": 11.504251480102539, "learning_rate": 4.405728250294017e-06, "loss": 3.5037, "step": 22105 }, { "epoch": 0.22491455078125, "grad_norm": 9.41455078125, "learning_rate": 4.40546940933577e-06, "loss": 3.6421, "step": 22110 }, { "epoch": 0.22496541341145834, "grad_norm": 10.715293884277344, "learning_rate": 4.40521051962654e-06, "loss": 3.6148, "step": 22115 }, { "epoch": 0.22501627604166666, "grad_norm": 13.229304313659668, "learning_rate": 4.40495158117295e-06, "loss": 3.4736, "step": 22120 }, { "epoch": 0.225067138671875, "grad_norm": 15.428013801574707, "learning_rate": 4.404692593981626e-06, "loss": 3.6475, "step": 22125 }, { "epoch": 0.22511800130208334, "grad_norm": 16.025081634521484, "learning_rate": 4.404433558059193e-06, "loss": 3.1062, "step": 22130 }, { "epoch": 0.22516886393229166, "grad_norm": 16.080984115600586, "learning_rate": 4.404174473412279e-06, "loss": 3.6567, "step": 22135 }, { "epoch": 0.2252197265625, "grad_norm": 8.873684883117676, "learning_rate": 4.403915340047512e-06, "loss": 3.2109, "step": 22140 }, { "epoch": 0.22527058919270834, "grad_norm": 11.24513053894043, "learning_rate": 4.403656157971523e-06, "loss": 3.7436, "step": 22145 }, { "epoch": 0.22532145182291666, "grad_norm": 13.644927024841309, "learning_rate": 4.403396927190942e-06, "loss": 3.2305, "step": 22150 }, { "epoch": 0.225372314453125, "grad_norm": 6.955374717712402, "learning_rate": 4.403137647712401e-06, "loss": 3.3256, "step": 22155 }, { "epoch": 0.22542317708333334, "grad_norm": 12.21574878692627, "learning_rate": 4.402878319542536e-06, "loss": 3.2371, "step": 22160 }, { "epoch": 0.22547403971354166, "grad_norm": 13.770991325378418, "learning_rate": 4.402618942687979e-06, "loss": 3.147, "step": 22165 }, { "epoch": 0.22552490234375, "grad_norm": 11.066407203674316, "learning_rate": 4.402359517155368e-06, "loss": 3.194, "step": 22170 }, { "epoch": 0.22557576497395834, "grad_norm": 9.962047576904297, "learning_rate": 4.40210004295134e-06, "loss": 3.1988, "step": 22175 }, { "epoch": 0.22562662760416666, "grad_norm": 14.844084739685059, "learning_rate": 4.401840520082532e-06, "loss": 3.2153, "step": 22180 }, { "epoch": 0.225677490234375, "grad_norm": 13.172402381896973, "learning_rate": 4.401580948555586e-06, "loss": 3.3641, "step": 22185 }, { "epoch": 0.22572835286458334, "grad_norm": 7.525303363800049, "learning_rate": 4.401321328377142e-06, "loss": 3.2718, "step": 22190 }, { "epoch": 0.22577921549479166, "grad_norm": 11.506841659545898, "learning_rate": 4.401061659553843e-06, "loss": 2.9344, "step": 22195 }, { "epoch": 0.225830078125, "grad_norm": 13.197406768798828, "learning_rate": 4.400801942092331e-06, "loss": 3.0663, "step": 22200 }, { "epoch": 0.22588094075520834, "grad_norm": 8.670125961303711, "learning_rate": 4.400542175999253e-06, "loss": 3.2565, "step": 22205 }, { "epoch": 0.22593180338541666, "grad_norm": 11.305577278137207, "learning_rate": 4.400282361281253e-06, "loss": 3.3172, "step": 22210 }, { "epoch": 0.225982666015625, "grad_norm": 13.321239471435547, "learning_rate": 4.400022497944979e-06, "loss": 3.5229, "step": 22215 }, { "epoch": 0.22603352864583334, "grad_norm": 10.876173973083496, "learning_rate": 4.3997625859970805e-06, "loss": 3.6165, "step": 22220 }, { "epoch": 0.22608439127604166, "grad_norm": 13.838961601257324, "learning_rate": 4.399502625444206e-06, "loss": 3.4273, "step": 22225 }, { "epoch": 0.22613525390625, "grad_norm": 11.505608558654785, "learning_rate": 4.399242616293007e-06, "loss": 3.6678, "step": 22230 }, { "epoch": 0.22618611653645834, "grad_norm": 12.19419002532959, "learning_rate": 4.398982558550137e-06, "loss": 3.0976, "step": 22235 }, { "epoch": 0.22623697916666666, "grad_norm": 9.771162033081055, "learning_rate": 4.398722452222247e-06, "loss": 3.7784, "step": 22240 }, { "epoch": 0.226287841796875, "grad_norm": 11.823652267456055, "learning_rate": 4.398462297315993e-06, "loss": 3.7503, "step": 22245 }, { "epoch": 0.22633870442708334, "grad_norm": 12.599007606506348, "learning_rate": 4.398202093838032e-06, "loss": 3.747, "step": 22250 }, { "epoch": 0.22638956705729166, "grad_norm": 8.704680442810059, "learning_rate": 4.3979418417950195e-06, "loss": 2.9508, "step": 22255 }, { "epoch": 0.2264404296875, "grad_norm": 8.176292419433594, "learning_rate": 4.397681541193616e-06, "loss": 3.0066, "step": 22260 }, { "epoch": 0.22649129231770834, "grad_norm": 12.29770278930664, "learning_rate": 4.397421192040478e-06, "loss": 3.3324, "step": 22265 }, { "epoch": 0.22654215494791666, "grad_norm": 14.355680465698242, "learning_rate": 4.3971607943422715e-06, "loss": 3.0472, "step": 22270 }, { "epoch": 0.226593017578125, "grad_norm": 8.760498046875, "learning_rate": 4.396900348105654e-06, "loss": 3.1807, "step": 22275 }, { "epoch": 0.22664388020833334, "grad_norm": 13.894689559936523, "learning_rate": 4.39663985333729e-06, "loss": 3.1082, "step": 22280 }, { "epoch": 0.22669474283854166, "grad_norm": 8.41318130493164, "learning_rate": 4.396379310043847e-06, "loss": 3.2702, "step": 22285 }, { "epoch": 0.22674560546875, "grad_norm": 10.254307746887207, "learning_rate": 4.3961187182319876e-06, "loss": 3.4133, "step": 22290 }, { "epoch": 0.22679646809895834, "grad_norm": 11.506667137145996, "learning_rate": 4.39585807790838e-06, "loss": 3.3754, "step": 22295 }, { "epoch": 0.22684733072916666, "grad_norm": 14.916718482971191, "learning_rate": 4.3955973890796936e-06, "loss": 3.4474, "step": 22300 }, { "epoch": 0.226898193359375, "grad_norm": 13.130877494812012, "learning_rate": 4.3953366517525966e-06, "loss": 3.1012, "step": 22305 }, { "epoch": 0.22694905598958334, "grad_norm": 16.143596649169922, "learning_rate": 4.39507586593376e-06, "loss": 3.3571, "step": 22310 }, { "epoch": 0.22699991861979166, "grad_norm": 13.136238098144531, "learning_rate": 4.394815031629858e-06, "loss": 3.8295, "step": 22315 }, { "epoch": 0.22705078125, "grad_norm": 10.161462783813477, "learning_rate": 4.394554148847562e-06, "loss": 3.5558, "step": 22320 }, { "epoch": 0.22710164388020834, "grad_norm": 13.627985000610352, "learning_rate": 4.394293217593547e-06, "loss": 3.3345, "step": 22325 }, { "epoch": 0.22715250651041666, "grad_norm": 13.85934829711914, "learning_rate": 4.394032237874488e-06, "loss": 3.3807, "step": 22330 }, { "epoch": 0.227203369140625, "grad_norm": 7.913649559020996, "learning_rate": 4.393771209697065e-06, "loss": 3.1791, "step": 22335 }, { "epoch": 0.22725423177083334, "grad_norm": 12.486056327819824, "learning_rate": 4.393510133067954e-06, "loss": 3.2871, "step": 22340 }, { "epoch": 0.22730509440104166, "grad_norm": 9.422446250915527, "learning_rate": 4.393249007993834e-06, "loss": 3.2929, "step": 22345 }, { "epoch": 0.22735595703125, "grad_norm": 13.53770923614502, "learning_rate": 4.392987834481388e-06, "loss": 3.753, "step": 22350 }, { "epoch": 0.22740681966145834, "grad_norm": 14.302570343017578, "learning_rate": 4.3927266125372966e-06, "loss": 3.7092, "step": 22355 }, { "epoch": 0.22745768229166666, "grad_norm": 9.646662712097168, "learning_rate": 4.392465342168244e-06, "loss": 3.1584, "step": 22360 }, { "epoch": 0.227508544921875, "grad_norm": 12.217989921569824, "learning_rate": 4.392204023380913e-06, "loss": 3.2846, "step": 22365 }, { "epoch": 0.22755940755208334, "grad_norm": 15.172819137573242, "learning_rate": 4.391942656181991e-06, "loss": 3.232, "step": 22370 }, { "epoch": 0.22761027018229166, "grad_norm": 12.54372787475586, "learning_rate": 4.391681240578165e-06, "loss": 3.7518, "step": 22375 }, { "epoch": 0.2276611328125, "grad_norm": 14.048870086669922, "learning_rate": 4.391419776576123e-06, "loss": 3.4796, "step": 22380 }, { "epoch": 0.22771199544270834, "grad_norm": 15.415027618408203, "learning_rate": 4.391158264182553e-06, "loss": 3.2247, "step": 22385 }, { "epoch": 0.22776285807291666, "grad_norm": 8.710091590881348, "learning_rate": 4.390896703404148e-06, "loss": 3.7823, "step": 22390 }, { "epoch": 0.227813720703125, "grad_norm": 8.83072566986084, "learning_rate": 4.390635094247599e-06, "loss": 3.5134, "step": 22395 }, { "epoch": 0.22786458333333334, "grad_norm": 13.684968948364258, "learning_rate": 4.3903734367196e-06, "loss": 3.1468, "step": 22400 }, { "epoch": 0.22791544596354166, "grad_norm": 10.560220718383789, "learning_rate": 4.390111730826844e-06, "loss": 3.2929, "step": 22405 }, { "epoch": 0.22796630859375, "grad_norm": 9.37875747680664, "learning_rate": 4.389849976576027e-06, "loss": 3.3213, "step": 22410 }, { "epoch": 0.22801717122395834, "grad_norm": 12.373004913330078, "learning_rate": 4.389588173973847e-06, "loss": 3.6029, "step": 22415 }, { "epoch": 0.22806803385416666, "grad_norm": 11.633017539978027, "learning_rate": 4.389326323027001e-06, "loss": 3.4954, "step": 22420 }, { "epoch": 0.228118896484375, "grad_norm": 9.542677879333496, "learning_rate": 4.389064423742188e-06, "loss": 3.1426, "step": 22425 }, { "epoch": 0.22816975911458334, "grad_norm": 11.222747802734375, "learning_rate": 4.388802476126112e-06, "loss": 3.5985, "step": 22430 }, { "epoch": 0.22822062174479166, "grad_norm": 11.572148323059082, "learning_rate": 4.3885404801854704e-06, "loss": 3.5769, "step": 22435 }, { "epoch": 0.228271484375, "grad_norm": 9.974691390991211, "learning_rate": 4.388278435926968e-06, "loss": 4.3666, "step": 22440 }, { "epoch": 0.22832234700520834, "grad_norm": 15.239340782165527, "learning_rate": 4.3880163433573105e-06, "loss": 3.0226, "step": 22445 }, { "epoch": 0.22837320963541666, "grad_norm": 14.125506401062012, "learning_rate": 4.387754202483201e-06, "loss": 3.4135, "step": 22450 }, { "epoch": 0.228424072265625, "grad_norm": 12.766975402832031, "learning_rate": 4.3874920133113495e-06, "loss": 3.422, "step": 22455 }, { "epoch": 0.22847493489583334, "grad_norm": 7.391766548156738, "learning_rate": 4.3872297758484604e-06, "loss": 3.0828, "step": 22460 }, { "epoch": 0.22852579752604166, "grad_norm": 13.441563606262207, "learning_rate": 4.386967490101246e-06, "loss": 3.1743, "step": 22465 }, { "epoch": 0.22857666015625, "grad_norm": 13.292790412902832, "learning_rate": 4.3867051560764154e-06, "loss": 3.1812, "step": 22470 }, { "epoch": 0.22862752278645834, "grad_norm": 13.288102149963379, "learning_rate": 4.38644277378068e-06, "loss": 3.6401, "step": 22475 }, { "epoch": 0.22867838541666666, "grad_norm": 13.77042293548584, "learning_rate": 4.386180343220754e-06, "loss": 3.0652, "step": 22480 }, { "epoch": 0.228729248046875, "grad_norm": 12.693780899047852, "learning_rate": 4.3859178644033505e-06, "loss": 3.6637, "step": 22485 }, { "epoch": 0.22878011067708334, "grad_norm": 11.478263854980469, "learning_rate": 4.3856553373351865e-06, "loss": 3.0771, "step": 22490 }, { "epoch": 0.22883097330729166, "grad_norm": 13.068379402160645, "learning_rate": 4.385392762022976e-06, "loss": 3.3229, "step": 22495 }, { "epoch": 0.2288818359375, "grad_norm": 13.19984245300293, "learning_rate": 4.38513013847344e-06, "loss": 3.1831, "step": 22500 }, { "epoch": 0.22893269856770834, "grad_norm": 13.603717803955078, "learning_rate": 4.384867466693295e-06, "loss": 3.3175, "step": 22505 }, { "epoch": 0.22898356119791666, "grad_norm": 11.724016189575195, "learning_rate": 4.384604746689264e-06, "loss": 3.3074, "step": 22510 }, { "epoch": 0.229034423828125, "grad_norm": 12.843661308288574, "learning_rate": 4.384341978468067e-06, "loss": 3.1878, "step": 22515 }, { "epoch": 0.22908528645833334, "grad_norm": 16.34494400024414, "learning_rate": 4.384079162036426e-06, "loss": 4.0106, "step": 22520 }, { "epoch": 0.22913614908854166, "grad_norm": 13.975119590759277, "learning_rate": 4.383816297401068e-06, "loss": 3.3851, "step": 22525 }, { "epoch": 0.22918701171875, "grad_norm": 13.665790557861328, "learning_rate": 4.383553384568715e-06, "loss": 3.5789, "step": 22530 }, { "epoch": 0.22923787434895834, "grad_norm": 11.161511421203613, "learning_rate": 4.383290423546095e-06, "loss": 3.299, "step": 22535 }, { "epoch": 0.22928873697916666, "grad_norm": 15.252664566040039, "learning_rate": 4.383027414339936e-06, "loss": 3.2013, "step": 22540 }, { "epoch": 0.229339599609375, "grad_norm": 13.857137680053711, "learning_rate": 4.382764356956968e-06, "loss": 3.5467, "step": 22545 }, { "epoch": 0.22939046223958334, "grad_norm": 10.059674263000488, "learning_rate": 4.38250125140392e-06, "loss": 3.4946, "step": 22550 }, { "epoch": 0.22944132486979166, "grad_norm": 10.169170379638672, "learning_rate": 4.382238097687524e-06, "loss": 3.5028, "step": 22555 }, { "epoch": 0.2294921875, "grad_norm": 14.963750839233398, "learning_rate": 4.381974895814511e-06, "loss": 3.2772, "step": 22560 }, { "epoch": 0.22954305013020834, "grad_norm": 22.403776168823242, "learning_rate": 4.381711645791617e-06, "loss": 3.4373, "step": 22565 }, { "epoch": 0.22959391276041666, "grad_norm": 14.73482894897461, "learning_rate": 4.3814483476255764e-06, "loss": 3.1332, "step": 22570 }, { "epoch": 0.229644775390625, "grad_norm": 9.600224494934082, "learning_rate": 4.381185001323126e-06, "loss": 2.9423, "step": 22575 }, { "epoch": 0.22969563802083334, "grad_norm": 12.039507865905762, "learning_rate": 4.380921606891003e-06, "loss": 3.3055, "step": 22580 }, { "epoch": 0.22974650065104166, "grad_norm": 13.997042655944824, "learning_rate": 4.3806581643359465e-06, "loss": 3.2969, "step": 22585 }, { "epoch": 0.22979736328125, "grad_norm": 6.661002159118652, "learning_rate": 4.380394673664697e-06, "loss": 3.3523, "step": 22590 }, { "epoch": 0.22984822591145834, "grad_norm": 13.084622383117676, "learning_rate": 4.380131134883996e-06, "loss": 3.6514, "step": 22595 }, { "epoch": 0.22989908854166666, "grad_norm": 8.07689094543457, "learning_rate": 4.379867548000585e-06, "loss": 4.1248, "step": 22600 }, { "epoch": 0.229949951171875, "grad_norm": 12.517134666442871, "learning_rate": 4.3796039130212085e-06, "loss": 3.6931, "step": 22605 }, { "epoch": 0.23000081380208334, "grad_norm": 7.653154373168945, "learning_rate": 4.379340229952611e-06, "loss": 3.2468, "step": 22610 }, { "epoch": 0.23005167643229166, "grad_norm": 12.616921424865723, "learning_rate": 4.379076498801539e-06, "loss": 3.6261, "step": 22615 }, { "epoch": 0.2301025390625, "grad_norm": 11.284850120544434, "learning_rate": 4.378812719574741e-06, "loss": 3.9268, "step": 22620 }, { "epoch": 0.23015340169270834, "grad_norm": 12.80991268157959, "learning_rate": 4.378548892278964e-06, "loss": 3.0249, "step": 22625 }, { "epoch": 0.23020426432291666, "grad_norm": 12.30255126953125, "learning_rate": 4.378285016920959e-06, "loss": 3.6609, "step": 22630 }, { "epoch": 0.230255126953125, "grad_norm": 9.481846809387207, "learning_rate": 4.378021093507477e-06, "loss": 3.1856, "step": 22635 }, { "epoch": 0.23030598958333334, "grad_norm": 8.41667366027832, "learning_rate": 4.377757122045271e-06, "loss": 3.4197, "step": 22640 }, { "epoch": 0.23035685221354166, "grad_norm": 11.499900817871094, "learning_rate": 4.377493102541094e-06, "loss": 3.1093, "step": 22645 }, { "epoch": 0.23040771484375, "grad_norm": 15.506875991821289, "learning_rate": 4.3772290350017014e-06, "loss": 3.4855, "step": 22650 }, { "epoch": 0.23045857747395834, "grad_norm": 7.720246315002441, "learning_rate": 4.376964919433848e-06, "loss": 3.4042, "step": 22655 }, { "epoch": 0.23050944010416666, "grad_norm": 7.877285480499268, "learning_rate": 4.376700755844292e-06, "loss": 3.2406, "step": 22660 }, { "epoch": 0.230560302734375, "grad_norm": 14.253623008728027, "learning_rate": 4.376436544239793e-06, "loss": 3.3702, "step": 22665 }, { "epoch": 0.23061116536458334, "grad_norm": 13.754986763000488, "learning_rate": 4.37617228462711e-06, "loss": 3.6621, "step": 22670 }, { "epoch": 0.23066202799479166, "grad_norm": 13.590864181518555, "learning_rate": 4.375907977013002e-06, "loss": 3.2626, "step": 22675 }, { "epoch": 0.230712890625, "grad_norm": 9.83669376373291, "learning_rate": 4.3756436214042344e-06, "loss": 3.1272, "step": 22680 }, { "epoch": 0.23076375325520834, "grad_norm": 11.492262840270996, "learning_rate": 4.375379217807568e-06, "loss": 3.3619, "step": 22685 }, { "epoch": 0.23081461588541666, "grad_norm": 13.837381362915039, "learning_rate": 4.37511476622977e-06, "loss": 3.3483, "step": 22690 }, { "epoch": 0.230865478515625, "grad_norm": 12.848306655883789, "learning_rate": 4.374850266677605e-06, "loss": 3.6923, "step": 22695 }, { "epoch": 0.23091634114583334, "grad_norm": 10.598101615905762, "learning_rate": 4.37458571915784e-06, "loss": 3.4643, "step": 22700 }, { "epoch": 0.23096720377604166, "grad_norm": 13.353347778320312, "learning_rate": 4.374321123677245e-06, "loss": 3.3367, "step": 22705 }, { "epoch": 0.23101806640625, "grad_norm": 11.848420143127441, "learning_rate": 4.374056480242587e-06, "loss": 3.8908, "step": 22710 }, { "epoch": 0.23106892903645834, "grad_norm": 9.748141288757324, "learning_rate": 4.373791788860638e-06, "loss": 3.0885, "step": 22715 }, { "epoch": 0.23111979166666666, "grad_norm": 11.857436180114746, "learning_rate": 4.373527049538171e-06, "loss": 3.2744, "step": 22720 }, { "epoch": 0.231170654296875, "grad_norm": 12.81821060180664, "learning_rate": 4.373262262281958e-06, "loss": 3.2552, "step": 22725 }, { "epoch": 0.23122151692708334, "grad_norm": 12.025280952453613, "learning_rate": 4.372997427098774e-06, "loss": 3.5045, "step": 22730 }, { "epoch": 0.23127237955729166, "grad_norm": 12.568089485168457, "learning_rate": 4.372732543995395e-06, "loss": 3.4126, "step": 22735 }, { "epoch": 0.2313232421875, "grad_norm": 9.853676795959473, "learning_rate": 4.372467612978597e-06, "loss": 3.697, "step": 22740 }, { "epoch": 0.23137410481770834, "grad_norm": 12.528533935546875, "learning_rate": 4.37220263405516e-06, "loss": 3.0022, "step": 22745 }, { "epoch": 0.23142496744791666, "grad_norm": 9.40113639831543, "learning_rate": 4.371937607231862e-06, "loss": 3.3832, "step": 22750 }, { "epoch": 0.231475830078125, "grad_norm": 16.36553382873535, "learning_rate": 4.371672532515484e-06, "loss": 3.1619, "step": 22755 }, { "epoch": 0.23152669270833334, "grad_norm": 10.763917922973633, "learning_rate": 4.371407409912808e-06, "loss": 3.4408, "step": 22760 }, { "epoch": 0.23157755533854166, "grad_norm": 9.289487838745117, "learning_rate": 4.371142239430616e-06, "loss": 3.2544, "step": 22765 }, { "epoch": 0.23162841796875, "grad_norm": 10.083857536315918, "learning_rate": 4.370877021075695e-06, "loss": 3.215, "step": 22770 }, { "epoch": 0.23167928059895834, "grad_norm": 8.067363739013672, "learning_rate": 4.3706117548548276e-06, "loss": 3.3603, "step": 22775 }, { "epoch": 0.23173014322916666, "grad_norm": 18.238494873046875, "learning_rate": 4.3703464407748024e-06, "loss": 3.5479, "step": 22780 }, { "epoch": 0.231781005859375, "grad_norm": 9.413612365722656, "learning_rate": 4.3700810788424065e-06, "loss": 3.5223, "step": 22785 }, { "epoch": 0.23183186848958334, "grad_norm": 12.918985366821289, "learning_rate": 4.36981566906443e-06, "loss": 3.4497, "step": 22790 }, { "epoch": 0.23188273111979166, "grad_norm": 14.47326374053955, "learning_rate": 4.369550211447663e-06, "loss": 3.5497, "step": 22795 }, { "epoch": 0.23193359375, "grad_norm": 14.251363754272461, "learning_rate": 4.369284705998896e-06, "loss": 3.8692, "step": 22800 }, { "epoch": 0.23198445638020834, "grad_norm": 9.761405944824219, "learning_rate": 4.369019152724923e-06, "loss": 3.4169, "step": 22805 }, { "epoch": 0.23203531901041666, "grad_norm": 16.306804656982422, "learning_rate": 4.368753551632539e-06, "loss": 3.333, "step": 22810 }, { "epoch": 0.232086181640625, "grad_norm": 10.218423843383789, "learning_rate": 4.368487902728537e-06, "loss": 3.2954, "step": 22815 }, { "epoch": 0.23213704427083334, "grad_norm": 14.56457233428955, "learning_rate": 4.368222206019716e-06, "loss": 3.4159, "step": 22820 }, { "epoch": 0.23218790690104166, "grad_norm": 13.245458602905273, "learning_rate": 4.367956461512872e-06, "loss": 3.6755, "step": 22825 }, { "epoch": 0.23223876953125, "grad_norm": 11.180668830871582, "learning_rate": 4.367690669214805e-06, "loss": 3.4576, "step": 22830 }, { "epoch": 0.23228963216145834, "grad_norm": 12.140656471252441, "learning_rate": 4.367424829132314e-06, "loss": 3.4696, "step": 22835 }, { "epoch": 0.23234049479166666, "grad_norm": 15.034218788146973, "learning_rate": 4.367158941272203e-06, "loss": 3.5265, "step": 22840 }, { "epoch": 0.232391357421875, "grad_norm": 10.023531913757324, "learning_rate": 4.366893005641272e-06, "loss": 3.3705, "step": 22845 }, { "epoch": 0.23244222005208334, "grad_norm": 9.87674331665039, "learning_rate": 4.3666270222463255e-06, "loss": 3.3428, "step": 22850 }, { "epoch": 0.23249308268229166, "grad_norm": 8.11019229888916, "learning_rate": 4.3663609910941705e-06, "loss": 3.0177, "step": 22855 }, { "epoch": 0.2325439453125, "grad_norm": 9.974431037902832, "learning_rate": 4.3660949121916105e-06, "loss": 3.2103, "step": 22860 }, { "epoch": 0.23259480794270834, "grad_norm": 8.820601463317871, "learning_rate": 4.365828785545456e-06, "loss": 3.5385, "step": 22865 }, { "epoch": 0.23264567057291666, "grad_norm": 12.899969100952148, "learning_rate": 4.365562611162513e-06, "loss": 3.2611, "step": 22870 }, { "epoch": 0.232696533203125, "grad_norm": 13.530678749084473, "learning_rate": 4.365296389049593e-06, "loss": 3.4912, "step": 22875 }, { "epoch": 0.23274739583333334, "grad_norm": 17.59902000427246, "learning_rate": 4.365030119213508e-06, "loss": 3.5208, "step": 22880 }, { "epoch": 0.23279825846354166, "grad_norm": 10.701493263244629, "learning_rate": 4.364763801661068e-06, "loss": 3.3438, "step": 22885 }, { "epoch": 0.23284912109375, "grad_norm": 13.408337593078613, "learning_rate": 4.364497436399089e-06, "loss": 2.9106, "step": 22890 }, { "epoch": 0.23289998372395834, "grad_norm": 11.21833610534668, "learning_rate": 4.364231023434385e-06, "loss": 3.7474, "step": 22895 }, { "epoch": 0.23295084635416666, "grad_norm": 11.439018249511719, "learning_rate": 4.363964562773772e-06, "loss": 3.2894, "step": 22900 }, { "epoch": 0.233001708984375, "grad_norm": 11.574640274047852, "learning_rate": 4.363698054424067e-06, "loss": 3.5409, "step": 22905 }, { "epoch": 0.23305257161458334, "grad_norm": 11.974193572998047, "learning_rate": 4.36343149839209e-06, "loss": 3.4101, "step": 22910 }, { "epoch": 0.23310343424479166, "grad_norm": 9.374265670776367, "learning_rate": 4.363164894684659e-06, "loss": 3.3679, "step": 22915 }, { "epoch": 0.233154296875, "grad_norm": 14.392888069152832, "learning_rate": 4.362898243308596e-06, "loss": 3.5128, "step": 22920 }, { "epoch": 0.23320515950520834, "grad_norm": 15.125787734985352, "learning_rate": 4.362631544270723e-06, "loss": 3.3931, "step": 22925 }, { "epoch": 0.23325602213541666, "grad_norm": 12.04422378540039, "learning_rate": 4.362364797577864e-06, "loss": 3.27, "step": 22930 }, { "epoch": 0.233306884765625, "grad_norm": 9.42465877532959, "learning_rate": 4.362098003236842e-06, "loss": 3.1457, "step": 22935 }, { "epoch": 0.23335774739583334, "grad_norm": 10.79931354522705, "learning_rate": 4.361831161254485e-06, "loss": 3.3657, "step": 22940 }, { "epoch": 0.23340861002604166, "grad_norm": 7.212630748748779, "learning_rate": 4.361564271637618e-06, "loss": 3.6657, "step": 22945 }, { "epoch": 0.23345947265625, "grad_norm": 10.60206127166748, "learning_rate": 4.361297334393071e-06, "loss": 3.5798, "step": 22950 }, { "epoch": 0.23351033528645834, "grad_norm": 8.320850372314453, "learning_rate": 4.361030349527673e-06, "loss": 3.4497, "step": 22955 }, { "epoch": 0.23356119791666666, "grad_norm": 11.13176441192627, "learning_rate": 4.360763317048254e-06, "loss": 3.4702, "step": 22960 }, { "epoch": 0.233612060546875, "grad_norm": 10.365711212158203, "learning_rate": 4.360496236961647e-06, "loss": 3.2991, "step": 22965 }, { "epoch": 0.23366292317708334, "grad_norm": 14.777729988098145, "learning_rate": 4.360229109274685e-06, "loss": 3.3027, "step": 22970 }, { "epoch": 0.23371378580729166, "grad_norm": 7.491941928863525, "learning_rate": 4.359961933994202e-06, "loss": 3.2291, "step": 22975 }, { "epoch": 0.2337646484375, "grad_norm": 9.205909729003906, "learning_rate": 4.359694711127034e-06, "loss": 3.5741, "step": 22980 }, { "epoch": 0.23381551106770834, "grad_norm": 15.118029594421387, "learning_rate": 4.359427440680016e-06, "loss": 3.3006, "step": 22985 }, { "epoch": 0.23386637369791666, "grad_norm": 8.938360214233398, "learning_rate": 4.35916012265999e-06, "loss": 3.3448, "step": 22990 }, { "epoch": 0.233917236328125, "grad_norm": 10.3394193649292, "learning_rate": 4.358892757073792e-06, "loss": 3.3876, "step": 22995 }, { "epoch": 0.23396809895833334, "grad_norm": 15.624671936035156, "learning_rate": 4.358625343928264e-06, "loss": 3.6473, "step": 23000 }, { "epoch": 0.23401896158854166, "grad_norm": 7.711795806884766, "learning_rate": 4.358357883230245e-06, "loss": 3.729, "step": 23005 }, { "epoch": 0.23406982421875, "grad_norm": 8.540888786315918, "learning_rate": 4.358090374986582e-06, "loss": 3.3981, "step": 23010 }, { "epoch": 0.23412068684895834, "grad_norm": 11.216827392578125, "learning_rate": 4.357822819204117e-06, "loss": 3.6308, "step": 23015 }, { "epoch": 0.23417154947916666, "grad_norm": 11.93607234954834, "learning_rate": 4.357555215889695e-06, "loss": 3.3311, "step": 23020 }, { "epoch": 0.234222412109375, "grad_norm": 14.416646957397461, "learning_rate": 4.3572875650501624e-06, "loss": 3.3338, "step": 23025 }, { "epoch": 0.23427327473958334, "grad_norm": 9.926932334899902, "learning_rate": 4.357019866692369e-06, "loss": 3.2852, "step": 23030 }, { "epoch": 0.23432413736979166, "grad_norm": 13.363643646240234, "learning_rate": 4.356752120823162e-06, "loss": 3.4826, "step": 23035 }, { "epoch": 0.234375, "grad_norm": 10.39242935180664, "learning_rate": 4.356484327449392e-06, "loss": 3.2185, "step": 23040 }, { "epoch": 0.23442586263020834, "grad_norm": 11.942859649658203, "learning_rate": 4.356216486577911e-06, "loss": 3.3092, "step": 23045 }, { "epoch": 0.23447672526041666, "grad_norm": 15.64292049407959, "learning_rate": 4.35594859821557e-06, "loss": 3.3923, "step": 23050 }, { "epoch": 0.234527587890625, "grad_norm": 14.945747375488281, "learning_rate": 4.355680662369225e-06, "loss": 3.5142, "step": 23055 }, { "epoch": 0.23457845052083334, "grad_norm": 12.805582046508789, "learning_rate": 4.355412679045729e-06, "loss": 3.4128, "step": 23060 }, { "epoch": 0.23462931315104166, "grad_norm": 9.039190292358398, "learning_rate": 4.355144648251941e-06, "loss": 3.5806, "step": 23065 }, { "epoch": 0.23468017578125, "grad_norm": 15.245207786560059, "learning_rate": 4.354876569994716e-06, "loss": 3.7038, "step": 23070 }, { "epoch": 0.23473103841145834, "grad_norm": 12.465885162353516, "learning_rate": 4.354608444280915e-06, "loss": 3.2708, "step": 23075 }, { "epoch": 0.23478190104166666, "grad_norm": 8.4971342086792, "learning_rate": 4.3543402711173946e-06, "loss": 3.0945, "step": 23080 }, { "epoch": 0.234832763671875, "grad_norm": 9.483534812927246, "learning_rate": 4.354072050511019e-06, "loss": 3.514, "step": 23085 }, { "epoch": 0.23488362630208334, "grad_norm": 10.665985107421875, "learning_rate": 4.353803782468648e-06, "loss": 3.7719, "step": 23090 }, { "epoch": 0.23493448893229166, "grad_norm": 12.816307067871094, "learning_rate": 4.353535466997149e-06, "loss": 3.7636, "step": 23095 }, { "epoch": 0.2349853515625, "grad_norm": 12.756634712219238, "learning_rate": 4.3532671041033825e-06, "loss": 3.3117, "step": 23100 }, { "epoch": 0.23503621419270834, "grad_norm": 8.485614776611328, "learning_rate": 4.352998693794217e-06, "loss": 3.102, "step": 23105 }, { "epoch": 0.23508707682291666, "grad_norm": 8.25635814666748, "learning_rate": 4.352730236076519e-06, "loss": 3.0136, "step": 23110 }, { "epoch": 0.235137939453125, "grad_norm": 9.216659545898438, "learning_rate": 4.352461730957157e-06, "loss": 3.7945, "step": 23115 }, { "epoch": 0.23518880208333334, "grad_norm": 11.367064476013184, "learning_rate": 4.352193178443001e-06, "loss": 3.3426, "step": 23120 }, { "epoch": 0.23523966471354166, "grad_norm": 12.334614753723145, "learning_rate": 4.351924578540922e-06, "loss": 3.4398, "step": 23125 }, { "epoch": 0.23529052734375, "grad_norm": 11.858649253845215, "learning_rate": 4.35165593125779e-06, "loss": 3.3179, "step": 23130 }, { "epoch": 0.23534138997395834, "grad_norm": 12.8947172164917, "learning_rate": 4.351387236600481e-06, "loss": 3.2333, "step": 23135 }, { "epoch": 0.23539225260416666, "grad_norm": 7.790979862213135, "learning_rate": 4.351118494575869e-06, "loss": 3.4224, "step": 23140 }, { "epoch": 0.235443115234375, "grad_norm": 9.988829612731934, "learning_rate": 4.350849705190828e-06, "loss": 3.4509, "step": 23145 }, { "epoch": 0.23549397786458334, "grad_norm": 8.369173049926758, "learning_rate": 4.350580868452237e-06, "loss": 3.169, "step": 23150 }, { "epoch": 0.23554484049479166, "grad_norm": 12.344426155090332, "learning_rate": 4.350311984366972e-06, "loss": 3.4007, "step": 23155 }, { "epoch": 0.235595703125, "grad_norm": 11.743609428405762, "learning_rate": 4.350043052941914e-06, "loss": 3.3544, "step": 23160 }, { "epoch": 0.23564656575520834, "grad_norm": 11.494827270507812, "learning_rate": 4.3497740741839425e-06, "loss": 3.342, "step": 23165 }, { "epoch": 0.23569742838541666, "grad_norm": 9.599040031433105, "learning_rate": 4.349505048099941e-06, "loss": 3.4375, "step": 23170 }, { "epoch": 0.235748291015625, "grad_norm": 12.405707359313965, "learning_rate": 4.34923597469679e-06, "loss": 3.9962, "step": 23175 }, { "epoch": 0.23579915364583334, "grad_norm": 12.194296836853027, "learning_rate": 4.348966853981376e-06, "loss": 3.4165, "step": 23180 }, { "epoch": 0.23585001627604166, "grad_norm": 9.456016540527344, "learning_rate": 4.348697685960582e-06, "loss": 3.6132, "step": 23185 }, { "epoch": 0.23590087890625, "grad_norm": 12.257650375366211, "learning_rate": 4.348428470641297e-06, "loss": 3.1384, "step": 23190 }, { "epoch": 0.23595174153645834, "grad_norm": 11.710000991821289, "learning_rate": 4.348159208030407e-06, "loss": 3.6273, "step": 23195 }, { "epoch": 0.23600260416666666, "grad_norm": 15.638426780700684, "learning_rate": 4.347889898134802e-06, "loss": 3.3879, "step": 23200 }, { "epoch": 0.236053466796875, "grad_norm": 11.080460548400879, "learning_rate": 4.3476205409613715e-06, "loss": 3.4117, "step": 23205 }, { "epoch": 0.23610432942708334, "grad_norm": 7.962300777435303, "learning_rate": 4.347351136517008e-06, "loss": 3.6079, "step": 23210 }, { "epoch": 0.23615519205729166, "grad_norm": 14.046839714050293, "learning_rate": 4.3470816848086025e-06, "loss": 3.049, "step": 23215 }, { "epoch": 0.2362060546875, "grad_norm": 9.567350387573242, "learning_rate": 4.346812185843051e-06, "loss": 3.103, "step": 23220 }, { "epoch": 0.23625691731770834, "grad_norm": 14.83440113067627, "learning_rate": 4.346542639627247e-06, "loss": 3.3158, "step": 23225 }, { "epoch": 0.23630777994791666, "grad_norm": 14.364097595214844, "learning_rate": 4.346273046168088e-06, "loss": 3.2224, "step": 23230 }, { "epoch": 0.236358642578125, "grad_norm": 9.735870361328125, "learning_rate": 4.34600340547247e-06, "loss": 3.7907, "step": 23235 }, { "epoch": 0.23640950520833334, "grad_norm": 11.21799373626709, "learning_rate": 4.3457337175472925e-06, "loss": 3.6273, "step": 23240 }, { "epoch": 0.23646036783854166, "grad_norm": 13.670818328857422, "learning_rate": 4.345463982399456e-06, "loss": 3.2087, "step": 23245 }, { "epoch": 0.23651123046875, "grad_norm": 10.433359146118164, "learning_rate": 4.3451942000358605e-06, "loss": 3.4824, "step": 23250 }, { "epoch": 0.23656209309895834, "grad_norm": 9.702804565429688, "learning_rate": 4.344924370463408e-06, "loss": 3.6147, "step": 23255 }, { "epoch": 0.23661295572916666, "grad_norm": 8.879476547241211, "learning_rate": 4.344654493689004e-06, "loss": 3.0391, "step": 23260 }, { "epoch": 0.236663818359375, "grad_norm": 7.819838523864746, "learning_rate": 4.344384569719551e-06, "loss": 3.643, "step": 23265 }, { "epoch": 0.23671468098958334, "grad_norm": 10.553860664367676, "learning_rate": 4.344114598561957e-06, "loss": 3.5373, "step": 23270 }, { "epoch": 0.23676554361979166, "grad_norm": 11.761865615844727, "learning_rate": 4.3438445802231264e-06, "loss": 3.2724, "step": 23275 }, { "epoch": 0.23681640625, "grad_norm": 12.256386756896973, "learning_rate": 4.34357451470997e-06, "loss": 3.3775, "step": 23280 }, { "epoch": 0.23686726888020834, "grad_norm": 14.110241889953613, "learning_rate": 4.343304402029398e-06, "loss": 3.7732, "step": 23285 }, { "epoch": 0.23691813151041666, "grad_norm": 8.55147933959961, "learning_rate": 4.343034242188318e-06, "loss": 3.3465, "step": 23290 }, { "epoch": 0.236968994140625, "grad_norm": 12.542463302612305, "learning_rate": 4.342764035193645e-06, "loss": 3.4555, "step": 23295 }, { "epoch": 0.23701985677083334, "grad_norm": 13.53172492980957, "learning_rate": 4.3424937810522904e-06, "loss": 3.3524, "step": 23300 }, { "epoch": 0.23707071940104166, "grad_norm": 12.750872611999512, "learning_rate": 4.342223479771169e-06, "loss": 3.7538, "step": 23305 }, { "epoch": 0.23712158203125, "grad_norm": 9.65660285949707, "learning_rate": 4.341953131357196e-06, "loss": 3.566, "step": 23310 }, { "epoch": 0.23717244466145834, "grad_norm": 15.46808910369873, "learning_rate": 4.341682735817289e-06, "loss": 3.6563, "step": 23315 }, { "epoch": 0.23722330729166666, "grad_norm": 15.83115291595459, "learning_rate": 4.341412293158367e-06, "loss": 3.3011, "step": 23320 }, { "epoch": 0.237274169921875, "grad_norm": 9.020452499389648, "learning_rate": 4.341141803387347e-06, "loss": 3.2142, "step": 23325 }, { "epoch": 0.23732503255208334, "grad_norm": 7.2499470710754395, "learning_rate": 4.3408712665111505e-06, "loss": 3.2466, "step": 23330 }, { "epoch": 0.23737589518229166, "grad_norm": 14.520453453063965, "learning_rate": 4.340600682536699e-06, "loss": 3.1733, "step": 23335 }, { "epoch": 0.2374267578125, "grad_norm": 7.980301856994629, "learning_rate": 4.340330051470915e-06, "loss": 3.0205, "step": 23340 }, { "epoch": 0.23747762044270834, "grad_norm": 7.818747520446777, "learning_rate": 4.340059373320723e-06, "loss": 3.4661, "step": 23345 }, { "epoch": 0.23752848307291666, "grad_norm": 8.310884475708008, "learning_rate": 4.339788648093048e-06, "loss": 2.7708, "step": 23350 }, { "epoch": 0.237579345703125, "grad_norm": 11.503134727478027, "learning_rate": 4.339517875794817e-06, "loss": 3.2503, "step": 23355 }, { "epoch": 0.23763020833333334, "grad_norm": 14.735780715942383, "learning_rate": 4.339247056432957e-06, "loss": 3.3462, "step": 23360 }, { "epoch": 0.23768107096354166, "grad_norm": 14.477924346923828, "learning_rate": 4.338976190014397e-06, "loss": 3.2735, "step": 23365 }, { "epoch": 0.23773193359375, "grad_norm": 14.508803367614746, "learning_rate": 4.338705276546067e-06, "loss": 3.169, "step": 23370 }, { "epoch": 0.23778279622395834, "grad_norm": 10.182372093200684, "learning_rate": 4.338434316034897e-06, "loss": 3.6902, "step": 23375 }, { "epoch": 0.23783365885416666, "grad_norm": 15.233953475952148, "learning_rate": 4.338163308487823e-06, "loss": 3.7186, "step": 23380 }, { "epoch": 0.237884521484375, "grad_norm": 8.143902778625488, "learning_rate": 4.337892253911775e-06, "loss": 3.2273, "step": 23385 }, { "epoch": 0.23793538411458334, "grad_norm": 15.452500343322754, "learning_rate": 4.33762115231369e-06, "loss": 3.5926, "step": 23390 }, { "epoch": 0.23798624674479166, "grad_norm": 14.97335147857666, "learning_rate": 4.337350003700502e-06, "loss": 3.2986, "step": 23395 }, { "epoch": 0.238037109375, "grad_norm": 11.603572845458984, "learning_rate": 4.3370788080791516e-06, "loss": 3.6042, "step": 23400 }, { "epoch": 0.23808797200520834, "grad_norm": 11.123287200927734, "learning_rate": 4.336807565456574e-06, "loss": 3.5009, "step": 23405 }, { "epoch": 0.23813883463541666, "grad_norm": 16.845380783081055, "learning_rate": 4.3365362758397115e-06, "loss": 3.2503, "step": 23410 }, { "epoch": 0.238189697265625, "grad_norm": 10.304829597473145, "learning_rate": 4.3362649392355026e-06, "loss": 3.478, "step": 23415 }, { "epoch": 0.23824055989583334, "grad_norm": 14.178970336914062, "learning_rate": 4.335993555650891e-06, "loss": 3.2514, "step": 23420 }, { "epoch": 0.23829142252604166, "grad_norm": 11.138680458068848, "learning_rate": 4.335722125092819e-06, "loss": 3.286, "step": 23425 }, { "epoch": 0.23834228515625, "grad_norm": 13.36595344543457, "learning_rate": 4.335450647568231e-06, "loss": 3.4917, "step": 23430 }, { "epoch": 0.23839314778645834, "grad_norm": 11.410798072814941, "learning_rate": 4.335179123084074e-06, "loss": 3.4145, "step": 23435 }, { "epoch": 0.23844401041666666, "grad_norm": 11.153116226196289, "learning_rate": 4.334907551647295e-06, "loss": 3.4745, "step": 23440 }, { "epoch": 0.238494873046875, "grad_norm": 10.488436698913574, "learning_rate": 4.33463593326484e-06, "loss": 3.4593, "step": 23445 }, { "epoch": 0.23854573567708334, "grad_norm": 15.080182075500488, "learning_rate": 4.3343642679436595e-06, "loss": 3.4577, "step": 23450 }, { "epoch": 0.23859659830729166, "grad_norm": 13.887222290039062, "learning_rate": 4.3340925556907045e-06, "loss": 3.0636, "step": 23455 }, { "epoch": 0.2386474609375, "grad_norm": 14.262229919433594, "learning_rate": 4.333820796512926e-06, "loss": 3.1407, "step": 23460 }, { "epoch": 0.23869832356770834, "grad_norm": 14.66751480102539, "learning_rate": 4.333548990417277e-06, "loss": 3.1232, "step": 23465 }, { "epoch": 0.23874918619791666, "grad_norm": 7.767457962036133, "learning_rate": 4.333277137410712e-06, "loss": 3.2363, "step": 23470 }, { "epoch": 0.238800048828125, "grad_norm": 13.721875190734863, "learning_rate": 4.333005237500186e-06, "loss": 3.167, "step": 23475 }, { "epoch": 0.23885091145833334, "grad_norm": 7.040370464324951, "learning_rate": 4.332733290692655e-06, "loss": 3.3249, "step": 23480 }, { "epoch": 0.23890177408854166, "grad_norm": 14.299421310424805, "learning_rate": 4.332461296995077e-06, "loss": 3.3313, "step": 23485 }, { "epoch": 0.23895263671875, "grad_norm": 12.305241584777832, "learning_rate": 4.3321892564144115e-06, "loss": 3.3294, "step": 23490 }, { "epoch": 0.23900349934895834, "grad_norm": 15.409032821655273, "learning_rate": 4.331917168957619e-06, "loss": 3.5023, "step": 23495 }, { "epoch": 0.23905436197916666, "grad_norm": 18.326412200927734, "learning_rate": 4.3316450346316586e-06, "loss": 3.599, "step": 23500 }, { "epoch": 0.239105224609375, "grad_norm": 13.42839527130127, "learning_rate": 4.331372853443495e-06, "loss": 3.3468, "step": 23505 }, { "epoch": 0.23915608723958334, "grad_norm": 13.067977905273438, "learning_rate": 4.33110062540009e-06, "loss": 3.1257, "step": 23510 }, { "epoch": 0.23920694986979166, "grad_norm": 10.786623001098633, "learning_rate": 4.33082835050841e-06, "loss": 3.0433, "step": 23515 }, { "epoch": 0.2392578125, "grad_norm": 16.42357635498047, "learning_rate": 4.330556028775421e-06, "loss": 3.241, "step": 23520 }, { "epoch": 0.23930867513020834, "grad_norm": 13.901268005371094, "learning_rate": 4.330283660208089e-06, "loss": 3.0375, "step": 23525 }, { "epoch": 0.23935953776041666, "grad_norm": 13.501899719238281, "learning_rate": 4.330011244813384e-06, "loss": 2.9452, "step": 23530 }, { "epoch": 0.239410400390625, "grad_norm": 12.320079803466797, "learning_rate": 4.329738782598276e-06, "loss": 3.4211, "step": 23535 }, { "epoch": 0.23946126302083334, "grad_norm": 12.208647727966309, "learning_rate": 4.329466273569734e-06, "loss": 3.389, "step": 23540 }, { "epoch": 0.23951212565104166, "grad_norm": 13.780562400817871, "learning_rate": 4.329193717734731e-06, "loss": 3.313, "step": 23545 }, { "epoch": 0.23956298828125, "grad_norm": 9.39169692993164, "learning_rate": 4.328921115100239e-06, "loss": 3.0223, "step": 23550 }, { "epoch": 0.23961385091145834, "grad_norm": 11.84803295135498, "learning_rate": 4.3286484656732365e-06, "loss": 3.1971, "step": 23555 }, { "epoch": 0.23966471354166666, "grad_norm": 8.828704833984375, "learning_rate": 4.328375769460694e-06, "loss": 3.4065, "step": 23560 }, { "epoch": 0.239715576171875, "grad_norm": 12.668590545654297, "learning_rate": 4.328103026469592e-06, "loss": 3.3995, "step": 23565 }, { "epoch": 0.23976643880208334, "grad_norm": 7.958857536315918, "learning_rate": 4.327830236706908e-06, "loss": 3.1953, "step": 23570 }, { "epoch": 0.23981730143229166, "grad_norm": 12.19107723236084, "learning_rate": 4.32755740017962e-06, "loss": 3.0576, "step": 23575 }, { "epoch": 0.2398681640625, "grad_norm": 10.372651100158691, "learning_rate": 4.327284516894708e-06, "loss": 3.195, "step": 23580 }, { "epoch": 0.23991902669270834, "grad_norm": 15.74683952331543, "learning_rate": 4.327011586859156e-06, "loss": 3.286, "step": 23585 }, { "epoch": 0.23996988932291666, "grad_norm": 7.834261417388916, "learning_rate": 4.326738610079946e-06, "loss": 3.4963, "step": 23590 }, { "epoch": 0.240020751953125, "grad_norm": 12.021536827087402, "learning_rate": 4.326465586564061e-06, "loss": 3.5939, "step": 23595 }, { "epoch": 0.24007161458333334, "grad_norm": 11.826726913452148, "learning_rate": 4.326192516318486e-06, "loss": 3.2587, "step": 23600 }, { "epoch": 0.24012247721354166, "grad_norm": 9.564510345458984, "learning_rate": 4.325919399350209e-06, "loss": 3.1422, "step": 23605 }, { "epoch": 0.24017333984375, "grad_norm": 15.419652938842773, "learning_rate": 4.3256462356662175e-06, "loss": 3.3998, "step": 23610 }, { "epoch": 0.24022420247395834, "grad_norm": 7.081390857696533, "learning_rate": 4.325373025273499e-06, "loss": 3.4022, "step": 23615 }, { "epoch": 0.24027506510416666, "grad_norm": 10.660130500793457, "learning_rate": 4.325099768179045e-06, "loss": 3.2061, "step": 23620 }, { "epoch": 0.240325927734375, "grad_norm": 12.876676559448242, "learning_rate": 4.324826464389846e-06, "loss": 3.2347, "step": 23625 }, { "epoch": 0.24037679036458334, "grad_norm": 10.950091361999512, "learning_rate": 4.324553113912894e-06, "loss": 3.2463, "step": 23630 }, { "epoch": 0.24042765299479166, "grad_norm": 13.289194107055664, "learning_rate": 4.324279716755183e-06, "loss": 3.704, "step": 23635 }, { "epoch": 0.240478515625, "grad_norm": 12.992833137512207, "learning_rate": 4.324006272923708e-06, "loss": 3.1545, "step": 23640 }, { "epoch": 0.24052937825520834, "grad_norm": 7.339069843292236, "learning_rate": 4.323732782425465e-06, "loss": 3.0488, "step": 23645 }, { "epoch": 0.24058024088541666, "grad_norm": 11.042210578918457, "learning_rate": 4.323459245267451e-06, "loss": 3.2029, "step": 23650 }, { "epoch": 0.240631103515625, "grad_norm": 13.536554336547852, "learning_rate": 4.323185661456665e-06, "loss": 3.2709, "step": 23655 }, { "epoch": 0.24068196614583334, "grad_norm": 9.641441345214844, "learning_rate": 4.322912031000105e-06, "loss": 3.6717, "step": 23660 }, { "epoch": 0.24073282877604166, "grad_norm": 13.114518165588379, "learning_rate": 4.322638353904773e-06, "loss": 3.4325, "step": 23665 }, { "epoch": 0.24078369140625, "grad_norm": 12.646187782287598, "learning_rate": 4.322364630177671e-06, "loss": 3.0339, "step": 23670 }, { "epoch": 0.24083455403645834, "grad_norm": 10.695713996887207, "learning_rate": 4.322090859825802e-06, "loss": 3.271, "step": 23675 }, { "epoch": 0.24088541666666666, "grad_norm": 8.131204605102539, "learning_rate": 4.3218170428561695e-06, "loss": 3.7232, "step": 23680 }, { "epoch": 0.240936279296875, "grad_norm": 9.798648834228516, "learning_rate": 4.3215431792757805e-06, "loss": 3.2187, "step": 23685 }, { "epoch": 0.24098714192708334, "grad_norm": 9.147968292236328, "learning_rate": 4.32126926909164e-06, "loss": 3.2248, "step": 23690 }, { "epoch": 0.24103800455729166, "grad_norm": 10.552434921264648, "learning_rate": 4.320995312310758e-06, "loss": 3.5436, "step": 23695 }, { "epoch": 0.2410888671875, "grad_norm": 11.929584503173828, "learning_rate": 4.320721308940142e-06, "loss": 3.6509, "step": 23700 }, { "epoch": 0.24113972981770834, "grad_norm": 12.67947006225586, "learning_rate": 4.320447258986804e-06, "loss": 3.2208, "step": 23705 }, { "epoch": 0.24119059244791666, "grad_norm": 10.882842063903809, "learning_rate": 4.320173162457754e-06, "loss": 3.2851, "step": 23710 }, { "epoch": 0.241241455078125, "grad_norm": 9.038286209106445, "learning_rate": 4.319899019360003e-06, "loss": 3.9344, "step": 23715 }, { "epoch": 0.24129231770833334, "grad_norm": 8.482138633728027, "learning_rate": 4.319624829700569e-06, "loss": 3.2898, "step": 23720 }, { "epoch": 0.24134318033854166, "grad_norm": 9.246583938598633, "learning_rate": 4.319350593486465e-06, "loss": 3.2805, "step": 23725 }, { "epoch": 0.24139404296875, "grad_norm": 14.592893600463867, "learning_rate": 4.319076310724707e-06, "loss": 3.9456, "step": 23730 }, { "epoch": 0.24144490559895834, "grad_norm": 13.710060119628906, "learning_rate": 4.318801981422312e-06, "loss": 3.4525, "step": 23735 }, { "epoch": 0.24149576822916666, "grad_norm": 11.734492301940918, "learning_rate": 4.3185276055863004e-06, "loss": 3.4413, "step": 23740 }, { "epoch": 0.241546630859375, "grad_norm": 10.637125015258789, "learning_rate": 4.31825318322369e-06, "loss": 3.4871, "step": 23745 }, { "epoch": 0.24159749348958334, "grad_norm": 11.061915397644043, "learning_rate": 4.317978714341504e-06, "loss": 3.3846, "step": 23750 }, { "epoch": 0.24164835611979166, "grad_norm": 9.801789283752441, "learning_rate": 4.317704198946763e-06, "loss": 3.3485, "step": 23755 }, { "epoch": 0.24169921875, "grad_norm": 13.971748352050781, "learning_rate": 4.3174296370464905e-06, "loss": 3.1945, "step": 23760 }, { "epoch": 0.24175008138020834, "grad_norm": 9.924613952636719, "learning_rate": 4.317155028647712e-06, "loss": 3.3264, "step": 23765 }, { "epoch": 0.24180094401041666, "grad_norm": 14.436348915100098, "learning_rate": 4.316880373757453e-06, "loss": 3.4252, "step": 23770 }, { "epoch": 0.241851806640625, "grad_norm": 16.445823669433594, "learning_rate": 4.31660567238274e-06, "loss": 3.5997, "step": 23775 }, { "epoch": 0.24190266927083334, "grad_norm": 14.193046569824219, "learning_rate": 4.316330924530601e-06, "loss": 3.13, "step": 23780 }, { "epoch": 0.24195353190104166, "grad_norm": 14.578299522399902, "learning_rate": 4.316056130208067e-06, "loss": 3.4323, "step": 23785 }, { "epoch": 0.24200439453125, "grad_norm": 13.892190933227539, "learning_rate": 4.315781289422168e-06, "loss": 3.5278, "step": 23790 }, { "epoch": 0.24205525716145834, "grad_norm": 15.20643424987793, "learning_rate": 4.315506402179933e-06, "loss": 3.3675, "step": 23795 }, { "epoch": 0.24210611979166666, "grad_norm": 11.651402473449707, "learning_rate": 4.3152314684883975e-06, "loss": 3.95, "step": 23800 }, { "epoch": 0.242156982421875, "grad_norm": 12.027578353881836, "learning_rate": 4.314956488354596e-06, "loss": 3.5839, "step": 23805 }, { "epoch": 0.24220784505208334, "grad_norm": 12.57933235168457, "learning_rate": 4.314681461785561e-06, "loss": 3.2394, "step": 23810 }, { "epoch": 0.24225870768229166, "grad_norm": 10.47960376739502, "learning_rate": 4.314406388788333e-06, "loss": 3.5905, "step": 23815 }, { "epoch": 0.2423095703125, "grad_norm": 11.346346855163574, "learning_rate": 4.3141312693699465e-06, "loss": 3.887, "step": 23820 }, { "epoch": 0.24236043294270834, "grad_norm": 12.971914291381836, "learning_rate": 4.313856103537442e-06, "loss": 3.6812, "step": 23825 }, { "epoch": 0.24241129557291666, "grad_norm": 12.668682098388672, "learning_rate": 4.313580891297859e-06, "loss": 3.2519, "step": 23830 }, { "epoch": 0.242462158203125, "grad_norm": 10.159984588623047, "learning_rate": 4.3133056326582386e-06, "loss": 3.1967, "step": 23835 }, { "epoch": 0.24251302083333334, "grad_norm": 15.020711898803711, "learning_rate": 4.313030327625623e-06, "loss": 3.1332, "step": 23840 }, { "epoch": 0.24256388346354166, "grad_norm": 11.418685913085938, "learning_rate": 4.312754976207058e-06, "loss": 3.2695, "step": 23845 }, { "epoch": 0.24261474609375, "grad_norm": 16.103302001953125, "learning_rate": 4.3124795784095845e-06, "loss": 3.1872, "step": 23850 }, { "epoch": 0.24266560872395834, "grad_norm": 8.224347114562988, "learning_rate": 4.312204134240252e-06, "loss": 3.4198, "step": 23855 }, { "epoch": 0.24271647135416666, "grad_norm": 16.922773361206055, "learning_rate": 4.311928643706105e-06, "loss": 3.6617, "step": 23860 }, { "epoch": 0.242767333984375, "grad_norm": 12.889124870300293, "learning_rate": 4.311653106814194e-06, "loss": 3.2375, "step": 23865 }, { "epoch": 0.24281819661458334, "grad_norm": 11.637298583984375, "learning_rate": 4.3113775235715675e-06, "loss": 3.4952, "step": 23870 }, { "epoch": 0.24286905924479166, "grad_norm": 15.415702819824219, "learning_rate": 4.3111018939852764e-06, "loss": 3.1809, "step": 23875 }, { "epoch": 0.242919921875, "grad_norm": 16.010303497314453, "learning_rate": 4.3108262180623724e-06, "loss": 3.384, "step": 23880 }, { "epoch": 0.24297078450520834, "grad_norm": 13.60275650024414, "learning_rate": 4.310550495809909e-06, "loss": 3.7173, "step": 23885 }, { "epoch": 0.24302164713541666, "grad_norm": 15.211081504821777, "learning_rate": 4.31027472723494e-06, "loss": 3.3849, "step": 23890 }, { "epoch": 0.243072509765625, "grad_norm": 15.576075553894043, "learning_rate": 4.309998912344522e-06, "loss": 3.3945, "step": 23895 }, { "epoch": 0.24312337239583334, "grad_norm": 6.691440582275391, "learning_rate": 4.30972305114571e-06, "loss": 3.64, "step": 23900 }, { "epoch": 0.24317423502604166, "grad_norm": 10.863690376281738, "learning_rate": 4.309447143645563e-06, "loss": 3.5055, "step": 23905 }, { "epoch": 0.24322509765625, "grad_norm": 14.350895881652832, "learning_rate": 4.30917118985114e-06, "loss": 3.259, "step": 23910 }, { "epoch": 0.24327596028645834, "grad_norm": 10.835892677307129, "learning_rate": 4.3088951897695e-06, "loss": 3.3739, "step": 23915 }, { "epoch": 0.24332682291666666, "grad_norm": 10.899023056030273, "learning_rate": 4.308619143407706e-06, "loss": 3.4605, "step": 23920 }, { "epoch": 0.243377685546875, "grad_norm": 17.81281280517578, "learning_rate": 4.30834305077282e-06, "loss": 3.6514, "step": 23925 }, { "epoch": 0.24342854817708334, "grad_norm": 10.215516090393066, "learning_rate": 4.3080669118719055e-06, "loss": 3.2631, "step": 23930 }, { "epoch": 0.24347941080729166, "grad_norm": 11.371179580688477, "learning_rate": 4.307790726712028e-06, "loss": 3.3525, "step": 23935 }, { "epoch": 0.2435302734375, "grad_norm": 7.968616962432861, "learning_rate": 4.307514495300253e-06, "loss": 3.1416, "step": 23940 }, { "epoch": 0.24358113606770834, "grad_norm": 11.859014511108398, "learning_rate": 4.307238217643648e-06, "loss": 3.1803, "step": 23945 }, { "epoch": 0.24363199869791666, "grad_norm": 7.970711708068848, "learning_rate": 4.306961893749281e-06, "loss": 3.1293, "step": 23950 }, { "epoch": 0.243682861328125, "grad_norm": 16.512006759643555, "learning_rate": 4.306685523624223e-06, "loss": 3.3174, "step": 23955 }, { "epoch": 0.24373372395833334, "grad_norm": 12.797045707702637, "learning_rate": 4.306409107275544e-06, "loss": 3.4595, "step": 23960 }, { "epoch": 0.24378458658854166, "grad_norm": 15.285560607910156, "learning_rate": 4.3061326447103155e-06, "loss": 3.0841, "step": 23965 }, { "epoch": 0.24383544921875, "grad_norm": 12.97689151763916, "learning_rate": 4.305856135935612e-06, "loss": 3.112, "step": 23970 }, { "epoch": 0.24388631184895834, "grad_norm": 13.685649871826172, "learning_rate": 4.3055795809585075e-06, "loss": 3.5289, "step": 23975 }, { "epoch": 0.24393717447916666, "grad_norm": 12.28869915008545, "learning_rate": 4.305302979786077e-06, "loss": 3.1983, "step": 23980 }, { "epoch": 0.243988037109375, "grad_norm": 7.516073226928711, "learning_rate": 4.305026332425398e-06, "loss": 3.5407, "step": 23985 }, { "epoch": 0.24403889973958334, "grad_norm": 10.212562561035156, "learning_rate": 4.304749638883548e-06, "loss": 3.4814, "step": 23990 }, { "epoch": 0.24408976236979166, "grad_norm": 15.381714820861816, "learning_rate": 4.304472899167607e-06, "loss": 3.2064, "step": 23995 }, { "epoch": 0.244140625, "grad_norm": 12.505240440368652, "learning_rate": 4.304196113284653e-06, "loss": 3.1553, "step": 24000 }, { "epoch": 0.24419148763020834, "grad_norm": 7.604684352874756, "learning_rate": 4.30391928124177e-06, "loss": 3.1687, "step": 24005 }, { "epoch": 0.24424235026041666, "grad_norm": 11.336790084838867, "learning_rate": 4.303642403046039e-06, "loss": 3.4772, "step": 24010 }, { "epoch": 0.244293212890625, "grad_norm": 17.307836532592773, "learning_rate": 4.3033654787045456e-06, "loss": 3.0816, "step": 24015 }, { "epoch": 0.24434407552083334, "grad_norm": 16.10969352722168, "learning_rate": 4.303088508224374e-06, "loss": 4.3131, "step": 24020 }, { "epoch": 0.24439493815104166, "grad_norm": 9.405745506286621, "learning_rate": 4.302811491612609e-06, "loss": 3.3127, "step": 24025 }, { "epoch": 0.24444580078125, "grad_norm": 13.108831405639648, "learning_rate": 4.30253442887634e-06, "loss": 3.1843, "step": 24030 }, { "epoch": 0.24449666341145834, "grad_norm": 9.459420204162598, "learning_rate": 4.302257320022655e-06, "loss": 3.6039, "step": 24035 }, { "epoch": 0.24454752604166666, "grad_norm": 17.34709358215332, "learning_rate": 4.301980165058643e-06, "loss": 3.4738, "step": 24040 }, { "epoch": 0.244598388671875, "grad_norm": 9.753969192504883, "learning_rate": 4.301702963991397e-06, "loss": 3.2752, "step": 24045 }, { "epoch": 0.24464925130208334, "grad_norm": 14.754100799560547, "learning_rate": 4.3014257168280074e-06, "loss": 2.8529, "step": 24050 }, { "epoch": 0.24470011393229166, "grad_norm": 9.242658615112305, "learning_rate": 4.3011484235755676e-06, "loss": 3.7791, "step": 24055 }, { "epoch": 0.2447509765625, "grad_norm": 13.81303882598877, "learning_rate": 4.300871084241172e-06, "loss": 3.5131, "step": 24060 }, { "epoch": 0.24480183919270834, "grad_norm": 14.629620552062988, "learning_rate": 4.3005936988319166e-06, "loss": 3.393, "step": 24065 }, { "epoch": 0.24485270182291666, "grad_norm": 13.942264556884766, "learning_rate": 4.3003162673548975e-06, "loss": 3.5995, "step": 24070 }, { "epoch": 0.244903564453125, "grad_norm": 12.826180458068848, "learning_rate": 4.300038789817214e-06, "loss": 3.2657, "step": 24075 }, { "epoch": 0.24495442708333334, "grad_norm": 14.900425910949707, "learning_rate": 4.299761266225965e-06, "loss": 3.4057, "step": 24080 }, { "epoch": 0.24500528971354166, "grad_norm": 16.51618766784668, "learning_rate": 4.299483696588251e-06, "loss": 3.4861, "step": 24085 }, { "epoch": 0.24505615234375, "grad_norm": 13.200966835021973, "learning_rate": 4.299206080911172e-06, "loss": 3.4621, "step": 24090 }, { "epoch": 0.24510701497395834, "grad_norm": 13.313547134399414, "learning_rate": 4.298928419201833e-06, "loss": 3.1582, "step": 24095 }, { "epoch": 0.24515787760416666, "grad_norm": 12.535401344299316, "learning_rate": 4.298650711467336e-06, "loss": 3.1646, "step": 24100 }, { "epoch": 0.245208740234375, "grad_norm": 15.257453918457031, "learning_rate": 4.298372957714787e-06, "loss": 3.2669, "step": 24105 }, { "epoch": 0.24525960286458334, "grad_norm": 16.784587860107422, "learning_rate": 4.298095157951292e-06, "loss": 3.5681, "step": 24110 }, { "epoch": 0.24531046549479166, "grad_norm": 10.863885879516602, "learning_rate": 4.297817312183959e-06, "loss": 3.6729, "step": 24115 }, { "epoch": 0.245361328125, "grad_norm": 12.638436317443848, "learning_rate": 4.2975394204198965e-06, "loss": 3.1696, "step": 24120 }, { "epoch": 0.24541219075520834, "grad_norm": 7.186621189117432, "learning_rate": 4.297261482666214e-06, "loss": 3.2565, "step": 24125 }, { "epoch": 0.24546305338541666, "grad_norm": 8.10990047454834, "learning_rate": 4.296983498930022e-06, "loss": 3.5558, "step": 24130 }, { "epoch": 0.245513916015625, "grad_norm": 9.806075096130371, "learning_rate": 4.296705469218432e-06, "loss": 3.3298, "step": 24135 }, { "epoch": 0.24556477864583334, "grad_norm": 15.463103294372559, "learning_rate": 4.29642739353856e-06, "loss": 3.4272, "step": 24140 }, { "epoch": 0.24561564127604166, "grad_norm": 15.613325119018555, "learning_rate": 4.296149271897519e-06, "loss": 3.311, "step": 24145 }, { "epoch": 0.24566650390625, "grad_norm": 10.683793067932129, "learning_rate": 4.295871104302424e-06, "loss": 3.4097, "step": 24150 }, { "epoch": 0.24571736653645834, "grad_norm": 8.91572093963623, "learning_rate": 4.295592890760392e-06, "loss": 3.3294, "step": 24155 }, { "epoch": 0.24576822916666666, "grad_norm": 13.804422378540039, "learning_rate": 4.295314631278542e-06, "loss": 3.3976, "step": 24160 }, { "epoch": 0.245819091796875, "grad_norm": 10.138822555541992, "learning_rate": 4.295036325863993e-06, "loss": 3.3067, "step": 24165 }, { "epoch": 0.24586995442708334, "grad_norm": 11.83923053741455, "learning_rate": 4.294757974523865e-06, "loss": 3.5395, "step": 24170 }, { "epoch": 0.24592081705729166, "grad_norm": 12.042837142944336, "learning_rate": 4.294479577265279e-06, "loss": 3.417, "step": 24175 }, { "epoch": 0.2459716796875, "grad_norm": 7.58932638168335, "learning_rate": 4.294201134095359e-06, "loss": 3.1013, "step": 24180 }, { "epoch": 0.24602254231770834, "grad_norm": 14.032426834106445, "learning_rate": 4.293922645021229e-06, "loss": 3.4314, "step": 24185 }, { "epoch": 0.24607340494791666, "grad_norm": 8.114699363708496, "learning_rate": 4.293644110050011e-06, "loss": 3.2654, "step": 24190 }, { "epoch": 0.246124267578125, "grad_norm": 12.10887622833252, "learning_rate": 4.293365529188835e-06, "loss": 3.2226, "step": 24195 }, { "epoch": 0.24617513020833334, "grad_norm": 11.954651832580566, "learning_rate": 4.2930869024448276e-06, "loss": 3.3103, "step": 24200 }, { "epoch": 0.24622599283854166, "grad_norm": 8.925844192504883, "learning_rate": 4.292808229825116e-06, "loss": 3.4017, "step": 24205 }, { "epoch": 0.24627685546875, "grad_norm": 15.956528663635254, "learning_rate": 4.292529511336831e-06, "loss": 3.2091, "step": 24210 }, { "epoch": 0.24632771809895834, "grad_norm": 14.59361457824707, "learning_rate": 4.292250746987104e-06, "loss": 3.3883, "step": 24215 }, { "epoch": 0.24637858072916666, "grad_norm": 15.532940864562988, "learning_rate": 4.2919719367830654e-06, "loss": 3.2584, "step": 24220 }, { "epoch": 0.246429443359375, "grad_norm": 15.537583351135254, "learning_rate": 4.29169308073185e-06, "loss": 3.459, "step": 24225 }, { "epoch": 0.24648030598958334, "grad_norm": 9.399313926696777, "learning_rate": 4.291414178840592e-06, "loss": 3.0459, "step": 24230 }, { "epoch": 0.24653116861979166, "grad_norm": 9.091721534729004, "learning_rate": 4.291135231116426e-06, "loss": 3.4614, "step": 24235 }, { "epoch": 0.24658203125, "grad_norm": 12.210503578186035, "learning_rate": 4.29085623756649e-06, "loss": 3.6569, "step": 24240 }, { "epoch": 0.24663289388020834, "grad_norm": 10.190805435180664, "learning_rate": 4.2905771981979225e-06, "loss": 3.3828, "step": 24245 }, { "epoch": 0.24668375651041666, "grad_norm": 8.21806812286377, "learning_rate": 4.290298113017861e-06, "loss": 3.4683, "step": 24250 }, { "epoch": 0.246734619140625, "grad_norm": 7.7201762199401855, "learning_rate": 4.290018982033447e-06, "loss": 3.3651, "step": 24255 }, { "epoch": 0.24678548177083334, "grad_norm": 14.318398475646973, "learning_rate": 4.289739805251821e-06, "loss": 3.1525, "step": 24260 }, { "epoch": 0.24683634440104166, "grad_norm": 8.810102462768555, "learning_rate": 4.2894605826801265e-06, "loss": 3.2851, "step": 24265 }, { "epoch": 0.24688720703125, "grad_norm": 13.633394241333008, "learning_rate": 4.289181314325508e-06, "loss": 3.3869, "step": 24270 }, { "epoch": 0.24693806966145834, "grad_norm": 17.578227996826172, "learning_rate": 4.288902000195108e-06, "loss": 3.2742, "step": 24275 }, { "epoch": 0.24698893229166666, "grad_norm": 12.528148651123047, "learning_rate": 4.288622640296076e-06, "loss": 3.379, "step": 24280 }, { "epoch": 0.247039794921875, "grad_norm": 12.188680648803711, "learning_rate": 4.288343234635557e-06, "loss": 3.4696, "step": 24285 }, { "epoch": 0.24709065755208334, "grad_norm": 9.514275550842285, "learning_rate": 4.2880637832207005e-06, "loss": 3.2483, "step": 24290 }, { "epoch": 0.24714152018229166, "grad_norm": 13.15481948852539, "learning_rate": 4.287784286058655e-06, "loss": 3.7612, "step": 24295 }, { "epoch": 0.2471923828125, "grad_norm": 13.95080280303955, "learning_rate": 4.287504743156574e-06, "loss": 3.2929, "step": 24300 }, { "epoch": 0.24724324544270834, "grad_norm": 10.395090103149414, "learning_rate": 4.287225154521606e-06, "loss": 3.3828, "step": 24305 }, { "epoch": 0.24729410807291666, "grad_norm": 13.436478614807129, "learning_rate": 4.286945520160907e-06, "loss": 3.0997, "step": 24310 }, { "epoch": 0.247344970703125, "grad_norm": 13.696346282958984, "learning_rate": 4.286665840081631e-06, "loss": 3.2578, "step": 24315 }, { "epoch": 0.24739583333333334, "grad_norm": 14.64127254486084, "learning_rate": 4.286386114290932e-06, "loss": 3.3801, "step": 24320 }, { "epoch": 0.24744669596354166, "grad_norm": 14.282286643981934, "learning_rate": 4.286106342795968e-06, "loss": 3.4712, "step": 24325 }, { "epoch": 0.24749755859375, "grad_norm": 12.41998291015625, "learning_rate": 4.285826525603896e-06, "loss": 3.613, "step": 24330 }, { "epoch": 0.24754842122395834, "grad_norm": 11.224750518798828, "learning_rate": 4.285546662721876e-06, "loss": 3.2357, "step": 24335 }, { "epoch": 0.24759928385416666, "grad_norm": 10.853987693786621, "learning_rate": 4.2852667541570685e-06, "loss": 3.1096, "step": 24340 }, { "epoch": 0.247650146484375, "grad_norm": 9.90202522277832, "learning_rate": 4.284986799916633e-06, "loss": 3.5553, "step": 24345 }, { "epoch": 0.24770100911458334, "grad_norm": 13.316431999206543, "learning_rate": 4.284706800007734e-06, "loss": 3.4482, "step": 24350 }, { "epoch": 0.24775187174479166, "grad_norm": 9.244454383850098, "learning_rate": 4.284426754437535e-06, "loss": 3.1333, "step": 24355 }, { "epoch": 0.247802734375, "grad_norm": 14.12641429901123, "learning_rate": 4.2841466632132e-06, "loss": 3.245, "step": 24360 }, { "epoch": 0.24785359700520834, "grad_norm": 14.708112716674805, "learning_rate": 4.283866526341895e-06, "loss": 3.0999, "step": 24365 }, { "epoch": 0.24790445963541666, "grad_norm": 8.532719612121582, "learning_rate": 4.283586343830789e-06, "loss": 3.2599, "step": 24370 }, { "epoch": 0.247955322265625, "grad_norm": 14.071338653564453, "learning_rate": 4.283306115687048e-06, "loss": 3.2315, "step": 24375 }, { "epoch": 0.24800618489583334, "grad_norm": 10.1383695602417, "learning_rate": 4.2830258419178436e-06, "loss": 3.3294, "step": 24380 }, { "epoch": 0.24805704752604166, "grad_norm": 14.600129127502441, "learning_rate": 4.2827455225303445e-06, "loss": 3.4252, "step": 24385 }, { "epoch": 0.24810791015625, "grad_norm": 12.649153709411621, "learning_rate": 4.2824651575317256e-06, "loss": 3.4132, "step": 24390 }, { "epoch": 0.24815877278645834, "grad_norm": 15.295096397399902, "learning_rate": 4.282184746929157e-06, "loss": 3.549, "step": 24395 }, { "epoch": 0.24820963541666666, "grad_norm": 16.297683715820312, "learning_rate": 4.2819042907298135e-06, "loss": 3.3736, "step": 24400 }, { "epoch": 0.248260498046875, "grad_norm": 14.560355186462402, "learning_rate": 4.281623788940872e-06, "loss": 3.2375, "step": 24405 }, { "epoch": 0.24831136067708334, "grad_norm": 11.617905616760254, "learning_rate": 4.281343241569508e-06, "loss": 3.1846, "step": 24410 }, { "epoch": 0.24836222330729166, "grad_norm": 12.611489295959473, "learning_rate": 4.281062648622899e-06, "loss": 3.4114, "step": 24415 }, { "epoch": 0.2484130859375, "grad_norm": 40.78852844238281, "learning_rate": 4.280782010108224e-06, "loss": 3.2481, "step": 24420 }, { "epoch": 0.24846394856770834, "grad_norm": 14.916101455688477, "learning_rate": 4.280501326032665e-06, "loss": 3.2941, "step": 24425 }, { "epoch": 0.24851481119791666, "grad_norm": 13.82861614227295, "learning_rate": 4.280220596403399e-06, "loss": 3.34, "step": 24430 }, { "epoch": 0.248565673828125, "grad_norm": 12.18250846862793, "learning_rate": 4.279939821227614e-06, "loss": 3.1754, "step": 24435 }, { "epoch": 0.24861653645833334, "grad_norm": 14.767477989196777, "learning_rate": 4.279659000512488e-06, "loss": 3.298, "step": 24440 }, { "epoch": 0.24866739908854166, "grad_norm": 13.104989051818848, "learning_rate": 4.27937813426521e-06, "loss": 3.068, "step": 24445 }, { "epoch": 0.24871826171875, "grad_norm": 7.446710586547852, "learning_rate": 4.279097222492963e-06, "loss": 2.9791, "step": 24450 }, { "epoch": 0.24876912434895834, "grad_norm": 8.23807144165039, "learning_rate": 4.278816265202936e-06, "loss": 3.1207, "step": 24455 }, { "epoch": 0.24881998697916666, "grad_norm": 14.00052547454834, "learning_rate": 4.278535262402316e-06, "loss": 3.3176, "step": 24460 }, { "epoch": 0.248870849609375, "grad_norm": 15.20098876953125, "learning_rate": 4.278254214098293e-06, "loss": 3.3625, "step": 24465 }, { "epoch": 0.24892171223958334, "grad_norm": 13.704755783081055, "learning_rate": 4.277973120298058e-06, "loss": 3.3232, "step": 24470 }, { "epoch": 0.24897257486979166, "grad_norm": 10.962750434875488, "learning_rate": 4.277691981008802e-06, "loss": 3.4346, "step": 24475 }, { "epoch": 0.2490234375, "grad_norm": 12.34006404876709, "learning_rate": 4.277410796237717e-06, "loss": 3.3349, "step": 24480 }, { "epoch": 0.24907430013020834, "grad_norm": 13.989150047302246, "learning_rate": 4.277129565991999e-06, "loss": 3.5294, "step": 24485 }, { "epoch": 0.24912516276041666, "grad_norm": 10.373230934143066, "learning_rate": 4.276848290278842e-06, "loss": 3.3079, "step": 24490 }, { "epoch": 0.249176025390625, "grad_norm": 10.260279655456543, "learning_rate": 4.276566969105443e-06, "loss": 3.2219, "step": 24495 }, { "epoch": 0.24922688802083334, "grad_norm": 9.731127738952637, "learning_rate": 4.276285602478999e-06, "loss": 3.3173, "step": 24500 }, { "epoch": 0.24927775065104166, "grad_norm": 11.35506534576416, "learning_rate": 4.276004190406708e-06, "loss": 3.2735, "step": 24505 }, { "epoch": 0.24932861328125, "grad_norm": 8.43806266784668, "learning_rate": 4.2757227328957715e-06, "loss": 3.1982, "step": 24510 }, { "epoch": 0.24937947591145834, "grad_norm": 9.247403144836426, "learning_rate": 4.27544122995339e-06, "loss": 3.5729, "step": 24515 }, { "epoch": 0.24943033854166666, "grad_norm": 14.659168243408203, "learning_rate": 4.275159681586766e-06, "loss": 3.3769, "step": 24520 }, { "epoch": 0.249481201171875, "grad_norm": 11.563913345336914, "learning_rate": 4.274878087803101e-06, "loss": 3.3972, "step": 24525 }, { "epoch": 0.24953206380208334, "grad_norm": 14.300829887390137, "learning_rate": 4.2745964486096015e-06, "loss": 2.9669, "step": 24530 }, { "epoch": 0.24958292643229166, "grad_norm": 8.588785171508789, "learning_rate": 4.274314764013472e-06, "loss": 3.1472, "step": 24535 }, { "epoch": 0.2496337890625, "grad_norm": 15.398540496826172, "learning_rate": 4.274033034021921e-06, "loss": 3.4659, "step": 24540 }, { "epoch": 0.24968465169270834, "grad_norm": 9.775829315185547, "learning_rate": 4.273751258642154e-06, "loss": 3.5144, "step": 24545 }, { "epoch": 0.24973551432291666, "grad_norm": 10.324780464172363, "learning_rate": 4.2734694378813825e-06, "loss": 2.9803, "step": 24550 }, { "epoch": 0.249786376953125, "grad_norm": 17.24770164489746, "learning_rate": 4.273187571746815e-06, "loss": 3.6072, "step": 24555 }, { "epoch": 0.24983723958333334, "grad_norm": 13.051507949829102, "learning_rate": 4.272905660245664e-06, "loss": 3.2217, "step": 24560 }, { "epoch": 0.24988810221354166, "grad_norm": 15.218391418457031, "learning_rate": 4.272623703385141e-06, "loss": 3.1289, "step": 24565 }, { "epoch": 0.24993896484375, "grad_norm": 11.163311004638672, "learning_rate": 4.272341701172462e-06, "loss": 3.6372, "step": 24570 }, { "epoch": 0.24998982747395834, "grad_norm": 8.303897857666016, "learning_rate": 4.27205965361484e-06, "loss": 3.2102, "step": 24575 }, { "epoch": 0.25, "eval_loss": 3.3746256828308105, "eval_runtime": 161.3971, "eval_samples_per_second": 12.435, "eval_steps_per_second": 12.435, "step": 24576 }, { "epoch": 0.2500406901041667, "grad_norm": 16.447832107543945, "learning_rate": 4.271777560719494e-06, "loss": 3.4576, "step": 24580 }, { "epoch": 0.250091552734375, "grad_norm": 14.935396194458008, "learning_rate": 4.271495422493636e-06, "loss": 2.9597, "step": 24585 }, { "epoch": 0.2501424153645833, "grad_norm": 12.183156967163086, "learning_rate": 4.271213238944489e-06, "loss": 3.1356, "step": 24590 }, { "epoch": 0.2501932779947917, "grad_norm": 9.555173873901367, "learning_rate": 4.270931010079271e-06, "loss": 3.418, "step": 24595 }, { "epoch": 0.250244140625, "grad_norm": 13.501853942871094, "learning_rate": 4.270648735905203e-06, "loss": 3.4476, "step": 24600 }, { "epoch": 0.2502950032552083, "grad_norm": 9.53705883026123, "learning_rate": 4.270366416429506e-06, "loss": 3.3436, "step": 24605 }, { "epoch": 0.2503458658854167, "grad_norm": 10.591668128967285, "learning_rate": 4.270084051659404e-06, "loss": 3.6996, "step": 24610 }, { "epoch": 0.250396728515625, "grad_norm": 12.271478652954102, "learning_rate": 4.2698016416021215e-06, "loss": 3.1625, "step": 24615 }, { "epoch": 0.2504475911458333, "grad_norm": 10.937945365905762, "learning_rate": 4.269519186264884e-06, "loss": 3.0254, "step": 24620 }, { "epoch": 0.2504984537760417, "grad_norm": 13.449241638183594, "learning_rate": 4.269236685654916e-06, "loss": 3.3296, "step": 24625 }, { "epoch": 0.25054931640625, "grad_norm": 13.686077117919922, "learning_rate": 4.268954139779447e-06, "loss": 3.2587, "step": 24630 }, { "epoch": 0.2506001790364583, "grad_norm": 10.223738670349121, "learning_rate": 4.268671548645707e-06, "loss": 3.5311, "step": 24635 }, { "epoch": 0.2506510416666667, "grad_norm": 8.637419700622559, "learning_rate": 4.268388912260924e-06, "loss": 3.1581, "step": 24640 }, { "epoch": 0.250701904296875, "grad_norm": 7.338236331939697, "learning_rate": 4.268106230632329e-06, "loss": 3.0383, "step": 24645 }, { "epoch": 0.2507527669270833, "grad_norm": 10.000212669372559, "learning_rate": 4.267823503767156e-06, "loss": 3.3966, "step": 24650 }, { "epoch": 0.2508036295572917, "grad_norm": 14.338170051574707, "learning_rate": 4.267540731672638e-06, "loss": 3.561, "step": 24655 }, { "epoch": 0.2508544921875, "grad_norm": 8.327690124511719, "learning_rate": 4.267257914356009e-06, "loss": 3.2426, "step": 24660 }, { "epoch": 0.2509053548177083, "grad_norm": 12.349832534790039, "learning_rate": 4.2669750518245055e-06, "loss": 3.118, "step": 24665 }, { "epoch": 0.2509562174479167, "grad_norm": 14.336140632629395, "learning_rate": 4.266692144085364e-06, "loss": 3.5699, "step": 24670 }, { "epoch": 0.251007080078125, "grad_norm": 13.961627960205078, "learning_rate": 4.266409191145822e-06, "loss": 3.4062, "step": 24675 }, { "epoch": 0.2510579427083333, "grad_norm": 17.38461685180664, "learning_rate": 4.266126193013121e-06, "loss": 3.4216, "step": 24680 }, { "epoch": 0.2511088053385417, "grad_norm": 14.52137565612793, "learning_rate": 4.265843149694499e-06, "loss": 3.3358, "step": 24685 }, { "epoch": 0.25115966796875, "grad_norm": 9.268922805786133, "learning_rate": 4.265560061197198e-06, "loss": 3.5275, "step": 24690 }, { "epoch": 0.2512105305989583, "grad_norm": 11.482409477233887, "learning_rate": 4.265276927528462e-06, "loss": 3.5046, "step": 24695 }, { "epoch": 0.2512613932291667, "grad_norm": 11.873368263244629, "learning_rate": 4.264993748695534e-06, "loss": 3.3458, "step": 24700 }, { "epoch": 0.251312255859375, "grad_norm": 8.131156921386719, "learning_rate": 4.264710524705659e-06, "loss": 3.0646, "step": 24705 }, { "epoch": 0.2513631184895833, "grad_norm": 7.6707048416137695, "learning_rate": 4.264427255566084e-06, "loss": 3.1548, "step": 24710 }, { "epoch": 0.2514139811197917, "grad_norm": 13.597451210021973, "learning_rate": 4.2641439412840555e-06, "loss": 3.3094, "step": 24715 }, { "epoch": 0.25146484375, "grad_norm": 9.331670761108398, "learning_rate": 4.263860581866823e-06, "loss": 3.2787, "step": 24720 }, { "epoch": 0.2515157063802083, "grad_norm": 12.325285911560059, "learning_rate": 4.263577177321635e-06, "loss": 3.5232, "step": 24725 }, { "epoch": 0.2515665690104167, "grad_norm": 10.015127182006836, "learning_rate": 4.2632937276557425e-06, "loss": 3.3396, "step": 24730 }, { "epoch": 0.251617431640625, "grad_norm": 11.384984970092773, "learning_rate": 4.263010232876399e-06, "loss": 3.4962, "step": 24735 }, { "epoch": 0.2516682942708333, "grad_norm": 15.124960899353027, "learning_rate": 4.262726692990856e-06, "loss": 3.1269, "step": 24740 }, { "epoch": 0.2517191569010417, "grad_norm": 11.73608112335205, "learning_rate": 4.262443108006369e-06, "loss": 3.5437, "step": 24745 }, { "epoch": 0.25177001953125, "grad_norm": 12.246577262878418, "learning_rate": 4.262159477930192e-06, "loss": 3.264, "step": 24750 }, { "epoch": 0.2518208821614583, "grad_norm": 15.663727760314941, "learning_rate": 4.261875802769583e-06, "loss": 3.2328, "step": 24755 }, { "epoch": 0.2518717447916667, "grad_norm": 10.710125923156738, "learning_rate": 4.261592082531799e-06, "loss": 3.3633, "step": 24760 }, { "epoch": 0.251922607421875, "grad_norm": 12.465362548828125, "learning_rate": 4.261308317224099e-06, "loss": 3.4696, "step": 24765 }, { "epoch": 0.2519734700520833, "grad_norm": 14.243422508239746, "learning_rate": 4.261024506853744e-06, "loss": 3.5099, "step": 24770 }, { "epoch": 0.2520243326822917, "grad_norm": 16.999818801879883, "learning_rate": 4.260740651427994e-06, "loss": 3.5493, "step": 24775 }, { "epoch": 0.2520751953125, "grad_norm": 13.684869766235352, "learning_rate": 4.260456750954111e-06, "loss": 3.6394, "step": 24780 }, { "epoch": 0.2521260579427083, "grad_norm": 11.110882759094238, "learning_rate": 4.26017280543936e-06, "loss": 3.1611, "step": 24785 }, { "epoch": 0.2521769205729167, "grad_norm": 9.618308067321777, "learning_rate": 4.259888814891004e-06, "loss": 3.4842, "step": 24790 }, { "epoch": 0.252227783203125, "grad_norm": 14.321953773498535, "learning_rate": 4.259604779316311e-06, "loss": 3.167, "step": 24795 }, { "epoch": 0.2522786458333333, "grad_norm": 7.829379558563232, "learning_rate": 4.2593206987225466e-06, "loss": 3.2783, "step": 24800 }, { "epoch": 0.2523295084635417, "grad_norm": 11.342432022094727, "learning_rate": 4.2590365731169784e-06, "loss": 3.3707, "step": 24805 }, { "epoch": 0.25238037109375, "grad_norm": 12.385947227478027, "learning_rate": 4.258752402506877e-06, "loss": 3.6793, "step": 24810 }, { "epoch": 0.2524312337239583, "grad_norm": 15.225542068481445, "learning_rate": 4.258468186899513e-06, "loss": 3.1762, "step": 24815 }, { "epoch": 0.2524820963541667, "grad_norm": 10.188759803771973, "learning_rate": 4.258183926302157e-06, "loss": 3.0373, "step": 24820 }, { "epoch": 0.252532958984375, "grad_norm": 12.267877578735352, "learning_rate": 4.257899620722081e-06, "loss": 3.3485, "step": 24825 }, { "epoch": 0.2525838216145833, "grad_norm": 8.663679122924805, "learning_rate": 4.25761527016656e-06, "loss": 3.5647, "step": 24830 }, { "epoch": 0.2526346842447917, "grad_norm": 11.342432022094727, "learning_rate": 4.257330874642869e-06, "loss": 3.8163, "step": 24835 }, { "epoch": 0.252685546875, "grad_norm": 14.044656753540039, "learning_rate": 4.257046434158284e-06, "loss": 3.6804, "step": 24840 }, { "epoch": 0.2527364095052083, "grad_norm": 14.019876480102539, "learning_rate": 4.256761948720083e-06, "loss": 3.2891, "step": 24845 }, { "epoch": 0.2527872721354167, "grad_norm": 8.586852073669434, "learning_rate": 4.256477418335543e-06, "loss": 3.3606, "step": 24850 }, { "epoch": 0.252838134765625, "grad_norm": 15.628178596496582, "learning_rate": 4.256192843011946e-06, "loss": 3.695, "step": 24855 }, { "epoch": 0.2528889973958333, "grad_norm": 12.91670036315918, "learning_rate": 4.2559082227565696e-06, "loss": 3.2321, "step": 24860 }, { "epoch": 0.2529398600260417, "grad_norm": 15.387182235717773, "learning_rate": 4.255623557576699e-06, "loss": 3.7515, "step": 24865 }, { "epoch": 0.25299072265625, "grad_norm": 11.862276077270508, "learning_rate": 4.255338847479615e-06, "loss": 3.1253, "step": 24870 }, { "epoch": 0.2530415852864583, "grad_norm": 16.38576889038086, "learning_rate": 4.255054092472604e-06, "loss": 3.1637, "step": 24875 }, { "epoch": 0.2530924479166667, "grad_norm": 8.539254188537598, "learning_rate": 4.254769292562948e-06, "loss": 3.4981, "step": 24880 }, { "epoch": 0.253143310546875, "grad_norm": 7.540289402008057, "learning_rate": 4.2544844477579375e-06, "loss": 3.4908, "step": 24885 }, { "epoch": 0.2531941731770833, "grad_norm": 11.539024353027344, "learning_rate": 4.254199558064857e-06, "loss": 3.0611, "step": 24890 }, { "epoch": 0.2532450358072917, "grad_norm": 9.188528060913086, "learning_rate": 4.2539146234909965e-06, "loss": 3.7936, "step": 24895 }, { "epoch": 0.2532958984375, "grad_norm": 13.862792015075684, "learning_rate": 4.2536296440436465e-06, "loss": 3.2935, "step": 24900 }, { "epoch": 0.2533467610677083, "grad_norm": 15.22266960144043, "learning_rate": 4.253344619730098e-06, "loss": 3.6186, "step": 24905 }, { "epoch": 0.2533976236979167, "grad_norm": 8.168689727783203, "learning_rate": 4.2530595505576425e-06, "loss": 3.4356, "step": 24910 }, { "epoch": 0.253448486328125, "grad_norm": 8.944549560546875, "learning_rate": 4.252774436533574e-06, "loss": 3.3316, "step": 24915 }, { "epoch": 0.2534993489583333, "grad_norm": 12.66011905670166, "learning_rate": 4.252489277665188e-06, "loss": 3.8579, "step": 24920 }, { "epoch": 0.2535502115885417, "grad_norm": 14.050813674926758, "learning_rate": 4.252204073959778e-06, "loss": 3.3374, "step": 24925 }, { "epoch": 0.25360107421875, "grad_norm": 13.361396789550781, "learning_rate": 4.251918825424642e-06, "loss": 3.3719, "step": 24930 }, { "epoch": 0.2536519368489583, "grad_norm": 11.718053817749023, "learning_rate": 4.251633532067079e-06, "loss": 3.3046, "step": 24935 }, { "epoch": 0.2537027994791667, "grad_norm": 12.97679615020752, "learning_rate": 4.251348193894386e-06, "loss": 3.5764, "step": 24940 }, { "epoch": 0.253753662109375, "grad_norm": 9.908891677856445, "learning_rate": 4.251062810913867e-06, "loss": 3.2527, "step": 24945 }, { "epoch": 0.2538045247395833, "grad_norm": 13.16048526763916, "learning_rate": 4.250777383132819e-06, "loss": 3.912, "step": 24950 }, { "epoch": 0.2538553873697917, "grad_norm": 15.14319896697998, "learning_rate": 4.250491910558546e-06, "loss": 3.3679, "step": 24955 }, { "epoch": 0.25390625, "grad_norm": 10.365280151367188, "learning_rate": 4.250206393198354e-06, "loss": 3.3091, "step": 24960 }, { "epoch": 0.2539571126302083, "grad_norm": 12.37360668182373, "learning_rate": 4.249920831059546e-06, "loss": 3.19, "step": 24965 }, { "epoch": 0.2540079752604167, "grad_norm": 10.952878952026367, "learning_rate": 4.249635224149428e-06, "loss": 3.2674, "step": 24970 }, { "epoch": 0.254058837890625, "grad_norm": 13.540122032165527, "learning_rate": 4.249349572475307e-06, "loss": 3.0913, "step": 24975 }, { "epoch": 0.2541097005208333, "grad_norm": 13.925262451171875, "learning_rate": 4.249063876044493e-06, "loss": 3.2861, "step": 24980 }, { "epoch": 0.2541605631510417, "grad_norm": 15.029991149902344, "learning_rate": 4.248778134864293e-06, "loss": 3.4239, "step": 24985 }, { "epoch": 0.25421142578125, "grad_norm": 14.305146217346191, "learning_rate": 4.248492348942021e-06, "loss": 3.9219, "step": 24990 }, { "epoch": 0.2542622884114583, "grad_norm": 8.43241024017334, "learning_rate": 4.248206518284984e-06, "loss": 3.3433, "step": 24995 }, { "epoch": 0.2543131510416667, "grad_norm": 8.676648139953613, "learning_rate": 4.247920642900499e-06, "loss": 3.4091, "step": 25000 }, { "epoch": 0.254364013671875, "grad_norm": 14.321340560913086, "learning_rate": 4.247634722795879e-06, "loss": 3.2965, "step": 25005 }, { "epoch": 0.2544148763020833, "grad_norm": 12.503641128540039, "learning_rate": 4.247348757978438e-06, "loss": 3.3245, "step": 25010 }, { "epoch": 0.2544657389322917, "grad_norm": 10.391291618347168, "learning_rate": 4.247062748455494e-06, "loss": 3.0437, "step": 25015 }, { "epoch": 0.2545166015625, "grad_norm": 9.460679054260254, "learning_rate": 4.246776694234364e-06, "loss": 3.0883, "step": 25020 }, { "epoch": 0.2545674641927083, "grad_norm": 9.043420791625977, "learning_rate": 4.2464905953223644e-06, "loss": 2.9897, "step": 25025 }, { "epoch": 0.2546183268229167, "grad_norm": 15.813292503356934, "learning_rate": 4.246204451726819e-06, "loss": 3.0896, "step": 25030 }, { "epoch": 0.254669189453125, "grad_norm": 10.58598804473877, "learning_rate": 4.245918263455045e-06, "loss": 3.1905, "step": 25035 }, { "epoch": 0.2547200520833333, "grad_norm": 7.66256856918335, "learning_rate": 4.2456320305143675e-06, "loss": 2.9339, "step": 25040 }, { "epoch": 0.2547709147135417, "grad_norm": 13.066410064697266, "learning_rate": 4.245345752912107e-06, "loss": 3.4187, "step": 25045 }, { "epoch": 0.25482177734375, "grad_norm": 10.616400718688965, "learning_rate": 4.245059430655589e-06, "loss": 3.6924, "step": 25050 }, { "epoch": 0.2548726399739583, "grad_norm": 9.606547355651855, "learning_rate": 4.24477306375214e-06, "loss": 3.4028, "step": 25055 }, { "epoch": 0.2549235026041667, "grad_norm": 12.106054306030273, "learning_rate": 4.244486652209086e-06, "loss": 3.0908, "step": 25060 }, { "epoch": 0.254974365234375, "grad_norm": 10.97599983215332, "learning_rate": 4.244200196033753e-06, "loss": 3.1756, "step": 25065 }, { "epoch": 0.2550252278645833, "grad_norm": 15.771632194519043, "learning_rate": 4.243913695233472e-06, "loss": 3.3499, "step": 25070 }, { "epoch": 0.2550760904947917, "grad_norm": 13.067724227905273, "learning_rate": 4.243627149815574e-06, "loss": 3.4849, "step": 25075 }, { "epoch": 0.255126953125, "grad_norm": 15.27235221862793, "learning_rate": 4.2433405597873865e-06, "loss": 3.3464, "step": 25080 }, { "epoch": 0.2551778157552083, "grad_norm": 14.676030158996582, "learning_rate": 4.243053925156245e-06, "loss": 3.5484, "step": 25085 }, { "epoch": 0.2552286783854167, "grad_norm": 14.75976276397705, "learning_rate": 4.242767245929482e-06, "loss": 3.4655, "step": 25090 }, { "epoch": 0.255279541015625, "grad_norm": 10.113024711608887, "learning_rate": 4.242480522114432e-06, "loss": 3.4032, "step": 25095 }, { "epoch": 0.2553304036458333, "grad_norm": 15.981361389160156, "learning_rate": 4.242193753718431e-06, "loss": 2.9224, "step": 25100 }, { "epoch": 0.2553812662760417, "grad_norm": 10.057245254516602, "learning_rate": 4.241906940748815e-06, "loss": 3.4517, "step": 25105 }, { "epoch": 0.25543212890625, "grad_norm": 13.309122085571289, "learning_rate": 4.241620083212924e-06, "loss": 3.3819, "step": 25110 }, { "epoch": 0.2554829915364583, "grad_norm": 13.638635635375977, "learning_rate": 4.2413331811180945e-06, "loss": 3.2491, "step": 25115 }, { "epoch": 0.2555338541666667, "grad_norm": 9.086438179016113, "learning_rate": 4.24104623447167e-06, "loss": 3.3659, "step": 25120 }, { "epoch": 0.255584716796875, "grad_norm": 15.72012710571289, "learning_rate": 4.240759243280989e-06, "loss": 3.2641, "step": 25125 }, { "epoch": 0.2556355794270833, "grad_norm": 8.95547866821289, "learning_rate": 4.240472207553395e-06, "loss": 2.916, "step": 25130 }, { "epoch": 0.2556864420572917, "grad_norm": 10.37775993347168, "learning_rate": 4.240185127296233e-06, "loss": 3.2412, "step": 25135 }, { "epoch": 0.2557373046875, "grad_norm": 12.915329933166504, "learning_rate": 4.239898002516848e-06, "loss": 3.3121, "step": 25140 }, { "epoch": 0.2557881673177083, "grad_norm": 14.557816505432129, "learning_rate": 4.239610833222583e-06, "loss": 3.604, "step": 25145 }, { "epoch": 0.2558390299479167, "grad_norm": 10.346381187438965, "learning_rate": 4.239323619420789e-06, "loss": 3.3416, "step": 25150 }, { "epoch": 0.255889892578125, "grad_norm": 9.316656112670898, "learning_rate": 4.239036361118811e-06, "loss": 3.5399, "step": 25155 }, { "epoch": 0.2559407552083333, "grad_norm": 10.786599159240723, "learning_rate": 4.238749058324001e-06, "loss": 3.5514, "step": 25160 }, { "epoch": 0.2559916178385417, "grad_norm": 12.696056365966797, "learning_rate": 4.238461711043708e-06, "loss": 3.1802, "step": 25165 }, { "epoch": 0.25604248046875, "grad_norm": 12.015348434448242, "learning_rate": 4.238174319285283e-06, "loss": 3.1336, "step": 25170 }, { "epoch": 0.2560933430989583, "grad_norm": 12.305672645568848, "learning_rate": 4.237886883056082e-06, "loss": 3.1156, "step": 25175 }, { "epoch": 0.2561442057291667, "grad_norm": 15.55685043334961, "learning_rate": 4.237599402363456e-06, "loss": 3.3255, "step": 25180 }, { "epoch": 0.256195068359375, "grad_norm": 13.057061195373535, "learning_rate": 4.237311877214761e-06, "loss": 3.5198, "step": 25185 }, { "epoch": 0.2562459309895833, "grad_norm": 13.559088706970215, "learning_rate": 4.237024307617354e-06, "loss": 3.0956, "step": 25190 }, { "epoch": 0.2562967936197917, "grad_norm": 8.187592506408691, "learning_rate": 4.236736693578591e-06, "loss": 3.4795, "step": 25195 }, { "epoch": 0.25634765625, "grad_norm": 10.342399597167969, "learning_rate": 4.236449035105832e-06, "loss": 3.3141, "step": 25200 }, { "epoch": 0.2563985188802083, "grad_norm": 12.40994930267334, "learning_rate": 4.236161332206436e-06, "loss": 3.7125, "step": 25205 }, { "epoch": 0.2564493815104167, "grad_norm": 14.115476608276367, "learning_rate": 4.235873584887764e-06, "loss": 3.8294, "step": 25210 }, { "epoch": 0.256500244140625, "grad_norm": 9.711331367492676, "learning_rate": 4.2355857931571765e-06, "loss": 2.9399, "step": 25215 }, { "epoch": 0.2565511067708333, "grad_norm": 11.328681945800781, "learning_rate": 4.235297957022039e-06, "loss": 3.3246, "step": 25220 }, { "epoch": 0.2566019694010417, "grad_norm": 9.998047828674316, "learning_rate": 4.235010076489715e-06, "loss": 3.3671, "step": 25225 }, { "epoch": 0.25665283203125, "grad_norm": 9.312052726745605, "learning_rate": 4.234722151567569e-06, "loss": 3.6494, "step": 25230 }, { "epoch": 0.2567036946614583, "grad_norm": 14.352344512939453, "learning_rate": 4.2344341822629675e-06, "loss": 3.1944, "step": 25235 }, { "epoch": 0.2567545572916667, "grad_norm": 14.686880111694336, "learning_rate": 4.234146168583278e-06, "loss": 3.1691, "step": 25240 }, { "epoch": 0.256805419921875, "grad_norm": 11.81808853149414, "learning_rate": 4.233858110535871e-06, "loss": 3.2245, "step": 25245 }, { "epoch": 0.2568562825520833, "grad_norm": 11.544474601745605, "learning_rate": 4.233570008128115e-06, "loss": 3.4465, "step": 25250 }, { "epoch": 0.2569071451822917, "grad_norm": 9.362020492553711, "learning_rate": 4.233281861367381e-06, "loss": 3.4905, "step": 25255 }, { "epoch": 0.2569580078125, "grad_norm": 14.878250122070312, "learning_rate": 4.232993670261041e-06, "loss": 3.437, "step": 25260 }, { "epoch": 0.2570088704427083, "grad_norm": 10.850860595703125, "learning_rate": 4.23270543481647e-06, "loss": 3.1745, "step": 25265 }, { "epoch": 0.2570597330729167, "grad_norm": 7.937302112579346, "learning_rate": 4.23241715504104e-06, "loss": 3.3978, "step": 25270 }, { "epoch": 0.257110595703125, "grad_norm": 11.461185455322266, "learning_rate": 4.232128830942128e-06, "loss": 3.3198, "step": 25275 }, { "epoch": 0.2571614583333333, "grad_norm": 12.847282409667969, "learning_rate": 4.231840462527111e-06, "loss": 3.3679, "step": 25280 }, { "epoch": 0.2572123209635417, "grad_norm": 8.424904823303223, "learning_rate": 4.231552049803366e-06, "loss": 3.529, "step": 25285 }, { "epoch": 0.25726318359375, "grad_norm": 8.810689926147461, "learning_rate": 4.231263592778271e-06, "loss": 3.1633, "step": 25290 }, { "epoch": 0.2573140462239583, "grad_norm": 12.490745544433594, "learning_rate": 4.230975091459209e-06, "loss": 3.3352, "step": 25295 }, { "epoch": 0.2573649088541667, "grad_norm": 9.57895278930664, "learning_rate": 4.230686545853558e-06, "loss": 3.1129, "step": 25300 }, { "epoch": 0.257415771484375, "grad_norm": 11.672348022460938, "learning_rate": 4.230397955968704e-06, "loss": 3.7327, "step": 25305 }, { "epoch": 0.2574666341145833, "grad_norm": 13.51906967163086, "learning_rate": 4.230109321812027e-06, "loss": 3.0903, "step": 25310 }, { "epoch": 0.2575174967447917, "grad_norm": 8.938597679138184, "learning_rate": 4.2298206433909135e-06, "loss": 3.3603, "step": 25315 }, { "epoch": 0.257568359375, "grad_norm": 13.414045333862305, "learning_rate": 4.229531920712748e-06, "loss": 2.9802, "step": 25320 }, { "epoch": 0.2576192220052083, "grad_norm": 11.90649127960205, "learning_rate": 4.2292431537849194e-06, "loss": 3.3947, "step": 25325 }, { "epoch": 0.2576700846354167, "grad_norm": 16.518070220947266, "learning_rate": 4.228954342614814e-06, "loss": 3.4593, "step": 25330 }, { "epoch": 0.257720947265625, "grad_norm": 13.547666549682617, "learning_rate": 4.2286654872098225e-06, "loss": 3.7998, "step": 25335 }, { "epoch": 0.2577718098958333, "grad_norm": 10.90760326385498, "learning_rate": 4.2283765875773325e-06, "loss": 3.3047, "step": 25340 }, { "epoch": 0.2578226725260417, "grad_norm": 17.5865535736084, "learning_rate": 4.228087643724738e-06, "loss": 3.4747, "step": 25345 }, { "epoch": 0.25787353515625, "grad_norm": 10.39489459991455, "learning_rate": 4.227798655659431e-06, "loss": 3.4802, "step": 25350 }, { "epoch": 0.2579243977864583, "grad_norm": 12.231903076171875, "learning_rate": 4.227509623388805e-06, "loss": 3.5976, "step": 25355 }, { "epoch": 0.2579752604166667, "grad_norm": 15.524836540222168, "learning_rate": 4.2272205469202544e-06, "loss": 3.0221, "step": 25360 }, { "epoch": 0.258026123046875, "grad_norm": 13.699244499206543, "learning_rate": 4.226931426261175e-06, "loss": 3.6252, "step": 25365 }, { "epoch": 0.2580769856770833, "grad_norm": 10.88588809967041, "learning_rate": 4.2266422614189656e-06, "loss": 3.4606, "step": 25370 }, { "epoch": 0.2581278483072917, "grad_norm": 12.582985877990723, "learning_rate": 4.226353052401023e-06, "loss": 4.0478, "step": 25375 }, { "epoch": 0.2581787109375, "grad_norm": 14.824645042419434, "learning_rate": 4.2260637992147455e-06, "loss": 3.245, "step": 25380 }, { "epoch": 0.2582295735677083, "grad_norm": 8.575550079345703, "learning_rate": 4.225774501867536e-06, "loss": 3.2326, "step": 25385 }, { "epoch": 0.2582804361979167, "grad_norm": 11.453362464904785, "learning_rate": 4.225485160366795e-06, "loss": 3.3931, "step": 25390 }, { "epoch": 0.258331298828125, "grad_norm": 12.148056030273438, "learning_rate": 4.2251957747199246e-06, "loss": 3.3584, "step": 25395 }, { "epoch": 0.2583821614583333, "grad_norm": 13.610539436340332, "learning_rate": 4.22490634493433e-06, "loss": 3.7102, "step": 25400 }, { "epoch": 0.2584330240885417, "grad_norm": 15.177289962768555, "learning_rate": 4.224616871017414e-06, "loss": 3.6817, "step": 25405 }, { "epoch": 0.25848388671875, "grad_norm": 6.891851902008057, "learning_rate": 4.224327352976585e-06, "loss": 3.2669, "step": 25410 }, { "epoch": 0.2585347493489583, "grad_norm": 14.693187713623047, "learning_rate": 4.224037790819249e-06, "loss": 3.4193, "step": 25415 }, { "epoch": 0.2585856119791667, "grad_norm": 12.441463470458984, "learning_rate": 4.223748184552815e-06, "loss": 3.5708, "step": 25420 }, { "epoch": 0.258636474609375, "grad_norm": 12.066529273986816, "learning_rate": 4.223458534184693e-06, "loss": 3.327, "step": 25425 }, { "epoch": 0.2586873372395833, "grad_norm": 13.036898612976074, "learning_rate": 4.223168839722293e-06, "loss": 3.4348, "step": 25430 }, { "epoch": 0.2587381998697917, "grad_norm": 8.6354341506958, "learning_rate": 4.222879101173025e-06, "loss": 3.3506, "step": 25435 }, { "epoch": 0.2587890625, "grad_norm": 15.209229469299316, "learning_rate": 4.222589318544305e-06, "loss": 3.3671, "step": 25440 }, { "epoch": 0.2588399251302083, "grad_norm": 8.996095657348633, "learning_rate": 4.222299491843544e-06, "loss": 3.3109, "step": 25445 }, { "epoch": 0.2588907877604167, "grad_norm": 15.306842803955078, "learning_rate": 4.2220096210781604e-06, "loss": 3.2306, "step": 25450 }, { "epoch": 0.258941650390625, "grad_norm": 10.3480863571167, "learning_rate": 4.221719706255568e-06, "loss": 3.7634, "step": 25455 }, { "epoch": 0.2589925130208333, "grad_norm": 8.632323265075684, "learning_rate": 4.221429747383185e-06, "loss": 3.5949, "step": 25460 }, { "epoch": 0.2590433756510417, "grad_norm": 7.815703392028809, "learning_rate": 4.221139744468431e-06, "loss": 3.553, "step": 25465 }, { "epoch": 0.25909423828125, "grad_norm": 11.302229881286621, "learning_rate": 4.220849697518723e-06, "loss": 3.4386, "step": 25470 }, { "epoch": 0.2591451009114583, "grad_norm": 14.221625328063965, "learning_rate": 4.2205596065414845e-06, "loss": 3.1683, "step": 25475 }, { "epoch": 0.2591959635416667, "grad_norm": 12.27514362335205, "learning_rate": 4.220269471544136e-06, "loss": 3.2244, "step": 25480 }, { "epoch": 0.259246826171875, "grad_norm": 13.549633026123047, "learning_rate": 4.2199792925341e-06, "loss": 3.4984, "step": 25485 }, { "epoch": 0.2592976888020833, "grad_norm": 10.52974796295166, "learning_rate": 4.219689069518802e-06, "loss": 3.3162, "step": 25490 }, { "epoch": 0.2593485514322917, "grad_norm": 10.306483268737793, "learning_rate": 4.219398802505668e-06, "loss": 2.8758, "step": 25495 }, { "epoch": 0.2593994140625, "grad_norm": 13.99836540222168, "learning_rate": 4.2191084915021215e-06, "loss": 3.3814, "step": 25500 }, { "epoch": 0.2594502766927083, "grad_norm": 14.140470504760742, "learning_rate": 4.218818136515593e-06, "loss": 3.4431, "step": 25505 }, { "epoch": 0.2595011393229167, "grad_norm": 9.10169506072998, "learning_rate": 4.2185277375535086e-06, "loss": 3.4482, "step": 25510 }, { "epoch": 0.259552001953125, "grad_norm": 16.0885009765625, "learning_rate": 4.218237294623301e-06, "loss": 3.5298, "step": 25515 }, { "epoch": 0.2596028645833333, "grad_norm": 17.33360481262207, "learning_rate": 4.217946807732398e-06, "loss": 3.2658, "step": 25520 }, { "epoch": 0.2596537272135417, "grad_norm": 11.709734916687012, "learning_rate": 4.217656276888233e-06, "loss": 3.4651, "step": 25525 }, { "epoch": 0.25970458984375, "grad_norm": 8.238667488098145, "learning_rate": 4.2173657020982405e-06, "loss": 3.2665, "step": 25530 }, { "epoch": 0.2597554524739583, "grad_norm": 6.8458099365234375, "learning_rate": 4.217075083369853e-06, "loss": 3.512, "step": 25535 }, { "epoch": 0.2598063151041667, "grad_norm": 15.638907432556152, "learning_rate": 4.216784420710506e-06, "loss": 3.6318, "step": 25540 }, { "epoch": 0.259857177734375, "grad_norm": 9.434146881103516, "learning_rate": 4.216493714127638e-06, "loss": 3.2256, "step": 25545 }, { "epoch": 0.2599080403645833, "grad_norm": 16.002166748046875, "learning_rate": 4.216202963628684e-06, "loss": 3.6784, "step": 25550 }, { "epoch": 0.2599589029947917, "grad_norm": 10.18635368347168, "learning_rate": 4.215912169221083e-06, "loss": 3.5994, "step": 25555 }, { "epoch": 0.260009765625, "grad_norm": 8.981634140014648, "learning_rate": 4.215621330912277e-06, "loss": 3.4051, "step": 25560 }, { "epoch": 0.2600606282552083, "grad_norm": 11.532533645629883, "learning_rate": 4.2153304487097065e-06, "loss": 3.3125, "step": 25565 }, { "epoch": 0.2601114908854167, "grad_norm": 11.587449073791504, "learning_rate": 4.215039522620813e-06, "loss": 3.2259, "step": 25570 }, { "epoch": 0.260162353515625, "grad_norm": 11.612677574157715, "learning_rate": 4.214748552653038e-06, "loss": 3.4293, "step": 25575 }, { "epoch": 0.2602132161458333, "grad_norm": 8.308237075805664, "learning_rate": 4.21445753881383e-06, "loss": 3.0921, "step": 25580 }, { "epoch": 0.2602640787760417, "grad_norm": 12.204816818237305, "learning_rate": 4.214166481110631e-06, "loss": 3.1295, "step": 25585 }, { "epoch": 0.26031494140625, "grad_norm": 12.297460556030273, "learning_rate": 4.213875379550889e-06, "loss": 3.623, "step": 25590 }, { "epoch": 0.2603658040364583, "grad_norm": 9.227754592895508, "learning_rate": 4.213584234142052e-06, "loss": 3.2325, "step": 25595 }, { "epoch": 0.2604166666666667, "grad_norm": 11.207293510437012, "learning_rate": 4.213293044891568e-06, "loss": 3.1893, "step": 25600 }, { "epoch": 0.260467529296875, "grad_norm": 7.930004596710205, "learning_rate": 4.213001811806888e-06, "loss": 3.0561, "step": 25605 }, { "epoch": 0.2605183919270833, "grad_norm": 14.136908531188965, "learning_rate": 4.212710534895462e-06, "loss": 3.5516, "step": 25610 }, { "epoch": 0.2605692545572917, "grad_norm": 14.007734298706055, "learning_rate": 4.212419214164744e-06, "loss": 3.3025, "step": 25615 }, { "epoch": 0.2606201171875, "grad_norm": 14.646519660949707, "learning_rate": 4.212127849622186e-06, "loss": 3.4518, "step": 25620 }, { "epoch": 0.2606709798177083, "grad_norm": 15.032630920410156, "learning_rate": 4.211836441275242e-06, "loss": 3.4148, "step": 25625 }, { "epoch": 0.2607218424479167, "grad_norm": 9.241325378417969, "learning_rate": 4.211544989131369e-06, "loss": 3.1756, "step": 25630 }, { "epoch": 0.260772705078125, "grad_norm": 13.654480934143066, "learning_rate": 4.211253493198023e-06, "loss": 3.1426, "step": 25635 }, { "epoch": 0.2608235677083333, "grad_norm": 10.502730369567871, "learning_rate": 4.210961953482662e-06, "loss": 3.0288, "step": 25640 }, { "epoch": 0.2608744303385417, "grad_norm": 11.17243480682373, "learning_rate": 4.210670369992745e-06, "loss": 3.2139, "step": 25645 }, { "epoch": 0.26092529296875, "grad_norm": 17.18389320373535, "learning_rate": 4.210378742735732e-06, "loss": 3.17, "step": 25650 }, { "epoch": 0.2609761555989583, "grad_norm": 10.495933532714844, "learning_rate": 4.210087071719084e-06, "loss": 3.1387, "step": 25655 }, { "epoch": 0.2610270182291667, "grad_norm": 21.2041015625, "learning_rate": 4.209795356950264e-06, "loss": 3.7895, "step": 25660 }, { "epoch": 0.261077880859375, "grad_norm": 13.002775192260742, "learning_rate": 4.209503598436735e-06, "loss": 3.4469, "step": 25665 }, { "epoch": 0.2611287434895833, "grad_norm": 14.954187393188477, "learning_rate": 4.209211796185961e-06, "loss": 3.4128, "step": 25670 }, { "epoch": 0.2611796061197917, "grad_norm": 12.93187141418457, "learning_rate": 4.20891995020541e-06, "loss": 3.4071, "step": 25675 }, { "epoch": 0.26123046875, "grad_norm": 13.916560173034668, "learning_rate": 4.208628060502545e-06, "loss": 3.174, "step": 25680 }, { "epoch": 0.2612813313802083, "grad_norm": 10.77669620513916, "learning_rate": 4.2083361270848374e-06, "loss": 3.2852, "step": 25685 }, { "epoch": 0.2613321940104167, "grad_norm": 9.5277099609375, "learning_rate": 4.2080441499597546e-06, "loss": 3.375, "step": 25690 }, { "epoch": 0.261383056640625, "grad_norm": 12.276413917541504, "learning_rate": 4.207752129134768e-06, "loss": 3.5693, "step": 25695 }, { "epoch": 0.2614339192708333, "grad_norm": 26.959915161132812, "learning_rate": 4.207460064617347e-06, "loss": 3.549, "step": 25700 }, { "epoch": 0.2614847819010417, "grad_norm": 11.67579460144043, "learning_rate": 4.207167956414965e-06, "loss": 3.703, "step": 25705 }, { "epoch": 0.26153564453125, "grad_norm": 10.390996932983398, "learning_rate": 4.206875804535095e-06, "loss": 3.3588, "step": 25710 }, { "epoch": 0.2615865071614583, "grad_norm": 17.176240921020508, "learning_rate": 4.206583608985213e-06, "loss": 3.6047, "step": 25715 }, { "epoch": 0.2616373697916667, "grad_norm": 8.651700019836426, "learning_rate": 4.206291369772793e-06, "loss": 3.5504, "step": 25720 }, { "epoch": 0.261688232421875, "grad_norm": 12.320103645324707, "learning_rate": 4.205999086905313e-06, "loss": 3.5346, "step": 25725 }, { "epoch": 0.2617390950520833, "grad_norm": 10.914161682128906, "learning_rate": 4.205706760390251e-06, "loss": 2.9514, "step": 25730 }, { "epoch": 0.2617899576822917, "grad_norm": 11.681282997131348, "learning_rate": 4.205414390235087e-06, "loss": 3.2611, "step": 25735 }, { "epoch": 0.2618408203125, "grad_norm": 13.976754188537598, "learning_rate": 4.205121976447298e-06, "loss": 3.4745, "step": 25740 }, { "epoch": 0.2618916829427083, "grad_norm": 11.412381172180176, "learning_rate": 4.204829519034368e-06, "loss": 2.935, "step": 25745 }, { "epoch": 0.2619425455729167, "grad_norm": 16.802989959716797, "learning_rate": 4.204537018003779e-06, "loss": 3.4396, "step": 25750 }, { "epoch": 0.261993408203125, "grad_norm": 13.803888320922852, "learning_rate": 4.204244473363015e-06, "loss": 3.4686, "step": 25755 }, { "epoch": 0.2620442708333333, "grad_norm": 13.405187606811523, "learning_rate": 4.203951885119559e-06, "loss": 3.2148, "step": 25760 }, { "epoch": 0.2620951334635417, "grad_norm": 11.889911651611328, "learning_rate": 4.2036592532808986e-06, "loss": 3.3572, "step": 25765 }, { "epoch": 0.26214599609375, "grad_norm": 16.20751190185547, "learning_rate": 4.20336657785452e-06, "loss": 3.1729, "step": 25770 }, { "epoch": 0.2621968587239583, "grad_norm": 8.080984115600586, "learning_rate": 4.203073858847912e-06, "loss": 3.1279, "step": 25775 }, { "epoch": 0.2622477213541667, "grad_norm": 13.503204345703125, "learning_rate": 4.202781096268561e-06, "loss": 3.3724, "step": 25780 }, { "epoch": 0.262298583984375, "grad_norm": 12.27587890625, "learning_rate": 4.20248829012396e-06, "loss": 3.5408, "step": 25785 }, { "epoch": 0.2623494466145833, "grad_norm": 8.207764625549316, "learning_rate": 4.202195440421601e-06, "loss": 3.4311, "step": 25790 }, { "epoch": 0.2624003092447917, "grad_norm": 7.626378536224365, "learning_rate": 4.201902547168974e-06, "loss": 3.2892, "step": 25795 }, { "epoch": 0.262451171875, "grad_norm": 7.448328495025635, "learning_rate": 4.201609610373573e-06, "loss": 3.3419, "step": 25800 }, { "epoch": 0.2625020345052083, "grad_norm": 9.728148460388184, "learning_rate": 4.201316630042895e-06, "loss": 3.623, "step": 25805 }, { "epoch": 0.2625528971354167, "grad_norm": 13.286189079284668, "learning_rate": 4.201023606184433e-06, "loss": 3.4971, "step": 25810 }, { "epoch": 0.262603759765625, "grad_norm": 9.246475219726562, "learning_rate": 4.200730538805687e-06, "loss": 3.9024, "step": 25815 }, { "epoch": 0.2626546223958333, "grad_norm": 8.17664909362793, "learning_rate": 4.200437427914151e-06, "loss": 3.3405, "step": 25820 }, { "epoch": 0.2627054850260417, "grad_norm": 9.446663856506348, "learning_rate": 4.200144273517328e-06, "loss": 3.332, "step": 25825 }, { "epoch": 0.26275634765625, "grad_norm": 10.07276725769043, "learning_rate": 4.1998510756227165e-06, "loss": 3.7568, "step": 25830 }, { "epoch": 0.2628072102864583, "grad_norm": 8.375320434570312, "learning_rate": 4.199557834237818e-06, "loss": 3.5829, "step": 25835 }, { "epoch": 0.2628580729166667, "grad_norm": 11.211957931518555, "learning_rate": 4.1992645493701346e-06, "loss": 3.3721, "step": 25840 }, { "epoch": 0.262908935546875, "grad_norm": 15.27626895904541, "learning_rate": 4.198971221027172e-06, "loss": 3.3733, "step": 25845 }, { "epoch": 0.2629597981770833, "grad_norm": 18.51883888244629, "learning_rate": 4.198677849216432e-06, "loss": 3.5998, "step": 25850 }, { "epoch": 0.2630106608072917, "grad_norm": 11.279624938964844, "learning_rate": 4.198384433945422e-06, "loss": 3.3106, "step": 25855 }, { "epoch": 0.2630615234375, "grad_norm": 8.512675285339355, "learning_rate": 4.198090975221649e-06, "loss": 2.9766, "step": 25860 }, { "epoch": 0.2631123860677083, "grad_norm": 11.04448413848877, "learning_rate": 4.1977974730526206e-06, "loss": 3.4403, "step": 25865 }, { "epoch": 0.2631632486979167, "grad_norm": 8.66542911529541, "learning_rate": 4.197503927445846e-06, "loss": 3.1101, "step": 25870 }, { "epoch": 0.263214111328125, "grad_norm": 14.436765670776367, "learning_rate": 4.197210338408837e-06, "loss": 3.4673, "step": 25875 }, { "epoch": 0.2632649739583333, "grad_norm": 12.47187614440918, "learning_rate": 4.196916705949103e-06, "loss": 3.1727, "step": 25880 }, { "epoch": 0.2633158365885417, "grad_norm": 14.223555564880371, "learning_rate": 4.196623030074158e-06, "loss": 3.6004, "step": 25885 }, { "epoch": 0.26336669921875, "grad_norm": 12.401254653930664, "learning_rate": 4.196329310791514e-06, "loss": 3.2072, "step": 25890 }, { "epoch": 0.2634175618489583, "grad_norm": 11.804680824279785, "learning_rate": 4.196035548108687e-06, "loss": 3.5086, "step": 25895 }, { "epoch": 0.2634684244791667, "grad_norm": 12.847404479980469, "learning_rate": 4.195741742033192e-06, "loss": 3.1709, "step": 25900 }, { "epoch": 0.263519287109375, "grad_norm": 12.488205909729004, "learning_rate": 4.195447892572548e-06, "loss": 3.2119, "step": 25905 }, { "epoch": 0.2635701497395833, "grad_norm": 11.74796199798584, "learning_rate": 4.19515399973427e-06, "loss": 3.4599, "step": 25910 }, { "epoch": 0.2636210123697917, "grad_norm": 11.54658317565918, "learning_rate": 4.1948600635258795e-06, "loss": 3.391, "step": 25915 }, { "epoch": 0.263671875, "grad_norm": 12.342092514038086, "learning_rate": 4.194566083954895e-06, "loss": 3.3041, "step": 25920 }, { "epoch": 0.2637227376302083, "grad_norm": 11.62911605834961, "learning_rate": 4.19427206102884e-06, "loss": 3.2533, "step": 25925 }, { "epoch": 0.2637736002604167, "grad_norm": 11.094088554382324, "learning_rate": 4.193977994755236e-06, "loss": 3.1159, "step": 25930 }, { "epoch": 0.263824462890625, "grad_norm": 13.771241188049316, "learning_rate": 4.193683885141606e-06, "loss": 3.7694, "step": 25935 }, { "epoch": 0.2638753255208333, "grad_norm": 14.977154731750488, "learning_rate": 4.193389732195476e-06, "loss": 3.5497, "step": 25940 }, { "epoch": 0.2639261881510417, "grad_norm": 11.603812217712402, "learning_rate": 4.19309553592437e-06, "loss": 3.3818, "step": 25945 }, { "epoch": 0.26397705078125, "grad_norm": 13.614372253417969, "learning_rate": 4.192801296335816e-06, "loss": 3.3199, "step": 25950 }, { "epoch": 0.2640279134114583, "grad_norm": 9.210861206054688, "learning_rate": 4.192507013437343e-06, "loss": 3.6322, "step": 25955 }, { "epoch": 0.2640787760416667, "grad_norm": 12.073121070861816, "learning_rate": 4.1922126872364774e-06, "loss": 3.5082, "step": 25960 }, { "epoch": 0.264129638671875, "grad_norm": 10.942256927490234, "learning_rate": 4.191918317740753e-06, "loss": 3.2529, "step": 25965 }, { "epoch": 0.2641805013020833, "grad_norm": 12.275918960571289, "learning_rate": 4.1916239049577e-06, "loss": 3.3314, "step": 25970 }, { "epoch": 0.2642313639322917, "grad_norm": 11.296648979187012, "learning_rate": 4.191329448894849e-06, "loss": 3.2901, "step": 25975 }, { "epoch": 0.2642822265625, "grad_norm": 13.480021476745605, "learning_rate": 4.191034949559735e-06, "loss": 3.3637, "step": 25980 }, { "epoch": 0.2643330891927083, "grad_norm": 12.317750930786133, "learning_rate": 4.190740406959893e-06, "loss": 3.5085, "step": 25985 }, { "epoch": 0.2643839518229167, "grad_norm": 14.043403625488281, "learning_rate": 4.190445821102858e-06, "loss": 3.1108, "step": 25990 }, { "epoch": 0.264434814453125, "grad_norm": 12.085020065307617, "learning_rate": 4.190151191996168e-06, "loss": 3.3186, "step": 25995 }, { "epoch": 0.2644856770833333, "grad_norm": 11.749665260314941, "learning_rate": 4.189856519647361e-06, "loss": 3.9153, "step": 26000 }, { "epoch": 0.2645365397135417, "grad_norm": 12.28537654876709, "learning_rate": 4.189561804063974e-06, "loss": 3.1333, "step": 26005 }, { "epoch": 0.26458740234375, "grad_norm": 10.340080261230469, "learning_rate": 4.18926704525355e-06, "loss": 3.9485, "step": 26010 }, { "epoch": 0.2646382649739583, "grad_norm": 9.838654518127441, "learning_rate": 4.188972243223628e-06, "loss": 3.3617, "step": 26015 }, { "epoch": 0.2646891276041667, "grad_norm": 8.775800704956055, "learning_rate": 4.188677397981752e-06, "loss": 3.5872, "step": 26020 }, { "epoch": 0.264739990234375, "grad_norm": 13.727091789245605, "learning_rate": 4.188382509535465e-06, "loss": 3.3762, "step": 26025 }, { "epoch": 0.2647908528645833, "grad_norm": 7.585343360900879, "learning_rate": 4.188087577892311e-06, "loss": 3.5484, "step": 26030 }, { "epoch": 0.2648417154947917, "grad_norm": 14.522687911987305, "learning_rate": 4.187792603059837e-06, "loss": 3.2677, "step": 26035 }, { "epoch": 0.264892578125, "grad_norm": 11.510234832763672, "learning_rate": 4.187497585045589e-06, "loss": 3.6247, "step": 26040 }, { "epoch": 0.2649434407552083, "grad_norm": 16.558944702148438, "learning_rate": 4.1872025238571155e-06, "loss": 3.324, "step": 26045 }, { "epoch": 0.2649943033854167, "grad_norm": 12.075737953186035, "learning_rate": 4.186907419501966e-06, "loss": 3.6676, "step": 26050 }, { "epoch": 0.265045166015625, "grad_norm": 10.342079162597656, "learning_rate": 4.1866122719876896e-06, "loss": 3.2716, "step": 26055 }, { "epoch": 0.2650960286458333, "grad_norm": 10.918512344360352, "learning_rate": 4.186317081321838e-06, "loss": 3.4466, "step": 26060 }, { "epoch": 0.2651468912760417, "grad_norm": 9.312711715698242, "learning_rate": 4.186021847511963e-06, "loss": 3.1628, "step": 26065 }, { "epoch": 0.26519775390625, "grad_norm": 14.83906078338623, "learning_rate": 4.185726570565619e-06, "loss": 3.2134, "step": 26070 }, { "epoch": 0.2652486165364583, "grad_norm": 16.265422821044922, "learning_rate": 4.18543125049036e-06, "loss": 3.5201, "step": 26075 }, { "epoch": 0.2652994791666667, "grad_norm": 10.572354316711426, "learning_rate": 4.185135887293743e-06, "loss": 3.1069, "step": 26080 }, { "epoch": 0.265350341796875, "grad_norm": 11.070435523986816, "learning_rate": 4.1848404809833235e-06, "loss": 3.2837, "step": 26085 }, { "epoch": 0.2654012044270833, "grad_norm": 11.942509651184082, "learning_rate": 4.184545031566659e-06, "loss": 3.6628, "step": 26090 }, { "epoch": 0.2654520670572917, "grad_norm": 15.380331039428711, "learning_rate": 4.18424953905131e-06, "loss": 3.5475, "step": 26095 }, { "epoch": 0.2655029296875, "grad_norm": 9.15284538269043, "learning_rate": 4.183954003444834e-06, "loss": 2.9877, "step": 26100 }, { "epoch": 0.2655537923177083, "grad_norm": 12.45242691040039, "learning_rate": 4.1836584247547955e-06, "loss": 3.514, "step": 26105 }, { "epoch": 0.2656046549479167, "grad_norm": 11.011215209960938, "learning_rate": 4.183362802988754e-06, "loss": 3.3444, "step": 26110 }, { "epoch": 0.265655517578125, "grad_norm": 9.054574012756348, "learning_rate": 4.183067138154275e-06, "loss": 3.3986, "step": 26115 }, { "epoch": 0.2657063802083333, "grad_norm": 9.417643547058105, "learning_rate": 4.182771430258923e-06, "loss": 3.2725, "step": 26120 }, { "epoch": 0.2657572428385417, "grad_norm": 11.767871856689453, "learning_rate": 4.182475679310262e-06, "loss": 3.3302, "step": 26125 }, { "epoch": 0.26580810546875, "grad_norm": 11.49780559539795, "learning_rate": 4.182179885315859e-06, "loss": 3.461, "step": 26130 }, { "epoch": 0.2658589680989583, "grad_norm": 20.007266998291016, "learning_rate": 4.181884048283283e-06, "loss": 3.3254, "step": 26135 }, { "epoch": 0.2659098307291667, "grad_norm": 15.877753257751465, "learning_rate": 4.181588168220102e-06, "loss": 3.3056, "step": 26140 }, { "epoch": 0.265960693359375, "grad_norm": 10.472766876220703, "learning_rate": 4.181292245133887e-06, "loss": 3.4033, "step": 26145 }, { "epoch": 0.2660115559895833, "grad_norm": 10.49460506439209, "learning_rate": 4.180996279032208e-06, "loss": 3.3645, "step": 26150 }, { "epoch": 0.2660624186197917, "grad_norm": 13.416446685791016, "learning_rate": 4.180700269922637e-06, "loss": 3.2229, "step": 26155 }, { "epoch": 0.26611328125, "grad_norm": 15.531248092651367, "learning_rate": 4.180404217812749e-06, "loss": 3.1068, "step": 26160 }, { "epoch": 0.2661641438802083, "grad_norm": 12.707284927368164, "learning_rate": 4.180108122710116e-06, "loss": 3.1724, "step": 26165 }, { "epoch": 0.2662150065104167, "grad_norm": 8.874678611755371, "learning_rate": 4.1798119846223164e-06, "loss": 3.7423, "step": 26170 }, { "epoch": 0.266265869140625, "grad_norm": 9.22908878326416, "learning_rate": 4.1795158035569235e-06, "loss": 3.5431, "step": 26175 }, { "epoch": 0.2663167317708333, "grad_norm": 12.564714431762695, "learning_rate": 4.179219579521519e-06, "loss": 3.1369, "step": 26180 }, { "epoch": 0.2663675944010417, "grad_norm": 11.059355735778809, "learning_rate": 4.178923312523679e-06, "loss": 3.4337, "step": 26185 }, { "epoch": 0.26641845703125, "grad_norm": 11.744783401489258, "learning_rate": 4.178627002570983e-06, "loss": 3.4429, "step": 26190 }, { "epoch": 0.2664693196614583, "grad_norm": 13.256790161132812, "learning_rate": 4.178330649671014e-06, "loss": 3.2407, "step": 26195 }, { "epoch": 0.2665201822916667, "grad_norm": 12.98029899597168, "learning_rate": 4.178034253831353e-06, "loss": 3.5527, "step": 26200 }, { "epoch": 0.266571044921875, "grad_norm": 13.788947105407715, "learning_rate": 4.177737815059582e-06, "loss": 3.3113, "step": 26205 }, { "epoch": 0.2666219075520833, "grad_norm": 15.371501922607422, "learning_rate": 4.177441333363289e-06, "loss": 3.4981, "step": 26210 }, { "epoch": 0.2666727701822917, "grad_norm": 12.833020210266113, "learning_rate": 4.177144808750055e-06, "loss": 3.3368, "step": 26215 }, { "epoch": 0.2667236328125, "grad_norm": 16.910667419433594, "learning_rate": 4.17684824122747e-06, "loss": 3.3971, "step": 26220 }, { "epoch": 0.2667744954427083, "grad_norm": 14.4556303024292, "learning_rate": 4.176551630803119e-06, "loss": 3.1049, "step": 26225 }, { "epoch": 0.2668253580729167, "grad_norm": 8.58405876159668, "learning_rate": 4.176254977484593e-06, "loss": 3.2514, "step": 26230 }, { "epoch": 0.266876220703125, "grad_norm": 9.659404754638672, "learning_rate": 4.175958281279481e-06, "loss": 3.3754, "step": 26235 }, { "epoch": 0.2669270833333333, "grad_norm": 10.980110168457031, "learning_rate": 4.1756615421953725e-06, "loss": 3.0858, "step": 26240 }, { "epoch": 0.2669779459635417, "grad_norm": 9.385514259338379, "learning_rate": 4.175364760239862e-06, "loss": 3.2213, "step": 26245 }, { "epoch": 0.26702880859375, "grad_norm": 8.674199104309082, "learning_rate": 4.1750679354205395e-06, "loss": 3.8795, "step": 26250 }, { "epoch": 0.2670796712239583, "grad_norm": 15.488489151000977, "learning_rate": 4.1747710677450015e-06, "loss": 3.4649, "step": 26255 }, { "epoch": 0.2671305338541667, "grad_norm": 7.275283336639404, "learning_rate": 4.174474157220843e-06, "loss": 2.7254, "step": 26260 }, { "epoch": 0.267181396484375, "grad_norm": 12.355480194091797, "learning_rate": 4.17417720385566e-06, "loss": 3.3009, "step": 26265 }, { "epoch": 0.2672322591145833, "grad_norm": 14.901166915893555, "learning_rate": 4.17388020765705e-06, "loss": 3.684, "step": 26270 }, { "epoch": 0.2672831217447917, "grad_norm": 15.172636985778809, "learning_rate": 4.1735831686326126e-06, "loss": 3.2967, "step": 26275 }, { "epoch": 0.267333984375, "grad_norm": 13.416927337646484, "learning_rate": 4.173286086789945e-06, "loss": 3.3368, "step": 26280 }, { "epoch": 0.2673848470052083, "grad_norm": 12.028944969177246, "learning_rate": 4.17298896213665e-06, "loss": 3.3686, "step": 26285 }, { "epoch": 0.2674357096354167, "grad_norm": 9.418871879577637, "learning_rate": 4.172691794680329e-06, "loss": 3.6334, "step": 26290 }, { "epoch": 0.267486572265625, "grad_norm": 11.656323432922363, "learning_rate": 4.172394584428585e-06, "loss": 3.1989, "step": 26295 }, { "epoch": 0.2675374348958333, "grad_norm": 8.896805763244629, "learning_rate": 4.172097331389022e-06, "loss": 3.1311, "step": 26300 }, { "epoch": 0.2675882975260417, "grad_norm": 11.2920560836792, "learning_rate": 4.171800035569245e-06, "loss": 3.4342, "step": 26305 }, { "epoch": 0.26763916015625, "grad_norm": 13.756390571594238, "learning_rate": 4.1715026969768605e-06, "loss": 3.7989, "step": 26310 }, { "epoch": 0.2676900227864583, "grad_norm": 10.483635902404785, "learning_rate": 4.171205315619476e-06, "loss": 3.3567, "step": 26315 }, { "epoch": 0.2677408854166667, "grad_norm": 7.852400779724121, "learning_rate": 4.170907891504699e-06, "loss": 3.1506, "step": 26320 }, { "epoch": 0.267791748046875, "grad_norm": 14.112519264221191, "learning_rate": 4.170610424640139e-06, "loss": 3.3308, "step": 26325 }, { "epoch": 0.2678426106770833, "grad_norm": 13.232032775878906, "learning_rate": 4.170312915033408e-06, "loss": 3.0412, "step": 26330 }, { "epoch": 0.2678934733072917, "grad_norm": 8.810139656066895, "learning_rate": 4.170015362692117e-06, "loss": 3.426, "step": 26335 }, { "epoch": 0.2679443359375, "grad_norm": 11.008728981018066, "learning_rate": 4.1697177676238786e-06, "loss": 3.0953, "step": 26340 }, { "epoch": 0.2679951985677083, "grad_norm": 15.006650924682617, "learning_rate": 4.169420129836307e-06, "loss": 3.2985, "step": 26345 }, { "epoch": 0.2680460611979167, "grad_norm": 19.160358428955078, "learning_rate": 4.1691224493370175e-06, "loss": 3.5361, "step": 26350 }, { "epoch": 0.268096923828125, "grad_norm": 12.861884117126465, "learning_rate": 4.168824726133625e-06, "loss": 3.2308, "step": 26355 }, { "epoch": 0.2681477864583333, "grad_norm": 10.119865417480469, "learning_rate": 4.1685269602337484e-06, "loss": 3.256, "step": 26360 }, { "epoch": 0.2681986490885417, "grad_norm": 12.688722610473633, "learning_rate": 4.168229151645004e-06, "loss": 3.7487, "step": 26365 }, { "epoch": 0.26824951171875, "grad_norm": 11.832856178283691, "learning_rate": 4.167931300375014e-06, "loss": 3.3695, "step": 26370 }, { "epoch": 0.2683003743489583, "grad_norm": 16.034059524536133, "learning_rate": 4.167633406431395e-06, "loss": 3.3954, "step": 26375 }, { "epoch": 0.2683512369791667, "grad_norm": 12.99596881866455, "learning_rate": 4.167335469821771e-06, "loss": 3.0622, "step": 26380 }, { "epoch": 0.268402099609375, "grad_norm": 8.741852760314941, "learning_rate": 4.167037490553764e-06, "loss": 3.5418, "step": 26385 }, { "epoch": 0.2684529622395833, "grad_norm": 15.765666007995605, "learning_rate": 4.1667394686349984e-06, "loss": 4.2059, "step": 26390 }, { "epoch": 0.2685038248697917, "grad_norm": 11.599302291870117, "learning_rate": 4.166441404073099e-06, "loss": 3.1983, "step": 26395 }, { "epoch": 0.2685546875, "grad_norm": 14.83521842956543, "learning_rate": 4.166143296875691e-06, "loss": 2.924, "step": 26400 }, { "epoch": 0.2686055501302083, "grad_norm": 13.936452865600586, "learning_rate": 4.165845147050401e-06, "loss": 3.3603, "step": 26405 }, { "epoch": 0.2686564127604167, "grad_norm": 14.793567657470703, "learning_rate": 4.165546954604859e-06, "loss": 3.4469, "step": 26410 }, { "epoch": 0.268707275390625, "grad_norm": 10.850431442260742, "learning_rate": 4.165248719546691e-06, "loss": 3.3443, "step": 26415 }, { "epoch": 0.2687581380208333, "grad_norm": 11.647077560424805, "learning_rate": 4.164950441883531e-06, "loss": 3.1829, "step": 26420 }, { "epoch": 0.2688090006510417, "grad_norm": 14.193302154541016, "learning_rate": 4.1646521216230075e-06, "loss": 3.4609, "step": 26425 }, { "epoch": 0.26885986328125, "grad_norm": 8.489500045776367, "learning_rate": 4.164353758772755e-06, "loss": 3.1324, "step": 26430 }, { "epoch": 0.2689107259114583, "grad_norm": 10.395947456359863, "learning_rate": 4.1640553533404046e-06, "loss": 3.8962, "step": 26435 }, { "epoch": 0.2689615885416667, "grad_norm": 9.547869682312012, "learning_rate": 4.163756905333594e-06, "loss": 2.9235, "step": 26440 }, { "epoch": 0.269012451171875, "grad_norm": 8.927133560180664, "learning_rate": 4.1634584147599564e-06, "loss": 3.3013, "step": 26445 }, { "epoch": 0.2690633138020833, "grad_norm": 9.086165428161621, "learning_rate": 4.16315988162713e-06, "loss": 3.7275, "step": 26450 }, { "epoch": 0.2691141764322917, "grad_norm": 14.933182716369629, "learning_rate": 4.162861305942753e-06, "loss": 3.4349, "step": 26455 }, { "epoch": 0.2691650390625, "grad_norm": 11.188149452209473, "learning_rate": 4.162562687714462e-06, "loss": 3.49, "step": 26460 }, { "epoch": 0.2692159016927083, "grad_norm": 14.859213829040527, "learning_rate": 4.1622640269499e-06, "loss": 3.5914, "step": 26465 }, { "epoch": 0.2692667643229167, "grad_norm": 14.470888137817383, "learning_rate": 4.161965323656706e-06, "loss": 4.0164, "step": 26470 }, { "epoch": 0.269317626953125, "grad_norm": 8.29883861541748, "learning_rate": 4.161666577842524e-06, "loss": 3.7569, "step": 26475 }, { "epoch": 0.2693684895833333, "grad_norm": 16.3638858795166, "learning_rate": 4.161367789514995e-06, "loss": 3.1554, "step": 26480 }, { "epoch": 0.2694193522135417, "grad_norm": 9.231216430664062, "learning_rate": 4.161068958681766e-06, "loss": 3.3017, "step": 26485 }, { "epoch": 0.26947021484375, "grad_norm": 14.40170669555664, "learning_rate": 4.160770085350481e-06, "loss": 3.4301, "step": 26490 }, { "epoch": 0.2695210774739583, "grad_norm": 9.079069137573242, "learning_rate": 4.1604711695287876e-06, "loss": 3.3494, "step": 26495 }, { "epoch": 0.2695719401041667, "grad_norm": 10.551512718200684, "learning_rate": 4.160172211224332e-06, "loss": 3.287, "step": 26500 }, { "epoch": 0.269622802734375, "grad_norm": 12.600727081298828, "learning_rate": 4.159873210444765e-06, "loss": 3.4167, "step": 26505 }, { "epoch": 0.2696736653645833, "grad_norm": 9.81279468536377, "learning_rate": 4.159574167197734e-06, "loss": 3.3034, "step": 26510 }, { "epoch": 0.2697245279947917, "grad_norm": 15.392224311828613, "learning_rate": 4.159275081490892e-06, "loss": 3.1249, "step": 26515 }, { "epoch": 0.269775390625, "grad_norm": 10.038593292236328, "learning_rate": 4.158975953331891e-06, "loss": 3.2412, "step": 26520 }, { "epoch": 0.2698262532552083, "grad_norm": 13.212175369262695, "learning_rate": 4.158676782728383e-06, "loss": 3.4358, "step": 26525 }, { "epoch": 0.2698771158854167, "grad_norm": 10.415067672729492, "learning_rate": 4.158377569688022e-06, "loss": 3.503, "step": 26530 }, { "epoch": 0.269927978515625, "grad_norm": 12.571605682373047, "learning_rate": 4.158078314218464e-06, "loss": 3.5547, "step": 26535 }, { "epoch": 0.2699788411458333, "grad_norm": 9.009819984436035, "learning_rate": 4.157779016327366e-06, "loss": 3.4935, "step": 26540 }, { "epoch": 0.2700297037760417, "grad_norm": 14.811439514160156, "learning_rate": 4.1574796760223845e-06, "loss": 3.4557, "step": 26545 }, { "epoch": 0.27008056640625, "grad_norm": 15.486289978027344, "learning_rate": 4.157180293311179e-06, "loss": 3.7938, "step": 26550 }, { "epoch": 0.2701314290364583, "grad_norm": 15.374136924743652, "learning_rate": 4.156880868201407e-06, "loss": 3.4213, "step": 26555 }, { "epoch": 0.2701822916666667, "grad_norm": 9.167612075805664, "learning_rate": 4.156581400700732e-06, "loss": 3.3672, "step": 26560 }, { "epoch": 0.270233154296875, "grad_norm": 9.546422958374023, "learning_rate": 4.156281890816814e-06, "loss": 3.5656, "step": 26565 }, { "epoch": 0.2702840169270833, "grad_norm": 16.142099380493164, "learning_rate": 4.155982338557317e-06, "loss": 3.25, "step": 26570 }, { "epoch": 0.2703348795572917, "grad_norm": 12.354496955871582, "learning_rate": 4.155682743929904e-06, "loss": 3.4914, "step": 26575 }, { "epoch": 0.2703857421875, "grad_norm": 10.914223670959473, "learning_rate": 4.15538310694224e-06, "loss": 3.4107, "step": 26580 }, { "epoch": 0.2704366048177083, "grad_norm": 12.899299621582031, "learning_rate": 4.155083427601992e-06, "loss": 3.2644, "step": 26585 }, { "epoch": 0.2704874674479167, "grad_norm": 14.942575454711914, "learning_rate": 4.154783705916827e-06, "loss": 3.1591, "step": 26590 }, { "epoch": 0.270538330078125, "grad_norm": 10.189476013183594, "learning_rate": 4.1544839418944124e-06, "loss": 3.1835, "step": 26595 }, { "epoch": 0.2705891927083333, "grad_norm": 13.850764274597168, "learning_rate": 4.154184135542419e-06, "loss": 3.2307, "step": 26600 }, { "epoch": 0.2706400553385417, "grad_norm": 10.06661605834961, "learning_rate": 4.1538842868685155e-06, "loss": 3.3825, "step": 26605 }, { "epoch": 0.27069091796875, "grad_norm": 7.886444568634033, "learning_rate": 4.153584395880376e-06, "loss": 2.9482, "step": 26610 }, { "epoch": 0.2707417805989583, "grad_norm": 13.357390403747559, "learning_rate": 4.153284462585672e-06, "loss": 3.6921, "step": 26615 }, { "epoch": 0.2707926432291667, "grad_norm": 12.01125431060791, "learning_rate": 4.152984486992075e-06, "loss": 3.6984, "step": 26620 }, { "epoch": 0.270843505859375, "grad_norm": 11.155698776245117, "learning_rate": 4.152684469107263e-06, "loss": 3.1299, "step": 26625 }, { "epoch": 0.2708943684895833, "grad_norm": 14.0589017868042, "learning_rate": 4.15238440893891e-06, "loss": 3.6016, "step": 26630 }, { "epoch": 0.2709452311197917, "grad_norm": 15.438796043395996, "learning_rate": 4.1520843064946936e-06, "loss": 3.5558, "step": 26635 }, { "epoch": 0.27099609375, "grad_norm": 11.46037483215332, "learning_rate": 4.151784161782292e-06, "loss": 3.6484, "step": 26640 }, { "epoch": 0.2710469563802083, "grad_norm": 15.121505737304688, "learning_rate": 4.1514839748093845e-06, "loss": 3.2945, "step": 26645 }, { "epoch": 0.2710978190104167, "grad_norm": 13.236750602722168, "learning_rate": 4.15118374558365e-06, "loss": 3.2456, "step": 26650 }, { "epoch": 0.271148681640625, "grad_norm": 16.323974609375, "learning_rate": 4.150883474112771e-06, "loss": 3.434, "step": 26655 }, { "epoch": 0.2711995442708333, "grad_norm": 14.29743766784668, "learning_rate": 4.150583160404431e-06, "loss": 3.3447, "step": 26660 }, { "epoch": 0.2712504069010417, "grad_norm": 14.510125160217285, "learning_rate": 4.150282804466311e-06, "loss": 3.2886, "step": 26665 }, { "epoch": 0.27130126953125, "grad_norm": 9.165349006652832, "learning_rate": 4.1499824063060965e-06, "loss": 3.3194, "step": 26670 }, { "epoch": 0.2713521321614583, "grad_norm": 9.51498031616211, "learning_rate": 4.149681965931473e-06, "loss": 3.72, "step": 26675 }, { "epoch": 0.2714029947916667, "grad_norm": 16.4527645111084, "learning_rate": 4.149381483350128e-06, "loss": 3.4032, "step": 26680 }, { "epoch": 0.271453857421875, "grad_norm": 13.104421615600586, "learning_rate": 4.149080958569749e-06, "loss": 3.4986, "step": 26685 }, { "epoch": 0.2715047200520833, "grad_norm": 12.464369773864746, "learning_rate": 4.148780391598023e-06, "loss": 3.7014, "step": 26690 }, { "epoch": 0.2715555826822917, "grad_norm": 12.509614944458008, "learning_rate": 4.148479782442642e-06, "loss": 3.1501, "step": 26695 }, { "epoch": 0.2716064453125, "grad_norm": 10.114399909973145, "learning_rate": 4.148179131111298e-06, "loss": 3.1244, "step": 26700 }, { "epoch": 0.2716573079427083, "grad_norm": 9.907551765441895, "learning_rate": 4.147878437611681e-06, "loss": 3.5617, "step": 26705 }, { "epoch": 0.2717081705729167, "grad_norm": 9.493293762207031, "learning_rate": 4.147577701951483e-06, "loss": 3.0425, "step": 26710 }, { "epoch": 0.271759033203125, "grad_norm": 12.182016372680664, "learning_rate": 4.147276924138402e-06, "loss": 3.1682, "step": 26715 }, { "epoch": 0.2718098958333333, "grad_norm": 9.14958667755127, "learning_rate": 4.14697610418013e-06, "loss": 3.8109, "step": 26720 }, { "epoch": 0.2718607584635417, "grad_norm": 12.60875415802002, "learning_rate": 4.1466752420843656e-06, "loss": 3.7398, "step": 26725 }, { "epoch": 0.27191162109375, "grad_norm": 13.810464859008789, "learning_rate": 4.146374337858805e-06, "loss": 3.2147, "step": 26730 }, { "epoch": 0.2719624837239583, "grad_norm": 10.453813552856445, "learning_rate": 4.146073391511148e-06, "loss": 3.7852, "step": 26735 }, { "epoch": 0.2720133463541667, "grad_norm": 12.554015159606934, "learning_rate": 4.145772403049093e-06, "loss": 3.589, "step": 26740 }, { "epoch": 0.272064208984375, "grad_norm": 10.385574340820312, "learning_rate": 4.145471372480341e-06, "loss": 3.2686, "step": 26745 }, { "epoch": 0.2721150716145833, "grad_norm": 7.913267612457275, "learning_rate": 4.145170299812594e-06, "loss": 3.4415, "step": 26750 }, { "epoch": 0.2721659342447917, "grad_norm": 9.897327423095703, "learning_rate": 4.144869185053555e-06, "loss": 3.3393, "step": 26755 }, { "epoch": 0.272216796875, "grad_norm": 10.874954223632812, "learning_rate": 4.144568028210928e-06, "loss": 3.4982, "step": 26760 }, { "epoch": 0.2722676595052083, "grad_norm": 15.309311866760254, "learning_rate": 4.144266829292417e-06, "loss": 3.4094, "step": 26765 }, { "epoch": 0.2723185221354167, "grad_norm": 9.066498756408691, "learning_rate": 4.1439655883057295e-06, "loss": 3.1665, "step": 26770 }, { "epoch": 0.272369384765625, "grad_norm": 9.98532772064209, "learning_rate": 4.1436643052585715e-06, "loss": 3.3225, "step": 26775 }, { "epoch": 0.2724202473958333, "grad_norm": 12.59506893157959, "learning_rate": 4.143362980158653e-06, "loss": 3.7703, "step": 26780 }, { "epoch": 0.2724711100260417, "grad_norm": 9.37546157836914, "learning_rate": 4.143061613013681e-06, "loss": 3.2978, "step": 26785 }, { "epoch": 0.27252197265625, "grad_norm": 7.499221324920654, "learning_rate": 4.142760203831367e-06, "loss": 3.4306, "step": 26790 }, { "epoch": 0.2725728352864583, "grad_norm": 10.164257049560547, "learning_rate": 4.142458752619423e-06, "loss": 3.6316, "step": 26795 }, { "epoch": 0.2726236979166667, "grad_norm": 10.297619819641113, "learning_rate": 4.142157259385562e-06, "loss": 3.3527, "step": 26800 }, { "epoch": 0.272674560546875, "grad_norm": 7.55012321472168, "learning_rate": 4.1418557241374955e-06, "loss": 3.4203, "step": 26805 }, { "epoch": 0.2727254231770833, "grad_norm": 18.338563919067383, "learning_rate": 4.14155414688294e-06, "loss": 2.9126, "step": 26810 }, { "epoch": 0.2727762858072917, "grad_norm": 13.562408447265625, "learning_rate": 4.141252527629611e-06, "loss": 3.3561, "step": 26815 }, { "epoch": 0.2728271484375, "grad_norm": 10.19615364074707, "learning_rate": 4.140950866385225e-06, "loss": 3.0776, "step": 26820 }, { "epoch": 0.2728780110677083, "grad_norm": 11.81718635559082, "learning_rate": 4.140649163157499e-06, "loss": 3.2956, "step": 26825 }, { "epoch": 0.2729288736979167, "grad_norm": 12.27664566040039, "learning_rate": 4.140347417954154e-06, "loss": 3.3226, "step": 26830 }, { "epoch": 0.272979736328125, "grad_norm": 15.785837173461914, "learning_rate": 4.14004563078291e-06, "loss": 3.2568, "step": 26835 }, { "epoch": 0.2730305989583333, "grad_norm": 13.030390739440918, "learning_rate": 4.1397438016514856e-06, "loss": 3.5478, "step": 26840 }, { "epoch": 0.2730814615885417, "grad_norm": 10.237311363220215, "learning_rate": 4.139441930567604e-06, "loss": 3.4174, "step": 26845 }, { "epoch": 0.27313232421875, "grad_norm": 14.9881591796875, "learning_rate": 4.139140017538992e-06, "loss": 4.0547, "step": 26850 }, { "epoch": 0.2731831868489583, "grad_norm": 15.239192008972168, "learning_rate": 4.13883806257337e-06, "loss": 3.1832, "step": 26855 }, { "epoch": 0.2732340494791667, "grad_norm": 14.115588188171387, "learning_rate": 4.138536065678463e-06, "loss": 3.3923, "step": 26860 }, { "epoch": 0.273284912109375, "grad_norm": 9.392797470092773, "learning_rate": 4.138234026862002e-06, "loss": 3.9802, "step": 26865 }, { "epoch": 0.2733357747395833, "grad_norm": 9.962571144104004, "learning_rate": 4.137931946131709e-06, "loss": 3.1848, "step": 26870 }, { "epoch": 0.2733866373697917, "grad_norm": 11.43811321258545, "learning_rate": 4.137629823495317e-06, "loss": 3.4973, "step": 26875 }, { "epoch": 0.2734375, "grad_norm": 13.599096298217773, "learning_rate": 4.1373276589605535e-06, "loss": 3.869, "step": 26880 }, { "epoch": 0.2734883626302083, "grad_norm": 14.69780158996582, "learning_rate": 4.13702545253515e-06, "loss": 3.1746, "step": 26885 }, { "epoch": 0.2735392252604167, "grad_norm": 15.275219917297363, "learning_rate": 4.1367232042268376e-06, "loss": 3.4442, "step": 26890 }, { "epoch": 0.273590087890625, "grad_norm": 11.607757568359375, "learning_rate": 4.136420914043352e-06, "loss": 3.6636, "step": 26895 }, { "epoch": 0.2736409505208333, "grad_norm": 10.504234313964844, "learning_rate": 4.136118581992423e-06, "loss": 3.2825, "step": 26900 }, { "epoch": 0.2736918131510417, "grad_norm": 9.813732147216797, "learning_rate": 4.13581620808179e-06, "loss": 3.2447, "step": 26905 }, { "epoch": 0.27374267578125, "grad_norm": 10.727784156799316, "learning_rate": 4.135513792319185e-06, "loss": 3.4474, "step": 26910 }, { "epoch": 0.2737935384114583, "grad_norm": 9.403905868530273, "learning_rate": 4.135211334712349e-06, "loss": 3.4208, "step": 26915 }, { "epoch": 0.2738444010416667, "grad_norm": 9.684918403625488, "learning_rate": 4.134908835269018e-06, "loss": 3.5309, "step": 26920 }, { "epoch": 0.273895263671875, "grad_norm": 10.809853553771973, "learning_rate": 4.1346062939969316e-06, "loss": 4.2717, "step": 26925 }, { "epoch": 0.2739461263020833, "grad_norm": 13.863726615905762, "learning_rate": 4.13430371090383e-06, "loss": 3.4833, "step": 26930 }, { "epoch": 0.2739969889322917, "grad_norm": 15.26383113861084, "learning_rate": 4.134001085997457e-06, "loss": 3.5963, "step": 26935 }, { "epoch": 0.2740478515625, "grad_norm": 15.077373504638672, "learning_rate": 4.133698419285552e-06, "loss": 3.7273, "step": 26940 }, { "epoch": 0.2740987141927083, "grad_norm": 10.0264892578125, "learning_rate": 4.13339571077586e-06, "loss": 3.2443, "step": 26945 }, { "epoch": 0.2741495768229167, "grad_norm": 9.452435493469238, "learning_rate": 4.1330929604761275e-06, "loss": 3.4279, "step": 26950 }, { "epoch": 0.274200439453125, "grad_norm": 15.747602462768555, "learning_rate": 4.132790168394098e-06, "loss": 3.5983, "step": 26955 }, { "epoch": 0.2742513020833333, "grad_norm": 12.317953109741211, "learning_rate": 4.1324873345375185e-06, "loss": 3.8896, "step": 26960 }, { "epoch": 0.2743021647135417, "grad_norm": 11.992025375366211, "learning_rate": 4.132184458914138e-06, "loss": 3.5027, "step": 26965 }, { "epoch": 0.27435302734375, "grad_norm": 14.02786922454834, "learning_rate": 4.131881541531705e-06, "loss": 4.0145, "step": 26970 }, { "epoch": 0.2744038899739583, "grad_norm": 18.570852279663086, "learning_rate": 4.131578582397969e-06, "loss": 3.5407, "step": 26975 }, { "epoch": 0.2744547526041667, "grad_norm": 9.13254165649414, "learning_rate": 4.131275581520683e-06, "loss": 3.531, "step": 26980 }, { "epoch": 0.274505615234375, "grad_norm": 9.889278411865234, "learning_rate": 4.130972538907597e-06, "loss": 3.6133, "step": 26985 }, { "epoch": 0.2745564778645833, "grad_norm": 14.381463050842285, "learning_rate": 4.130669454566465e-06, "loss": 3.3638, "step": 26990 }, { "epoch": 0.2746073404947917, "grad_norm": 14.76690673828125, "learning_rate": 4.130366328505041e-06, "loss": 3.4389, "step": 26995 }, { "epoch": 0.274658203125, "grad_norm": 14.930278778076172, "learning_rate": 4.1300631607310824e-06, "loss": 3.3233, "step": 27000 }, { "epoch": 0.2747090657552083, "grad_norm": 9.765680313110352, "learning_rate": 4.129759951252344e-06, "loss": 3.1376, "step": 27005 }, { "epoch": 0.2747599283854167, "grad_norm": 10.81482219696045, "learning_rate": 4.129456700076583e-06, "loss": 3.2255, "step": 27010 }, { "epoch": 0.274810791015625, "grad_norm": 10.286974906921387, "learning_rate": 4.129153407211558e-06, "loss": 3.4194, "step": 27015 }, { "epoch": 0.2748616536458333, "grad_norm": 13.686568260192871, "learning_rate": 4.128850072665029e-06, "loss": 3.1024, "step": 27020 }, { "epoch": 0.2749125162760417, "grad_norm": 15.8404541015625, "learning_rate": 4.128546696444759e-06, "loss": 3.2685, "step": 27025 }, { "epoch": 0.27496337890625, "grad_norm": 11.484049797058105, "learning_rate": 4.128243278558506e-06, "loss": 3.8495, "step": 27030 }, { "epoch": 0.2750142415364583, "grad_norm": 9.2147798538208, "learning_rate": 4.127939819014035e-06, "loss": 3.4059, "step": 27035 }, { "epoch": 0.2750651041666667, "grad_norm": 11.897221565246582, "learning_rate": 4.127636317819109e-06, "loss": 3.4944, "step": 27040 }, { "epoch": 0.275115966796875, "grad_norm": 14.68928050994873, "learning_rate": 4.127332774981495e-06, "loss": 3.1315, "step": 27045 }, { "epoch": 0.2751668294270833, "grad_norm": 16.452741622924805, "learning_rate": 4.127029190508957e-06, "loss": 3.4271, "step": 27050 }, { "epoch": 0.2752176920572917, "grad_norm": 14.647496223449707, "learning_rate": 4.126725564409263e-06, "loss": 3.5874, "step": 27055 }, { "epoch": 0.2752685546875, "grad_norm": 12.843924522399902, "learning_rate": 4.126421896690181e-06, "loss": 3.3549, "step": 27060 }, { "epoch": 0.2753194173177083, "grad_norm": 12.407615661621094, "learning_rate": 4.12611818735948e-06, "loss": 3.3919, "step": 27065 }, { "epoch": 0.2753702799479167, "grad_norm": 12.004281997680664, "learning_rate": 4.125814436424931e-06, "loss": 3.2099, "step": 27070 }, { "epoch": 0.275421142578125, "grad_norm": 12.618803024291992, "learning_rate": 4.1255106438943045e-06, "loss": 2.9556, "step": 27075 }, { "epoch": 0.2754720052083333, "grad_norm": 14.524580001831055, "learning_rate": 4.125206809775374e-06, "loss": 3.7834, "step": 27080 }, { "epoch": 0.2755228678385417, "grad_norm": 14.32988166809082, "learning_rate": 4.124902934075912e-06, "loss": 3.8865, "step": 27085 }, { "epoch": 0.27557373046875, "grad_norm": 11.782154083251953, "learning_rate": 4.124599016803695e-06, "loss": 3.2699, "step": 27090 }, { "epoch": 0.2756245930989583, "grad_norm": 10.000688552856445, "learning_rate": 4.124295057966496e-06, "loss": 3.0126, "step": 27095 }, { "epoch": 0.2756754557291667, "grad_norm": 11.379739761352539, "learning_rate": 4.123991057572092e-06, "loss": 3.6174, "step": 27100 }, { "epoch": 0.275726318359375, "grad_norm": 13.724488258361816, "learning_rate": 4.123687015628263e-06, "loss": 3.2716, "step": 27105 }, { "epoch": 0.2757771809895833, "grad_norm": 15.961641311645508, "learning_rate": 4.123382932142786e-06, "loss": 3.2398, "step": 27110 }, { "epoch": 0.2758280436197917, "grad_norm": 10.406798362731934, "learning_rate": 4.123078807123442e-06, "loss": 3.2444, "step": 27115 }, { "epoch": 0.27587890625, "grad_norm": 8.292187690734863, "learning_rate": 4.12277464057801e-06, "loss": 3.3237, "step": 27120 }, { "epoch": 0.2759297688802083, "grad_norm": 9.379560470581055, "learning_rate": 4.1224704325142754e-06, "loss": 3.5323, "step": 27125 }, { "epoch": 0.2759806315104167, "grad_norm": 12.531656265258789, "learning_rate": 4.122166182940018e-06, "loss": 3.2237, "step": 27130 }, { "epoch": 0.276031494140625, "grad_norm": 8.860665321350098, "learning_rate": 4.1218618918630235e-06, "loss": 2.8973, "step": 27135 }, { "epoch": 0.2760823567708333, "grad_norm": 18.059009552001953, "learning_rate": 4.121557559291078e-06, "loss": 3.2918, "step": 27140 }, { "epoch": 0.2761332194010417, "grad_norm": 7.78617525100708, "learning_rate": 4.121253185231966e-06, "loss": 3.0191, "step": 27145 }, { "epoch": 0.27618408203125, "grad_norm": 14.027008056640625, "learning_rate": 4.120948769693476e-06, "loss": 3.0125, "step": 27150 }, { "epoch": 0.2762349446614583, "grad_norm": 14.529438972473145, "learning_rate": 4.120644312683395e-06, "loss": 3.3326, "step": 27155 }, { "epoch": 0.2762858072916667, "grad_norm": 10.114949226379395, "learning_rate": 4.120339814209514e-06, "loss": 3.273, "step": 27160 }, { "epoch": 0.276336669921875, "grad_norm": 8.970900535583496, "learning_rate": 4.120035274279623e-06, "loss": 3.4991, "step": 27165 }, { "epoch": 0.2763875325520833, "grad_norm": 10.103755950927734, "learning_rate": 4.119730692901513e-06, "loss": 3.273, "step": 27170 }, { "epoch": 0.2764383951822917, "grad_norm": 11.475733757019043, "learning_rate": 4.119426070082977e-06, "loss": 3.2216, "step": 27175 }, { "epoch": 0.2764892578125, "grad_norm": 14.949260711669922, "learning_rate": 4.11912140583181e-06, "loss": 3.4254, "step": 27180 }, { "epoch": 0.2765401204427083, "grad_norm": 11.60977554321289, "learning_rate": 4.118816700155804e-06, "loss": 3.5087, "step": 27185 }, { "epoch": 0.2765909830729167, "grad_norm": 9.910012245178223, "learning_rate": 4.118511953062758e-06, "loss": 3.2046, "step": 27190 }, { "epoch": 0.276641845703125, "grad_norm": 9.686962127685547, "learning_rate": 4.1182071645604656e-06, "loss": 3.5146, "step": 27195 }, { "epoch": 0.2766927083333333, "grad_norm": 8.127184867858887, "learning_rate": 4.1179023346567286e-06, "loss": 3.4442, "step": 27200 }, { "epoch": 0.2767435709635417, "grad_norm": 10.926115036010742, "learning_rate": 4.117597463359342e-06, "loss": 3.1616, "step": 27205 }, { "epoch": 0.27679443359375, "grad_norm": 11.111457824707031, "learning_rate": 4.117292550676108e-06, "loss": 3.1595, "step": 27210 }, { "epoch": 0.2768452962239583, "grad_norm": 9.984879493713379, "learning_rate": 4.116987596614828e-06, "loss": 3.5258, "step": 27215 }, { "epoch": 0.2768961588541667, "grad_norm": 10.812644958496094, "learning_rate": 4.116682601183304e-06, "loss": 3.4125, "step": 27220 }, { "epoch": 0.276947021484375, "grad_norm": 10.24685287475586, "learning_rate": 4.1163775643893374e-06, "loss": 3.5382, "step": 27225 }, { "epoch": 0.2769978841145833, "grad_norm": 11.69446849822998, "learning_rate": 4.116072486240735e-06, "loss": 3.1572, "step": 27230 }, { "epoch": 0.2770487467447917, "grad_norm": 11.58246898651123, "learning_rate": 4.115767366745301e-06, "loss": 3.2686, "step": 27235 }, { "epoch": 0.277099609375, "grad_norm": 13.597831726074219, "learning_rate": 4.1154622059108415e-06, "loss": 3.3648, "step": 27240 }, { "epoch": 0.2771504720052083, "grad_norm": 13.547455787658691, "learning_rate": 4.1151570037451645e-06, "loss": 3.2043, "step": 27245 }, { "epoch": 0.2772013346354167, "grad_norm": 8.566726684570312, "learning_rate": 4.114851760256079e-06, "loss": 3.4433, "step": 27250 }, { "epoch": 0.277252197265625, "grad_norm": 13.299729347229004, "learning_rate": 4.114546475451394e-06, "loss": 3.7028, "step": 27255 }, { "epoch": 0.2773030598958333, "grad_norm": 13.903881072998047, "learning_rate": 4.114241149338919e-06, "loss": 3.248, "step": 27260 }, { "epoch": 0.2773539225260417, "grad_norm": 9.658771514892578, "learning_rate": 4.113935781926468e-06, "loss": 3.2362, "step": 27265 }, { "epoch": 0.27740478515625, "grad_norm": 14.61761474609375, "learning_rate": 4.113630373221852e-06, "loss": 3.3815, "step": 27270 }, { "epoch": 0.2774556477864583, "grad_norm": 8.722289085388184, "learning_rate": 4.113324923232886e-06, "loss": 3.3268, "step": 27275 }, { "epoch": 0.2775065104166667, "grad_norm": 14.244725227355957, "learning_rate": 4.113019431967383e-06, "loss": 3.3132, "step": 27280 }, { "epoch": 0.277557373046875, "grad_norm": 10.710943222045898, "learning_rate": 4.112713899433161e-06, "loss": 3.4074, "step": 27285 }, { "epoch": 0.2776082356770833, "grad_norm": 16.3253116607666, "learning_rate": 4.1124083256380354e-06, "loss": 3.2259, "step": 27290 }, { "epoch": 0.2776590983072917, "grad_norm": 13.641079902648926, "learning_rate": 4.112102710589826e-06, "loss": 3.0738, "step": 27295 }, { "epoch": 0.2777099609375, "grad_norm": 13.473037719726562, "learning_rate": 4.1117970542963504e-06, "loss": 3.2011, "step": 27300 }, { "epoch": 0.2777608235677083, "grad_norm": 16.01197624206543, "learning_rate": 4.111491356765429e-06, "loss": 3.3992, "step": 27305 }, { "epoch": 0.2778116861979167, "grad_norm": 8.830290794372559, "learning_rate": 4.111185618004884e-06, "loss": 3.1679, "step": 27310 }, { "epoch": 0.277862548828125, "grad_norm": 9.501608848571777, "learning_rate": 4.110879838022536e-06, "loss": 3.3934, "step": 27315 }, { "epoch": 0.2779134114583333, "grad_norm": 10.511950492858887, "learning_rate": 4.1105740168262085e-06, "loss": 3.1209, "step": 27320 }, { "epoch": 0.2779642740885417, "grad_norm": 13.050411224365234, "learning_rate": 4.110268154423728e-06, "loss": 3.3571, "step": 27325 }, { "epoch": 0.27801513671875, "grad_norm": 7.376755714416504, "learning_rate": 4.109962250822918e-06, "loss": 3.3481, "step": 27330 }, { "epoch": 0.2780659993489583, "grad_norm": 13.177421569824219, "learning_rate": 4.109656306031606e-06, "loss": 3.0446, "step": 27335 }, { "epoch": 0.2781168619791667, "grad_norm": 10.911126136779785, "learning_rate": 4.109350320057617e-06, "loss": 3.1723, "step": 27340 }, { "epoch": 0.278167724609375, "grad_norm": 15.355131149291992, "learning_rate": 4.109044292908783e-06, "loss": 3.5127, "step": 27345 }, { "epoch": 0.2782185872395833, "grad_norm": 9.465426445007324, "learning_rate": 4.108738224592932e-06, "loss": 3.5795, "step": 27350 }, { "epoch": 0.2782694498697917, "grad_norm": 11.438790321350098, "learning_rate": 4.108432115117894e-06, "loss": 3.7179, "step": 27355 }, { "epoch": 0.2783203125, "grad_norm": 15.0741605758667, "learning_rate": 4.108125964491503e-06, "loss": 3.372, "step": 27360 }, { "epoch": 0.2783711751302083, "grad_norm": 13.863125801086426, "learning_rate": 4.10781977272159e-06, "loss": 3.4886, "step": 27365 }, { "epoch": 0.2784220377604167, "grad_norm": 9.793328285217285, "learning_rate": 4.1075135398159885e-06, "loss": 3.2055, "step": 27370 }, { "epoch": 0.278472900390625, "grad_norm": 12.542889595031738, "learning_rate": 4.107207265782535e-06, "loss": 3.18, "step": 27375 }, { "epoch": 0.2785237630208333, "grad_norm": 10.684040069580078, "learning_rate": 4.106900950629063e-06, "loss": 3.6244, "step": 27380 }, { "epoch": 0.2785746256510417, "grad_norm": 11.289875984191895, "learning_rate": 4.106594594363413e-06, "loss": 3.3408, "step": 27385 }, { "epoch": 0.27862548828125, "grad_norm": 17.948379516601562, "learning_rate": 4.10628819699342e-06, "loss": 3.3196, "step": 27390 }, { "epoch": 0.2786763509114583, "grad_norm": 12.828132629394531, "learning_rate": 4.105981758526924e-06, "loss": 3.6498, "step": 27395 }, { "epoch": 0.2787272135416667, "grad_norm": 11.889634132385254, "learning_rate": 4.105675278971766e-06, "loss": 3.6316, "step": 27400 }, { "epoch": 0.278778076171875, "grad_norm": 9.463981628417969, "learning_rate": 4.105368758335787e-06, "loss": 3.2058, "step": 27405 }, { "epoch": 0.2788289388020833, "grad_norm": 12.077744483947754, "learning_rate": 4.105062196626828e-06, "loss": 3.7315, "step": 27410 }, { "epoch": 0.2788798014322917, "grad_norm": 17.923763275146484, "learning_rate": 4.104755593852733e-06, "loss": 3.3367, "step": 27415 }, { "epoch": 0.2789306640625, "grad_norm": 15.344260215759277, "learning_rate": 4.104448950021347e-06, "loss": 3.0758, "step": 27420 }, { "epoch": 0.2789815266927083, "grad_norm": 13.663217544555664, "learning_rate": 4.104142265140515e-06, "loss": 3.0715, "step": 27425 }, { "epoch": 0.2790323893229167, "grad_norm": 11.255102157592773, "learning_rate": 4.103835539218083e-06, "loss": 3.7433, "step": 27430 }, { "epoch": 0.279083251953125, "grad_norm": 11.063042640686035, "learning_rate": 4.103528772261899e-06, "loss": 3.2718, "step": 27435 }, { "epoch": 0.2791341145833333, "grad_norm": 15.842066764831543, "learning_rate": 4.103221964279811e-06, "loss": 3.7132, "step": 27440 }, { "epoch": 0.2791849772135417, "grad_norm": 13.824544906616211, "learning_rate": 4.10291511527967e-06, "loss": 3.6161, "step": 27445 }, { "epoch": 0.27923583984375, "grad_norm": 13.159097671508789, "learning_rate": 4.102608225269324e-06, "loss": 3.4805, "step": 27450 }, { "epoch": 0.2792867024739583, "grad_norm": 14.674468994140625, "learning_rate": 4.1023012942566285e-06, "loss": 3.423, "step": 27455 }, { "epoch": 0.2793375651041667, "grad_norm": 8.352295875549316, "learning_rate": 4.101994322249433e-06, "loss": 3.0751, "step": 27460 }, { "epoch": 0.279388427734375, "grad_norm": 10.348424911499023, "learning_rate": 4.1016873092555935e-06, "loss": 3.8127, "step": 27465 }, { "epoch": 0.2794392903645833, "grad_norm": 10.092679977416992, "learning_rate": 4.101380255282963e-06, "loss": 3.8686, "step": 27470 }, { "epoch": 0.2794901529947917, "grad_norm": 9.561445236206055, "learning_rate": 4.101073160339398e-06, "loss": 3.1852, "step": 27475 }, { "epoch": 0.279541015625, "grad_norm": 12.506081581115723, "learning_rate": 4.100766024432756e-06, "loss": 3.4527, "step": 27480 }, { "epoch": 0.2795918782552083, "grad_norm": 11.840296745300293, "learning_rate": 4.100458847570895e-06, "loss": 3.3029, "step": 27485 }, { "epoch": 0.2796427408854167, "grad_norm": 10.97950267791748, "learning_rate": 4.100151629761673e-06, "loss": 3.3937, "step": 27490 }, { "epoch": 0.279693603515625, "grad_norm": 12.873710632324219, "learning_rate": 4.099844371012951e-06, "loss": 3.4339, "step": 27495 }, { "epoch": 0.2797444661458333, "grad_norm": 10.452871322631836, "learning_rate": 4.099537071332591e-06, "loss": 3.7318, "step": 27500 }, { "epoch": 0.2797953287760417, "grad_norm": 10.990396499633789, "learning_rate": 4.099229730728453e-06, "loss": 3.7192, "step": 27505 }, { "epoch": 0.27984619140625, "grad_norm": 14.749526977539062, "learning_rate": 4.098922349208402e-06, "loss": 3.0498, "step": 27510 }, { "epoch": 0.2798970540364583, "grad_norm": 15.220508575439453, "learning_rate": 4.098614926780301e-06, "loss": 3.1953, "step": 27515 }, { "epoch": 0.2799479166666667, "grad_norm": 9.671578407287598, "learning_rate": 4.098307463452017e-06, "loss": 3.2799, "step": 27520 }, { "epoch": 0.279998779296875, "grad_norm": 14.070199966430664, "learning_rate": 4.097999959231414e-06, "loss": 3.4482, "step": 27525 }, { "epoch": 0.2800496419270833, "grad_norm": 11.59659194946289, "learning_rate": 4.097692414126363e-06, "loss": 3.2175, "step": 27530 }, { "epoch": 0.2801005045572917, "grad_norm": 10.20411491394043, "learning_rate": 4.097384828144728e-06, "loss": 3.68, "step": 27535 }, { "epoch": 0.2801513671875, "grad_norm": 9.166175842285156, "learning_rate": 4.097077201294383e-06, "loss": 3.2877, "step": 27540 }, { "epoch": 0.2802022298177083, "grad_norm": 11.604389190673828, "learning_rate": 4.096769533583195e-06, "loss": 3.4169, "step": 27545 }, { "epoch": 0.2802530924479167, "grad_norm": 9.63940715789795, "learning_rate": 4.096461825019037e-06, "loss": 3.1978, "step": 27550 }, { "epoch": 0.280303955078125, "grad_norm": 13.217351913452148, "learning_rate": 4.096154075609781e-06, "loss": 3.2307, "step": 27555 }, { "epoch": 0.2803548177083333, "grad_norm": 8.332501411437988, "learning_rate": 4.095846285363302e-06, "loss": 3.5514, "step": 27560 }, { "epoch": 0.2804056803385417, "grad_norm": 7.972536563873291, "learning_rate": 4.0955384542874745e-06, "loss": 3.111, "step": 27565 }, { "epoch": 0.28045654296875, "grad_norm": 13.895954132080078, "learning_rate": 4.0952305823901726e-06, "loss": 3.4349, "step": 27570 }, { "epoch": 0.2805074055989583, "grad_norm": 16.52075958251953, "learning_rate": 4.094922669679275e-06, "loss": 3.6657, "step": 27575 }, { "epoch": 0.2805582682291667, "grad_norm": 13.944071769714355, "learning_rate": 4.09461471616266e-06, "loss": 3.4333, "step": 27580 }, { "epoch": 0.280609130859375, "grad_norm": 15.831037521362305, "learning_rate": 4.0943067218482044e-06, "loss": 3.2349, "step": 27585 }, { "epoch": 0.2806599934895833, "grad_norm": 10.248038291931152, "learning_rate": 4.09399868674379e-06, "loss": 3.6537, "step": 27590 }, { "epoch": 0.2807108561197917, "grad_norm": 8.459166526794434, "learning_rate": 4.0936906108572955e-06, "loss": 3.6726, "step": 27595 }, { "epoch": 0.28076171875, "grad_norm": 12.822853088378906, "learning_rate": 4.0933824941966055e-06, "loss": 3.4818, "step": 27600 }, { "epoch": 0.2808125813802083, "grad_norm": 13.79715633392334, "learning_rate": 4.093074336769601e-06, "loss": 3.5654, "step": 27605 }, { "epoch": 0.2808634440104167, "grad_norm": 12.120914459228516, "learning_rate": 4.092766138584169e-06, "loss": 2.9587, "step": 27610 }, { "epoch": 0.280914306640625, "grad_norm": 10.529516220092773, "learning_rate": 4.092457899648192e-06, "loss": 3.1997, "step": 27615 }, { "epoch": 0.2809651692708333, "grad_norm": 12.962397575378418, "learning_rate": 4.092149619969556e-06, "loss": 3.8154, "step": 27620 }, { "epoch": 0.2810160319010417, "grad_norm": 11.064664840698242, "learning_rate": 4.091841299556151e-06, "loss": 3.0046, "step": 27625 }, { "epoch": 0.28106689453125, "grad_norm": 14.863337516784668, "learning_rate": 4.091532938415863e-06, "loss": 3.0863, "step": 27630 }, { "epoch": 0.2811177571614583, "grad_norm": 8.072478294372559, "learning_rate": 4.091224536556583e-06, "loss": 3.5172, "step": 27635 }, { "epoch": 0.2811686197916667, "grad_norm": 9.187117576599121, "learning_rate": 4.090916093986199e-06, "loss": 3.6613, "step": 27640 }, { "epoch": 0.281219482421875, "grad_norm": 10.171202659606934, "learning_rate": 4.090607610712604e-06, "loss": 3.2175, "step": 27645 }, { "epoch": 0.2812703450520833, "grad_norm": 15.561723709106445, "learning_rate": 4.090299086743691e-06, "loss": 3.1775, "step": 27650 }, { "epoch": 0.2813212076822917, "grad_norm": 12.204340934753418, "learning_rate": 4.089990522087352e-06, "loss": 3.1616, "step": 27655 }, { "epoch": 0.2813720703125, "grad_norm": 17.98369026184082, "learning_rate": 4.089681916751483e-06, "loss": 3.4742, "step": 27660 }, { "epoch": 0.2814229329427083, "grad_norm": 12.7793607711792, "learning_rate": 4.089373270743978e-06, "loss": 2.9907, "step": 27665 }, { "epoch": 0.2814737955729167, "grad_norm": 17.77541732788086, "learning_rate": 4.089064584072735e-06, "loss": 3.446, "step": 27670 }, { "epoch": 0.281524658203125, "grad_norm": 13.95572280883789, "learning_rate": 4.088755856745652e-06, "loss": 3.262, "step": 27675 }, { "epoch": 0.2815755208333333, "grad_norm": 16.80682373046875, "learning_rate": 4.088447088770627e-06, "loss": 3.6142, "step": 27680 }, { "epoch": 0.2816263834635417, "grad_norm": 14.185280799865723, "learning_rate": 4.088138280155559e-06, "loss": 3.7258, "step": 27685 }, { "epoch": 0.28167724609375, "grad_norm": 10.341408729553223, "learning_rate": 4.08782943090835e-06, "loss": 3.1562, "step": 27690 }, { "epoch": 0.2817281087239583, "grad_norm": 12.043773651123047, "learning_rate": 4.087520541036901e-06, "loss": 3.1961, "step": 27695 }, { "epoch": 0.2817789713541667, "grad_norm": 12.786687850952148, "learning_rate": 4.087211610549115e-06, "loss": 3.3973, "step": 27700 }, { "epoch": 0.281829833984375, "grad_norm": 14.750553131103516, "learning_rate": 4.086902639452897e-06, "loss": 3.3499, "step": 27705 }, { "epoch": 0.2818806966145833, "grad_norm": 7.420515537261963, "learning_rate": 4.08659362775615e-06, "loss": 3.2789, "step": 27710 }, { "epoch": 0.2819315592447917, "grad_norm": 8.589488983154297, "learning_rate": 4.086284575466783e-06, "loss": 3.4395, "step": 27715 }, { "epoch": 0.281982421875, "grad_norm": 14.356674194335938, "learning_rate": 4.085975482592699e-06, "loss": 3.8196, "step": 27720 }, { "epoch": 0.2820332845052083, "grad_norm": 10.523839950561523, "learning_rate": 4.085666349141809e-06, "loss": 3.5714, "step": 27725 }, { "epoch": 0.2820841471354167, "grad_norm": 14.31343936920166, "learning_rate": 4.085357175122022e-06, "loss": 3.2955, "step": 27730 }, { "epoch": 0.282135009765625, "grad_norm": 13.042136192321777, "learning_rate": 4.085047960541247e-06, "loss": 3.2577, "step": 27735 }, { "epoch": 0.2821858723958333, "grad_norm": 13.704607963562012, "learning_rate": 4.084738705407395e-06, "loss": 3.2682, "step": 27740 }, { "epoch": 0.2822367350260417, "grad_norm": 13.314915657043457, "learning_rate": 4.084429409728379e-06, "loss": 4.1069, "step": 27745 }, { "epoch": 0.28228759765625, "grad_norm": 14.6658935546875, "learning_rate": 4.084120073512113e-06, "loss": 3.1926, "step": 27750 }, { "epoch": 0.2823384602864583, "grad_norm": 11.541994094848633, "learning_rate": 4.083810696766509e-06, "loss": 3.8473, "step": 27755 }, { "epoch": 0.2823893229166667, "grad_norm": 9.510300636291504, "learning_rate": 4.083501279499485e-06, "loss": 3.2756, "step": 27760 }, { "epoch": 0.282440185546875, "grad_norm": 11.580392837524414, "learning_rate": 4.083191821718956e-06, "loss": 3.4178, "step": 27765 }, { "epoch": 0.2824910481770833, "grad_norm": 9.93171501159668, "learning_rate": 4.082882323432839e-06, "loss": 3.6801, "step": 27770 }, { "epoch": 0.2825419108072917, "grad_norm": 15.171422958374023, "learning_rate": 4.082572784649053e-06, "loss": 3.283, "step": 27775 }, { "epoch": 0.2825927734375, "grad_norm": 13.232819557189941, "learning_rate": 4.082263205375517e-06, "loss": 3.6217, "step": 27780 }, { "epoch": 0.2826436360677083, "grad_norm": 11.675466537475586, "learning_rate": 4.081953585620153e-06, "loss": 3.4335, "step": 27785 }, { "epoch": 0.2826944986979167, "grad_norm": 8.862447738647461, "learning_rate": 4.081643925390881e-06, "loss": 3.3755, "step": 27790 }, { "epoch": 0.282745361328125, "grad_norm": 10.922281265258789, "learning_rate": 4.081334224695623e-06, "loss": 3.6146, "step": 27795 }, { "epoch": 0.2827962239583333, "grad_norm": 9.969596862792969, "learning_rate": 4.081024483542306e-06, "loss": 3.6792, "step": 27800 }, { "epoch": 0.2828470865885417, "grad_norm": 11.884249687194824, "learning_rate": 4.08071470193885e-06, "loss": 3.3235, "step": 27805 }, { "epoch": 0.28289794921875, "grad_norm": 14.386703491210938, "learning_rate": 4.080404879893184e-06, "loss": 3.3408, "step": 27810 }, { "epoch": 0.2829488118489583, "grad_norm": 15.549338340759277, "learning_rate": 4.080095017413234e-06, "loss": 3.3848, "step": 27815 }, { "epoch": 0.2829996744791667, "grad_norm": 11.88217830657959, "learning_rate": 4.079785114506927e-06, "loss": 3.8501, "step": 27820 }, { "epoch": 0.283050537109375, "grad_norm": 16.217618942260742, "learning_rate": 4.079475171182193e-06, "loss": 3.5115, "step": 27825 }, { "epoch": 0.2831013997395833, "grad_norm": 14.34183406829834, "learning_rate": 4.0791651874469605e-06, "loss": 3.622, "step": 27830 }, { "epoch": 0.2831522623697917, "grad_norm": 14.53591251373291, "learning_rate": 4.078855163309161e-06, "loss": 3.2665, "step": 27835 }, { "epoch": 0.283203125, "grad_norm": 10.941570281982422, "learning_rate": 4.078545098776726e-06, "loss": 3.858, "step": 27840 }, { "epoch": 0.2832539876302083, "grad_norm": 9.548334121704102, "learning_rate": 4.07823499385759e-06, "loss": 3.6304, "step": 27845 }, { "epoch": 0.2833048502604167, "grad_norm": 13.581596374511719, "learning_rate": 4.077924848559685e-06, "loss": 3.3858, "step": 27850 }, { "epoch": 0.283355712890625, "grad_norm": 11.762258529663086, "learning_rate": 4.077614662890947e-06, "loss": 3.2353, "step": 27855 }, { "epoch": 0.2834065755208333, "grad_norm": 15.425300598144531, "learning_rate": 4.077304436859311e-06, "loss": 3.2407, "step": 27860 }, { "epoch": 0.2834574381510417, "grad_norm": 16.47221565246582, "learning_rate": 4.0769941704727154e-06, "loss": 3.4532, "step": 27865 }, { "epoch": 0.28350830078125, "grad_norm": 10.942659378051758, "learning_rate": 4.076683863739098e-06, "loss": 3.0886, "step": 27870 }, { "epoch": 0.2835591634114583, "grad_norm": 15.116689682006836, "learning_rate": 4.076373516666398e-06, "loss": 3.561, "step": 27875 }, { "epoch": 0.2836100260416667, "grad_norm": 12.724804878234863, "learning_rate": 4.076063129262554e-06, "loss": 3.1554, "step": 27880 }, { "epoch": 0.283660888671875, "grad_norm": 15.09601879119873, "learning_rate": 4.07575270153551e-06, "loss": 3.1598, "step": 27885 }, { "epoch": 0.2837117513020833, "grad_norm": 10.501201629638672, "learning_rate": 4.0754422334932054e-06, "loss": 2.8968, "step": 27890 }, { "epoch": 0.2837626139322917, "grad_norm": 13.239876747131348, "learning_rate": 4.075131725143585e-06, "loss": 3.1013, "step": 27895 }, { "epoch": 0.2838134765625, "grad_norm": 9.330605506896973, "learning_rate": 4.074821176494592e-06, "loss": 3.3436, "step": 27900 }, { "epoch": 0.2838643391927083, "grad_norm": 13.973194122314453, "learning_rate": 4.074510587554173e-06, "loss": 3.3293, "step": 27905 }, { "epoch": 0.2839152018229167, "grad_norm": 16.430049896240234, "learning_rate": 4.074199958330275e-06, "loss": 3.6129, "step": 27910 }, { "epoch": 0.283966064453125, "grad_norm": 9.183777809143066, "learning_rate": 4.073889288830843e-06, "loss": 3.6742, "step": 27915 }, { "epoch": 0.2840169270833333, "grad_norm": 9.213640213012695, "learning_rate": 4.073578579063827e-06, "loss": 3.4222, "step": 27920 }, { "epoch": 0.2840677897135417, "grad_norm": 14.945640563964844, "learning_rate": 4.073267829037175e-06, "loss": 3.3423, "step": 27925 }, { "epoch": 0.28411865234375, "grad_norm": 9.454371452331543, "learning_rate": 4.07295703875884e-06, "loss": 3.3122, "step": 27930 }, { "epoch": 0.2841695149739583, "grad_norm": 17.040342330932617, "learning_rate": 4.072646208236772e-06, "loss": 3.4337, "step": 27935 }, { "epoch": 0.2842203776041667, "grad_norm": 15.587324142456055, "learning_rate": 4.072335337478922e-06, "loss": 3.4444, "step": 27940 }, { "epoch": 0.284271240234375, "grad_norm": 85.70834350585938, "learning_rate": 4.072024426493246e-06, "loss": 3.3656, "step": 27945 }, { "epoch": 0.2843221028645833, "grad_norm": 7.995316505432129, "learning_rate": 4.071713475287698e-06, "loss": 3.1318, "step": 27950 }, { "epoch": 0.2843729654947917, "grad_norm": 13.728548049926758, "learning_rate": 4.071402483870233e-06, "loss": 3.6891, "step": 27955 }, { "epoch": 0.284423828125, "grad_norm": 18.6165828704834, "learning_rate": 4.071091452248807e-06, "loss": 3.1881, "step": 27960 }, { "epoch": 0.2844746907552083, "grad_norm": 12.543664932250977, "learning_rate": 4.070780380431379e-06, "loss": 3.5005, "step": 27965 }, { "epoch": 0.2845255533854167, "grad_norm": 15.050885200500488, "learning_rate": 4.070469268425909e-06, "loss": 3.4116, "step": 27970 }, { "epoch": 0.284576416015625, "grad_norm": 10.877278327941895, "learning_rate": 4.070158116240353e-06, "loss": 3.8081, "step": 27975 }, { "epoch": 0.2846272786458333, "grad_norm": 14.296137809753418, "learning_rate": 4.069846923882674e-06, "loss": 3.4596, "step": 27980 }, { "epoch": 0.2846781412760417, "grad_norm": 7.9767560958862305, "learning_rate": 4.069535691360835e-06, "loss": 3.3673, "step": 27985 }, { "epoch": 0.28472900390625, "grad_norm": 11.449370384216309, "learning_rate": 4.069224418682796e-06, "loss": 2.9643, "step": 27990 }, { "epoch": 0.2847798665364583, "grad_norm": 11.578384399414062, "learning_rate": 4.068913105856522e-06, "loss": 3.1005, "step": 27995 }, { "epoch": 0.2848307291666667, "grad_norm": 13.277164459228516, "learning_rate": 4.068601752889979e-06, "loss": 3.0292, "step": 28000 }, { "epoch": 0.284881591796875, "grad_norm": 14.91826343536377, "learning_rate": 4.068290359791131e-06, "loss": 3.2388, "step": 28005 }, { "epoch": 0.2849324544270833, "grad_norm": 28.24471092224121, "learning_rate": 4.067978926567947e-06, "loss": 3.4851, "step": 28010 }, { "epoch": 0.2849833170572917, "grad_norm": 11.40623950958252, "learning_rate": 4.0676674532283936e-06, "loss": 3.4192, "step": 28015 }, { "epoch": 0.2850341796875, "grad_norm": 7.215197563171387, "learning_rate": 4.067355939780439e-06, "loss": 2.9501, "step": 28020 }, { "epoch": 0.2850850423177083, "grad_norm": 13.303850173950195, "learning_rate": 4.067044386232055e-06, "loss": 3.3592, "step": 28025 }, { "epoch": 0.2851359049479167, "grad_norm": 11.901801109313965, "learning_rate": 4.066732792591211e-06, "loss": 3.1835, "step": 28030 }, { "epoch": 0.285186767578125, "grad_norm": 11.704801559448242, "learning_rate": 4.066421158865881e-06, "loss": 3.851, "step": 28035 }, { "epoch": 0.2852376302083333, "grad_norm": 9.540196418762207, "learning_rate": 4.066109485064037e-06, "loss": 3.4988, "step": 28040 }, { "epoch": 0.2852884928385417, "grad_norm": 7.771119117736816, "learning_rate": 4.065797771193652e-06, "loss": 3.3013, "step": 28045 }, { "epoch": 0.28533935546875, "grad_norm": 9.430291175842285, "learning_rate": 4.065486017262703e-06, "loss": 3.1248, "step": 28050 }, { "epoch": 0.2853902180989583, "grad_norm": 10.184014320373535, "learning_rate": 4.065174223279165e-06, "loss": 3.3792, "step": 28055 }, { "epoch": 0.2854410807291667, "grad_norm": 14.535456657409668, "learning_rate": 4.0648623892510145e-06, "loss": 3.9619, "step": 28060 }, { "epoch": 0.285491943359375, "grad_norm": 11.507352828979492, "learning_rate": 4.064550515186232e-06, "loss": 3.2716, "step": 28065 }, { "epoch": 0.2855428059895833, "grad_norm": 10.623822212219238, "learning_rate": 4.064238601092795e-06, "loss": 3.1316, "step": 28070 }, { "epoch": 0.2855936686197917, "grad_norm": 10.219310760498047, "learning_rate": 4.063926646978684e-06, "loss": 3.2253, "step": 28075 }, { "epoch": 0.28564453125, "grad_norm": 11.938286781311035, "learning_rate": 4.06361465285188e-06, "loss": 3.2599, "step": 28080 }, { "epoch": 0.2856953938802083, "grad_norm": 15.574570655822754, "learning_rate": 4.063302618720367e-06, "loss": 3.5626, "step": 28085 }, { "epoch": 0.2857462565104167, "grad_norm": 10.333740234375, "learning_rate": 4.062990544592126e-06, "loss": 3.8863, "step": 28090 }, { "epoch": 0.285797119140625, "grad_norm": 17.025602340698242, "learning_rate": 4.062678430475143e-06, "loss": 3.6249, "step": 28095 }, { "epoch": 0.2858479817708333, "grad_norm": 9.703519821166992, "learning_rate": 4.062366276377403e-06, "loss": 3.3165, "step": 28100 }, { "epoch": 0.2858988444010417, "grad_norm": 14.47970962524414, "learning_rate": 4.062054082306891e-06, "loss": 3.3594, "step": 28105 }, { "epoch": 0.28594970703125, "grad_norm": 15.501633644104004, "learning_rate": 4.061741848271597e-06, "loss": 3.4847, "step": 28110 }, { "epoch": 0.2860005696614583, "grad_norm": 12.506375312805176, "learning_rate": 4.061429574279506e-06, "loss": 3.8149, "step": 28115 }, { "epoch": 0.2860514322916667, "grad_norm": 8.678794860839844, "learning_rate": 4.061117260338611e-06, "loss": 3.5832, "step": 28120 }, { "epoch": 0.286102294921875, "grad_norm": 16.247833251953125, "learning_rate": 4.0608049064569e-06, "loss": 2.9734, "step": 28125 }, { "epoch": 0.2861531575520833, "grad_norm": 12.02649974822998, "learning_rate": 4.060492512642366e-06, "loss": 3.3525, "step": 28130 }, { "epoch": 0.2862040201822917, "grad_norm": 7.934184551239014, "learning_rate": 4.060180078903001e-06, "loss": 3.7371, "step": 28135 }, { "epoch": 0.2862548828125, "grad_norm": 8.472125053405762, "learning_rate": 4.059867605246799e-06, "loss": 3.0991, "step": 28140 }, { "epoch": 0.2863057454427083, "grad_norm": 12.102945327758789, "learning_rate": 4.059555091681753e-06, "loss": 3.4988, "step": 28145 }, { "epoch": 0.2863566080729167, "grad_norm": 12.248966217041016, "learning_rate": 4.05924253821586e-06, "loss": 3.1905, "step": 28150 }, { "epoch": 0.286407470703125, "grad_norm": 10.715188026428223, "learning_rate": 4.0589299448571164e-06, "loss": 3.6046, "step": 28155 }, { "epoch": 0.2864583333333333, "grad_norm": 13.544321060180664, "learning_rate": 4.05861731161352e-06, "loss": 3.216, "step": 28160 }, { "epoch": 0.2865091959635417, "grad_norm": 12.897768020629883, "learning_rate": 4.058304638493068e-06, "loss": 3.0793, "step": 28165 }, { "epoch": 0.28656005859375, "grad_norm": 9.566934585571289, "learning_rate": 4.057991925503762e-06, "loss": 3.435, "step": 28170 }, { "epoch": 0.2866109212239583, "grad_norm": 15.859289169311523, "learning_rate": 4.0576791726536015e-06, "loss": 3.0893, "step": 28175 }, { "epoch": 0.2866617838541667, "grad_norm": 9.841944694519043, "learning_rate": 4.057366379950589e-06, "loss": 3.0007, "step": 28180 }, { "epoch": 0.286712646484375, "grad_norm": 14.592065811157227, "learning_rate": 4.057053547402726e-06, "loss": 3.0607, "step": 28185 }, { "epoch": 0.2867635091145833, "grad_norm": 7.597635746002197, "learning_rate": 4.056740675018018e-06, "loss": 3.4154, "step": 28190 }, { "epoch": 0.2868143717447917, "grad_norm": 11.658868789672852, "learning_rate": 4.056427762804468e-06, "loss": 3.5037, "step": 28195 }, { "epoch": 0.286865234375, "grad_norm": 14.114136695861816, "learning_rate": 4.0561148107700836e-06, "loss": 3.3819, "step": 28200 }, { "epoch": 0.2869160970052083, "grad_norm": 9.104647636413574, "learning_rate": 4.05580181892287e-06, "loss": 3.049, "step": 28205 }, { "epoch": 0.2869669596354167, "grad_norm": 8.600118637084961, "learning_rate": 4.055488787270836e-06, "loss": 3.2881, "step": 28210 }, { "epoch": 0.287017822265625, "grad_norm": 8.562788009643555, "learning_rate": 4.055175715821989e-06, "loss": 3.3843, "step": 28215 }, { "epoch": 0.2870686848958333, "grad_norm": 9.191661834716797, "learning_rate": 4.054862604584341e-06, "loss": 3.013, "step": 28220 }, { "epoch": 0.2871195475260417, "grad_norm": 9.98511028289795, "learning_rate": 4.054549453565902e-06, "loss": 3.4819, "step": 28225 }, { "epoch": 0.28717041015625, "grad_norm": 11.296266555786133, "learning_rate": 4.0542362627746825e-06, "loss": 3.2387, "step": 28230 }, { "epoch": 0.2872212727864583, "grad_norm": 15.573591232299805, "learning_rate": 4.053923032218698e-06, "loss": 3.583, "step": 28235 }, { "epoch": 0.2872721354166667, "grad_norm": 14.96290111541748, "learning_rate": 4.05360976190596e-06, "loss": 3.3793, "step": 28240 }, { "epoch": 0.287322998046875, "grad_norm": 13.247879981994629, "learning_rate": 4.053296451844485e-06, "loss": 3.342, "step": 28245 }, { "epoch": 0.2873738606770833, "grad_norm": 10.612177848815918, "learning_rate": 4.052983102042288e-06, "loss": 3.2241, "step": 28250 }, { "epoch": 0.2874247233072917, "grad_norm": 13.62741756439209, "learning_rate": 4.0526697125073876e-06, "loss": 3.4002, "step": 28255 }, { "epoch": 0.2874755859375, "grad_norm": 13.484682083129883, "learning_rate": 4.052356283247799e-06, "loss": 3.1952, "step": 28260 }, { "epoch": 0.2875264485677083, "grad_norm": 14.832202911376953, "learning_rate": 4.052042814271544e-06, "loss": 3.1964, "step": 28265 }, { "epoch": 0.2875773111979167, "grad_norm": 13.506696701049805, "learning_rate": 4.051729305586641e-06, "loss": 3.4984, "step": 28270 }, { "epoch": 0.287628173828125, "grad_norm": 13.403464317321777, "learning_rate": 4.051415757201111e-06, "loss": 3.051, "step": 28275 }, { "epoch": 0.2876790364583333, "grad_norm": 13.91868782043457, "learning_rate": 4.051102169122977e-06, "loss": 3.4574, "step": 28280 }, { "epoch": 0.2877298990885417, "grad_norm": 12.299845695495605, "learning_rate": 4.050788541360262e-06, "loss": 3.5358, "step": 28285 }, { "epoch": 0.28778076171875, "grad_norm": 13.38621997833252, "learning_rate": 4.05047487392099e-06, "loss": 3.3492, "step": 28290 }, { "epoch": 0.2878316243489583, "grad_norm": 14.945199966430664, "learning_rate": 4.050161166813186e-06, "loss": 3.8799, "step": 28295 }, { "epoch": 0.2878824869791667, "grad_norm": 10.105154991149902, "learning_rate": 4.049847420044874e-06, "loss": 3.613, "step": 28300 }, { "epoch": 0.287933349609375, "grad_norm": 11.531231880187988, "learning_rate": 4.049533633624086e-06, "loss": 3.4646, "step": 28305 }, { "epoch": 0.2879842122395833, "grad_norm": 12.640409469604492, "learning_rate": 4.049219807558845e-06, "loss": 3.1404, "step": 28310 }, { "epoch": 0.2880350748697917, "grad_norm": 14.797945976257324, "learning_rate": 4.048905941857183e-06, "loss": 3.8047, "step": 28315 }, { "epoch": 0.2880859375, "grad_norm": 7.843355655670166, "learning_rate": 4.048592036527131e-06, "loss": 3.6593, "step": 28320 }, { "epoch": 0.2881368001302083, "grad_norm": 13.126662254333496, "learning_rate": 4.048278091576716e-06, "loss": 3.5298, "step": 28325 }, { "epoch": 0.2881876627604167, "grad_norm": 7.7960429191589355, "learning_rate": 4.047964107013976e-06, "loss": 3.4729, "step": 28330 }, { "epoch": 0.288238525390625, "grad_norm": 14.168012619018555, "learning_rate": 4.047650082846939e-06, "loss": 3.2386, "step": 28335 }, { "epoch": 0.2882893880208333, "grad_norm": 12.29859447479248, "learning_rate": 4.047336019083642e-06, "loss": 3.6913, "step": 28340 }, { "epoch": 0.2883402506510417, "grad_norm": 12.323288917541504, "learning_rate": 4.04702191573212e-06, "loss": 3.2937, "step": 28345 }, { "epoch": 0.28839111328125, "grad_norm": 17.101675033569336, "learning_rate": 4.046707772800409e-06, "loss": 3.4794, "step": 28350 }, { "epoch": 0.2884419759114583, "grad_norm": 8.433690071105957, "learning_rate": 4.046393590296547e-06, "loss": 3.2544, "step": 28355 }, { "epoch": 0.2884928385416667, "grad_norm": 11.87043285369873, "learning_rate": 4.046079368228571e-06, "loss": 3.274, "step": 28360 }, { "epoch": 0.288543701171875, "grad_norm": 14.319379806518555, "learning_rate": 4.04576510660452e-06, "loss": 3.1518, "step": 28365 }, { "epoch": 0.2885945638020833, "grad_norm": 16.121383666992188, "learning_rate": 4.045450805432436e-06, "loss": 3.5974, "step": 28370 }, { "epoch": 0.2886454264322917, "grad_norm": 10.960174560546875, "learning_rate": 4.045136464720358e-06, "loss": 3.4076, "step": 28375 }, { "epoch": 0.2886962890625, "grad_norm": 17.95393180847168, "learning_rate": 4.044822084476332e-06, "loss": 3.2632, "step": 28380 }, { "epoch": 0.2887471516927083, "grad_norm": 13.769865989685059, "learning_rate": 4.044507664708398e-06, "loss": 3.3918, "step": 28385 }, { "epoch": 0.2887980143229167, "grad_norm": 7.544826984405518, "learning_rate": 4.0441932054246015e-06, "loss": 3.4184, "step": 28390 }, { "epoch": 0.288848876953125, "grad_norm": 7.78319787979126, "learning_rate": 4.043878706632988e-06, "loss": 3.4036, "step": 28395 }, { "epoch": 0.2888997395833333, "grad_norm": 10.946330070495605, "learning_rate": 4.0435641683416035e-06, "loss": 3.235, "step": 28400 }, { "epoch": 0.2889506022135417, "grad_norm": 15.540562629699707, "learning_rate": 4.043249590558496e-06, "loss": 3.3337, "step": 28405 }, { "epoch": 0.28900146484375, "grad_norm": 11.995061874389648, "learning_rate": 4.042934973291713e-06, "loss": 3.3849, "step": 28410 }, { "epoch": 0.2890523274739583, "grad_norm": 9.146485328674316, "learning_rate": 4.042620316549305e-06, "loss": 3.2122, "step": 28415 }, { "epoch": 0.2891031901041667, "grad_norm": 10.783262252807617, "learning_rate": 4.0423056203393204e-06, "loss": 3.1673, "step": 28420 }, { "epoch": 0.289154052734375, "grad_norm": 14.132058143615723, "learning_rate": 4.041990884669814e-06, "loss": 3.2879, "step": 28425 }, { "epoch": 0.2892049153645833, "grad_norm": 13.759737968444824, "learning_rate": 4.041676109548835e-06, "loss": 3.6415, "step": 28430 }, { "epoch": 0.2892557779947917, "grad_norm": 12.73385238647461, "learning_rate": 4.041361294984439e-06, "loss": 3.3467, "step": 28435 }, { "epoch": 0.289306640625, "grad_norm": 13.97321605682373, "learning_rate": 4.04104644098468e-06, "loss": 3.6723, "step": 28440 }, { "epoch": 0.2893575032552083, "grad_norm": 9.306679725646973, "learning_rate": 4.040731547557612e-06, "loss": 3.3643, "step": 28445 }, { "epoch": 0.2894083658854167, "grad_norm": 15.250093460083008, "learning_rate": 4.040416614711293e-06, "loss": 3.3897, "step": 28450 }, { "epoch": 0.289459228515625, "grad_norm": 12.86093807220459, "learning_rate": 4.04010164245378e-06, "loss": 3.2651, "step": 28455 }, { "epoch": 0.2895100911458333, "grad_norm": 17.20577621459961, "learning_rate": 4.039786630793131e-06, "loss": 3.1926, "step": 28460 }, { "epoch": 0.2895609537760417, "grad_norm": 10.533615112304688, "learning_rate": 4.039471579737407e-06, "loss": 2.9851, "step": 28465 }, { "epoch": 0.28961181640625, "grad_norm": 14.544201850891113, "learning_rate": 4.039156489294667e-06, "loss": 3.0023, "step": 28470 }, { "epoch": 0.2896626790364583, "grad_norm": 11.979805946350098, "learning_rate": 4.038841359472973e-06, "loss": 3.6672, "step": 28475 }, { "epoch": 0.2897135416666667, "grad_norm": 13.206119537353516, "learning_rate": 4.038526190280387e-06, "loss": 3.3212, "step": 28480 }, { "epoch": 0.289764404296875, "grad_norm": 15.055665969848633, "learning_rate": 4.038210981724975e-06, "loss": 3.6517, "step": 28485 }, { "epoch": 0.2898152669270833, "grad_norm": 10.177913665771484, "learning_rate": 4.037895733814797e-06, "loss": 3.0735, "step": 28490 }, { "epoch": 0.2898661295572917, "grad_norm": 12.19504451751709, "learning_rate": 4.037580446557923e-06, "loss": 3.3941, "step": 28495 }, { "epoch": 0.2899169921875, "grad_norm": 9.21094799041748, "learning_rate": 4.037265119962417e-06, "loss": 2.9818, "step": 28500 }, { "epoch": 0.2899678548177083, "grad_norm": 9.886246681213379, "learning_rate": 4.036949754036348e-06, "loss": 3.5824, "step": 28505 }, { "epoch": 0.2900187174479167, "grad_norm": 11.721346855163574, "learning_rate": 4.036634348787783e-06, "loss": 3.245, "step": 28510 }, { "epoch": 0.290069580078125, "grad_norm": 12.692378997802734, "learning_rate": 4.036318904224793e-06, "loss": 3.122, "step": 28515 }, { "epoch": 0.2901204427083333, "grad_norm": 14.110976219177246, "learning_rate": 4.036003420355447e-06, "loss": 3.3814, "step": 28520 }, { "epoch": 0.2901713053385417, "grad_norm": 10.645319938659668, "learning_rate": 4.035687897187819e-06, "loss": 3.4385, "step": 28525 }, { "epoch": 0.29022216796875, "grad_norm": 10.692234992980957, "learning_rate": 4.03537233472998e-06, "loss": 3.0013, "step": 28530 }, { "epoch": 0.2902730305989583, "grad_norm": 11.266454696655273, "learning_rate": 4.035056732990003e-06, "loss": 3.7197, "step": 28535 }, { "epoch": 0.2903238932291667, "grad_norm": 10.607219696044922, "learning_rate": 4.034741091975963e-06, "loss": 3.2842, "step": 28540 }, { "epoch": 0.290374755859375, "grad_norm": 11.192341804504395, "learning_rate": 4.034425411695937e-06, "loss": 3.2919, "step": 28545 }, { "epoch": 0.2904256184895833, "grad_norm": 12.919684410095215, "learning_rate": 4.0341096921580005e-06, "loss": 3.4981, "step": 28550 }, { "epoch": 0.2904764811197917, "grad_norm": 8.975446701049805, "learning_rate": 4.03379393337023e-06, "loss": 3.1235, "step": 28555 }, { "epoch": 0.29052734375, "grad_norm": 9.11768627166748, "learning_rate": 4.033478135340706e-06, "loss": 3.2628, "step": 28560 }, { "epoch": 0.2905782063802083, "grad_norm": 7.787058353424072, "learning_rate": 4.033162298077507e-06, "loss": 3.5017, "step": 28565 }, { "epoch": 0.2906290690104167, "grad_norm": 8.185754776000977, "learning_rate": 4.0328464215887145e-06, "loss": 3.0679, "step": 28570 }, { "epoch": 0.290679931640625, "grad_norm": 15.081027030944824, "learning_rate": 4.032530505882409e-06, "loss": 3.4288, "step": 28575 }, { "epoch": 0.2907307942708333, "grad_norm": 15.316882133483887, "learning_rate": 4.032214550966674e-06, "loss": 3.5306, "step": 28580 }, { "epoch": 0.2907816569010417, "grad_norm": 11.870295524597168, "learning_rate": 4.031898556849594e-06, "loss": 3.7683, "step": 28585 }, { "epoch": 0.29083251953125, "grad_norm": 10.215821266174316, "learning_rate": 4.0315825235392516e-06, "loss": 3.4146, "step": 28590 }, { "epoch": 0.2908833821614583, "grad_norm": 9.163251876831055, "learning_rate": 4.0312664510437325e-06, "loss": 3.5628, "step": 28595 }, { "epoch": 0.2909342447916667, "grad_norm": 11.067456245422363, "learning_rate": 4.030950339371125e-06, "loss": 3.2844, "step": 28600 }, { "epoch": 0.290985107421875, "grad_norm": 15.394378662109375, "learning_rate": 4.030634188529517e-06, "loss": 3.2655, "step": 28605 }, { "epoch": 0.2910359700520833, "grad_norm": 12.825260162353516, "learning_rate": 4.030317998526995e-06, "loss": 3.3786, "step": 28610 }, { "epoch": 0.2910868326822917, "grad_norm": 8.954317092895508, "learning_rate": 4.03000176937165e-06, "loss": 3.397, "step": 28615 }, { "epoch": 0.2911376953125, "grad_norm": 10.459576606750488, "learning_rate": 4.029685501071572e-06, "loss": 3.3017, "step": 28620 }, { "epoch": 0.2911885579427083, "grad_norm": 9.525602340698242, "learning_rate": 4.029369193634853e-06, "loss": 3.4694, "step": 28625 }, { "epoch": 0.2912394205729167, "grad_norm": 20.42050552368164, "learning_rate": 4.029052847069585e-06, "loss": 3.3009, "step": 28630 }, { "epoch": 0.291290283203125, "grad_norm": 12.305190086364746, "learning_rate": 4.028736461383864e-06, "loss": 3.5505, "step": 28635 }, { "epoch": 0.2913411458333333, "grad_norm": 10.575671195983887, "learning_rate": 4.028420036585782e-06, "loss": 3.5277, "step": 28640 }, { "epoch": 0.2913920084635417, "grad_norm": 11.413809776306152, "learning_rate": 4.0281035726834364e-06, "loss": 3.5491, "step": 28645 }, { "epoch": 0.29144287109375, "grad_norm": 15.26525592803955, "learning_rate": 4.027787069684922e-06, "loss": 3.4472, "step": 28650 }, { "epoch": 0.2914937337239583, "grad_norm": 13.700064659118652, "learning_rate": 4.0274705275983385e-06, "loss": 3.7096, "step": 28655 }, { "epoch": 0.2915445963541667, "grad_norm": 11.321172714233398, "learning_rate": 4.027153946431783e-06, "loss": 3.2004, "step": 28660 }, { "epoch": 0.291595458984375, "grad_norm": 11.454513549804688, "learning_rate": 4.026837326193357e-06, "loss": 3.331, "step": 28665 }, { "epoch": 0.2916463216145833, "grad_norm": 8.657747268676758, "learning_rate": 4.026520666891158e-06, "loss": 3.2785, "step": 28670 }, { "epoch": 0.2916971842447917, "grad_norm": 13.227458953857422, "learning_rate": 4.026203968533291e-06, "loss": 3.4965, "step": 28675 }, { "epoch": 0.291748046875, "grad_norm": 13.470231056213379, "learning_rate": 4.025887231127856e-06, "loss": 3.1858, "step": 28680 }, { "epoch": 0.2917989095052083, "grad_norm": 10.020124435424805, "learning_rate": 4.025570454682959e-06, "loss": 3.2221, "step": 28685 }, { "epoch": 0.2918497721354167, "grad_norm": 8.721534729003906, "learning_rate": 4.025253639206703e-06, "loss": 3.3351, "step": 28690 }, { "epoch": 0.291900634765625, "grad_norm": 7.433453559875488, "learning_rate": 4.024936784707194e-06, "loss": 3.0843, "step": 28695 }, { "epoch": 0.2919514973958333, "grad_norm": 7.928054332733154, "learning_rate": 4.02461989119254e-06, "loss": 3.2303, "step": 28700 }, { "epoch": 0.2920023600260417, "grad_norm": 14.633429527282715, "learning_rate": 4.0243029586708455e-06, "loss": 3.2592, "step": 28705 }, { "epoch": 0.29205322265625, "grad_norm": 11.753192901611328, "learning_rate": 4.023985987150222e-06, "loss": 3.2326, "step": 28710 }, { "epoch": 0.2921040852864583, "grad_norm": 18.38849449157715, "learning_rate": 4.023668976638778e-06, "loss": 3.6247, "step": 28715 }, { "epoch": 0.2921549479166667, "grad_norm": 7.707657337188721, "learning_rate": 4.023351927144625e-06, "loss": 3.3972, "step": 28720 }, { "epoch": 0.292205810546875, "grad_norm": 11.268938064575195, "learning_rate": 4.023034838675874e-06, "loss": 3.0399, "step": 28725 }, { "epoch": 0.2922566731770833, "grad_norm": 10.834413528442383, "learning_rate": 4.022717711240637e-06, "loss": 3.2737, "step": 28730 }, { "epoch": 0.2923075358072917, "grad_norm": 9.380146026611328, "learning_rate": 4.022400544847028e-06, "loss": 3.3967, "step": 28735 }, { "epoch": 0.2923583984375, "grad_norm": 9.512231826782227, "learning_rate": 4.022083339503164e-06, "loss": 2.9652, "step": 28740 }, { "epoch": 0.2924092610677083, "grad_norm": 13.55062484741211, "learning_rate": 4.021766095217157e-06, "loss": 2.9839, "step": 28745 }, { "epoch": 0.2924601236979167, "grad_norm": 11.842476844787598, "learning_rate": 4.021448811997125e-06, "loss": 3.5804, "step": 28750 }, { "epoch": 0.292510986328125, "grad_norm": 14.86185073852539, "learning_rate": 4.021131489851186e-06, "loss": 3.2041, "step": 28755 }, { "epoch": 0.2925618489583333, "grad_norm": 10.015793800354004, "learning_rate": 4.020814128787458e-06, "loss": 3.4947, "step": 28760 }, { "epoch": 0.2926127115885417, "grad_norm": 16.945362091064453, "learning_rate": 4.020496728814062e-06, "loss": 3.4086, "step": 28765 }, { "epoch": 0.29266357421875, "grad_norm": 14.16063404083252, "learning_rate": 4.020179289939117e-06, "loss": 3.505, "step": 28770 }, { "epoch": 0.2927144368489583, "grad_norm": 13.34571361541748, "learning_rate": 4.019861812170744e-06, "loss": 3.2015, "step": 28775 }, { "epoch": 0.2927652994791667, "grad_norm": 8.515129089355469, "learning_rate": 4.019544295517069e-06, "loss": 3.3871, "step": 28780 }, { "epoch": 0.292816162109375, "grad_norm": 10.501129150390625, "learning_rate": 4.019226739986212e-06, "loss": 3.1915, "step": 28785 }, { "epoch": 0.2928670247395833, "grad_norm": 15.085137367248535, "learning_rate": 4.0189091455862995e-06, "loss": 3.245, "step": 28790 }, { "epoch": 0.2929178873697917, "grad_norm": 12.195261001586914, "learning_rate": 4.018591512325456e-06, "loss": 3.7411, "step": 28795 }, { "epoch": 0.29296875, "grad_norm": 12.424592018127441, "learning_rate": 4.018273840211809e-06, "loss": 3.7522, "step": 28800 }, { "epoch": 0.2930196126302083, "grad_norm": 10.4650297164917, "learning_rate": 4.0179561292534855e-06, "loss": 3.5745, "step": 28805 }, { "epoch": 0.2930704752604167, "grad_norm": 8.884088516235352, "learning_rate": 4.017638379458614e-06, "loss": 3.3963, "step": 28810 }, { "epoch": 0.293121337890625, "grad_norm": 11.254535675048828, "learning_rate": 4.017320590835325e-06, "loss": 3.4626, "step": 28815 }, { "epoch": 0.2931722005208333, "grad_norm": 10.561436653137207, "learning_rate": 4.0170027633917474e-06, "loss": 3.747, "step": 28820 }, { "epoch": 0.2932230631510417, "grad_norm": 10.550532341003418, "learning_rate": 4.016684897136015e-06, "loss": 3.5104, "step": 28825 }, { "epoch": 0.29327392578125, "grad_norm": 13.52685546875, "learning_rate": 4.0163669920762574e-06, "loss": 3.0843, "step": 28830 }, { "epoch": 0.2933247884114583, "grad_norm": 15.23268985748291, "learning_rate": 4.016049048220611e-06, "loss": 3.21, "step": 28835 }, { "epoch": 0.2933756510416667, "grad_norm": 14.624536514282227, "learning_rate": 4.015731065577209e-06, "loss": 3.3289, "step": 28840 }, { "epoch": 0.293426513671875, "grad_norm": 9.752080917358398, "learning_rate": 4.015413044154186e-06, "loss": 3.5604, "step": 28845 }, { "epoch": 0.2934773763020833, "grad_norm": 13.152545928955078, "learning_rate": 4.01509498395968e-06, "loss": 3.1857, "step": 28850 }, { "epoch": 0.2935282389322917, "grad_norm": 12.336877822875977, "learning_rate": 4.014776885001828e-06, "loss": 3.0739, "step": 28855 }, { "epoch": 0.2935791015625, "grad_norm": 14.324713706970215, "learning_rate": 4.014458747288768e-06, "loss": 3.1812, "step": 28860 }, { "epoch": 0.2936299641927083, "grad_norm": 8.907907485961914, "learning_rate": 4.014140570828641e-06, "loss": 3.2912, "step": 28865 }, { "epoch": 0.2936808268229167, "grad_norm": 14.255101203918457, "learning_rate": 4.0138223556295855e-06, "loss": 3.4148, "step": 28870 }, { "epoch": 0.293731689453125, "grad_norm": 10.45189094543457, "learning_rate": 4.013504101699743e-06, "loss": 3.2952, "step": 28875 }, { "epoch": 0.2937825520833333, "grad_norm": 11.531671524047852, "learning_rate": 4.013185809047259e-06, "loss": 3.132, "step": 28880 }, { "epoch": 0.2938334147135417, "grad_norm": 9.269369125366211, "learning_rate": 4.012867477680273e-06, "loss": 3.3035, "step": 28885 }, { "epoch": 0.29388427734375, "grad_norm": 13.622968673706055, "learning_rate": 4.012549107606931e-06, "loss": 3.4165, "step": 28890 }, { "epoch": 0.2939351399739583, "grad_norm": 12.69628620147705, "learning_rate": 4.01223069883538e-06, "loss": 3.1884, "step": 28895 }, { "epoch": 0.2939860026041667, "grad_norm": 12.34487533569336, "learning_rate": 4.0119122513737645e-06, "loss": 3.3732, "step": 28900 }, { "epoch": 0.294036865234375, "grad_norm": 12.57749080657959, "learning_rate": 4.011593765230232e-06, "loss": 3.0678, "step": 28905 }, { "epoch": 0.2940877278645833, "grad_norm": 9.764747619628906, "learning_rate": 4.011275240412932e-06, "loss": 3.3889, "step": 28910 }, { "epoch": 0.2941385904947917, "grad_norm": 14.425663948059082, "learning_rate": 4.010956676930013e-06, "loss": 3.7294, "step": 28915 }, { "epoch": 0.294189453125, "grad_norm": 10.81959056854248, "learning_rate": 4.0106380747896245e-06, "loss": 3.803, "step": 28920 }, { "epoch": 0.2942403157552083, "grad_norm": 14.788504600524902, "learning_rate": 4.01031943399992e-06, "loss": 2.9475, "step": 28925 }, { "epoch": 0.2942911783854167, "grad_norm": 9.9393949508667, "learning_rate": 4.01000075456905e-06, "loss": 3.0866, "step": 28930 }, { "epoch": 0.294342041015625, "grad_norm": 8.403119087219238, "learning_rate": 4.009682036505169e-06, "loss": 3.4695, "step": 28935 }, { "epoch": 0.2943929036458333, "grad_norm": 9.470560073852539, "learning_rate": 4.009363279816431e-06, "loss": 3.2326, "step": 28940 }, { "epoch": 0.2944437662760417, "grad_norm": 9.08181381225586, "learning_rate": 4.009044484510991e-06, "loss": 3.6787, "step": 28945 }, { "epoch": 0.29449462890625, "grad_norm": 9.185422897338867, "learning_rate": 4.0087256505970065e-06, "loss": 3.2821, "step": 28950 }, { "epoch": 0.2945454915364583, "grad_norm": 16.905046463012695, "learning_rate": 4.0084067780826326e-06, "loss": 3.5609, "step": 28955 }, { "epoch": 0.2945963541666667, "grad_norm": 15.509016036987305, "learning_rate": 4.008087866976029e-06, "loss": 3.1874, "step": 28960 }, { "epoch": 0.294647216796875, "grad_norm": 14.498684883117676, "learning_rate": 4.007768917285356e-06, "loss": 3.7175, "step": 28965 }, { "epoch": 0.2946980794270833, "grad_norm": 11.822076797485352, "learning_rate": 4.007449929018772e-06, "loss": 3.1827, "step": 28970 }, { "epoch": 0.2947489420572917, "grad_norm": 14.711019515991211, "learning_rate": 4.007130902184438e-06, "loss": 3.1797, "step": 28975 }, { "epoch": 0.2947998046875, "grad_norm": 9.385677337646484, "learning_rate": 4.006811836790518e-06, "loss": 3.3217, "step": 28980 }, { "epoch": 0.2948506673177083, "grad_norm": 15.740737915039062, "learning_rate": 4.006492732845174e-06, "loss": 3.4122, "step": 28985 }, { "epoch": 0.2949015299479167, "grad_norm": 15.037389755249023, "learning_rate": 4.00617359035657e-06, "loss": 3.2279, "step": 28990 }, { "epoch": 0.294952392578125, "grad_norm": 880.1937255859375, "learning_rate": 4.005854409332872e-06, "loss": 3.9876, "step": 28995 }, { "epoch": 0.2950032552083333, "grad_norm": 12.129514694213867, "learning_rate": 4.0055351897822464e-06, "loss": 3.2343, "step": 29000 }, { "epoch": 0.2950541178385417, "grad_norm": 12.854998588562012, "learning_rate": 4.00521593171286e-06, "loss": 3.4128, "step": 29005 }, { "epoch": 0.29510498046875, "grad_norm": 12.726030349731445, "learning_rate": 4.00489663513288e-06, "loss": 3.4242, "step": 29010 }, { "epoch": 0.2951558430989583, "grad_norm": 9.434267044067383, "learning_rate": 4.004577300050476e-06, "loss": 3.6865, "step": 29015 }, { "epoch": 0.2952067057291667, "grad_norm": 14.372798919677734, "learning_rate": 4.004257926473819e-06, "loss": 3.5117, "step": 29020 }, { "epoch": 0.295257568359375, "grad_norm": 9.412242889404297, "learning_rate": 4.00393851441108e-06, "loss": 3.2831, "step": 29025 }, { "epoch": 0.2953084309895833, "grad_norm": 17.29656982421875, "learning_rate": 4.00361906387043e-06, "loss": 3.8513, "step": 29030 }, { "epoch": 0.2953592936197917, "grad_norm": 12.378039360046387, "learning_rate": 4.003299574860042e-06, "loss": 3.2805, "step": 29035 }, { "epoch": 0.29541015625, "grad_norm": 9.60687255859375, "learning_rate": 4.002980047388092e-06, "loss": 3.25, "step": 29040 }, { "epoch": 0.2954610188802083, "grad_norm": 8.89541244506836, "learning_rate": 4.002660481462753e-06, "loss": 3.3592, "step": 29045 }, { "epoch": 0.2955118815104167, "grad_norm": 8.48711109161377, "learning_rate": 4.002340877092202e-06, "loss": 3.995, "step": 29050 }, { "epoch": 0.295562744140625, "grad_norm": 16.27423095703125, "learning_rate": 4.0020212342846155e-06, "loss": 3.4849, "step": 29055 }, { "epoch": 0.2956136067708333, "grad_norm": 15.46958065032959, "learning_rate": 4.001701553048172e-06, "loss": 3.3011, "step": 29060 }, { "epoch": 0.2956644694010417, "grad_norm": 10.370869636535645, "learning_rate": 4.0013818333910505e-06, "loss": 3.3733, "step": 29065 }, { "epoch": 0.29571533203125, "grad_norm": 16.184940338134766, "learning_rate": 4.00106207532143e-06, "loss": 4.0936, "step": 29070 }, { "epoch": 0.2957661946614583, "grad_norm": 12.648850440979004, "learning_rate": 4.000742278847492e-06, "loss": 3.0688, "step": 29075 }, { "epoch": 0.2958170572916667, "grad_norm": 8.855330467224121, "learning_rate": 4.0004224439774185e-06, "loss": 3.2634, "step": 29080 }, { "epoch": 0.295867919921875, "grad_norm": 16.087480545043945, "learning_rate": 4.000102570719393e-06, "loss": 3.2605, "step": 29085 }, { "epoch": 0.2959187825520833, "grad_norm": 13.387033462524414, "learning_rate": 3.999782659081598e-06, "loss": 3.2089, "step": 29090 }, { "epoch": 0.2959696451822917, "grad_norm": 12.484994888305664, "learning_rate": 3.99946270907222e-06, "loss": 3.3006, "step": 29095 }, { "epoch": 0.2960205078125, "grad_norm": 14.613950729370117, "learning_rate": 3.999142720699444e-06, "loss": 3.2682, "step": 29100 }, { "epoch": 0.2960713704427083, "grad_norm": 11.924738883972168, "learning_rate": 3.998822693971456e-06, "loss": 3.3534, "step": 29105 }, { "epoch": 0.2961222330729167, "grad_norm": 11.536297798156738, "learning_rate": 3.998502628896446e-06, "loss": 3.3467, "step": 29110 }, { "epoch": 0.296173095703125, "grad_norm": 11.012533187866211, "learning_rate": 3.9981825254826e-06, "loss": 3.0033, "step": 29115 }, { "epoch": 0.2962239583333333, "grad_norm": 8.716772079467773, "learning_rate": 3.99786238373811e-06, "loss": 3.2317, "step": 29120 }, { "epoch": 0.2962748209635417, "grad_norm": 11.559467315673828, "learning_rate": 3.997542203671165e-06, "loss": 3.5196, "step": 29125 }, { "epoch": 0.29632568359375, "grad_norm": 13.896491050720215, "learning_rate": 3.997221985289959e-06, "loss": 3.5077, "step": 29130 }, { "epoch": 0.2963765462239583, "grad_norm": 12.800115585327148, "learning_rate": 3.996901728602683e-06, "loss": 3.2497, "step": 29135 }, { "epoch": 0.2964274088541667, "grad_norm": 16.33082389831543, "learning_rate": 3.996581433617531e-06, "loss": 3.2995, "step": 29140 }, { "epoch": 0.296478271484375, "grad_norm": 11.650870323181152, "learning_rate": 3.996261100342697e-06, "loss": 3.3726, "step": 29145 }, { "epoch": 0.2965291341145833, "grad_norm": 9.717093467712402, "learning_rate": 3.995940728786378e-06, "loss": 3.4764, "step": 29150 }, { "epoch": 0.2965799967447917, "grad_norm": 10.83946704864502, "learning_rate": 3.99562031895677e-06, "loss": 3.3277, "step": 29155 }, { "epoch": 0.296630859375, "grad_norm": 12.137735366821289, "learning_rate": 3.9952998708620714e-06, "loss": 3.5005, "step": 29160 }, { "epoch": 0.2966817220052083, "grad_norm": 9.967763900756836, "learning_rate": 3.994979384510479e-06, "loss": 3.1285, "step": 29165 }, { "epoch": 0.2967325846354167, "grad_norm": 10.213086128234863, "learning_rate": 3.994658859910193e-06, "loss": 3.1881, "step": 29170 }, { "epoch": 0.296783447265625, "grad_norm": 12.435432434082031, "learning_rate": 3.9943382970694155e-06, "loss": 3.5875, "step": 29175 }, { "epoch": 0.2968343098958333, "grad_norm": 9.136934280395508, "learning_rate": 3.994017695996346e-06, "loss": 3.713, "step": 29180 }, { "epoch": 0.2968851725260417, "grad_norm": 13.187419891357422, "learning_rate": 3.993697056699188e-06, "loss": 3.2372, "step": 29185 }, { "epoch": 0.29693603515625, "grad_norm": 7.720186233520508, "learning_rate": 3.993376379186145e-06, "loss": 3.278, "step": 29190 }, { "epoch": 0.2969868977864583, "grad_norm": 9.980789184570312, "learning_rate": 3.993055663465422e-06, "loss": 3.4021, "step": 29195 }, { "epoch": 0.2970377604166667, "grad_norm": 14.341978073120117, "learning_rate": 3.9927349095452235e-06, "loss": 3.3752, "step": 29200 }, { "epoch": 0.297088623046875, "grad_norm": 9.22407341003418, "learning_rate": 3.992414117433756e-06, "loss": 3.1495, "step": 29205 }, { "epoch": 0.2971394856770833, "grad_norm": 14.73730754852295, "learning_rate": 3.992093287139226e-06, "loss": 3.2453, "step": 29210 }, { "epoch": 0.2971903483072917, "grad_norm": 12.117033004760742, "learning_rate": 3.991772418669844e-06, "loss": 3.467, "step": 29215 }, { "epoch": 0.2972412109375, "grad_norm": 12.779738426208496, "learning_rate": 3.991451512033818e-06, "loss": 3.425, "step": 29220 }, { "epoch": 0.2972920735677083, "grad_norm": 12.695642471313477, "learning_rate": 3.991130567239359e-06, "loss": 3.3072, "step": 29225 }, { "epoch": 0.2973429361979167, "grad_norm": 14.683784484863281, "learning_rate": 3.990809584294677e-06, "loss": 3.7767, "step": 29230 }, { "epoch": 0.297393798828125, "grad_norm": 14.795294761657715, "learning_rate": 3.990488563207986e-06, "loss": 3.3255, "step": 29235 }, { "epoch": 0.2974446614583333, "grad_norm": 12.686258316040039, "learning_rate": 3.990167503987498e-06, "loss": 3.7086, "step": 29240 }, { "epoch": 0.2974955240885417, "grad_norm": 8.185772895812988, "learning_rate": 3.989846406641428e-06, "loss": 3.4043, "step": 29245 }, { "epoch": 0.29754638671875, "grad_norm": 10.710786819458008, "learning_rate": 3.98952527117799e-06, "loss": 3.1477, "step": 29250 }, { "epoch": 0.2975972493489583, "grad_norm": 11.759841918945312, "learning_rate": 3.989204097605402e-06, "loss": 3.4075, "step": 29255 }, { "epoch": 0.2976481119791667, "grad_norm": 13.554819107055664, "learning_rate": 3.98888288593188e-06, "loss": 3.1757, "step": 29260 }, { "epoch": 0.297698974609375, "grad_norm": 12.05898666381836, "learning_rate": 3.988561636165641e-06, "loss": 3.0917, "step": 29265 }, { "epoch": 0.2977498372395833, "grad_norm": 11.725159645080566, "learning_rate": 3.988240348314907e-06, "loss": 3.2702, "step": 29270 }, { "epoch": 0.2978006998697917, "grad_norm": 15.247992515563965, "learning_rate": 3.987919022387895e-06, "loss": 3.6453, "step": 29275 }, { "epoch": 0.2978515625, "grad_norm": 14.956148147583008, "learning_rate": 3.987597658392828e-06, "loss": 3.6442, "step": 29280 }, { "epoch": 0.2979024251302083, "grad_norm": 13.976456642150879, "learning_rate": 3.987276256337928e-06, "loss": 3.8726, "step": 29285 }, { "epoch": 0.2979532877604167, "grad_norm": 13.797755241394043, "learning_rate": 3.9869548162314176e-06, "loss": 3.4543, "step": 29290 }, { "epoch": 0.298004150390625, "grad_norm": 9.575790405273438, "learning_rate": 3.98663333808152e-06, "loss": 3.2062, "step": 29295 }, { "epoch": 0.2980550130208333, "grad_norm": 12.933478355407715, "learning_rate": 3.9863118218964606e-06, "loss": 3.2992, "step": 29300 }, { "epoch": 0.2981058756510417, "grad_norm": 11.104291915893555, "learning_rate": 3.985990267684466e-06, "loss": 2.822, "step": 29305 }, { "epoch": 0.29815673828125, "grad_norm": 7.797368049621582, "learning_rate": 3.985668675453763e-06, "loss": 3.5728, "step": 29310 }, { "epoch": 0.2982076009114583, "grad_norm": 13.247109413146973, "learning_rate": 3.985347045212578e-06, "loss": 3.2314, "step": 29315 }, { "epoch": 0.2982584635416667, "grad_norm": 14.105443000793457, "learning_rate": 3.985025376969143e-06, "loss": 3.4454, "step": 29320 }, { "epoch": 0.298309326171875, "grad_norm": 15.554372787475586, "learning_rate": 3.984703670731683e-06, "loss": 3.7341, "step": 29325 }, { "epoch": 0.2983601888020833, "grad_norm": 9.164495468139648, "learning_rate": 3.9843819265084334e-06, "loss": 3.2357, "step": 29330 }, { "epoch": 0.2984110514322917, "grad_norm": 11.580939292907715, "learning_rate": 3.984060144307623e-06, "loss": 3.3563, "step": 29335 }, { "epoch": 0.2984619140625, "grad_norm": 8.120573043823242, "learning_rate": 3.983738324137486e-06, "loss": 3.5399, "step": 29340 }, { "epoch": 0.2985127766927083, "grad_norm": 9.155670166015625, "learning_rate": 3.983416466006255e-06, "loss": 3.4847, "step": 29345 }, { "epoch": 0.2985636393229167, "grad_norm": 17.27814483642578, "learning_rate": 3.9830945699221674e-06, "loss": 3.273, "step": 29350 }, { "epoch": 0.298614501953125, "grad_norm": 15.654536247253418, "learning_rate": 3.982772635893456e-06, "loss": 3.5948, "step": 29355 }, { "epoch": 0.2986653645833333, "grad_norm": 10.65638256072998, "learning_rate": 3.9824506639283564e-06, "loss": 3.2658, "step": 29360 }, { "epoch": 0.2987162272135417, "grad_norm": 10.578728675842285, "learning_rate": 3.98212865403511e-06, "loss": 3.1781, "step": 29365 }, { "epoch": 0.29876708984375, "grad_norm": 7.720913410186768, "learning_rate": 3.981806606221952e-06, "loss": 3.4242, "step": 29370 }, { "epoch": 0.2988179524739583, "grad_norm": 10.320748329162598, "learning_rate": 3.981484520497123e-06, "loss": 3.3708, "step": 29375 }, { "epoch": 0.2988688151041667, "grad_norm": 15.515741348266602, "learning_rate": 3.981162396868865e-06, "loss": 3.2143, "step": 29380 }, { "epoch": 0.298919677734375, "grad_norm": 14.272456169128418, "learning_rate": 3.980840235345417e-06, "loss": 3.2696, "step": 29385 }, { "epoch": 0.2989705403645833, "grad_norm": 10.035221099853516, "learning_rate": 3.980518035935023e-06, "loss": 3.7437, "step": 29390 }, { "epoch": 0.2990214029947917, "grad_norm": 10.790133476257324, "learning_rate": 3.980195798645926e-06, "loss": 3.1117, "step": 29395 }, { "epoch": 0.299072265625, "grad_norm": 17.080608367919922, "learning_rate": 3.979873523486371e-06, "loss": 3.6969, "step": 29400 }, { "epoch": 0.2991231282552083, "grad_norm": 12.20949935913086, "learning_rate": 3.979551210464602e-06, "loss": 4.0681, "step": 29405 }, { "epoch": 0.2991739908854167, "grad_norm": 11.205544471740723, "learning_rate": 3.9792288595888655e-06, "loss": 3.3577, "step": 29410 }, { "epoch": 0.299224853515625, "grad_norm": 14.382440567016602, "learning_rate": 3.978906470867409e-06, "loss": 3.1179, "step": 29415 }, { "epoch": 0.2992757161458333, "grad_norm": 10.04643726348877, "learning_rate": 3.978584044308482e-06, "loss": 3.174, "step": 29420 }, { "epoch": 0.2993265787760417, "grad_norm": 12.206974029541016, "learning_rate": 3.978261579920333e-06, "loss": 3.5778, "step": 29425 }, { "epoch": 0.29937744140625, "grad_norm": 9.708235740661621, "learning_rate": 3.977939077711211e-06, "loss": 3.1949, "step": 29430 }, { "epoch": 0.2994283040364583, "grad_norm": 10.987796783447266, "learning_rate": 3.977616537689368e-06, "loss": 3.5749, "step": 29435 }, { "epoch": 0.2994791666666667, "grad_norm": 8.089970588684082, "learning_rate": 3.977293959863057e-06, "loss": 3.5678, "step": 29440 }, { "epoch": 0.299530029296875, "grad_norm": 14.080187797546387, "learning_rate": 3.97697134424053e-06, "loss": 3.7299, "step": 29445 }, { "epoch": 0.2995808919270833, "grad_norm": 14.45607852935791, "learning_rate": 3.97664869083004e-06, "loss": 3.6074, "step": 29450 }, { "epoch": 0.2996317545572917, "grad_norm": 9.266926765441895, "learning_rate": 3.976325999639844e-06, "loss": 3.5337, "step": 29455 }, { "epoch": 0.2996826171875, "grad_norm": 8.380191802978516, "learning_rate": 3.976003270678197e-06, "loss": 3.3772, "step": 29460 }, { "epoch": 0.2997334798177083, "grad_norm": 9.378839492797852, "learning_rate": 3.975680503953358e-06, "loss": 3.5624, "step": 29465 }, { "epoch": 0.2997843424479167, "grad_norm": 7.663854598999023, "learning_rate": 3.97535769947358e-06, "loss": 3.4436, "step": 29470 }, { "epoch": 0.299835205078125, "grad_norm": 8.099380493164062, "learning_rate": 3.9750348572471276e-06, "loss": 3.0022, "step": 29475 }, { "epoch": 0.2998860677083333, "grad_norm": 10.462038040161133, "learning_rate": 3.974711977282256e-06, "loss": 3.0147, "step": 29480 }, { "epoch": 0.2999369303385417, "grad_norm": 8.75016975402832, "learning_rate": 3.974389059587229e-06, "loss": 3.4312, "step": 29485 }, { "epoch": 0.29998779296875, "grad_norm": 15.210886001586914, "learning_rate": 3.9740661041703075e-06, "loss": 3.013, "step": 29490 }, { "epoch": 0.3000386555989583, "grad_norm": 10.11583423614502, "learning_rate": 3.973743111039754e-06, "loss": 3.249, "step": 29495 }, { "epoch": 0.3000895182291667, "grad_norm": 16.003278732299805, "learning_rate": 3.973420080203832e-06, "loss": 3.3855, "step": 29500 }, { "epoch": 0.300140380859375, "grad_norm": 9.147961616516113, "learning_rate": 3.9730970116708065e-06, "loss": 3.2473, "step": 29505 }, { "epoch": 0.3001912434895833, "grad_norm": 9.03951358795166, "learning_rate": 3.9727739054489436e-06, "loss": 3.0429, "step": 29510 }, { "epoch": 0.3002421061197917, "grad_norm": 12.876129150390625, "learning_rate": 3.972450761546509e-06, "loss": 3.327, "step": 29515 }, { "epoch": 0.30029296875, "grad_norm": 11.136457443237305, "learning_rate": 3.972127579971771e-06, "loss": 3.2739, "step": 29520 }, { "epoch": 0.3003438313802083, "grad_norm": 13.348973274230957, "learning_rate": 3.971804360732997e-06, "loss": 3.2297, "step": 29525 }, { "epoch": 0.3003946940104167, "grad_norm": 10.658791542053223, "learning_rate": 3.971481103838459e-06, "loss": 3.3988, "step": 29530 }, { "epoch": 0.300445556640625, "grad_norm": 12.942093849182129, "learning_rate": 3.971157809296424e-06, "loss": 3.246, "step": 29535 }, { "epoch": 0.3004964192708333, "grad_norm": 8.947063446044922, "learning_rate": 3.970834477115167e-06, "loss": 3.2981, "step": 29540 }, { "epoch": 0.3005472819010417, "grad_norm": 14.3213472366333, "learning_rate": 3.970511107302956e-06, "loss": 3.0184, "step": 29545 }, { "epoch": 0.30059814453125, "grad_norm": 11.473852157592773, "learning_rate": 3.970187699868069e-06, "loss": 3.3741, "step": 29550 }, { "epoch": 0.3006490071614583, "grad_norm": 16.973127365112305, "learning_rate": 3.969864254818777e-06, "loss": 3.4917, "step": 29555 }, { "epoch": 0.3006998697916667, "grad_norm": 10.866402626037598, "learning_rate": 3.969540772163357e-06, "loss": 3.0928, "step": 29560 }, { "epoch": 0.300750732421875, "grad_norm": 12.010110855102539, "learning_rate": 3.969217251910084e-06, "loss": 3.0374, "step": 29565 }, { "epoch": 0.3008015950520833, "grad_norm": 15.12334156036377, "learning_rate": 3.968893694067236e-06, "loss": 3.4492, "step": 29570 }, { "epoch": 0.3008524576822917, "grad_norm": 12.803681373596191, "learning_rate": 3.968570098643092e-06, "loss": 3.2817, "step": 29575 }, { "epoch": 0.3009033203125, "grad_norm": 8.338359832763672, "learning_rate": 3.968246465645929e-06, "loss": 3.0263, "step": 29580 }, { "epoch": 0.3009541829427083, "grad_norm": 15.683090209960938, "learning_rate": 3.967922795084029e-06, "loss": 3.5276, "step": 29585 }, { "epoch": 0.3010050455729167, "grad_norm": 11.639715194702148, "learning_rate": 3.967599086965671e-06, "loss": 3.413, "step": 29590 }, { "epoch": 0.301055908203125, "grad_norm": 12.234172821044922, "learning_rate": 3.967275341299139e-06, "loss": 2.8704, "step": 29595 }, { "epoch": 0.3011067708333333, "grad_norm": 8.11499309539795, "learning_rate": 3.9669515580927155e-06, "loss": 3.2947, "step": 29600 }, { "epoch": 0.3011576334635417, "grad_norm": 9.019293785095215, "learning_rate": 3.9666277373546844e-06, "loss": 3.1113, "step": 29605 }, { "epoch": 0.30120849609375, "grad_norm": 14.087760925292969, "learning_rate": 3.966303879093329e-06, "loss": 3.0913, "step": 29610 }, { "epoch": 0.3012593587239583, "grad_norm": 10.682214736938477, "learning_rate": 3.9659799833169374e-06, "loss": 3.7827, "step": 29615 }, { "epoch": 0.3013102213541667, "grad_norm": 8.64210033416748, "learning_rate": 3.9656560500337945e-06, "loss": 3.07, "step": 29620 }, { "epoch": 0.301361083984375, "grad_norm": 11.499073028564453, "learning_rate": 3.96533207925219e-06, "loss": 3.3049, "step": 29625 }, { "epoch": 0.3014119466145833, "grad_norm": 14.486063003540039, "learning_rate": 3.965008070980411e-06, "loss": 3.6134, "step": 29630 }, { "epoch": 0.3014628092447917, "grad_norm": 11.338687896728516, "learning_rate": 3.9646840252267485e-06, "loss": 3.2003, "step": 29635 }, { "epoch": 0.301513671875, "grad_norm": 9.439972877502441, "learning_rate": 3.964359941999491e-06, "loss": 3.3831, "step": 29640 }, { "epoch": 0.3015645345052083, "grad_norm": 10.964188575744629, "learning_rate": 3.964035821306933e-06, "loss": 3.5014, "step": 29645 }, { "epoch": 0.3016153971354167, "grad_norm": 14.766501426696777, "learning_rate": 3.963711663157365e-06, "loss": 3.0886, "step": 29650 }, { "epoch": 0.301666259765625, "grad_norm": 11.056827545166016, "learning_rate": 3.96338746755908e-06, "loss": 3.1309, "step": 29655 }, { "epoch": 0.3017171223958333, "grad_norm": 13.033686637878418, "learning_rate": 3.963063234520374e-06, "loss": 3.3055, "step": 29660 }, { "epoch": 0.3017679850260417, "grad_norm": 15.789066314697266, "learning_rate": 3.962738964049543e-06, "loss": 2.9897, "step": 29665 }, { "epoch": 0.30181884765625, "grad_norm": 13.167356491088867, "learning_rate": 3.962414656154882e-06, "loss": 3.3225, "step": 29670 }, { "epoch": 0.3018697102864583, "grad_norm": 12.949690818786621, "learning_rate": 3.962090310844687e-06, "loss": 3.186, "step": 29675 }, { "epoch": 0.3019205729166667, "grad_norm": 16.214996337890625, "learning_rate": 3.96176592812726e-06, "loss": 3.8273, "step": 29680 }, { "epoch": 0.301971435546875, "grad_norm": 16.74712371826172, "learning_rate": 3.961441508010897e-06, "loss": 3.3157, "step": 29685 }, { "epoch": 0.3020222981770833, "grad_norm": 12.501587867736816, "learning_rate": 3.9611170505039e-06, "loss": 2.9707, "step": 29690 }, { "epoch": 0.3020731608072917, "grad_norm": 13.367961883544922, "learning_rate": 3.960792555614569e-06, "loss": 3.5666, "step": 29695 }, { "epoch": 0.3021240234375, "grad_norm": 10.78051471710205, "learning_rate": 3.960468023351207e-06, "loss": 3.4304, "step": 29700 }, { "epoch": 0.3021748860677083, "grad_norm": 13.978754997253418, "learning_rate": 3.960143453722117e-06, "loss": 3.1024, "step": 29705 }, { "epoch": 0.3022257486979167, "grad_norm": 13.29320240020752, "learning_rate": 3.959818846735602e-06, "loss": 3.649, "step": 29710 }, { "epoch": 0.302276611328125, "grad_norm": 7.485118389129639, "learning_rate": 3.959494202399969e-06, "loss": 3.5782, "step": 29715 }, { "epoch": 0.3023274739583333, "grad_norm": 11.284222602844238, "learning_rate": 3.9591695207235225e-06, "loss": 3.4861, "step": 29720 }, { "epoch": 0.3023783365885417, "grad_norm": 17.11857032775879, "learning_rate": 3.958844801714569e-06, "loss": 3.1366, "step": 29725 }, { "epoch": 0.30242919921875, "grad_norm": 13.37563705444336, "learning_rate": 3.958520045381417e-06, "loss": 3.2703, "step": 29730 }, { "epoch": 0.3024800618489583, "grad_norm": 8.232620239257812, "learning_rate": 3.958195251732376e-06, "loss": 3.2489, "step": 29735 }, { "epoch": 0.3025309244791667, "grad_norm": 10.953129768371582, "learning_rate": 3.957870420775756e-06, "loss": 3.5771, "step": 29740 }, { "epoch": 0.302581787109375, "grad_norm": 16.61271095275879, "learning_rate": 3.9575455525198655e-06, "loss": 3.2988, "step": 29745 }, { "epoch": 0.3026326497395833, "grad_norm": 9.787528991699219, "learning_rate": 3.957220646973018e-06, "loss": 3.1724, "step": 29750 }, { "epoch": 0.3026835123697917, "grad_norm": 10.20058822631836, "learning_rate": 3.956895704143525e-06, "loss": 3.4975, "step": 29755 }, { "epoch": 0.302734375, "grad_norm": 10.130928039550781, "learning_rate": 3.956570724039701e-06, "loss": 3.2884, "step": 29760 }, { "epoch": 0.3027852376302083, "grad_norm": 10.151378631591797, "learning_rate": 3.9562457066698615e-06, "loss": 3.2238, "step": 29765 }, { "epoch": 0.3028361002604167, "grad_norm": 10.487987518310547, "learning_rate": 3.955920652042319e-06, "loss": 3.35, "step": 29770 }, { "epoch": 0.302886962890625, "grad_norm": 10.246733665466309, "learning_rate": 3.955595560165392e-06, "loss": 4.0199, "step": 29775 }, { "epoch": 0.3029378255208333, "grad_norm": 11.14050006866455, "learning_rate": 3.955270431047398e-06, "loss": 3.4622, "step": 29780 }, { "epoch": 0.3029886881510417, "grad_norm": 15.32699966430664, "learning_rate": 3.954945264696656e-06, "loss": 3.5683, "step": 29785 }, { "epoch": 0.30303955078125, "grad_norm": 9.070558547973633, "learning_rate": 3.954620061121482e-06, "loss": 3.5751, "step": 29790 }, { "epoch": 0.3030904134114583, "grad_norm": 12.999809265136719, "learning_rate": 3.9542948203302e-06, "loss": 3.3605, "step": 29795 }, { "epoch": 0.3031412760416667, "grad_norm": 10.635300636291504, "learning_rate": 3.953969542331129e-06, "loss": 3.2415, "step": 29800 }, { "epoch": 0.303192138671875, "grad_norm": 15.898357391357422, "learning_rate": 3.953644227132593e-06, "loss": 3.1742, "step": 29805 }, { "epoch": 0.3032430013020833, "grad_norm": 11.910710334777832, "learning_rate": 3.9533188747429135e-06, "loss": 3.3774, "step": 29810 }, { "epoch": 0.3032938639322917, "grad_norm": 7.786906719207764, "learning_rate": 3.9529934851704155e-06, "loss": 3.1679, "step": 29815 }, { "epoch": 0.3033447265625, "grad_norm": 16.13606071472168, "learning_rate": 3.952668058423422e-06, "loss": 3.0755, "step": 29820 }, { "epoch": 0.3033955891927083, "grad_norm": 9.732821464538574, "learning_rate": 3.952342594510262e-06, "loss": 3.0813, "step": 29825 }, { "epoch": 0.3034464518229167, "grad_norm": 10.42857837677002, "learning_rate": 3.952017093439261e-06, "loss": 3.399, "step": 29830 }, { "epoch": 0.303497314453125, "grad_norm": 7.4075727462768555, "learning_rate": 3.951691555218746e-06, "loss": 3.5959, "step": 29835 }, { "epoch": 0.3035481770833333, "grad_norm": 8.515162467956543, "learning_rate": 3.951365979857047e-06, "loss": 3.36, "step": 29840 }, { "epoch": 0.3035990397135417, "grad_norm": 11.024301528930664, "learning_rate": 3.951040367362493e-06, "loss": 3.3992, "step": 29845 }, { "epoch": 0.30364990234375, "grad_norm": 12.866596221923828, "learning_rate": 3.9507147177434155e-06, "loss": 3.2026, "step": 29850 }, { "epoch": 0.3037007649739583, "grad_norm": 10.392648696899414, "learning_rate": 3.950389031008147e-06, "loss": 3.6532, "step": 29855 }, { "epoch": 0.3037516276041667, "grad_norm": 11.186022758483887, "learning_rate": 3.950063307165018e-06, "loss": 3.365, "step": 29860 }, { "epoch": 0.303802490234375, "grad_norm": 11.300045013427734, "learning_rate": 3.949737546222362e-06, "loss": 3.1194, "step": 29865 }, { "epoch": 0.3038533528645833, "grad_norm": 15.685585975646973, "learning_rate": 3.949411748188515e-06, "loss": 3.2783, "step": 29870 }, { "epoch": 0.3039042154947917, "grad_norm": 11.391825675964355, "learning_rate": 3.949085913071812e-06, "loss": 3.4238, "step": 29875 }, { "epoch": 0.303955078125, "grad_norm": 10.095256805419922, "learning_rate": 3.948760040880591e-06, "loss": 3.1203, "step": 29880 }, { "epoch": 0.3040059407552083, "grad_norm": 12.93649673461914, "learning_rate": 3.948434131623185e-06, "loss": 3.2666, "step": 29885 }, { "epoch": 0.3040568033854167, "grad_norm": 12.856670379638672, "learning_rate": 3.948108185307937e-06, "loss": 3.0006, "step": 29890 }, { "epoch": 0.304107666015625, "grad_norm": 13.006121635437012, "learning_rate": 3.947782201943183e-06, "loss": 3.2726, "step": 29895 }, { "epoch": 0.3041585286458333, "grad_norm": 8.508830070495605, "learning_rate": 3.9474561815372655e-06, "loss": 2.7996, "step": 29900 }, { "epoch": 0.3042093912760417, "grad_norm": 11.361810684204102, "learning_rate": 3.947130124098525e-06, "loss": 3.5956, "step": 29905 }, { "epoch": 0.30426025390625, "grad_norm": 15.425206184387207, "learning_rate": 3.946804029635303e-06, "loss": 3.8641, "step": 29910 }, { "epoch": 0.3043111165364583, "grad_norm": 10.67442512512207, "learning_rate": 3.9464778981559425e-06, "loss": 3.4641, "step": 29915 }, { "epoch": 0.3043619791666667, "grad_norm": 11.199278831481934, "learning_rate": 3.946151729668788e-06, "loss": 3.3431, "step": 29920 }, { "epoch": 0.304412841796875, "grad_norm": 11.408109664916992, "learning_rate": 3.9458255241821835e-06, "loss": 3.2934, "step": 29925 }, { "epoch": 0.3044637044270833, "grad_norm": 11.57914924621582, "learning_rate": 3.9454992817044765e-06, "loss": 3.0696, "step": 29930 }, { "epoch": 0.3045145670572917, "grad_norm": 8.558871269226074, "learning_rate": 3.9451730022440125e-06, "loss": 3.0752, "step": 29935 }, { "epoch": 0.3045654296875, "grad_norm": 11.476302146911621, "learning_rate": 3.944846685809141e-06, "loss": 3.289, "step": 29940 }, { "epoch": 0.3046162923177083, "grad_norm": 12.977760314941406, "learning_rate": 3.944520332408208e-06, "loss": 3.3298, "step": 29945 }, { "epoch": 0.3046671549479167, "grad_norm": 17.571115493774414, "learning_rate": 3.9441939420495655e-06, "loss": 3.5423, "step": 29950 }, { "epoch": 0.304718017578125, "grad_norm": 11.36111068725586, "learning_rate": 3.943867514741563e-06, "loss": 3.1054, "step": 29955 }, { "epoch": 0.3047688802083333, "grad_norm": 12.079353332519531, "learning_rate": 3.943541050492552e-06, "loss": 3.0162, "step": 29960 }, { "epoch": 0.3048197428385417, "grad_norm": 10.957902908325195, "learning_rate": 3.9432145493108864e-06, "loss": 3.2614, "step": 29965 }, { "epoch": 0.30487060546875, "grad_norm": 13.214820861816406, "learning_rate": 3.942888011204918e-06, "loss": 3.2373, "step": 29970 }, { "epoch": 0.3049214680989583, "grad_norm": 11.408329010009766, "learning_rate": 3.942561436183002e-06, "loss": 3.0805, "step": 29975 }, { "epoch": 0.3049723307291667, "grad_norm": 9.606331825256348, "learning_rate": 3.9422348242534935e-06, "loss": 3.462, "step": 29980 }, { "epoch": 0.305023193359375, "grad_norm": 9.041865348815918, "learning_rate": 3.941908175424749e-06, "loss": 3.6158, "step": 29985 }, { "epoch": 0.3050740559895833, "grad_norm": 12.085333824157715, "learning_rate": 3.941581489705125e-06, "loss": 3.2969, "step": 29990 }, { "epoch": 0.3051249186197917, "grad_norm": 14.818300247192383, "learning_rate": 3.941254767102981e-06, "loss": 3.5366, "step": 29995 }, { "epoch": 0.30517578125, "grad_norm": 12.06135082244873, "learning_rate": 3.940928007626675e-06, "loss": 3.5027, "step": 30000 }, { "epoch": 0.3052266438802083, "grad_norm": 8.41988754272461, "learning_rate": 3.940601211284568e-06, "loss": 3.5845, "step": 30005 }, { "epoch": 0.3052775065104167, "grad_norm": 9.827239990234375, "learning_rate": 3.94027437808502e-06, "loss": 3.5265, "step": 30010 }, { "epoch": 0.305328369140625, "grad_norm": 15.531473159790039, "learning_rate": 3.939947508036394e-06, "loss": 3.2988, "step": 30015 }, { "epoch": 0.3053792317708333, "grad_norm": 16.49957847595215, "learning_rate": 3.939620601147051e-06, "loss": 3.6751, "step": 30020 }, { "epoch": 0.3054300944010417, "grad_norm": 17.067161560058594, "learning_rate": 3.939293657425357e-06, "loss": 4.2174, "step": 30025 }, { "epoch": 0.30548095703125, "grad_norm": 10.203935623168945, "learning_rate": 3.9389666768796765e-06, "loss": 3.4124, "step": 30030 }, { "epoch": 0.3055318196614583, "grad_norm": 8.776480674743652, "learning_rate": 3.938639659518374e-06, "loss": 3.111, "step": 30035 }, { "epoch": 0.3055826822916667, "grad_norm": 16.952741622924805, "learning_rate": 3.938312605349817e-06, "loss": 3.3653, "step": 30040 }, { "epoch": 0.305633544921875, "grad_norm": 13.235602378845215, "learning_rate": 3.937985514382372e-06, "loss": 3.3832, "step": 30045 }, { "epoch": 0.3056844075520833, "grad_norm": 12.165931701660156, "learning_rate": 3.93765838662441e-06, "loss": 3.6152, "step": 30050 }, { "epoch": 0.3057352701822917, "grad_norm": 12.84066104888916, "learning_rate": 3.937331222084298e-06, "loss": 3.1008, "step": 30055 }, { "epoch": 0.3057861328125, "grad_norm": 11.93371868133545, "learning_rate": 3.937004020770407e-06, "loss": 3.1214, "step": 30060 }, { "epoch": 0.3058369954427083, "grad_norm": 10.240893363952637, "learning_rate": 3.936676782691109e-06, "loss": 3.5683, "step": 30065 }, { "epoch": 0.3058878580729167, "grad_norm": 14.546442031860352, "learning_rate": 3.9363495078547765e-06, "loss": 3.4732, "step": 30070 }, { "epoch": 0.305938720703125, "grad_norm": 14.076205253601074, "learning_rate": 3.936022196269782e-06, "loss": 3.0446, "step": 30075 }, { "epoch": 0.3059895833333333, "grad_norm": 8.096552848815918, "learning_rate": 3.9356948479445e-06, "loss": 3.5257, "step": 30080 }, { "epoch": 0.3060404459635417, "grad_norm": 8.671744346618652, "learning_rate": 3.935367462887306e-06, "loss": 3.302, "step": 30085 }, { "epoch": 0.30609130859375, "grad_norm": 13.429875373840332, "learning_rate": 3.935040041106576e-06, "loss": 3.1311, "step": 30090 }, { "epoch": 0.3061421712239583, "grad_norm": 13.481354713439941, "learning_rate": 3.934712582610686e-06, "loss": 3.5084, "step": 30095 }, { "epoch": 0.3061930338541667, "grad_norm": 12.646124839782715, "learning_rate": 3.934385087408014e-06, "loss": 3.1246, "step": 30100 }, { "epoch": 0.306243896484375, "grad_norm": 10.231154441833496, "learning_rate": 3.93405755550694e-06, "loss": 3.0921, "step": 30105 }, { "epoch": 0.3062947591145833, "grad_norm": 10.196928024291992, "learning_rate": 3.9337299869158445e-06, "loss": 3.5011, "step": 30110 }, { "epoch": 0.3063456217447917, "grad_norm": 13.01209545135498, "learning_rate": 3.933402381643106e-06, "loss": 3.5474, "step": 30115 }, { "epoch": 0.306396484375, "grad_norm": 7.661176681518555, "learning_rate": 3.933074739697107e-06, "loss": 3.8555, "step": 30120 }, { "epoch": 0.3064473470052083, "grad_norm": 14.038602828979492, "learning_rate": 3.9327470610862305e-06, "loss": 3.0677, "step": 30125 }, { "epoch": 0.3064982096354167, "grad_norm": 15.984299659729004, "learning_rate": 3.932419345818861e-06, "loss": 3.0514, "step": 30130 }, { "epoch": 0.306549072265625, "grad_norm": 13.001832962036133, "learning_rate": 3.932091593903381e-06, "loss": 3.3576, "step": 30135 }, { "epoch": 0.3065999348958333, "grad_norm": 15.798726081848145, "learning_rate": 3.931763805348179e-06, "loss": 3.5285, "step": 30140 }, { "epoch": 0.3066507975260417, "grad_norm": 12.089749336242676, "learning_rate": 3.9314359801616374e-06, "loss": 3.4186, "step": 30145 }, { "epoch": 0.30670166015625, "grad_norm": 14.937867164611816, "learning_rate": 3.931108118352146e-06, "loss": 2.9777, "step": 30150 }, { "epoch": 0.3067525227864583, "grad_norm": 7.658392429351807, "learning_rate": 3.930780219928093e-06, "loss": 3.2119, "step": 30155 }, { "epoch": 0.3068033854166667, "grad_norm": 13.586400985717773, "learning_rate": 3.930452284897867e-06, "loss": 3.2893, "step": 30160 }, { "epoch": 0.306854248046875, "grad_norm": 14.8994722366333, "learning_rate": 3.930124313269858e-06, "loss": 3.5276, "step": 30165 }, { "epoch": 0.3069051106770833, "grad_norm": 12.488736152648926, "learning_rate": 3.929796305052458e-06, "loss": 3.8872, "step": 30170 }, { "epoch": 0.3069559733072917, "grad_norm": 8.962054252624512, "learning_rate": 3.929468260254058e-06, "loss": 3.234, "step": 30175 }, { "epoch": 0.3070068359375, "grad_norm": 9.514458656311035, "learning_rate": 3.9291401788830505e-06, "loss": 3.4633, "step": 30180 }, { "epoch": 0.3070576985677083, "grad_norm": 12.32824420928955, "learning_rate": 3.928812060947832e-06, "loss": 3.6234, "step": 30185 }, { "epoch": 0.3071085611979167, "grad_norm": 7.72622013092041, "learning_rate": 3.928483906456795e-06, "loss": 3.188, "step": 30190 }, { "epoch": 0.307159423828125, "grad_norm": 16.456998825073242, "learning_rate": 3.9281557154183355e-06, "loss": 3.5812, "step": 30195 }, { "epoch": 0.3072102864583333, "grad_norm": 7.127664089202881, "learning_rate": 3.927827487840851e-06, "loss": 3.2705, "step": 30200 }, { "epoch": 0.3072611490885417, "grad_norm": 13.026443481445312, "learning_rate": 3.927499223732738e-06, "loss": 3.2092, "step": 30205 }, { "epoch": 0.30731201171875, "grad_norm": 12.97545051574707, "learning_rate": 3.927170923102396e-06, "loss": 3.5209, "step": 30210 }, { "epoch": 0.3073628743489583, "grad_norm": 7.901330947875977, "learning_rate": 3.926842585958225e-06, "loss": 3.4217, "step": 30215 }, { "epoch": 0.3074137369791667, "grad_norm": 11.98694896697998, "learning_rate": 3.926514212308624e-06, "loss": 3.086, "step": 30220 }, { "epoch": 0.307464599609375, "grad_norm": 11.968120574951172, "learning_rate": 3.926185802161995e-06, "loss": 3.5508, "step": 30225 }, { "epoch": 0.3075154622395833, "grad_norm": 9.247671127319336, "learning_rate": 3.92585735552674e-06, "loss": 3.6386, "step": 30230 }, { "epoch": 0.3075663248697917, "grad_norm": 9.899497985839844, "learning_rate": 3.925528872411263e-06, "loss": 3.4517, "step": 30235 }, { "epoch": 0.3076171875, "grad_norm": 13.091283798217773, "learning_rate": 3.925200352823967e-06, "loss": 3.8267, "step": 30240 }, { "epoch": 0.3076680501302083, "grad_norm": 12.776516914367676, "learning_rate": 3.924871796773259e-06, "loss": 3.7947, "step": 30245 }, { "epoch": 0.3077189127604167, "grad_norm": 7.171541690826416, "learning_rate": 3.924543204267542e-06, "loss": 3.1933, "step": 30250 }, { "epoch": 0.307769775390625, "grad_norm": 17.34246826171875, "learning_rate": 3.924214575315226e-06, "loss": 3.6964, "step": 30255 }, { "epoch": 0.3078206380208333, "grad_norm": 17.20662498474121, "learning_rate": 3.923885909924717e-06, "loss": 3.5014, "step": 30260 }, { "epoch": 0.3078715006510417, "grad_norm": 10.750012397766113, "learning_rate": 3.923557208104426e-06, "loss": 3.4796, "step": 30265 }, { "epoch": 0.30792236328125, "grad_norm": 14.636932373046875, "learning_rate": 3.92322846986276e-06, "loss": 2.8819, "step": 30270 }, { "epoch": 0.3079732259114583, "grad_norm": 13.901700973510742, "learning_rate": 3.922899695208131e-06, "loss": 3.3566, "step": 30275 }, { "epoch": 0.3080240885416667, "grad_norm": 32.55217742919922, "learning_rate": 3.922570884148951e-06, "loss": 3.2527, "step": 30280 }, { "epoch": 0.308074951171875, "grad_norm": 16.40342903137207, "learning_rate": 3.922242036693632e-06, "loss": 3.8343, "step": 30285 }, { "epoch": 0.3081258138020833, "grad_norm": 12.78229808807373, "learning_rate": 3.921913152850587e-06, "loss": 3.2929, "step": 30290 }, { "epoch": 0.3081766764322917, "grad_norm": 14.18374252319336, "learning_rate": 3.921584232628232e-06, "loss": 3.3324, "step": 30295 }, { "epoch": 0.3082275390625, "grad_norm": 13.564252853393555, "learning_rate": 3.92125527603498e-06, "loss": 3.1763, "step": 30300 }, { "epoch": 0.3082784016927083, "grad_norm": 10.080500602722168, "learning_rate": 3.920926283079249e-06, "loss": 3.3237, "step": 30305 }, { "epoch": 0.3083292643229167, "grad_norm": 15.610957145690918, "learning_rate": 3.920597253769457e-06, "loss": 3.3335, "step": 30310 }, { "epoch": 0.308380126953125, "grad_norm": 11.604490280151367, "learning_rate": 3.9202681881140206e-06, "loss": 3.3937, "step": 30315 }, { "epoch": 0.3084309895833333, "grad_norm": 12.033234596252441, "learning_rate": 3.919939086121359e-06, "loss": 3.5559, "step": 30320 }, { "epoch": 0.3084818522135417, "grad_norm": 10.802011489868164, "learning_rate": 3.919609947799892e-06, "loss": 3.1012, "step": 30325 }, { "epoch": 0.30853271484375, "grad_norm": 12.17712116241455, "learning_rate": 3.9192807731580415e-06, "loss": 3.3434, "step": 30330 }, { "epoch": 0.3085835774739583, "grad_norm": 11.824995994567871, "learning_rate": 3.9189515622042285e-06, "loss": 3.5515, "step": 30335 }, { "epoch": 0.3086344401041667, "grad_norm": 9.751220703125, "learning_rate": 3.9186223149468764e-06, "loss": 3.212, "step": 30340 }, { "epoch": 0.308685302734375, "grad_norm": 12.710297584533691, "learning_rate": 3.918293031394408e-06, "loss": 3.1981, "step": 30345 }, { "epoch": 0.3087361653645833, "grad_norm": 8.323152542114258, "learning_rate": 3.917963711555249e-06, "loss": 3.2414, "step": 30350 }, { "epoch": 0.3087870279947917, "grad_norm": 7.017961025238037, "learning_rate": 3.917634355437825e-06, "loss": 3.3119, "step": 30355 }, { "epoch": 0.308837890625, "grad_norm": 15.37889289855957, "learning_rate": 3.917304963050562e-06, "loss": 3.4028, "step": 30360 }, { "epoch": 0.3088887532552083, "grad_norm": 8.823172569274902, "learning_rate": 3.916975534401887e-06, "loss": 3.4427, "step": 30365 }, { "epoch": 0.3089396158854167, "grad_norm": 11.566137313842773, "learning_rate": 3.916646069500228e-06, "loss": 3.1481, "step": 30370 }, { "epoch": 0.308990478515625, "grad_norm": 13.274648666381836, "learning_rate": 3.916316568354017e-06, "loss": 3.1607, "step": 30375 }, { "epoch": 0.3090413411458333, "grad_norm": 9.833974838256836, "learning_rate": 3.915987030971681e-06, "loss": 3.4893, "step": 30380 }, { "epoch": 0.3090922037760417, "grad_norm": 9.17192554473877, "learning_rate": 3.915657457361652e-06, "loss": 3.3514, "step": 30385 }, { "epoch": 0.30914306640625, "grad_norm": 10.900861740112305, "learning_rate": 3.915327847532364e-06, "loss": 3.1907, "step": 30390 }, { "epoch": 0.3091939290364583, "grad_norm": 10.886656761169434, "learning_rate": 3.914998201492248e-06, "loss": 3.4113, "step": 30395 }, { "epoch": 0.3092447916666667, "grad_norm": 10.764690399169922, "learning_rate": 3.914668519249738e-06, "loss": 3.2688, "step": 30400 }, { "epoch": 0.309295654296875, "grad_norm": 16.22494888305664, "learning_rate": 3.9143388008132695e-06, "loss": 3.357, "step": 30405 }, { "epoch": 0.3093465169270833, "grad_norm": 8.67312240600586, "learning_rate": 3.914009046191277e-06, "loss": 3.7883, "step": 30410 }, { "epoch": 0.3093973795572917, "grad_norm": 9.882831573486328, "learning_rate": 3.9136792553922e-06, "loss": 3.2242, "step": 30415 }, { "epoch": 0.3094482421875, "grad_norm": 12.467557907104492, "learning_rate": 3.913349428424473e-06, "loss": 3.2608, "step": 30420 }, { "epoch": 0.3094991048177083, "grad_norm": 13.472617149353027, "learning_rate": 3.913019565296536e-06, "loss": 3.4081, "step": 30425 }, { "epoch": 0.3095499674479167, "grad_norm": 13.341675758361816, "learning_rate": 3.912689666016829e-06, "loss": 3.3591, "step": 30430 }, { "epoch": 0.309600830078125, "grad_norm": 15.18631649017334, "learning_rate": 3.9123597305937915e-06, "loss": 3.2089, "step": 30435 }, { "epoch": 0.3096516927083333, "grad_norm": 10.999125480651855, "learning_rate": 3.912029759035864e-06, "loss": 3.2369, "step": 30440 }, { "epoch": 0.3097025553385417, "grad_norm": 11.49664306640625, "learning_rate": 3.911699751351492e-06, "loss": 3.3434, "step": 30445 }, { "epoch": 0.30975341796875, "grad_norm": 9.98498249053955, "learning_rate": 3.911369707549114e-06, "loss": 3.2589, "step": 30450 }, { "epoch": 0.3098042805989583, "grad_norm": 10.603560447692871, "learning_rate": 3.911039627637179e-06, "loss": 2.9951, "step": 30455 }, { "epoch": 0.3098551432291667, "grad_norm": 8.391109466552734, "learning_rate": 3.910709511624128e-06, "loss": 3.558, "step": 30460 }, { "epoch": 0.309906005859375, "grad_norm": 15.02904224395752, "learning_rate": 3.910379359518409e-06, "loss": 4.0405, "step": 30465 }, { "epoch": 0.3099568684895833, "grad_norm": 11.91486644744873, "learning_rate": 3.910049171328468e-06, "loss": 3.6384, "step": 30470 }, { "epoch": 0.3100077311197917, "grad_norm": 13.936861991882324, "learning_rate": 3.9097189470627535e-06, "loss": 3.1747, "step": 30475 }, { "epoch": 0.31005859375, "grad_norm": 18.80107307434082, "learning_rate": 3.909388686729714e-06, "loss": 3.248, "step": 30480 }, { "epoch": 0.3101094563802083, "grad_norm": 12.20251750946045, "learning_rate": 3.909058390337799e-06, "loss": 3.5339, "step": 30485 }, { "epoch": 0.3101603190104167, "grad_norm": 12.874309539794922, "learning_rate": 3.908728057895459e-06, "loss": 3.1153, "step": 30490 }, { "epoch": 0.310211181640625, "grad_norm": 12.415948867797852, "learning_rate": 3.908397689411146e-06, "loss": 3.4896, "step": 30495 }, { "epoch": 0.3102620442708333, "grad_norm": 8.233391761779785, "learning_rate": 3.908067284893311e-06, "loss": 3.1929, "step": 30500 }, { "epoch": 0.3103129069010417, "grad_norm": 15.04599380493164, "learning_rate": 3.907736844350408e-06, "loss": 3.385, "step": 30505 }, { "epoch": 0.31036376953125, "grad_norm": 9.692916870117188, "learning_rate": 3.907406367790892e-06, "loss": 3.16, "step": 30510 }, { "epoch": 0.3104146321614583, "grad_norm": 13.403112411499023, "learning_rate": 3.907075855223218e-06, "loss": 3.398, "step": 30515 }, { "epoch": 0.3104654947916667, "grad_norm": 18.633678436279297, "learning_rate": 3.9067453066558415e-06, "loss": 3.4533, "step": 30520 }, { "epoch": 0.310516357421875, "grad_norm": 13.57737922668457, "learning_rate": 3.90641472209722e-06, "loss": 3.2045, "step": 30525 }, { "epoch": 0.3105672200520833, "grad_norm": 14.787510871887207, "learning_rate": 3.906084101555812e-06, "loss": 3.4004, "step": 30530 }, { "epoch": 0.3106180826822917, "grad_norm": 8.80350112915039, "learning_rate": 3.9057534450400735e-06, "loss": 3.3955, "step": 30535 }, { "epoch": 0.3106689453125, "grad_norm": 10.163858413696289, "learning_rate": 3.905422752558468e-06, "loss": 3.324, "step": 30540 }, { "epoch": 0.3107198079427083, "grad_norm": 9.57807731628418, "learning_rate": 3.905092024119454e-06, "loss": 3.5297, "step": 30545 }, { "epoch": 0.3107706705729167, "grad_norm": 9.638726234436035, "learning_rate": 3.904761259731493e-06, "loss": 3.2324, "step": 30550 }, { "epoch": 0.310821533203125, "grad_norm": 11.510026931762695, "learning_rate": 3.904430459403049e-06, "loss": 3.2802, "step": 30555 }, { "epoch": 0.3108723958333333, "grad_norm": 14.225361824035645, "learning_rate": 3.904099623142584e-06, "loss": 4.1202, "step": 30560 }, { "epoch": 0.3109232584635417, "grad_norm": 8.770647048950195, "learning_rate": 3.903768750958563e-06, "loss": 3.258, "step": 30565 }, { "epoch": 0.31097412109375, "grad_norm": 11.526542663574219, "learning_rate": 3.903437842859451e-06, "loss": 3.1181, "step": 30570 }, { "epoch": 0.3110249837239583, "grad_norm": 11.786979675292969, "learning_rate": 3.903106898853715e-06, "loss": 2.9532, "step": 30575 }, { "epoch": 0.3110758463541667, "grad_norm": 10.150199890136719, "learning_rate": 3.9027759189498215e-06, "loss": 3.2941, "step": 30580 }, { "epoch": 0.311126708984375, "grad_norm": 13.700420379638672, "learning_rate": 3.902444903156239e-06, "loss": 3.4857, "step": 30585 }, { "epoch": 0.3111775716145833, "grad_norm": 14.847107887268066, "learning_rate": 3.902113851481435e-06, "loss": 3.3416, "step": 30590 }, { "epoch": 0.3112284342447917, "grad_norm": 15.067854881286621, "learning_rate": 3.901782763933881e-06, "loss": 3.1923, "step": 30595 }, { "epoch": 0.311279296875, "grad_norm": 13.336148262023926, "learning_rate": 3.901451640522047e-06, "loss": 3.0086, "step": 30600 }, { "epoch": 0.3113301595052083, "grad_norm": 9.487920761108398, "learning_rate": 3.901120481254405e-06, "loss": 3.1972, "step": 30605 }, { "epoch": 0.3113810221354167, "grad_norm": 10.81859302520752, "learning_rate": 3.900789286139429e-06, "loss": 3.1985, "step": 30610 }, { "epoch": 0.311431884765625, "grad_norm": 8.982028007507324, "learning_rate": 3.900458055185589e-06, "loss": 3.1001, "step": 30615 }, { "epoch": 0.3114827473958333, "grad_norm": 9.588761329650879, "learning_rate": 3.900126788401363e-06, "loss": 3.204, "step": 30620 }, { "epoch": 0.3115336100260417, "grad_norm": 9.439041137695312, "learning_rate": 3.899795485795225e-06, "loss": 3.0524, "step": 30625 }, { "epoch": 0.31158447265625, "grad_norm": 12.190141677856445, "learning_rate": 3.899464147375651e-06, "loss": 3.6372, "step": 30630 }, { "epoch": 0.3116353352864583, "grad_norm": 13.552200317382812, "learning_rate": 3.899132773151118e-06, "loss": 3.2865, "step": 30635 }, { "epoch": 0.3116861979166667, "grad_norm": 13.921281814575195, "learning_rate": 3.898801363130105e-06, "loss": 3.3674, "step": 30640 }, { "epoch": 0.311737060546875, "grad_norm": 8.771903038024902, "learning_rate": 3.898469917321091e-06, "loss": 3.2259, "step": 30645 }, { "epoch": 0.3117879231770833, "grad_norm": 13.268839836120605, "learning_rate": 3.8981384357325545e-06, "loss": 3.368, "step": 30650 }, { "epoch": 0.3118387858072917, "grad_norm": 13.863824844360352, "learning_rate": 3.897806918372978e-06, "loss": 3.2181, "step": 30655 }, { "epoch": 0.3118896484375, "grad_norm": 15.39890193939209, "learning_rate": 3.8974753652508436e-06, "loss": 3.374, "step": 30660 }, { "epoch": 0.3119405110677083, "grad_norm": 13.512394905090332, "learning_rate": 3.897143776374633e-06, "loss": 3.0666, "step": 30665 }, { "epoch": 0.3119913736979167, "grad_norm": 11.035284042358398, "learning_rate": 3.89681215175283e-06, "loss": 3.2409, "step": 30670 }, { "epoch": 0.312042236328125, "grad_norm": 12.44509506225586, "learning_rate": 3.896480491393918e-06, "loss": 3.638, "step": 30675 }, { "epoch": 0.3120930989583333, "grad_norm": 14.145005226135254, "learning_rate": 3.8961487953063845e-06, "loss": 3.1806, "step": 30680 }, { "epoch": 0.3121439615885417, "grad_norm": 7.920605182647705, "learning_rate": 3.895817063498716e-06, "loss": 3.5746, "step": 30685 }, { "epoch": 0.31219482421875, "grad_norm": 11.769000053405762, "learning_rate": 3.895485295979398e-06, "loss": 3.2961, "step": 30690 }, { "epoch": 0.3122456868489583, "grad_norm": 11.063861846923828, "learning_rate": 3.895153492756919e-06, "loss": 3.9124, "step": 30695 }, { "epoch": 0.3122965494791667, "grad_norm": 14.211889266967773, "learning_rate": 3.894821653839769e-06, "loss": 3.4248, "step": 30700 }, { "epoch": 0.312347412109375, "grad_norm": 12.15110969543457, "learning_rate": 3.8944897792364366e-06, "loss": 3.1158, "step": 30705 }, { "epoch": 0.3123982747395833, "grad_norm": 14.808752059936523, "learning_rate": 3.894157868955415e-06, "loss": 3.3555, "step": 30710 }, { "epoch": 0.3124491373697917, "grad_norm": 14.335022926330566, "learning_rate": 3.893825923005195e-06, "loss": 3.2379, "step": 30715 }, { "epoch": 0.3125, "grad_norm": 14.32910442352295, "learning_rate": 3.893493941394268e-06, "loss": 3.1627, "step": 30720 }, { "epoch": 0.3125508626302083, "grad_norm": 11.805767059326172, "learning_rate": 3.893161924131129e-06, "loss": 3.5045, "step": 30725 }, { "epoch": 0.3126017252604167, "grad_norm": 10.202608108520508, "learning_rate": 3.892829871224273e-06, "loss": 3.4099, "step": 30730 }, { "epoch": 0.312652587890625, "grad_norm": 10.266390800476074, "learning_rate": 3.892497782682195e-06, "loss": 3.1493, "step": 30735 }, { "epoch": 0.3127034505208333, "grad_norm": 12.03805160522461, "learning_rate": 3.89216565851339e-06, "loss": 3.225, "step": 30740 }, { "epoch": 0.3127543131510417, "grad_norm": 11.497048377990723, "learning_rate": 3.891833498726359e-06, "loss": 2.9702, "step": 30745 }, { "epoch": 0.31280517578125, "grad_norm": 11.640868186950684, "learning_rate": 3.891501303329596e-06, "loss": 3.4517, "step": 30750 }, { "epoch": 0.3128560384114583, "grad_norm": 14.435894966125488, "learning_rate": 3.891169072331601e-06, "loss": 3.4013, "step": 30755 }, { "epoch": 0.3129069010416667, "grad_norm": 14.759724617004395, "learning_rate": 3.890836805740877e-06, "loss": 3.4514, "step": 30760 }, { "epoch": 0.312957763671875, "grad_norm": 9.112564086914062, "learning_rate": 3.890504503565922e-06, "loss": 3.2215, "step": 30765 }, { "epoch": 0.3130086263020833, "grad_norm": 12.83095645904541, "learning_rate": 3.890172165815239e-06, "loss": 3.357, "step": 30770 }, { "epoch": 0.3130594889322917, "grad_norm": 11.383550643920898, "learning_rate": 3.889839792497331e-06, "loss": 3.113, "step": 30775 }, { "epoch": 0.3131103515625, "grad_norm": 11.741923332214355, "learning_rate": 3.8895073836207015e-06, "loss": 3.3635, "step": 30780 }, { "epoch": 0.3131612141927083, "grad_norm": 8.802398681640625, "learning_rate": 3.889174939193855e-06, "loss": 3.3568, "step": 30785 }, { "epoch": 0.3132120768229167, "grad_norm": 14.020269393920898, "learning_rate": 3.888842459225296e-06, "loss": 3.4345, "step": 30790 }, { "epoch": 0.313262939453125, "grad_norm": 12.648975372314453, "learning_rate": 3.8885099437235325e-06, "loss": 3.5363, "step": 30795 }, { "epoch": 0.3133138020833333, "grad_norm": 13.394015312194824, "learning_rate": 3.888177392697071e-06, "loss": 3.3288, "step": 30800 }, { "epoch": 0.3133646647135417, "grad_norm": 10.144981384277344, "learning_rate": 3.8878448061544204e-06, "loss": 3.1019, "step": 30805 }, { "epoch": 0.31341552734375, "grad_norm": 11.660987854003906, "learning_rate": 3.887512184104088e-06, "loss": 3.2372, "step": 30810 }, { "epoch": 0.3134663899739583, "grad_norm": 14.93466854095459, "learning_rate": 3.8871795265545875e-06, "loss": 3.1339, "step": 30815 }, { "epoch": 0.3135172526041667, "grad_norm": 13.46787166595459, "learning_rate": 3.886846833514426e-06, "loss": 3.6266, "step": 30820 }, { "epoch": 0.313568115234375, "grad_norm": 14.1391019821167, "learning_rate": 3.886514104992117e-06, "loss": 3.4762, "step": 30825 }, { "epoch": 0.3136189778645833, "grad_norm": 16.144411087036133, "learning_rate": 3.886181340996174e-06, "loss": 3.6282, "step": 30830 }, { "epoch": 0.3136698404947917, "grad_norm": 13.948810577392578, "learning_rate": 3.88584854153511e-06, "loss": 3.3651, "step": 30835 }, { "epoch": 0.313720703125, "grad_norm": 7.992741584777832, "learning_rate": 3.885515706617439e-06, "loss": 3.6157, "step": 30840 }, { "epoch": 0.3137715657552083, "grad_norm": 12.564249992370605, "learning_rate": 3.885182836251678e-06, "loss": 3.2923, "step": 30845 }, { "epoch": 0.3138224283854167, "grad_norm": 13.613372802734375, "learning_rate": 3.884849930446341e-06, "loss": 3.39, "step": 30850 }, { "epoch": 0.313873291015625, "grad_norm": 13.717891693115234, "learning_rate": 3.884516989209948e-06, "loss": 3.0935, "step": 30855 }, { "epoch": 0.3139241536458333, "grad_norm": 14.83971881866455, "learning_rate": 3.8841840125510145e-06, "loss": 3.3069, "step": 30860 }, { "epoch": 0.3139750162760417, "grad_norm": 14.060420036315918, "learning_rate": 3.8838510004780615e-06, "loss": 3.3003, "step": 30865 }, { "epoch": 0.31402587890625, "grad_norm": 12.112072944641113, "learning_rate": 3.88351795299961e-06, "loss": 3.2948, "step": 30870 }, { "epoch": 0.3140767415364583, "grad_norm": 12.657612800598145, "learning_rate": 3.883184870124178e-06, "loss": 2.8017, "step": 30875 }, { "epoch": 0.3141276041666667, "grad_norm": 9.51637077331543, "learning_rate": 3.88285175186029e-06, "loss": 3.451, "step": 30880 }, { "epoch": 0.314178466796875, "grad_norm": 8.902334213256836, "learning_rate": 3.882518598216467e-06, "loss": 3.4797, "step": 30885 }, { "epoch": 0.3142293294270833, "grad_norm": 11.905548095703125, "learning_rate": 3.8821854092012345e-06, "loss": 3.1486, "step": 30890 }, { "epoch": 0.3142801920572917, "grad_norm": 11.69454574584961, "learning_rate": 3.8818521848231155e-06, "loss": 3.357, "step": 30895 }, { "epoch": 0.3143310546875, "grad_norm": 9.603013038635254, "learning_rate": 3.881518925090636e-06, "loss": 3.7997, "step": 30900 }, { "epoch": 0.3143819173177083, "grad_norm": 8.937087059020996, "learning_rate": 3.881185630012321e-06, "loss": 3.197, "step": 30905 }, { "epoch": 0.3144327799479167, "grad_norm": 14.387102127075195, "learning_rate": 3.8808522995967e-06, "loss": 3.3608, "step": 30910 }, { "epoch": 0.314483642578125, "grad_norm": 8.974607467651367, "learning_rate": 3.880518933852301e-06, "loss": 3.3294, "step": 30915 }, { "epoch": 0.3145345052083333, "grad_norm": 16.349624633789062, "learning_rate": 3.880185532787651e-06, "loss": 3.7891, "step": 30920 }, { "epoch": 0.3145853678385417, "grad_norm": 10.930258750915527, "learning_rate": 3.879852096411282e-06, "loss": 3.6586, "step": 30925 }, { "epoch": 0.31463623046875, "grad_norm": 12.886204719543457, "learning_rate": 3.879518624731724e-06, "loss": 3.3628, "step": 30930 }, { "epoch": 0.3146870930989583, "grad_norm": 12.439726829528809, "learning_rate": 3.8791851177575085e-06, "loss": 3.2021, "step": 30935 }, { "epoch": 0.3147379557291667, "grad_norm": 8.862081527709961, "learning_rate": 3.878851575497169e-06, "loss": 3.4902, "step": 30940 }, { "epoch": 0.314788818359375, "grad_norm": 10.594961166381836, "learning_rate": 3.878517997959239e-06, "loss": 3.1558, "step": 30945 }, { "epoch": 0.3148396809895833, "grad_norm": 11.894721984863281, "learning_rate": 3.8781843851522525e-06, "loss": 3.5946, "step": 30950 }, { "epoch": 0.3148905436197917, "grad_norm": 9.851780891418457, "learning_rate": 3.877850737084745e-06, "loss": 3.2834, "step": 30955 }, { "epoch": 0.31494140625, "grad_norm": 14.598539352416992, "learning_rate": 3.877517053765253e-06, "loss": 3.8892, "step": 30960 }, { "epoch": 0.3149922688802083, "grad_norm": 12.510114669799805, "learning_rate": 3.877183335202314e-06, "loss": 3.0889, "step": 30965 }, { "epoch": 0.3150431315104167, "grad_norm": 10.286641120910645, "learning_rate": 3.876849581404466e-06, "loss": 3.2251, "step": 30970 }, { "epoch": 0.315093994140625, "grad_norm": 14.571932792663574, "learning_rate": 3.876515792380247e-06, "loss": 3.2544, "step": 30975 }, { "epoch": 0.3151448567708333, "grad_norm": 8.331310272216797, "learning_rate": 3.876181968138199e-06, "loss": 3.6741, "step": 30980 }, { "epoch": 0.3151957194010417, "grad_norm": 14.296549797058105, "learning_rate": 3.8758481086868605e-06, "loss": 3.3286, "step": 30985 }, { "epoch": 0.31524658203125, "grad_norm": 10.8761568069458, "learning_rate": 3.875514214034775e-06, "loss": 3.6252, "step": 30990 }, { "epoch": 0.3152974446614583, "grad_norm": 11.81722640991211, "learning_rate": 3.875180284190483e-06, "loss": 3.6989, "step": 30995 }, { "epoch": 0.3153483072916667, "grad_norm": 14.4451904296875, "learning_rate": 3.874846319162531e-06, "loss": 3.2001, "step": 31000 }, { "epoch": 0.315399169921875, "grad_norm": 11.83335018157959, "learning_rate": 3.87451231895946e-06, "loss": 3.3927, "step": 31005 }, { "epoch": 0.3154500325520833, "grad_norm": 18.12177276611328, "learning_rate": 3.874178283589819e-06, "loss": 3.4793, "step": 31010 }, { "epoch": 0.3155008951822917, "grad_norm": 13.053730010986328, "learning_rate": 3.873844213062151e-06, "loss": 3.3342, "step": 31015 }, { "epoch": 0.3155517578125, "grad_norm": 11.309110641479492, "learning_rate": 3.873510107385004e-06, "loss": 3.9116, "step": 31020 }, { "epoch": 0.3156026204427083, "grad_norm": 14.312835693359375, "learning_rate": 3.873175966566929e-06, "loss": 3.5496, "step": 31025 }, { "epoch": 0.3156534830729167, "grad_norm": 13.165633201599121, "learning_rate": 3.87284179061647e-06, "loss": 3.8198, "step": 31030 }, { "epoch": 0.315704345703125, "grad_norm": 10.730169296264648, "learning_rate": 3.872507579542181e-06, "loss": 3.2504, "step": 31035 }, { "epoch": 0.3157552083333333, "grad_norm": 14.646590232849121, "learning_rate": 3.872173333352609e-06, "loss": 3.1502, "step": 31040 }, { "epoch": 0.3158060709635417, "grad_norm": 9.55855655670166, "learning_rate": 3.871839052056309e-06, "loss": 3.1742, "step": 31045 }, { "epoch": 0.31585693359375, "grad_norm": 15.399552345275879, "learning_rate": 3.871504735661831e-06, "loss": 3.1138, "step": 31050 }, { "epoch": 0.3159077962239583, "grad_norm": 9.473359107971191, "learning_rate": 3.87117038417773e-06, "loss": 3.4158, "step": 31055 }, { "epoch": 0.3159586588541667, "grad_norm": 13.662787437438965, "learning_rate": 3.870835997612559e-06, "loss": 2.8579, "step": 31060 }, { "epoch": 0.316009521484375, "grad_norm": 15.330162048339844, "learning_rate": 3.870501575974875e-06, "loss": 3.4082, "step": 31065 }, { "epoch": 0.3160603841145833, "grad_norm": 10.36662483215332, "learning_rate": 3.870167119273233e-06, "loss": 3.5759, "step": 31070 }, { "epoch": 0.3161112467447917, "grad_norm": 12.250576972961426, "learning_rate": 3.869832627516188e-06, "loss": 3.5613, "step": 31075 }, { "epoch": 0.316162109375, "grad_norm": 10.067193031311035, "learning_rate": 3.8694981007123025e-06, "loss": 3.4331, "step": 31080 }, { "epoch": 0.3162129720052083, "grad_norm": 10.213749885559082, "learning_rate": 3.869163538870131e-06, "loss": 3.1883, "step": 31085 }, { "epoch": 0.3162638346354167, "grad_norm": 10.69174861907959, "learning_rate": 3.868828941998236e-06, "loss": 3.4121, "step": 31090 }, { "epoch": 0.316314697265625, "grad_norm": 11.320060729980469, "learning_rate": 3.8684943101051754e-06, "loss": 3.309, "step": 31095 }, { "epoch": 0.3163655598958333, "grad_norm": 15.44389820098877, "learning_rate": 3.868159643199514e-06, "loss": 3.4406, "step": 31100 }, { "epoch": 0.3164164225260417, "grad_norm": 12.214240074157715, "learning_rate": 3.867824941289812e-06, "loss": 3.4774, "step": 31105 }, { "epoch": 0.31646728515625, "grad_norm": 10.657689094543457, "learning_rate": 3.867490204384632e-06, "loss": 3.3771, "step": 31110 }, { "epoch": 0.3165181477864583, "grad_norm": 11.51058578491211, "learning_rate": 3.8671554324925405e-06, "loss": 3.4663, "step": 31115 }, { "epoch": 0.3165690104166667, "grad_norm": 9.098102569580078, "learning_rate": 3.8668206256221005e-06, "loss": 3.5193, "step": 31120 }, { "epoch": 0.316619873046875, "grad_norm": 13.172540664672852, "learning_rate": 3.866485783781879e-06, "loss": 3.3625, "step": 31125 }, { "epoch": 0.3166707356770833, "grad_norm": 10.455148696899414, "learning_rate": 3.866150906980444e-06, "loss": 3.1612, "step": 31130 }, { "epoch": 0.3167215983072917, "grad_norm": 12.411478042602539, "learning_rate": 3.865815995226359e-06, "loss": 3.2446, "step": 31135 }, { "epoch": 0.3167724609375, "grad_norm": 11.790022850036621, "learning_rate": 3.865481048528198e-06, "loss": 3.383, "step": 31140 }, { "epoch": 0.3168233235677083, "grad_norm": 9.37701416015625, "learning_rate": 3.8651460668945274e-06, "loss": 3.336, "step": 31145 }, { "epoch": 0.3168741861979167, "grad_norm": 13.848721504211426, "learning_rate": 3.864811050333917e-06, "loss": 3.6014, "step": 31150 }, { "epoch": 0.316925048828125, "grad_norm": 14.876688003540039, "learning_rate": 3.864475998854941e-06, "loss": 3.5189, "step": 31155 }, { "epoch": 0.3169759114583333, "grad_norm": 11.406241416931152, "learning_rate": 3.864140912466169e-06, "loss": 3.3236, "step": 31160 }, { "epoch": 0.3170267740885417, "grad_norm": 15.621129035949707, "learning_rate": 3.863805791176176e-06, "loss": 3.897, "step": 31165 }, { "epoch": 0.31707763671875, "grad_norm": 12.045602798461914, "learning_rate": 3.8634706349935344e-06, "loss": 3.459, "step": 31170 }, { "epoch": 0.3171284993489583, "grad_norm": 7.246892929077148, "learning_rate": 3.8631354439268195e-06, "loss": 3.3646, "step": 31175 }, { "epoch": 0.3171793619791667, "grad_norm": 15.568187713623047, "learning_rate": 3.862800217984608e-06, "loss": 3.4646, "step": 31180 }, { "epoch": 0.317230224609375, "grad_norm": 9.922911643981934, "learning_rate": 3.862464957175476e-06, "loss": 3.3717, "step": 31185 }, { "epoch": 0.3172810872395833, "grad_norm": 12.449997901916504, "learning_rate": 3.862129661508001e-06, "loss": 3.4387, "step": 31190 }, { "epoch": 0.3173319498697917, "grad_norm": 16.325824737548828, "learning_rate": 3.8617943309907605e-06, "loss": 3.14, "step": 31195 }, { "epoch": 0.3173828125, "grad_norm": 15.284937858581543, "learning_rate": 3.861458965632335e-06, "loss": 3.3294, "step": 31200 }, { "epoch": 0.3174336751302083, "grad_norm": 11.433347702026367, "learning_rate": 3.861123565441306e-06, "loss": 3.334, "step": 31205 }, { "epoch": 0.3174845377604167, "grad_norm": 12.585484504699707, "learning_rate": 3.860788130426252e-06, "loss": 3.3748, "step": 31210 }, { "epoch": 0.317535400390625, "grad_norm": 16.614295959472656, "learning_rate": 3.860452660595757e-06, "loss": 3.1318, "step": 31215 }, { "epoch": 0.3175862630208333, "grad_norm": 9.032590866088867, "learning_rate": 3.860117155958403e-06, "loss": 3.4407, "step": 31220 }, { "epoch": 0.3176371256510417, "grad_norm": 6.906585693359375, "learning_rate": 3.859781616522773e-06, "loss": 3.8926, "step": 31225 }, { "epoch": 0.31768798828125, "grad_norm": 15.229521751403809, "learning_rate": 3.859446042297453e-06, "loss": 3.289, "step": 31230 }, { "epoch": 0.3177388509114583, "grad_norm": 8.980449676513672, "learning_rate": 3.859110433291029e-06, "loss": 3.2042, "step": 31235 }, { "epoch": 0.3177897135416667, "grad_norm": 12.09065055847168, "learning_rate": 3.858774789512086e-06, "loss": 3.2158, "step": 31240 }, { "epoch": 0.317840576171875, "grad_norm": 10.273479461669922, "learning_rate": 3.858439110969212e-06, "loss": 3.4836, "step": 31245 }, { "epoch": 0.3178914388020833, "grad_norm": 12.64652156829834, "learning_rate": 3.858103397670996e-06, "loss": 3.1187, "step": 31250 }, { "epoch": 0.3179423014322917, "grad_norm": 14.755744934082031, "learning_rate": 3.8577676496260255e-06, "loss": 3.2408, "step": 31255 }, { "epoch": 0.3179931640625, "grad_norm": 11.015496253967285, "learning_rate": 3.8574318668428925e-06, "loss": 3.2791, "step": 31260 }, { "epoch": 0.3180440266927083, "grad_norm": 11.528286933898926, "learning_rate": 3.857096049330186e-06, "loss": 3.1186, "step": 31265 }, { "epoch": 0.3180948893229167, "grad_norm": 11.770857810974121, "learning_rate": 3.8567601970964995e-06, "loss": 3.3614, "step": 31270 }, { "epoch": 0.318145751953125, "grad_norm": 16.619205474853516, "learning_rate": 3.856424310150425e-06, "loss": 3.3688, "step": 31275 }, { "epoch": 0.3181966145833333, "grad_norm": 14.829237937927246, "learning_rate": 3.856088388500555e-06, "loss": 3.5595, "step": 31280 }, { "epoch": 0.3182474772135417, "grad_norm": 10.562813758850098, "learning_rate": 3.855752432155486e-06, "loss": 2.9822, "step": 31285 }, { "epoch": 0.31829833984375, "grad_norm": 14.399613380432129, "learning_rate": 3.855416441123813e-06, "loss": 3.4562, "step": 31290 }, { "epoch": 0.3183492024739583, "grad_norm": 9.176456451416016, "learning_rate": 3.85508041541413e-06, "loss": 3.3423, "step": 31295 }, { "epoch": 0.3184000651041667, "grad_norm": 11.619348526000977, "learning_rate": 3.8547443550350364e-06, "loss": 3.552, "step": 31300 }, { "epoch": 0.318450927734375, "grad_norm": 11.135879516601562, "learning_rate": 3.85440825999513e-06, "loss": 3.3815, "step": 31305 }, { "epoch": 0.3185017903645833, "grad_norm": 12.544413566589355, "learning_rate": 3.8540721303030095e-06, "loss": 3.3132, "step": 31310 }, { "epoch": 0.3185526529947917, "grad_norm": 12.913429260253906, "learning_rate": 3.8537359659672735e-06, "loss": 3.1569, "step": 31315 }, { "epoch": 0.318603515625, "grad_norm": 11.757790565490723, "learning_rate": 3.853399766996524e-06, "loss": 3.0326, "step": 31320 }, { "epoch": 0.3186543782552083, "grad_norm": 11.700207710266113, "learning_rate": 3.853063533399363e-06, "loss": 3.2504, "step": 31325 }, { "epoch": 0.3187052408854167, "grad_norm": 8.301576614379883, "learning_rate": 3.852727265184391e-06, "loss": 3.5017, "step": 31330 }, { "epoch": 0.318756103515625, "grad_norm": 12.666961669921875, "learning_rate": 3.852390962360213e-06, "loss": 3.9952, "step": 31335 }, { "epoch": 0.3188069661458333, "grad_norm": 14.605937957763672, "learning_rate": 3.852054624935433e-06, "loss": 3.2524, "step": 31340 }, { "epoch": 0.3188578287760417, "grad_norm": 11.621174812316895, "learning_rate": 3.851718252918656e-06, "loss": 3.6715, "step": 31345 }, { "epoch": 0.31890869140625, "grad_norm": 9.262434959411621, "learning_rate": 3.8513818463184874e-06, "loss": 3.0173, "step": 31350 }, { "epoch": 0.3189595540364583, "grad_norm": 13.999554634094238, "learning_rate": 3.851045405143535e-06, "loss": 3.2864, "step": 31355 }, { "epoch": 0.3190104166666667, "grad_norm": 12.603280067443848, "learning_rate": 3.850708929402406e-06, "loss": 3.3036, "step": 31360 }, { "epoch": 0.319061279296875, "grad_norm": 11.629323959350586, "learning_rate": 3.8503724191037095e-06, "loss": 3.4563, "step": 31365 }, { "epoch": 0.3191121419270833, "grad_norm": 10.602560043334961, "learning_rate": 3.850035874256054e-06, "loss": 3.4163, "step": 31370 }, { "epoch": 0.3191630045572917, "grad_norm": 14.061223030090332, "learning_rate": 3.849699294868051e-06, "loss": 3.0912, "step": 31375 }, { "epoch": 0.3192138671875, "grad_norm": 9.001642227172852, "learning_rate": 3.849362680948312e-06, "loss": 3.3378, "step": 31380 }, { "epoch": 0.3192647298177083, "grad_norm": 12.85893726348877, "learning_rate": 3.849026032505449e-06, "loss": 3.6936, "step": 31385 }, { "epoch": 0.3193155924479167, "grad_norm": 15.685237884521484, "learning_rate": 3.848689349548073e-06, "loss": 3.2416, "step": 31390 }, { "epoch": 0.319366455078125, "grad_norm": 11.73633861541748, "learning_rate": 3.848352632084801e-06, "loss": 3.5264, "step": 31395 }, { "epoch": 0.3194173177083333, "grad_norm": 14.574155807495117, "learning_rate": 3.8480158801242464e-06, "loss": 3.4237, "step": 31400 }, { "epoch": 0.3194681803385417, "grad_norm": 9.669441223144531, "learning_rate": 3.847679093675024e-06, "loss": 3.1957, "step": 31405 }, { "epoch": 0.31951904296875, "grad_norm": 17.44133186340332, "learning_rate": 3.847342272745752e-06, "loss": 3.8577, "step": 31410 }, { "epoch": 0.3195699055989583, "grad_norm": 8.022843360900879, "learning_rate": 3.847005417345048e-06, "loss": 3.0239, "step": 31415 }, { "epoch": 0.3196207682291667, "grad_norm": 9.9092435836792, "learning_rate": 3.846668527481529e-06, "loss": 2.864, "step": 31420 }, { "epoch": 0.319671630859375, "grad_norm": 15.318598747253418, "learning_rate": 3.846331603163815e-06, "loss": 3.4041, "step": 31425 }, { "epoch": 0.3197224934895833, "grad_norm": 9.314645767211914, "learning_rate": 3.845994644400526e-06, "loss": 3.4976, "step": 31430 }, { "epoch": 0.3197733561197917, "grad_norm": 14.026188850402832, "learning_rate": 3.8456576512002836e-06, "loss": 3.3014, "step": 31435 }, { "epoch": 0.31982421875, "grad_norm": 15.728029251098633, "learning_rate": 3.845320623571708e-06, "loss": 2.8761, "step": 31440 }, { "epoch": 0.3198750813802083, "grad_norm": 8.416905403137207, "learning_rate": 3.8449835615234244e-06, "loss": 3.8231, "step": 31445 }, { "epoch": 0.3199259440104167, "grad_norm": 10.172744750976562, "learning_rate": 3.844646465064055e-06, "loss": 3.4616, "step": 31450 }, { "epoch": 0.319976806640625, "grad_norm": 12.25190258026123, "learning_rate": 3.844309334202223e-06, "loss": 3.1986, "step": 31455 }, { "epoch": 0.3200276692708333, "grad_norm": 12.856034278869629, "learning_rate": 3.843972168946557e-06, "loss": 3.1889, "step": 31460 }, { "epoch": 0.3200785319010417, "grad_norm": 14.786962509155273, "learning_rate": 3.843634969305681e-06, "loss": 3.4426, "step": 31465 }, { "epoch": 0.32012939453125, "grad_norm": 12.744573593139648, "learning_rate": 3.8432977352882225e-06, "loss": 3.2549, "step": 31470 }, { "epoch": 0.3201802571614583, "grad_norm": 14.236984252929688, "learning_rate": 3.842960466902811e-06, "loss": 3.6449, "step": 31475 }, { "epoch": 0.3202311197916667, "grad_norm": 14.349587440490723, "learning_rate": 3.8426231641580725e-06, "loss": 3.1954, "step": 31480 }, { "epoch": 0.320281982421875, "grad_norm": 13.996590614318848, "learning_rate": 3.84228582706264e-06, "loss": 3.6826, "step": 31485 }, { "epoch": 0.3203328450520833, "grad_norm": 10.456219673156738, "learning_rate": 3.841948455625142e-06, "loss": 3.6421, "step": 31490 }, { "epoch": 0.3203837076822917, "grad_norm": 10.800615310668945, "learning_rate": 3.841611049854211e-06, "loss": 3.5236, "step": 31495 }, { "epoch": 0.3204345703125, "grad_norm": 11.609787940979004, "learning_rate": 3.84127360975848e-06, "loss": 3.4777, "step": 31500 }, { "epoch": 0.3204854329427083, "grad_norm": 6.661224842071533, "learning_rate": 3.84093613534658e-06, "loss": 3.431, "step": 31505 }, { "epoch": 0.3205362955729167, "grad_norm": 14.083808898925781, "learning_rate": 3.840598626627149e-06, "loss": 3.5221, "step": 31510 }, { "epoch": 0.320587158203125, "grad_norm": 12.521286964416504, "learning_rate": 3.840261083608818e-06, "loss": 3.9192, "step": 31515 }, { "epoch": 0.3206380208333333, "grad_norm": 15.08007526397705, "learning_rate": 3.839923506300226e-06, "loss": 3.5092, "step": 31520 }, { "epoch": 0.3206888834635417, "grad_norm": 14.394757270812988, "learning_rate": 3.839585894710009e-06, "loss": 3.295, "step": 31525 }, { "epoch": 0.32073974609375, "grad_norm": 13.734356880187988, "learning_rate": 3.839248248846803e-06, "loss": 3.5945, "step": 31530 }, { "epoch": 0.3207906087239583, "grad_norm": 16.14764976501465, "learning_rate": 3.838910568719249e-06, "loss": 3.6131, "step": 31535 }, { "epoch": 0.3208414713541667, "grad_norm": 14.201319694519043, "learning_rate": 3.838572854335985e-06, "loss": 3.2369, "step": 31540 }, { "epoch": 0.320892333984375, "grad_norm": 13.397239685058594, "learning_rate": 3.838235105705651e-06, "loss": 3.8006, "step": 31545 }, { "epoch": 0.3209431966145833, "grad_norm": 12.374748229980469, "learning_rate": 3.837897322836891e-06, "loss": 3.2664, "step": 31550 }, { "epoch": 0.3209940592447917, "grad_norm": 14.16633129119873, "learning_rate": 3.8375595057383445e-06, "loss": 3.7635, "step": 31555 }, { "epoch": 0.321044921875, "grad_norm": 13.520079612731934, "learning_rate": 3.837221654418654e-06, "loss": 3.4169, "step": 31560 }, { "epoch": 0.3210957845052083, "grad_norm": 14.925244331359863, "learning_rate": 3.836883768886465e-06, "loss": 3.5138, "step": 31565 }, { "epoch": 0.3211466471354167, "grad_norm": 10.590404510498047, "learning_rate": 3.836545849150421e-06, "loss": 3.2101, "step": 31570 }, { "epoch": 0.321197509765625, "grad_norm": 14.485934257507324, "learning_rate": 3.836207895219169e-06, "loss": 3.1994, "step": 31575 }, { "epoch": 0.3212483723958333, "grad_norm": 9.394538879394531, "learning_rate": 3.835869907101355e-06, "loss": 3.2738, "step": 31580 }, { "epoch": 0.3212992350260417, "grad_norm": 9.60177230834961, "learning_rate": 3.835531884805626e-06, "loss": 3.0774, "step": 31585 }, { "epoch": 0.32135009765625, "grad_norm": 17.322458267211914, "learning_rate": 3.835193828340629e-06, "loss": 3.6144, "step": 31590 }, { "epoch": 0.3214009602864583, "grad_norm": 13.787790298461914, "learning_rate": 3.834855737715016e-06, "loss": 3.3578, "step": 31595 }, { "epoch": 0.3214518229166667, "grad_norm": 8.987893104553223, "learning_rate": 3.834517612937435e-06, "loss": 3.3803, "step": 31600 }, { "epoch": 0.321502685546875, "grad_norm": 16.33568572998047, "learning_rate": 3.834179454016537e-06, "loss": 3.4638, "step": 31605 }, { "epoch": 0.3215535481770833, "grad_norm": 11.19406509399414, "learning_rate": 3.833841260960973e-06, "loss": 3.3542, "step": 31610 }, { "epoch": 0.3216044108072917, "grad_norm": 14.263209342956543, "learning_rate": 3.833503033779398e-06, "loss": 3.1218, "step": 31615 }, { "epoch": 0.3216552734375, "grad_norm": 6.953857898712158, "learning_rate": 3.833164772480463e-06, "loss": 3.3436, "step": 31620 }, { "epoch": 0.3217061360677083, "grad_norm": 8.796978950500488, "learning_rate": 3.832826477072824e-06, "loss": 3.6858, "step": 31625 }, { "epoch": 0.3217569986979167, "grad_norm": 12.743858337402344, "learning_rate": 3.832488147565135e-06, "loss": 3.4351, "step": 31630 }, { "epoch": 0.321807861328125, "grad_norm": 10.61341667175293, "learning_rate": 3.832149783966053e-06, "loss": 3.5864, "step": 31635 }, { "epoch": 0.3218587239583333, "grad_norm": 12.128693580627441, "learning_rate": 3.831811386284234e-06, "loss": 3.103, "step": 31640 }, { "epoch": 0.3219095865885417, "grad_norm": 13.619905471801758, "learning_rate": 3.831472954528336e-06, "loss": 3.085, "step": 31645 }, { "epoch": 0.32196044921875, "grad_norm": 8.86436653137207, "learning_rate": 3.831134488707019e-06, "loss": 3.3038, "step": 31650 }, { "epoch": 0.3220113118489583, "grad_norm": 10.96633529663086, "learning_rate": 3.830795988828941e-06, "loss": 3.3176, "step": 31655 }, { "epoch": 0.3220621744791667, "grad_norm": 11.57568073272705, "learning_rate": 3.830457454902763e-06, "loss": 3.305, "step": 31660 }, { "epoch": 0.322113037109375, "grad_norm": 14.083612442016602, "learning_rate": 3.830118886937146e-06, "loss": 4.0346, "step": 31665 }, { "epoch": 0.3221638997395833, "grad_norm": 9.294708251953125, "learning_rate": 3.829780284940755e-06, "loss": 3.2512, "step": 31670 }, { "epoch": 0.3222147623697917, "grad_norm": 19.22852325439453, "learning_rate": 3.829441648922247e-06, "loss": 3.9869, "step": 31675 }, { "epoch": 0.322265625, "grad_norm": 14.847890853881836, "learning_rate": 3.8291029788902925e-06, "loss": 2.9014, "step": 31680 }, { "epoch": 0.3223164876302083, "grad_norm": 15.327040672302246, "learning_rate": 3.828764274853552e-06, "loss": 3.4145, "step": 31685 }, { "epoch": 0.3223673502604167, "grad_norm": 16.938589096069336, "learning_rate": 3.828425536820693e-06, "loss": 3.2736, "step": 31690 }, { "epoch": 0.322418212890625, "grad_norm": 10.017430305480957, "learning_rate": 3.828086764800381e-06, "loss": 3.7319, "step": 31695 }, { "epoch": 0.3224690755208333, "grad_norm": 14.776453018188477, "learning_rate": 3.827747958801283e-06, "loss": 3.64, "step": 31700 }, { "epoch": 0.3225199381510417, "grad_norm": 7.89909553527832, "learning_rate": 3.8274091188320695e-06, "loss": 3.294, "step": 31705 }, { "epoch": 0.32257080078125, "grad_norm": 13.75835132598877, "learning_rate": 3.8270702449014085e-06, "loss": 3.744, "step": 31710 }, { "epoch": 0.3226216634114583, "grad_norm": 10.914870262145996, "learning_rate": 3.82673133701797e-06, "loss": 3.1938, "step": 31715 }, { "epoch": 0.3226725260416667, "grad_norm": 17.479778289794922, "learning_rate": 3.826392395190424e-06, "loss": 3.3668, "step": 31720 }, { "epoch": 0.322723388671875, "grad_norm": 11.222243309020996, "learning_rate": 3.826053419427443e-06, "loss": 3.3123, "step": 31725 }, { "epoch": 0.3227742513020833, "grad_norm": 11.842264175415039, "learning_rate": 3.825714409737699e-06, "loss": 3.3255, "step": 31730 }, { "epoch": 0.3228251139322917, "grad_norm": 7.541260719299316, "learning_rate": 3.825375366129867e-06, "loss": 3.4824, "step": 31735 }, { "epoch": 0.3228759765625, "grad_norm": 11.848604202270508, "learning_rate": 3.82503628861262e-06, "loss": 3.7925, "step": 31740 }, { "epoch": 0.3229268391927083, "grad_norm": 11.389389038085938, "learning_rate": 3.824697177194634e-06, "loss": 3.0986, "step": 31745 }, { "epoch": 0.3229777018229167, "grad_norm": 9.12468147277832, "learning_rate": 3.824358031884584e-06, "loss": 3.3481, "step": 31750 }, { "epoch": 0.323028564453125, "grad_norm": 10.102771759033203, "learning_rate": 3.824018852691148e-06, "loss": 3.6813, "step": 31755 }, { "epoch": 0.3230794270833333, "grad_norm": 9.482272148132324, "learning_rate": 3.823679639623004e-06, "loss": 3.5337, "step": 31760 }, { "epoch": 0.3231302897135417, "grad_norm": 9.679491996765137, "learning_rate": 3.823340392688829e-06, "loss": 3.4032, "step": 31765 }, { "epoch": 0.32318115234375, "grad_norm": 13.405425071716309, "learning_rate": 3.823001111897304e-06, "loss": 3.4294, "step": 31770 }, { "epoch": 0.3232320149739583, "grad_norm": 7.808204174041748, "learning_rate": 3.822661797257111e-06, "loss": 3.0086, "step": 31775 }, { "epoch": 0.3232828776041667, "grad_norm": 12.230095863342285, "learning_rate": 3.822322448776927e-06, "loss": 3.2176, "step": 31780 }, { "epoch": 0.323333740234375, "grad_norm": 10.221451759338379, "learning_rate": 3.821983066465438e-06, "loss": 3.34, "step": 31785 }, { "epoch": 0.3233846028645833, "grad_norm": 11.985438346862793, "learning_rate": 3.821643650331325e-06, "loss": 2.9988, "step": 31790 }, { "epoch": 0.3234354654947917, "grad_norm": 18.49512481689453, "learning_rate": 3.821304200383273e-06, "loss": 3.5417, "step": 31795 }, { "epoch": 0.323486328125, "grad_norm": 9.309732437133789, "learning_rate": 3.820964716629965e-06, "loss": 3.3165, "step": 31800 }, { "epoch": 0.3235371907552083, "grad_norm": 10.926666259765625, "learning_rate": 3.8206251990800894e-06, "loss": 3.4721, "step": 31805 }, { "epoch": 0.3235880533854167, "grad_norm": 11.638176918029785, "learning_rate": 3.82028564774233e-06, "loss": 3.2567, "step": 31810 }, { "epoch": 0.323638916015625, "grad_norm": 15.134770393371582, "learning_rate": 3.819946062625376e-06, "loss": 3.7382, "step": 31815 }, { "epoch": 0.3236897786458333, "grad_norm": 8.549654006958008, "learning_rate": 3.819606443737914e-06, "loss": 3.0723, "step": 31820 }, { "epoch": 0.3237406412760417, "grad_norm": 9.45890998840332, "learning_rate": 3.819266791088635e-06, "loss": 3.608, "step": 31825 }, { "epoch": 0.32379150390625, "grad_norm": 10.157885551452637, "learning_rate": 3.818927104686226e-06, "loss": 3.3638, "step": 31830 }, { "epoch": 0.3238423665364583, "grad_norm": 12.008575439453125, "learning_rate": 3.818587384539381e-06, "loss": 2.9182, "step": 31835 }, { "epoch": 0.3238932291666667, "grad_norm": 13.765893936157227, "learning_rate": 3.818247630656791e-06, "loss": 3.6027, "step": 31840 }, { "epoch": 0.323944091796875, "grad_norm": 12.659904479980469, "learning_rate": 3.817907843047148e-06, "loss": 3.6605, "step": 31845 }, { "epoch": 0.3239949544270833, "grad_norm": 12.302201271057129, "learning_rate": 3.817568021719143e-06, "loss": 3.5059, "step": 31850 }, { "epoch": 0.3240458170572917, "grad_norm": 8.679214477539062, "learning_rate": 3.817228166681474e-06, "loss": 3.3955, "step": 31855 }, { "epoch": 0.3240966796875, "grad_norm": 11.400491714477539, "learning_rate": 3.816888277942834e-06, "loss": 3.4571, "step": 31860 }, { "epoch": 0.3241475423177083, "grad_norm": 11.593133926391602, "learning_rate": 3.81654835551192e-06, "loss": 3.5486, "step": 31865 }, { "epoch": 0.3241984049479167, "grad_norm": 16.467374801635742, "learning_rate": 3.816208399397428e-06, "loss": 3.3701, "step": 31870 }, { "epoch": 0.324249267578125, "grad_norm": 16.079572677612305, "learning_rate": 3.815868409608056e-06, "loss": 3.5682, "step": 31875 }, { "epoch": 0.3243001302083333, "grad_norm": 17.888187408447266, "learning_rate": 3.815528386152504e-06, "loss": 3.4161, "step": 31880 }, { "epoch": 0.3243509928385417, "grad_norm": 17.085041046142578, "learning_rate": 3.815188329039468e-06, "loss": 3.2796, "step": 31885 }, { "epoch": 0.32440185546875, "grad_norm": 12.836435317993164, "learning_rate": 3.814848238277652e-06, "loss": 3.4633, "step": 31890 }, { "epoch": 0.3244527180989583, "grad_norm": 18.368680953979492, "learning_rate": 3.8145081138757543e-06, "loss": 3.8005, "step": 31895 }, { "epoch": 0.3245035807291667, "grad_norm": 11.071703910827637, "learning_rate": 3.8141679558424786e-06, "loss": 3.3738, "step": 31900 }, { "epoch": 0.324554443359375, "grad_norm": 13.304140090942383, "learning_rate": 3.8138277641865274e-06, "loss": 2.9373, "step": 31905 }, { "epoch": 0.3246053059895833, "grad_norm": 53.66118621826172, "learning_rate": 3.813487538916604e-06, "loss": 3.8442, "step": 31910 }, { "epoch": 0.3246561686197917, "grad_norm": 10.714417457580566, "learning_rate": 3.813147280041414e-06, "loss": 3.2688, "step": 31915 }, { "epoch": 0.32470703125, "grad_norm": 10.948854446411133, "learning_rate": 3.8128069875696603e-06, "loss": 3.278, "step": 31920 }, { "epoch": 0.3247578938802083, "grad_norm": 8.774290084838867, "learning_rate": 3.8124666615100523e-06, "loss": 3.4067, "step": 31925 }, { "epoch": 0.3248087565104167, "grad_norm": 8.86909294128418, "learning_rate": 3.812126301871296e-06, "loss": 3.3751, "step": 31930 }, { "epoch": 0.324859619140625, "grad_norm": 11.547953605651855, "learning_rate": 3.8117859086620988e-06, "loss": 3.2366, "step": 31935 }, { "epoch": 0.3249104817708333, "grad_norm": 7.339703559875488, "learning_rate": 3.8114454818911707e-06, "loss": 3.2675, "step": 31940 }, { "epoch": 0.3249613444010417, "grad_norm": 13.766386032104492, "learning_rate": 3.8111050215672196e-06, "loss": 3.3403, "step": 31945 }, { "epoch": 0.32501220703125, "grad_norm": 8.798249244689941, "learning_rate": 3.810764527698958e-06, "loss": 3.3275, "step": 31950 }, { "epoch": 0.3250630696614583, "grad_norm": 12.045044898986816, "learning_rate": 3.8104240002950963e-06, "loss": 3.8319, "step": 31955 }, { "epoch": 0.3251139322916667, "grad_norm": 13.24450969696045, "learning_rate": 3.810083439364347e-06, "loss": 3.4244, "step": 31960 }, { "epoch": 0.325164794921875, "grad_norm": 12.002233505249023, "learning_rate": 3.809742844915425e-06, "loss": 3.5424, "step": 31965 }, { "epoch": 0.3252156575520833, "grad_norm": 7.133974552154541, "learning_rate": 3.8094022169570412e-06, "loss": 3.2509, "step": 31970 }, { "epoch": 0.3252665201822917, "grad_norm": 12.657421112060547, "learning_rate": 3.8090615554979127e-06, "loss": 3.1012, "step": 31975 }, { "epoch": 0.3253173828125, "grad_norm": 14.409326553344727, "learning_rate": 3.808720860546754e-06, "loss": 3.3739, "step": 31980 }, { "epoch": 0.3253682454427083, "grad_norm": 11.968620300292969, "learning_rate": 3.8083801321122824e-06, "loss": 3.1848, "step": 31985 }, { "epoch": 0.3254191080729167, "grad_norm": 11.809328079223633, "learning_rate": 3.808039370203216e-06, "loss": 3.3059, "step": 31990 }, { "epoch": 0.325469970703125, "grad_norm": 17.407115936279297, "learning_rate": 3.8076985748282725e-06, "loss": 3.6285, "step": 31995 }, { "epoch": 0.3255208333333333, "grad_norm": 7.9313483238220215, "learning_rate": 3.8073577459961697e-06, "loss": 3.3734, "step": 32000 }, { "epoch": 0.3255716959635417, "grad_norm": 8.173722267150879, "learning_rate": 3.8070168837156294e-06, "loss": 3.6385, "step": 32005 }, { "epoch": 0.32562255859375, "grad_norm": 11.985635757446289, "learning_rate": 3.806675987995372e-06, "loss": 3.2194, "step": 32010 }, { "epoch": 0.3256734212239583, "grad_norm": 10.79389762878418, "learning_rate": 3.8063350588441194e-06, "loss": 3.2867, "step": 32015 }, { "epoch": 0.3257242838541667, "grad_norm": 11.685125350952148, "learning_rate": 3.8059940962705934e-06, "loss": 3.2666, "step": 32020 }, { "epoch": 0.325775146484375, "grad_norm": 11.623699188232422, "learning_rate": 3.8056531002835184e-06, "loss": 3.4345, "step": 32025 }, { "epoch": 0.3258260091145833, "grad_norm": 8.528718948364258, "learning_rate": 3.805312070891618e-06, "loss": 3.0183, "step": 32030 }, { "epoch": 0.3258768717447917, "grad_norm": 8.97779655456543, "learning_rate": 3.8049710081036174e-06, "loss": 3.2587, "step": 32035 }, { "epoch": 0.325927734375, "grad_norm": 8.91556167602539, "learning_rate": 3.8046299119282437e-06, "loss": 3.3258, "step": 32040 }, { "epoch": 0.3259785970052083, "grad_norm": 9.004982948303223, "learning_rate": 3.8042887823742224e-06, "loss": 3.235, "step": 32045 }, { "epoch": 0.3260294596354167, "grad_norm": 12.783254623413086, "learning_rate": 3.8039476194502816e-06, "loss": 3.56, "step": 32050 }, { "epoch": 0.326080322265625, "grad_norm": 8.640666961669922, "learning_rate": 3.8036064231651502e-06, "loss": 3.0825, "step": 32055 }, { "epoch": 0.3261311848958333, "grad_norm": 15.606740951538086, "learning_rate": 3.8032651935275583e-06, "loss": 3.2792, "step": 32060 }, { "epoch": 0.3261820475260417, "grad_norm": 12.018491744995117, "learning_rate": 3.802923930546234e-06, "loss": 3.4059, "step": 32065 }, { "epoch": 0.32623291015625, "grad_norm": 13.65191650390625, "learning_rate": 3.802582634229911e-06, "loss": 3.4325, "step": 32070 }, { "epoch": 0.3262837727864583, "grad_norm": 11.970004081726074, "learning_rate": 3.802241304587319e-06, "loss": 3.4715, "step": 32075 }, { "epoch": 0.3263346354166667, "grad_norm": 11.346258163452148, "learning_rate": 3.8018999416271924e-06, "loss": 3.4341, "step": 32080 }, { "epoch": 0.326385498046875, "grad_norm": 8.91111946105957, "learning_rate": 3.8015585453582645e-06, "loss": 3.7919, "step": 32085 }, { "epoch": 0.3264363606770833, "grad_norm": 10.042716026306152, "learning_rate": 3.801217115789269e-06, "loss": 2.9715, "step": 32090 }, { "epoch": 0.3264872233072917, "grad_norm": 8.29838752746582, "learning_rate": 3.8008756529289424e-06, "loss": 3.8137, "step": 32095 }, { "epoch": 0.3265380859375, "grad_norm": 13.774858474731445, "learning_rate": 3.800534156786021e-06, "loss": 3.4684, "step": 32100 }, { "epoch": 0.3265889485677083, "grad_norm": 10.080721855163574, "learning_rate": 3.800192627369241e-06, "loss": 3.6454, "step": 32105 }, { "epoch": 0.3266398111979167, "grad_norm": 11.651310920715332, "learning_rate": 3.799851064687341e-06, "loss": 3.1652, "step": 32110 }, { "epoch": 0.326690673828125, "grad_norm": 13.935715675354004, "learning_rate": 3.799509468749059e-06, "loss": 3.2123, "step": 32115 }, { "epoch": 0.3267415364583333, "grad_norm": 10.228445053100586, "learning_rate": 3.7991678395631366e-06, "loss": 3.958, "step": 32120 }, { "epoch": 0.3267923990885417, "grad_norm": 9.072430610656738, "learning_rate": 3.7988261771383115e-06, "loss": 3.7092, "step": 32125 }, { "epoch": 0.32684326171875, "grad_norm": 10.914796829223633, "learning_rate": 3.7984844814833277e-06, "loss": 3.1457, "step": 32130 }, { "epoch": 0.3268941243489583, "grad_norm": 13.16501522064209, "learning_rate": 3.798142752606926e-06, "loss": 2.9826, "step": 32135 }, { "epoch": 0.3269449869791667, "grad_norm": 8.828996658325195, "learning_rate": 3.797800990517849e-06, "loss": 2.97, "step": 32140 }, { "epoch": 0.326995849609375, "grad_norm": 11.060222625732422, "learning_rate": 3.797459195224842e-06, "loss": 3.0853, "step": 32145 }, { "epoch": 0.3270467122395833, "grad_norm": 11.040933609008789, "learning_rate": 3.7971173667366478e-06, "loss": 3.1886, "step": 32150 }, { "epoch": 0.3270975748697917, "grad_norm": 8.803873062133789, "learning_rate": 3.796775505062014e-06, "loss": 3.4695, "step": 32155 }, { "epoch": 0.3271484375, "grad_norm": 11.375222206115723, "learning_rate": 3.7964336102096867e-06, "loss": 3.1505, "step": 32160 }, { "epoch": 0.3271993001302083, "grad_norm": 12.583060264587402, "learning_rate": 3.7960916821884132e-06, "loss": 3.1935, "step": 32165 }, { "epoch": 0.3272501627604167, "grad_norm": 9.795934677124023, "learning_rate": 3.7957497210069395e-06, "loss": 3.2419, "step": 32170 }, { "epoch": 0.327301025390625, "grad_norm": 13.176041603088379, "learning_rate": 3.7954077266740176e-06, "loss": 3.3261, "step": 32175 }, { "epoch": 0.3273518880208333, "grad_norm": 10.81923770904541, "learning_rate": 3.795065699198395e-06, "loss": 3.2257, "step": 32180 }, { "epoch": 0.3274027506510417, "grad_norm": 15.314313888549805, "learning_rate": 3.794723638588825e-06, "loss": 3.5417, "step": 32185 }, { "epoch": 0.32745361328125, "grad_norm": 12.650420188903809, "learning_rate": 3.7943815448540566e-06, "loss": 3.4859, "step": 32190 }, { "epoch": 0.3275044759114583, "grad_norm": 14.460564613342285, "learning_rate": 3.7940394180028427e-06, "loss": 3.2375, "step": 32195 }, { "epoch": 0.3275553385416667, "grad_norm": 11.314863204956055, "learning_rate": 3.793697258043938e-06, "loss": 3.3251, "step": 32200 }, { "epoch": 0.327606201171875, "grad_norm": 10.726693153381348, "learning_rate": 3.793355064986095e-06, "loss": 3.1633, "step": 32205 }, { "epoch": 0.3276570638020833, "grad_norm": 14.347328186035156, "learning_rate": 3.7930128388380694e-06, "loss": 3.3347, "step": 32210 }, { "epoch": 0.3277079264322917, "grad_norm": 9.588699340820312, "learning_rate": 3.792670579608617e-06, "loss": 3.4328, "step": 32215 }, { "epoch": 0.3277587890625, "grad_norm": 13.1727294921875, "learning_rate": 3.7923282873064937e-06, "loss": 3.1322, "step": 32220 }, { "epoch": 0.3278096516927083, "grad_norm": 12.08580493927002, "learning_rate": 3.7919859619404586e-06, "loss": 3.4606, "step": 32225 }, { "epoch": 0.3278605143229167, "grad_norm": 16.556385040283203, "learning_rate": 3.791643603519268e-06, "loss": 3.7167, "step": 32230 }, { "epoch": 0.327911376953125, "grad_norm": 9.649636268615723, "learning_rate": 3.7913012120516823e-06, "loss": 3.774, "step": 32235 }, { "epoch": 0.3279622395833333, "grad_norm": 13.704182624816895, "learning_rate": 3.7909587875464602e-06, "loss": 3.5504, "step": 32240 }, { "epoch": 0.3280131022135417, "grad_norm": 7.9661126136779785, "learning_rate": 3.7906163300123645e-06, "loss": 3.4497, "step": 32245 }, { "epoch": 0.32806396484375, "grad_norm": 10.539923667907715, "learning_rate": 3.7902738394581563e-06, "loss": 3.3076, "step": 32250 }, { "epoch": 0.3281148274739583, "grad_norm": 13.280152320861816, "learning_rate": 3.7899313158925967e-06, "loss": 3.2912, "step": 32255 }, { "epoch": 0.3281656901041667, "grad_norm": 16.510393142700195, "learning_rate": 3.7895887593244506e-06, "loss": 2.9562, "step": 32260 }, { "epoch": 0.328216552734375, "grad_norm": 8.455154418945312, "learning_rate": 3.7892461697624814e-06, "loss": 3.0784, "step": 32265 }, { "epoch": 0.3282674153645833, "grad_norm": 16.78911590576172, "learning_rate": 3.7889035472154552e-06, "loss": 3.3725, "step": 32270 }, { "epoch": 0.3283182779947917, "grad_norm": 15.698395729064941, "learning_rate": 3.7885608916921366e-06, "loss": 3.5096, "step": 32275 }, { "epoch": 0.328369140625, "grad_norm": 11.11893367767334, "learning_rate": 3.7882182032012936e-06, "loss": 3.0646, "step": 32280 }, { "epoch": 0.3284200032552083, "grad_norm": 12.249351501464844, "learning_rate": 3.7878754817516927e-06, "loss": 3.1635, "step": 32285 }, { "epoch": 0.3284708658854167, "grad_norm": 11.541364669799805, "learning_rate": 3.7875327273521035e-06, "loss": 3.1003, "step": 32290 }, { "epoch": 0.328521728515625, "grad_norm": 13.217779159545898, "learning_rate": 3.787189940011294e-06, "loss": 3.5588, "step": 32295 }, { "epoch": 0.3285725911458333, "grad_norm": 15.351944923400879, "learning_rate": 3.7868471197380353e-06, "loss": 3.5397, "step": 32300 }, { "epoch": 0.3286234537760417, "grad_norm": 7.318637847900391, "learning_rate": 3.786504266541098e-06, "loss": 3.4449, "step": 32305 }, { "epoch": 0.32867431640625, "grad_norm": 17.64267349243164, "learning_rate": 3.7861613804292542e-06, "loss": 3.6639, "step": 32310 }, { "epoch": 0.3287251790364583, "grad_norm": 10.956125259399414, "learning_rate": 3.7858184614112758e-06, "loss": 3.1958, "step": 32315 }, { "epoch": 0.3287760416666667, "grad_norm": 11.594647407531738, "learning_rate": 3.7854755094959374e-06, "loss": 3.473, "step": 32320 }, { "epoch": 0.328826904296875, "grad_norm": 13.282804489135742, "learning_rate": 3.7851325246920117e-06, "loss": 3.3825, "step": 32325 }, { "epoch": 0.3288777669270833, "grad_norm": 10.976999282836914, "learning_rate": 3.7847895070082763e-06, "loss": 3.06, "step": 32330 }, { "epoch": 0.3289286295572917, "grad_norm": 12.472411155700684, "learning_rate": 3.784446456453505e-06, "loss": 3.4135, "step": 32335 }, { "epoch": 0.3289794921875, "grad_norm": 12.325407981872559, "learning_rate": 3.7841033730364764e-06, "loss": 3.239, "step": 32340 }, { "epoch": 0.3290303548177083, "grad_norm": 7.367898464202881, "learning_rate": 3.783760256765967e-06, "loss": 3.4794, "step": 32345 }, { "epoch": 0.3290812174479167, "grad_norm": 7.337851524353027, "learning_rate": 3.7834171076507546e-06, "loss": 3.1509, "step": 32350 }, { "epoch": 0.329132080078125, "grad_norm": 15.461119651794434, "learning_rate": 3.7830739256996213e-06, "loss": 2.9971, "step": 32355 }, { "epoch": 0.3291829427083333, "grad_norm": 11.367182731628418, "learning_rate": 3.7827307109213453e-06, "loss": 3.182, "step": 32360 }, { "epoch": 0.3292338053385417, "grad_norm": 12.533026695251465, "learning_rate": 3.7823874633247075e-06, "loss": 3.354, "step": 32365 }, { "epoch": 0.32928466796875, "grad_norm": 16.5407657623291, "learning_rate": 3.7820441829184907e-06, "loss": 3.2569, "step": 32370 }, { "epoch": 0.3293355305989583, "grad_norm": 14.216818809509277, "learning_rate": 3.781700869711478e-06, "loss": 3.6429, "step": 32375 }, { "epoch": 0.3293863932291667, "grad_norm": 15.732698440551758, "learning_rate": 3.781357523712451e-06, "loss": 3.5927, "step": 32380 }, { "epoch": 0.329437255859375, "grad_norm": 9.038349151611328, "learning_rate": 3.7810141449301976e-06, "loss": 3.4536, "step": 32385 }, { "epoch": 0.3294881184895833, "grad_norm": 14.39390754699707, "learning_rate": 3.780670733373499e-06, "loss": 3.2767, "step": 32390 }, { "epoch": 0.3295389811197917, "grad_norm": 9.732683181762695, "learning_rate": 3.7803272890511437e-06, "loss": 3.4086, "step": 32395 }, { "epoch": 0.32958984375, "grad_norm": 15.71953296661377, "learning_rate": 3.779983811971919e-06, "loss": 3.1739, "step": 32400 }, { "epoch": 0.3296407063802083, "grad_norm": 14.279335021972656, "learning_rate": 3.7796403021446106e-06, "loss": 3.3621, "step": 32405 }, { "epoch": 0.3296915690104167, "grad_norm": 12.024027824401855, "learning_rate": 3.7792967595780095e-06, "loss": 3.6476, "step": 32410 }, { "epoch": 0.329742431640625, "grad_norm": 10.159197807312012, "learning_rate": 3.7789531842809034e-06, "loss": 3.4376, "step": 32415 }, { "epoch": 0.3297932942708333, "grad_norm": 13.340349197387695, "learning_rate": 3.7786095762620836e-06, "loss": 3.6374, "step": 32420 }, { "epoch": 0.3298441569010417, "grad_norm": 8.4384126663208, "learning_rate": 3.7782659355303397e-06, "loss": 3.2591, "step": 32425 }, { "epoch": 0.32989501953125, "grad_norm": 14.894301414489746, "learning_rate": 3.777922262094466e-06, "loss": 3.2045, "step": 32430 }, { "epoch": 0.3299458821614583, "grad_norm": 9.043319702148438, "learning_rate": 3.777578555963254e-06, "loss": 3.133, "step": 32435 }, { "epoch": 0.3299967447916667, "grad_norm": 7.9150919914245605, "learning_rate": 3.777234817145497e-06, "loss": 3.0001, "step": 32440 }, { "epoch": 0.330047607421875, "grad_norm": 13.222947120666504, "learning_rate": 3.77689104564999e-06, "loss": 3.2136, "step": 32445 }, { "epoch": 0.3300984700520833, "grad_norm": 11.708751678466797, "learning_rate": 3.7765472414855294e-06, "loss": 3.4354, "step": 32450 }, { "epoch": 0.3301493326822917, "grad_norm": 15.85578727722168, "learning_rate": 3.7762034046609085e-06, "loss": 3.5524, "step": 32455 }, { "epoch": 0.3302001953125, "grad_norm": 7.159616470336914, "learning_rate": 3.7758595351849276e-06, "loss": 3.0285, "step": 32460 }, { "epoch": 0.3302510579427083, "grad_norm": 8.465569496154785, "learning_rate": 3.7755156330663824e-06, "loss": 3.2453, "step": 32465 }, { "epoch": 0.3303019205729167, "grad_norm": 6.593746662139893, "learning_rate": 3.7751716983140713e-06, "loss": 3.0596, "step": 32470 }, { "epoch": 0.330352783203125, "grad_norm": 11.99484634399414, "learning_rate": 3.7748277309367946e-06, "loss": 3.342, "step": 32475 }, { "epoch": 0.3304036458333333, "grad_norm": 13.411246299743652, "learning_rate": 3.7744837309433534e-06, "loss": 3.2973, "step": 32480 }, { "epoch": 0.3304545084635417, "grad_norm": 10.368703842163086, "learning_rate": 3.774139698342547e-06, "loss": 3.322, "step": 32485 }, { "epoch": 0.33050537109375, "grad_norm": 11.17487621307373, "learning_rate": 3.773795633143179e-06, "loss": 2.9814, "step": 32490 }, { "epoch": 0.3305562337239583, "grad_norm": 13.313066482543945, "learning_rate": 3.7734515353540526e-06, "loss": 3.746, "step": 32495 }, { "epoch": 0.3306070963541667, "grad_norm": 17.076797485351562, "learning_rate": 3.7731074049839695e-06, "loss": 3.3898, "step": 32500 }, { "epoch": 0.330657958984375, "grad_norm": 9.995706558227539, "learning_rate": 3.772763242041735e-06, "loss": 3.1957, "step": 32505 }, { "epoch": 0.3307088216145833, "grad_norm": 12.41891098022461, "learning_rate": 3.7724190465361553e-06, "loss": 3.2141, "step": 32510 }, { "epoch": 0.3307596842447917, "grad_norm": 9.968422889709473, "learning_rate": 3.7720748184760353e-06, "loss": 3.4549, "step": 32515 }, { "epoch": 0.330810546875, "grad_norm": 9.146455764770508, "learning_rate": 3.7717305578701824e-06, "loss": 3.6653, "step": 32520 }, { "epoch": 0.3308614095052083, "grad_norm": 7.412753105163574, "learning_rate": 3.7713862647274054e-06, "loss": 3.9118, "step": 32525 }, { "epoch": 0.3309122721354167, "grad_norm": 11.762595176696777, "learning_rate": 3.7710419390565117e-06, "loss": 3.6556, "step": 32530 }, { "epoch": 0.330963134765625, "grad_norm": 8.641430854797363, "learning_rate": 3.7706975808663117e-06, "loss": 3.6632, "step": 32535 }, { "epoch": 0.3310139973958333, "grad_norm": 12.438641548156738, "learning_rate": 3.770353190165614e-06, "loss": 3.4598, "step": 32540 }, { "epoch": 0.3310648600260417, "grad_norm": 78.32935333251953, "learning_rate": 3.7700087669632325e-06, "loss": 3.5554, "step": 32545 }, { "epoch": 0.33111572265625, "grad_norm": 7.5797295570373535, "learning_rate": 3.769664311267977e-06, "loss": 3.04, "step": 32550 }, { "epoch": 0.3311665852864583, "grad_norm": 14.218374252319336, "learning_rate": 3.769319823088662e-06, "loss": 2.6611, "step": 32555 }, { "epoch": 0.3312174479166667, "grad_norm": 15.772465705871582, "learning_rate": 3.7689753024340987e-06, "loss": 3.4001, "step": 32560 }, { "epoch": 0.331268310546875, "grad_norm": 12.117385864257812, "learning_rate": 3.7686307493131043e-06, "loss": 3.4332, "step": 32565 }, { "epoch": 0.3313191731770833, "grad_norm": 10.31899356842041, "learning_rate": 3.7682861637344914e-06, "loss": 3.3407, "step": 32570 }, { "epoch": 0.3313700358072917, "grad_norm": 15.768966674804688, "learning_rate": 3.7679415457070787e-06, "loss": 3.4107, "step": 32575 }, { "epoch": 0.3314208984375, "grad_norm": 11.954106330871582, "learning_rate": 3.7675968952396824e-06, "loss": 3.4454, "step": 32580 }, { "epoch": 0.3314717610677083, "grad_norm": 12.425500869750977, "learning_rate": 3.7672522123411194e-06, "loss": 3.7169, "step": 32585 }, { "epoch": 0.3315226236979167, "grad_norm": 15.04654598236084, "learning_rate": 3.7669074970202092e-06, "loss": 3.389, "step": 32590 }, { "epoch": 0.331573486328125, "grad_norm": 12.327473640441895, "learning_rate": 3.7665627492857702e-06, "loss": 2.9167, "step": 32595 }, { "epoch": 0.3316243489583333, "grad_norm": 12.949674606323242, "learning_rate": 3.766217969146625e-06, "loss": 3.2564, "step": 32600 }, { "epoch": 0.3316752115885417, "grad_norm": 10.511869430541992, "learning_rate": 3.7658731566115924e-06, "loss": 3.2482, "step": 32605 }, { "epoch": 0.33172607421875, "grad_norm": 13.890068054199219, "learning_rate": 3.7655283116894947e-06, "loss": 3.3871, "step": 32610 }, { "epoch": 0.3317769368489583, "grad_norm": 15.479687690734863, "learning_rate": 3.765183434389156e-06, "loss": 3.3261, "step": 32615 }, { "epoch": 0.3318277994791667, "grad_norm": 10.50954532623291, "learning_rate": 3.764838524719398e-06, "loss": 3.1074, "step": 32620 }, { "epoch": 0.331878662109375, "grad_norm": 16.40087127685547, "learning_rate": 3.764493582689047e-06, "loss": 3.1189, "step": 32625 }, { "epoch": 0.3319295247395833, "grad_norm": 11.862818717956543, "learning_rate": 3.764148608306927e-06, "loss": 3.6677, "step": 32630 }, { "epoch": 0.3319803873697917, "grad_norm": 15.727245330810547, "learning_rate": 3.763803601581864e-06, "loss": 3.3587, "step": 32635 }, { "epoch": 0.33203125, "grad_norm": 12.317474365234375, "learning_rate": 3.763458562522686e-06, "loss": 3.4326, "step": 32640 }, { "epoch": 0.3320821126302083, "grad_norm": 9.582719802856445, "learning_rate": 3.7631134911382196e-06, "loss": 4.0108, "step": 32645 }, { "epoch": 0.3321329752604167, "grad_norm": 15.248978614807129, "learning_rate": 3.7627683874372943e-06, "loss": 3.1654, "step": 32650 }, { "epoch": 0.332183837890625, "grad_norm": 14.71537971496582, "learning_rate": 3.7624232514287396e-06, "loss": 3.4238, "step": 32655 }, { "epoch": 0.3322347005208333, "grad_norm": 10.918207168579102, "learning_rate": 3.7620780831213842e-06, "loss": 3.5747, "step": 32660 }, { "epoch": 0.3322855631510417, "grad_norm": 12.324572563171387, "learning_rate": 3.7617328825240614e-06, "loss": 3.325, "step": 32665 }, { "epoch": 0.33233642578125, "grad_norm": 8.128783226013184, "learning_rate": 3.7613876496456014e-06, "loss": 3.8681, "step": 32670 }, { "epoch": 0.3323872884114583, "grad_norm": 12.606327056884766, "learning_rate": 3.7610423844948367e-06, "loss": 3.4388, "step": 32675 }, { "epoch": 0.3324381510416667, "grad_norm": 8.71562671661377, "learning_rate": 3.7606970870806024e-06, "loss": 3.1369, "step": 32680 }, { "epoch": 0.332489013671875, "grad_norm": 17.26483917236328, "learning_rate": 3.760351757411732e-06, "loss": 3.4954, "step": 32685 }, { "epoch": 0.3325398763020833, "grad_norm": 14.267714500427246, "learning_rate": 3.76000639549706e-06, "loss": 3.2069, "step": 32690 }, { "epoch": 0.3325907389322917, "grad_norm": 7.968595504760742, "learning_rate": 3.7596610013454226e-06, "loss": 3.4987, "step": 32695 }, { "epoch": 0.3326416015625, "grad_norm": 12.251919746398926, "learning_rate": 3.7593155749656583e-06, "loss": 3.4392, "step": 32700 }, { "epoch": 0.3326924641927083, "grad_norm": 11.02563190460205, "learning_rate": 3.7589701163666026e-06, "loss": 2.8879, "step": 32705 }, { "epoch": 0.3327433268229167, "grad_norm": 10.374553680419922, "learning_rate": 3.758624625557096e-06, "loss": 3.5271, "step": 32710 }, { "epoch": 0.332794189453125, "grad_norm": 11.383188247680664, "learning_rate": 3.7582791025459753e-06, "loss": 3.1762, "step": 32715 }, { "epoch": 0.3328450520833333, "grad_norm": 14.80070972442627, "learning_rate": 3.757933547342083e-06, "loss": 3.7152, "step": 32720 }, { "epoch": 0.3328959147135417, "grad_norm": 10.358442306518555, "learning_rate": 3.7575879599542574e-06, "loss": 3.2842, "step": 32725 }, { "epoch": 0.33294677734375, "grad_norm": 11.465337753295898, "learning_rate": 3.7572423403913437e-06, "loss": 3.3594, "step": 32730 }, { "epoch": 0.3329976399739583, "grad_norm": 16.327484130859375, "learning_rate": 3.7568966886621827e-06, "loss": 3.2038, "step": 32735 }, { "epoch": 0.3330485026041667, "grad_norm": 11.995901107788086, "learning_rate": 3.7565510047756165e-06, "loss": 3.1667, "step": 32740 }, { "epoch": 0.333099365234375, "grad_norm": 11.180763244628906, "learning_rate": 3.756205288740492e-06, "loss": 3.2824, "step": 32745 }, { "epoch": 0.3331502278645833, "grad_norm": 13.001599311828613, "learning_rate": 3.755859540565652e-06, "loss": 3.4875, "step": 32750 }, { "epoch": 0.3332010904947917, "grad_norm": 12.533217430114746, "learning_rate": 3.755513760259944e-06, "loss": 3.0541, "step": 32755 }, { "epoch": 0.333251953125, "grad_norm": 14.233641624450684, "learning_rate": 3.755167947832213e-06, "loss": 2.9513, "step": 32760 }, { "epoch": 0.3333028157552083, "grad_norm": 10.958354949951172, "learning_rate": 3.754822103291309e-06, "loss": 3.5791, "step": 32765 }, { "epoch": 0.3333536783854167, "grad_norm": 8.330187797546387, "learning_rate": 3.7544762266460784e-06, "loss": 3.4526, "step": 32770 }, { "epoch": 0.333404541015625, "grad_norm": 9.813884735107422, "learning_rate": 3.754130317905371e-06, "loss": 3.0888, "step": 32775 }, { "epoch": 0.3334554036458333, "grad_norm": 10.979777336120605, "learning_rate": 3.753784377078036e-06, "loss": 3.5022, "step": 32780 }, { "epoch": 0.3335062662760417, "grad_norm": 14.47551155090332, "learning_rate": 3.7534384041729255e-06, "loss": 3.3508, "step": 32785 }, { "epoch": 0.33355712890625, "grad_norm": 11.598821640014648, "learning_rate": 3.75309239919889e-06, "loss": 3.2422, "step": 32790 }, { "epoch": 0.3336079915364583, "grad_norm": 12.611983299255371, "learning_rate": 3.752746362164783e-06, "loss": 3.2404, "step": 32795 }, { "epoch": 0.3336588541666667, "grad_norm": 13.306591987609863, "learning_rate": 3.7524002930794567e-06, "loss": 3.3322, "step": 32800 }, { "epoch": 0.333709716796875, "grad_norm": 14.114723205566406, "learning_rate": 3.752054191951766e-06, "loss": 3.4821, "step": 32805 }, { "epoch": 0.3337605794270833, "grad_norm": 8.142168045043945, "learning_rate": 3.751708058790566e-06, "loss": 3.6567, "step": 32810 }, { "epoch": 0.3338114420572917, "grad_norm": 16.089536666870117, "learning_rate": 3.7513618936047116e-06, "loss": 3.0567, "step": 32815 }, { "epoch": 0.3338623046875, "grad_norm": 13.037550926208496, "learning_rate": 3.7510156964030597e-06, "loss": 3.6844, "step": 32820 }, { "epoch": 0.3339131673177083, "grad_norm": 13.749690055847168, "learning_rate": 3.750669467194469e-06, "loss": 3.5359, "step": 32825 }, { "epoch": 0.3339640299479167, "grad_norm": 11.21825122833252, "learning_rate": 3.7503232059877947e-06, "loss": 3.0431, "step": 32830 }, { "epoch": 0.334014892578125, "grad_norm": 10.482390403747559, "learning_rate": 3.7499769127918977e-06, "loss": 3.1787, "step": 32835 }, { "epoch": 0.3340657552083333, "grad_norm": 14.753950119018555, "learning_rate": 3.7496305876156385e-06, "loss": 3.9245, "step": 32840 }, { "epoch": 0.3341166178385417, "grad_norm": 15.145987510681152, "learning_rate": 3.749284230467876e-06, "loss": 3.4858, "step": 32845 }, { "epoch": 0.33416748046875, "grad_norm": 9.231215476989746, "learning_rate": 3.7489378413574735e-06, "loss": 3.7131, "step": 32850 }, { "epoch": 0.3342183430989583, "grad_norm": 14.36355972290039, "learning_rate": 3.748591420293292e-06, "loss": 3.2538, "step": 32855 }, { "epoch": 0.3342692057291667, "grad_norm": 11.919439315795898, "learning_rate": 3.748244967284195e-06, "loss": 3.6638, "step": 32860 }, { "epoch": 0.334320068359375, "grad_norm": 10.983847618103027, "learning_rate": 3.7478984823390465e-06, "loss": 3.1181, "step": 32865 }, { "epoch": 0.3343709309895833, "grad_norm": 13.097049713134766, "learning_rate": 3.74755196546671e-06, "loss": 3.3605, "step": 32870 }, { "epoch": 0.3344217936197917, "grad_norm": 20.51844024658203, "learning_rate": 3.7472054166760535e-06, "loss": 3.0601, "step": 32875 }, { "epoch": 0.33447265625, "grad_norm": 12.644318580627441, "learning_rate": 3.7468588359759416e-06, "loss": 3.4744, "step": 32880 }, { "epoch": 0.3345235188802083, "grad_norm": 10.257139205932617, "learning_rate": 3.7465122233752415e-06, "loss": 3.3923, "step": 32885 }, { "epoch": 0.3345743815104167, "grad_norm": 12.080252647399902, "learning_rate": 3.7461655788828227e-06, "loss": 3.308, "step": 32890 }, { "epoch": 0.334625244140625, "grad_norm": 11.510242462158203, "learning_rate": 3.745818902507552e-06, "loss": 3.2991, "step": 32895 }, { "epoch": 0.3346761067708333, "grad_norm": 11.790918350219727, "learning_rate": 3.7454721942583007e-06, "loss": 3.4248, "step": 32900 }, { "epoch": 0.3347269694010417, "grad_norm": 10.164443016052246, "learning_rate": 3.745125454143938e-06, "loss": 3.2437, "step": 32905 }, { "epoch": 0.33477783203125, "grad_norm": 15.713869094848633, "learning_rate": 3.7447786821733356e-06, "loss": 3.3894, "step": 32910 }, { "epoch": 0.3348286946614583, "grad_norm": 10.7219877243042, "learning_rate": 3.744431878355366e-06, "loss": 3.2601, "step": 32915 }, { "epoch": 0.3348795572916667, "grad_norm": 15.252687454223633, "learning_rate": 3.7440850426989013e-06, "loss": 4.0186, "step": 32920 }, { "epoch": 0.334930419921875, "grad_norm": 14.942272186279297, "learning_rate": 3.743738175212816e-06, "loss": 3.5481, "step": 32925 }, { "epoch": 0.3349812825520833, "grad_norm": 14.428038597106934, "learning_rate": 3.743391275905984e-06, "loss": 3.4527, "step": 32930 }, { "epoch": 0.3350321451822917, "grad_norm": 12.873485565185547, "learning_rate": 3.743044344787281e-06, "loss": 3.0538, "step": 32935 }, { "epoch": 0.3350830078125, "grad_norm": 14.081838607788086, "learning_rate": 3.7426973818655833e-06, "loss": 3.5227, "step": 32940 }, { "epoch": 0.3351338704427083, "grad_norm": 10.624444007873535, "learning_rate": 3.7423503871497674e-06, "loss": 3.285, "step": 32945 }, { "epoch": 0.3351847330729167, "grad_norm": 11.455140113830566, "learning_rate": 3.7420033606487114e-06, "loss": 3.121, "step": 32950 }, { "epoch": 0.335235595703125, "grad_norm": 10.438578605651855, "learning_rate": 3.741656302371294e-06, "loss": 3.1361, "step": 32955 }, { "epoch": 0.3352864583333333, "grad_norm": 12.183462142944336, "learning_rate": 3.741309212326394e-06, "loss": 3.1173, "step": 32960 }, { "epoch": 0.3353373209635417, "grad_norm": 11.119400978088379, "learning_rate": 3.7409620905228927e-06, "loss": 2.9163, "step": 32965 }, { "epoch": 0.33538818359375, "grad_norm": 13.267557144165039, "learning_rate": 3.7406149369696704e-06, "loss": 2.92, "step": 32970 }, { "epoch": 0.3354390462239583, "grad_norm": 7.646348476409912, "learning_rate": 3.740267751675608e-06, "loss": 3.6382, "step": 32975 }, { "epoch": 0.3354899088541667, "grad_norm": 10.166210174560547, "learning_rate": 3.7399205346495905e-06, "loss": 3.2228, "step": 32980 }, { "epoch": 0.335540771484375, "grad_norm": 9.681705474853516, "learning_rate": 3.739573285900499e-06, "loss": 3.2517, "step": 32985 }, { "epoch": 0.3355916341145833, "grad_norm": 14.030284881591797, "learning_rate": 3.739226005437219e-06, "loss": 3.2506, "step": 32990 }, { "epoch": 0.3356424967447917, "grad_norm": 13.2785062789917, "learning_rate": 3.738878693268636e-06, "loss": 3.5483, "step": 32995 }, { "epoch": 0.335693359375, "grad_norm": 10.970511436462402, "learning_rate": 3.738531349403634e-06, "loss": 3.4073, "step": 33000 }, { "epoch": 0.3357442220052083, "grad_norm": 14.737278938293457, "learning_rate": 3.7381839738511024e-06, "loss": 3.2362, "step": 33005 }, { "epoch": 0.3357950846354167, "grad_norm": 10.005160331726074, "learning_rate": 3.737836566619927e-06, "loss": 3.5164, "step": 33010 }, { "epoch": 0.335845947265625, "grad_norm": 13.465721130371094, "learning_rate": 3.7374891277189957e-06, "loss": 3.4245, "step": 33015 }, { "epoch": 0.3358968098958333, "grad_norm": 15.173518180847168, "learning_rate": 3.7371416571571983e-06, "loss": 3.0978, "step": 33020 }, { "epoch": 0.3359476725260417, "grad_norm": 14.85016918182373, "learning_rate": 3.7367941549434254e-06, "loss": 3.4838, "step": 33025 }, { "epoch": 0.33599853515625, "grad_norm": 11.801966667175293, "learning_rate": 3.7364466210865676e-06, "loss": 3.839, "step": 33030 }, { "epoch": 0.3360493977864583, "grad_norm": 10.579994201660156, "learning_rate": 3.736099055595515e-06, "loss": 3.2351, "step": 33035 }, { "epoch": 0.3361002604166667, "grad_norm": 12.842473983764648, "learning_rate": 3.735751458479162e-06, "loss": 3.5836, "step": 33040 }, { "epoch": 0.336151123046875, "grad_norm": 12.5321683883667, "learning_rate": 3.7354038297464e-06, "loss": 3.1939, "step": 33045 }, { "epoch": 0.3362019856770833, "grad_norm": 10.775299072265625, "learning_rate": 3.7350561694061245e-06, "loss": 3.3556, "step": 33050 }, { "epoch": 0.3362528483072917, "grad_norm": 11.678428649902344, "learning_rate": 3.7347084774672295e-06, "loss": 3.8341, "step": 33055 }, { "epoch": 0.3363037109375, "grad_norm": 15.017143249511719, "learning_rate": 3.734360753938611e-06, "loss": 3.647, "step": 33060 }, { "epoch": 0.3363545735677083, "grad_norm": 14.71731185913086, "learning_rate": 3.7340129988291645e-06, "loss": 3.2912, "step": 33065 }, { "epoch": 0.3364054361979167, "grad_norm": 12.676302909851074, "learning_rate": 3.733665212147789e-06, "loss": 3.313, "step": 33070 }, { "epoch": 0.336456298828125, "grad_norm": 10.882745742797852, "learning_rate": 3.7333173939033807e-06, "loss": 2.9863, "step": 33075 }, { "epoch": 0.3365071614583333, "grad_norm": 11.048257827758789, "learning_rate": 3.732969544104839e-06, "loss": 3.0796, "step": 33080 }, { "epoch": 0.3365580240885417, "grad_norm": 13.560098648071289, "learning_rate": 3.732621662761064e-06, "loss": 3.1773, "step": 33085 }, { "epoch": 0.33660888671875, "grad_norm": 10.831151962280273, "learning_rate": 3.732273749880956e-06, "loss": 3.264, "step": 33090 }, { "epoch": 0.3366597493489583, "grad_norm": 12.574798583984375, "learning_rate": 3.7319258054734166e-06, "loss": 3.4186, "step": 33095 }, { "epoch": 0.3367106119791667, "grad_norm": 12.322128295898438, "learning_rate": 3.7315778295473467e-06, "loss": 3.4011, "step": 33100 }, { "epoch": 0.336761474609375, "grad_norm": 11.991111755371094, "learning_rate": 3.73122982211165e-06, "loss": 3.152, "step": 33105 }, { "epoch": 0.3368123372395833, "grad_norm": 9.92126178741455, "learning_rate": 3.730881783175231e-06, "loss": 3.0965, "step": 33110 }, { "epoch": 0.3368631998697917, "grad_norm": 7.607785224914551, "learning_rate": 3.730533712746992e-06, "loss": 3.1248, "step": 33115 }, { "epoch": 0.3369140625, "grad_norm": 10.90238094329834, "learning_rate": 3.7301856108358407e-06, "loss": 3.5398, "step": 33120 }, { "epoch": 0.3369649251302083, "grad_norm": 11.631036758422852, "learning_rate": 3.729837477450682e-06, "loss": 3.4372, "step": 33125 }, { "epoch": 0.3370157877604167, "grad_norm": 10.736931800842285, "learning_rate": 3.729489312600423e-06, "loss": 3.4555, "step": 33130 }, { "epoch": 0.337066650390625, "grad_norm": 11.401076316833496, "learning_rate": 3.7291411162939705e-06, "loss": 3.1215, "step": 33135 }, { "epoch": 0.3371175130208333, "grad_norm": 13.46949577331543, "learning_rate": 3.728792888540235e-06, "loss": 3.5968, "step": 33140 }, { "epoch": 0.3371683756510417, "grad_norm": 11.45217227935791, "learning_rate": 3.7284446293481235e-06, "loss": 3.2885, "step": 33145 }, { "epoch": 0.33721923828125, "grad_norm": 13.1810941696167, "learning_rate": 3.7280963387265484e-06, "loss": 3.298, "step": 33150 }, { "epoch": 0.3372701009114583, "grad_norm": 6.902253150939941, "learning_rate": 3.727748016684419e-06, "loss": 3.6278, "step": 33155 }, { "epoch": 0.3373209635416667, "grad_norm": 7.337198734283447, "learning_rate": 3.7273996632306473e-06, "loss": 3.5548, "step": 33160 }, { "epoch": 0.337371826171875, "grad_norm": 9.131155014038086, "learning_rate": 3.7270512783741475e-06, "loss": 3.3623, "step": 33165 }, { "epoch": 0.3374226888020833, "grad_norm": 13.159061431884766, "learning_rate": 3.7267028621238305e-06, "loss": 3.4383, "step": 33170 }, { "epoch": 0.3374735514322917, "grad_norm": 10.10805606842041, "learning_rate": 3.7263544144886123e-06, "loss": 3.2849, "step": 33175 }, { "epoch": 0.3375244140625, "grad_norm": 13.516425132751465, "learning_rate": 3.726005935477406e-06, "loss": 3.5075, "step": 33180 }, { "epoch": 0.3375752766927083, "grad_norm": 16.516651153564453, "learning_rate": 3.725657425099129e-06, "loss": 3.4631, "step": 33185 }, { "epoch": 0.3376261393229167, "grad_norm": 10.670194625854492, "learning_rate": 3.7253088833626973e-06, "loss": 2.9451, "step": 33190 }, { "epoch": 0.337677001953125, "grad_norm": 13.951617240905762, "learning_rate": 3.724960310277028e-06, "loss": 3.5035, "step": 33195 }, { "epoch": 0.3377278645833333, "grad_norm": 9.054759979248047, "learning_rate": 3.72461170585104e-06, "loss": 3.4928, "step": 33200 }, { "epoch": 0.3377787272135417, "grad_norm": 11.50307846069336, "learning_rate": 3.724263070093651e-06, "loss": 3.1345, "step": 33205 }, { "epoch": 0.33782958984375, "grad_norm": 6.693308353424072, "learning_rate": 3.723914403013782e-06, "loss": 3.3025, "step": 33210 }, { "epoch": 0.3378804524739583, "grad_norm": 14.62735652923584, "learning_rate": 3.7235657046203537e-06, "loss": 3.4618, "step": 33215 }, { "epoch": 0.3379313151041667, "grad_norm": 10.572195053100586, "learning_rate": 3.7232169749222856e-06, "loss": 3.9669, "step": 33220 }, { "epoch": 0.337982177734375, "grad_norm": 11.943432807922363, "learning_rate": 3.722868213928502e-06, "loss": 3.3552, "step": 33225 }, { "epoch": 0.3380330403645833, "grad_norm": 9.331600189208984, "learning_rate": 3.7225194216479255e-06, "loss": 3.3776, "step": 33230 }, { "epoch": 0.3380839029947917, "grad_norm": 13.232575416564941, "learning_rate": 3.722170598089478e-06, "loss": 3.1956, "step": 33235 }, { "epoch": 0.338134765625, "grad_norm": 11.602453231811523, "learning_rate": 3.7218217432620863e-06, "loss": 3.5115, "step": 33240 }, { "epoch": 0.3381856282552083, "grad_norm": 9.096488952636719, "learning_rate": 3.721472857174675e-06, "loss": 3.6, "step": 33245 }, { "epoch": 0.3382364908854167, "grad_norm": 8.760087013244629, "learning_rate": 3.7211239398361697e-06, "loss": 3.2964, "step": 33250 }, { "epoch": 0.338287353515625, "grad_norm": 12.91967487335205, "learning_rate": 3.720774991255498e-06, "loss": 3.1863, "step": 33255 }, { "epoch": 0.3383382161458333, "grad_norm": 15.063009262084961, "learning_rate": 3.7204260114415875e-06, "loss": 3.2829, "step": 33260 }, { "epoch": 0.3383890787760417, "grad_norm": 6.675595760345459, "learning_rate": 3.7200770004033666e-06, "loss": 3.0992, "step": 33265 }, { "epoch": 0.33843994140625, "grad_norm": 11.452447891235352, "learning_rate": 3.719727958149765e-06, "loss": 3.5164, "step": 33270 }, { "epoch": 0.3384908040364583, "grad_norm": 9.369376182556152, "learning_rate": 3.7193788846897126e-06, "loss": 3.4509, "step": 33275 }, { "epoch": 0.3385416666666667, "grad_norm": 11.180233001708984, "learning_rate": 3.719029780032141e-06, "loss": 3.1632, "step": 33280 }, { "epoch": 0.338592529296875, "grad_norm": 7.883488655090332, "learning_rate": 3.7186806441859806e-06, "loss": 3.4346, "step": 33285 }, { "epoch": 0.3386433919270833, "grad_norm": 9.978160858154297, "learning_rate": 3.7183314771601652e-06, "loss": 3.2653, "step": 33290 }, { "epoch": 0.3386942545572917, "grad_norm": 15.136893272399902, "learning_rate": 3.7179822789636276e-06, "loss": 3.1239, "step": 33295 }, { "epoch": 0.3387451171875, "grad_norm": 8.271352767944336, "learning_rate": 3.7176330496053026e-06, "loss": 3.0803, "step": 33300 }, { "epoch": 0.3387959798177083, "grad_norm": 14.263900756835938, "learning_rate": 3.7172837890941243e-06, "loss": 3.3083, "step": 33305 }, { "epoch": 0.3388468424479167, "grad_norm": 13.307559967041016, "learning_rate": 3.7169344974390286e-06, "loss": 3.3729, "step": 33310 }, { "epoch": 0.338897705078125, "grad_norm": 11.805386543273926, "learning_rate": 3.716585174648952e-06, "loss": 3.2258, "step": 33315 }, { "epoch": 0.3389485677083333, "grad_norm": 12.272468566894531, "learning_rate": 3.716235820732833e-06, "loss": 3.3413, "step": 33320 }, { "epoch": 0.3389994303385417, "grad_norm": 8.26090145111084, "learning_rate": 3.7158864356996076e-06, "loss": 3.3389, "step": 33325 }, { "epoch": 0.33905029296875, "grad_norm": 11.451239585876465, "learning_rate": 3.7155370195582167e-06, "loss": 3.2675, "step": 33330 }, { "epoch": 0.3391011555989583, "grad_norm": 13.9033203125, "learning_rate": 3.715187572317599e-06, "loss": 3.3937, "step": 33335 }, { "epoch": 0.3391520182291667, "grad_norm": 11.666393280029297, "learning_rate": 3.7148380939866956e-06, "loss": 3.2749, "step": 33340 }, { "epoch": 0.339202880859375, "grad_norm": 7.004508018493652, "learning_rate": 3.714488584574448e-06, "loss": 3.6018, "step": 33345 }, { "epoch": 0.3392537434895833, "grad_norm": 11.859896659851074, "learning_rate": 3.7141390440897964e-06, "loss": 3.2948, "step": 33350 }, { "epoch": 0.3393046061197917, "grad_norm": 13.420574188232422, "learning_rate": 3.7137894725416863e-06, "loss": 3.476, "step": 33355 }, { "epoch": 0.33935546875, "grad_norm": 14.50550651550293, "learning_rate": 3.7134398699390597e-06, "loss": 3.2388, "step": 33360 }, { "epoch": 0.3394063313802083, "grad_norm": 14.22556209564209, "learning_rate": 3.7130902362908616e-06, "loss": 3.5979, "step": 33365 }, { "epoch": 0.3394571940104167, "grad_norm": 8.849228858947754, "learning_rate": 3.7127405716060376e-06, "loss": 3.4067, "step": 33370 }, { "epoch": 0.339508056640625, "grad_norm": 14.5337553024292, "learning_rate": 3.7123908758935332e-06, "loss": 3.3617, "step": 33375 }, { "epoch": 0.3395589192708333, "grad_norm": 11.952080726623535, "learning_rate": 3.712041149162296e-06, "loss": 3.4079, "step": 33380 }, { "epoch": 0.3396097819010417, "grad_norm": 8.4275484085083, "learning_rate": 3.7116913914212737e-06, "loss": 3.1598, "step": 33385 }, { "epoch": 0.33966064453125, "grad_norm": 17.485008239746094, "learning_rate": 3.7113416026794135e-06, "loss": 3.5281, "step": 33390 }, { "epoch": 0.3397115071614583, "grad_norm": 12.192771911621094, "learning_rate": 3.710991782945666e-06, "loss": 3.6066, "step": 33395 }, { "epoch": 0.3397623697916667, "grad_norm": 7.462334632873535, "learning_rate": 3.7106419322289807e-06, "loss": 3.3792, "step": 33400 }, { "epoch": 0.339813232421875, "grad_norm": 11.422493934631348, "learning_rate": 3.710292050538308e-06, "loss": 3.5719, "step": 33405 }, { "epoch": 0.3398640950520833, "grad_norm": 9.912327766418457, "learning_rate": 3.7099421378826004e-06, "loss": 3.5519, "step": 33410 }, { "epoch": 0.3399149576822917, "grad_norm": 11.72535228729248, "learning_rate": 3.7095921942708103e-06, "loss": 3.3383, "step": 33415 }, { "epoch": 0.3399658203125, "grad_norm": 8.569254875183105, "learning_rate": 3.7092422197118905e-06, "loss": 3.2259, "step": 33420 }, { "epoch": 0.3400166829427083, "grad_norm": 13.561548233032227, "learning_rate": 3.708892214214795e-06, "loss": 3.586, "step": 33425 }, { "epoch": 0.3400675455729167, "grad_norm": 16.515724182128906, "learning_rate": 3.7085421777884786e-06, "loss": 3.584, "step": 33430 }, { "epoch": 0.340118408203125, "grad_norm": 12.934800148010254, "learning_rate": 3.7081921104418974e-06, "loss": 3.8999, "step": 33435 }, { "epoch": 0.3401692708333333, "grad_norm": 7.906087398529053, "learning_rate": 3.7078420121840063e-06, "loss": 3.283, "step": 33440 }, { "epoch": 0.3402201334635417, "grad_norm": 15.088556289672852, "learning_rate": 3.7074918830237647e-06, "loss": 3.113, "step": 33445 }, { "epoch": 0.34027099609375, "grad_norm": 9.234477043151855, "learning_rate": 3.70714172297013e-06, "loss": 3.3398, "step": 33450 }, { "epoch": 0.3403218587239583, "grad_norm": 13.867288589477539, "learning_rate": 3.706791532032059e-06, "loss": 3.2631, "step": 33455 }, { "epoch": 0.3403727213541667, "grad_norm": 12.773273468017578, "learning_rate": 3.7064413102185137e-06, "loss": 3.3552, "step": 33460 }, { "epoch": 0.340423583984375, "grad_norm": 8.415233612060547, "learning_rate": 3.7060910575384534e-06, "loss": 3.3519, "step": 33465 }, { "epoch": 0.3404744466145833, "grad_norm": 14.105103492736816, "learning_rate": 3.705740774000839e-06, "loss": 3.4592, "step": 33470 }, { "epoch": 0.3405253092447917, "grad_norm": 7.179952621459961, "learning_rate": 3.705390459614633e-06, "loss": 3.0682, "step": 33475 }, { "epoch": 0.340576171875, "grad_norm": 8.724873542785645, "learning_rate": 3.705040114388797e-06, "loss": 3.6412, "step": 33480 }, { "epoch": 0.3406270345052083, "grad_norm": 13.953536033630371, "learning_rate": 3.7046897383322962e-06, "loss": 3.4858, "step": 33485 }, { "epoch": 0.3406778971354167, "grad_norm": 11.59171199798584, "learning_rate": 3.704339331454093e-06, "loss": 3.3904, "step": 33490 }, { "epoch": 0.340728759765625, "grad_norm": 9.839463233947754, "learning_rate": 3.703988893763154e-06, "loss": 3.2637, "step": 33495 }, { "epoch": 0.3407796223958333, "grad_norm": 12.875106811523438, "learning_rate": 3.7036384252684454e-06, "loss": 3.5458, "step": 33500 }, { "epoch": 0.3408304850260417, "grad_norm": 12.247830390930176, "learning_rate": 3.7032879259789315e-06, "loss": 3.0541, "step": 33505 }, { "epoch": 0.34088134765625, "grad_norm": 13.42300033569336, "learning_rate": 3.7029373959035825e-06, "loss": 3.296, "step": 33510 }, { "epoch": 0.3409322102864583, "grad_norm": 13.851029396057129, "learning_rate": 3.702586835051364e-06, "loss": 3.9352, "step": 33515 }, { "epoch": 0.3409830729166667, "grad_norm": 11.831171989440918, "learning_rate": 3.7022362434312475e-06, "loss": 3.2998, "step": 33520 }, { "epoch": 0.341033935546875, "grad_norm": 11.240053176879883, "learning_rate": 3.7018856210522013e-06, "loss": 4.1, "step": 33525 }, { "epoch": 0.3410847981770833, "grad_norm": 11.248284339904785, "learning_rate": 3.7015349679231964e-06, "loss": 3.0341, "step": 33530 }, { "epoch": 0.3411356608072917, "grad_norm": 12.049551963806152, "learning_rate": 3.7011842840532042e-06, "loss": 3.5037, "step": 33535 }, { "epoch": 0.3411865234375, "grad_norm": 14.031254768371582, "learning_rate": 3.7008335694511966e-06, "loss": 3.1509, "step": 33540 }, { "epoch": 0.3412373860677083, "grad_norm": 12.392552375793457, "learning_rate": 3.700482824126147e-06, "loss": 3.2808, "step": 33545 }, { "epoch": 0.3412882486979167, "grad_norm": 9.247160911560059, "learning_rate": 3.7001320480870286e-06, "loss": 3.1487, "step": 33550 }, { "epoch": 0.341339111328125, "grad_norm": 11.892711639404297, "learning_rate": 3.699781241342817e-06, "loss": 3.4006, "step": 33555 }, { "epoch": 0.3413899739583333, "grad_norm": 8.088289260864258, "learning_rate": 3.699430403902485e-06, "loss": 3.5574, "step": 33560 }, { "epoch": 0.3414408365885417, "grad_norm": 8.417235374450684, "learning_rate": 3.699079535775012e-06, "loss": 3.5206, "step": 33565 }, { "epoch": 0.34149169921875, "grad_norm": 14.191048622131348, "learning_rate": 3.6987286369693724e-06, "loss": 3.2619, "step": 33570 }, { "epoch": 0.3415425618489583, "grad_norm": 12.321990013122559, "learning_rate": 3.698377707494546e-06, "loss": 3.454, "step": 33575 }, { "epoch": 0.3415934244791667, "grad_norm": 13.851666450500488, "learning_rate": 3.698026747359509e-06, "loss": 3.1697, "step": 33580 }, { "epoch": 0.341644287109375, "grad_norm": 14.189324378967285, "learning_rate": 3.6976757565732413e-06, "loss": 3.4847, "step": 33585 }, { "epoch": 0.3416951497395833, "grad_norm": 9.495662689208984, "learning_rate": 3.697324735144724e-06, "loss": 3.3849, "step": 33590 }, { "epoch": 0.3417460123697917, "grad_norm": 12.85490894317627, "learning_rate": 3.6969736830829367e-06, "loss": 3.681, "step": 33595 }, { "epoch": 0.341796875, "grad_norm": 10.4622802734375, "learning_rate": 3.6966226003968613e-06, "loss": 3.3093, "step": 33600 }, { "epoch": 0.3418477376302083, "grad_norm": 13.98997974395752, "learning_rate": 3.6962714870954815e-06, "loss": 3.1084, "step": 33605 }, { "epoch": 0.3418986002604167, "grad_norm": 9.652063369750977, "learning_rate": 3.6959203431877777e-06, "loss": 3.8275, "step": 33610 }, { "epoch": 0.341949462890625, "grad_norm": 9.276095390319824, "learning_rate": 3.6955691686827367e-06, "loss": 3.2222, "step": 33615 }, { "epoch": 0.3420003255208333, "grad_norm": 13.359003067016602, "learning_rate": 3.695217963589341e-06, "loss": 3.5332, "step": 33620 }, { "epoch": 0.3420511881510417, "grad_norm": 9.17261028289795, "learning_rate": 3.694866727916576e-06, "loss": 3.1112, "step": 33625 }, { "epoch": 0.34210205078125, "grad_norm": 13.203807830810547, "learning_rate": 3.6945154616734315e-06, "loss": 3.4515, "step": 33630 }, { "epoch": 0.3421529134114583, "grad_norm": 12.045472145080566, "learning_rate": 3.6941641648688895e-06, "loss": 3.5353, "step": 33635 }, { "epoch": 0.3422037760416667, "grad_norm": 14.34790325164795, "learning_rate": 3.6938128375119425e-06, "loss": 3.4131, "step": 33640 }, { "epoch": 0.342254638671875, "grad_norm": 12.461421966552734, "learning_rate": 3.6934614796115764e-06, "loss": 3.2753, "step": 33645 }, { "epoch": 0.3423055013020833, "grad_norm": 11.347406387329102, "learning_rate": 3.69311009117678e-06, "loss": 3.6192, "step": 33650 }, { "epoch": 0.3423563639322917, "grad_norm": 9.508161544799805, "learning_rate": 3.6927586722165466e-06, "loss": 3.2605, "step": 33655 }, { "epoch": 0.3424072265625, "grad_norm": 11.28418254852295, "learning_rate": 3.692407222739864e-06, "loss": 3.005, "step": 33660 }, { "epoch": 0.3424580891927083, "grad_norm": 8.46851634979248, "learning_rate": 3.6920557427557257e-06, "loss": 3.5675, "step": 33665 }, { "epoch": 0.3425089518229167, "grad_norm": 12.884858131408691, "learning_rate": 3.691704232273124e-06, "loss": 3.572, "step": 33670 }, { "epoch": 0.342559814453125, "grad_norm": 12.269371032714844, "learning_rate": 3.691352691301051e-06, "loss": 3.5664, "step": 33675 }, { "epoch": 0.3426106770833333, "grad_norm": 12.720943450927734, "learning_rate": 3.691001119848503e-06, "loss": 3.2918, "step": 33680 }, { "epoch": 0.3426615397135417, "grad_norm": 8.249712944030762, "learning_rate": 3.690649517924473e-06, "loss": 3.2787, "step": 33685 }, { "epoch": 0.34271240234375, "grad_norm": 11.139851570129395, "learning_rate": 3.6902978855379577e-06, "loss": 3.1602, "step": 33690 }, { "epoch": 0.3427632649739583, "grad_norm": 9.868725776672363, "learning_rate": 3.689946222697953e-06, "loss": 3.0968, "step": 33695 }, { "epoch": 0.3428141276041667, "grad_norm": 19.89038848876953, "learning_rate": 3.6895945294134556e-06, "loss": 3.4411, "step": 33700 }, { "epoch": 0.342864990234375, "grad_norm": 13.837854385375977, "learning_rate": 3.689242805693465e-06, "loss": 3.3441, "step": 33705 }, { "epoch": 0.3429158528645833, "grad_norm": 13.5803804397583, "learning_rate": 3.688891051546979e-06, "loss": 3.2884, "step": 33710 }, { "epoch": 0.3429667154947917, "grad_norm": 15.511903762817383, "learning_rate": 3.688539266982996e-06, "loss": 3.4876, "step": 33715 }, { "epoch": 0.343017578125, "grad_norm": 11.785426139831543, "learning_rate": 3.688187452010519e-06, "loss": 3.4428, "step": 33720 }, { "epoch": 0.3430684407552083, "grad_norm": 14.247519493103027, "learning_rate": 3.6878356066385467e-06, "loss": 3.3219, "step": 33725 }, { "epoch": 0.3431193033854167, "grad_norm": 11.359977722167969, "learning_rate": 3.6874837308760823e-06, "loss": 3.2945, "step": 33730 }, { "epoch": 0.343170166015625, "grad_norm": 17.293371200561523, "learning_rate": 3.6871318247321288e-06, "loss": 3.3167, "step": 33735 }, { "epoch": 0.3432210286458333, "grad_norm": 10.258779525756836, "learning_rate": 3.686779888215688e-06, "loss": 3.1372, "step": 33740 }, { "epoch": 0.3432718912760417, "grad_norm": 11.674057006835938, "learning_rate": 3.6864279213357655e-06, "loss": 3.477, "step": 33745 }, { "epoch": 0.34332275390625, "grad_norm": 15.193865776062012, "learning_rate": 3.686075924101366e-06, "loss": 3.3141, "step": 33750 }, { "epoch": 0.3433736165364583, "grad_norm": 9.285595893859863, "learning_rate": 3.685723896521495e-06, "loss": 3.4461, "step": 33755 }, { "epoch": 0.3434244791666667, "grad_norm": 11.371384620666504, "learning_rate": 3.685371838605159e-06, "loss": 3.3281, "step": 33760 }, { "epoch": 0.343475341796875, "grad_norm": 16.047948837280273, "learning_rate": 3.6850197503613656e-06, "loss": 3.1408, "step": 33765 }, { "epoch": 0.3435262044270833, "grad_norm": 10.679207801818848, "learning_rate": 3.6846676317991227e-06, "loss": 3.4766, "step": 33770 }, { "epoch": 0.3435770670572917, "grad_norm": 11.644343376159668, "learning_rate": 3.6843154829274397e-06, "loss": 3.4585, "step": 33775 }, { "epoch": 0.3436279296875, "grad_norm": 12.160526275634766, "learning_rate": 3.6839633037553245e-06, "loss": 3.1787, "step": 33780 }, { "epoch": 0.3436787923177083, "grad_norm": 13.829774856567383, "learning_rate": 3.68361109429179e-06, "loss": 3.2787, "step": 33785 }, { "epoch": 0.3437296549479167, "grad_norm": 10.009960174560547, "learning_rate": 3.6832588545458454e-06, "loss": 3.3127, "step": 33790 }, { "epoch": 0.343780517578125, "grad_norm": 17.351730346679688, "learning_rate": 3.6829065845265043e-06, "loss": 3.5596, "step": 33795 }, { "epoch": 0.3438313802083333, "grad_norm": 14.011494636535645, "learning_rate": 3.682554284242778e-06, "loss": 3.2904, "step": 33800 }, { "epoch": 0.3438822428385417, "grad_norm": 7.7989959716796875, "learning_rate": 3.6822019537036814e-06, "loss": 3.4226, "step": 33805 }, { "epoch": 0.34393310546875, "grad_norm": 9.405271530151367, "learning_rate": 3.681849592918227e-06, "loss": 4.0221, "step": 33810 }, { "epoch": 0.3439839680989583, "grad_norm": 14.77958869934082, "learning_rate": 3.6814972018954313e-06, "loss": 3.4452, "step": 33815 }, { "epoch": 0.3440348307291667, "grad_norm": 12.135478019714355, "learning_rate": 3.68114478064431e-06, "loss": 3.0434, "step": 33820 }, { "epoch": 0.344085693359375, "grad_norm": 13.508832931518555, "learning_rate": 3.680792329173879e-06, "loss": 3.3546, "step": 33825 }, { "epoch": 0.3441365559895833, "grad_norm": 14.546008110046387, "learning_rate": 3.6804398474931563e-06, "loss": 2.841, "step": 33830 }, { "epoch": 0.3441874186197917, "grad_norm": 10.957643508911133, "learning_rate": 3.68008733561116e-06, "loss": 3.2414, "step": 33835 }, { "epoch": 0.34423828125, "grad_norm": 10.273690223693848, "learning_rate": 3.679734793536909e-06, "loss": 3.2178, "step": 33840 }, { "epoch": 0.3442891438802083, "grad_norm": 15.164803504943848, "learning_rate": 3.6793822212794227e-06, "loss": 3.2884, "step": 33845 }, { "epoch": 0.3443400065104167, "grad_norm": 11.058144569396973, "learning_rate": 3.6790296188477228e-06, "loss": 3.6806, "step": 33850 }, { "epoch": 0.344390869140625, "grad_norm": 7.803112506866455, "learning_rate": 3.6786769862508282e-06, "loss": 3.3499, "step": 33855 }, { "epoch": 0.3444417317708333, "grad_norm": 9.694926261901855, "learning_rate": 3.678324323497763e-06, "loss": 4.0269, "step": 33860 }, { "epoch": 0.3444925944010417, "grad_norm": 15.972514152526855, "learning_rate": 3.6779716305975487e-06, "loss": 3.8841, "step": 33865 }, { "epoch": 0.34454345703125, "grad_norm": 15.146234512329102, "learning_rate": 3.67761890755921e-06, "loss": 3.0953, "step": 33870 }, { "epoch": 0.3445943196614583, "grad_norm": 14.086612701416016, "learning_rate": 3.6772661543917703e-06, "loss": 3.8149, "step": 33875 }, { "epoch": 0.3446451822916667, "grad_norm": 10.04620361328125, "learning_rate": 3.6769133711042556e-06, "loss": 3.4665, "step": 33880 }, { "epoch": 0.344696044921875, "grad_norm": 10.64113998413086, "learning_rate": 3.6765605577056907e-06, "loss": 3.5521, "step": 33885 }, { "epoch": 0.3447469075520833, "grad_norm": 13.816940307617188, "learning_rate": 3.6762077142051034e-06, "loss": 3.4866, "step": 33890 }, { "epoch": 0.3447977701822917, "grad_norm": 8.723909378051758, "learning_rate": 3.6758548406115196e-06, "loss": 3.1867, "step": 33895 }, { "epoch": 0.3448486328125, "grad_norm": 10.090558052062988, "learning_rate": 3.6755019369339695e-06, "loss": 3.2891, "step": 33900 }, { "epoch": 0.3448994954427083, "grad_norm": 9.237555503845215, "learning_rate": 3.67514900318148e-06, "loss": 3.3041, "step": 33905 }, { "epoch": 0.3449503580729167, "grad_norm": 9.231812477111816, "learning_rate": 3.674796039363083e-06, "loss": 3.2584, "step": 33910 }, { "epoch": 0.345001220703125, "grad_norm": 11.18399715423584, "learning_rate": 3.674443045487807e-06, "loss": 3.2969, "step": 33915 }, { "epoch": 0.3450520833333333, "grad_norm": 13.937256813049316, "learning_rate": 3.6740900215646836e-06, "loss": 3.417, "step": 33920 }, { "epoch": 0.3451029459635417, "grad_norm": 13.297497749328613, "learning_rate": 3.6737369676027457e-06, "loss": 3.1705, "step": 33925 }, { "epoch": 0.34515380859375, "grad_norm": 9.702543258666992, "learning_rate": 3.6733838836110258e-06, "loss": 3.1908, "step": 33930 }, { "epoch": 0.3452046712239583, "grad_norm": 9.547173500061035, "learning_rate": 3.6730307695985575e-06, "loss": 3.3946, "step": 33935 }, { "epoch": 0.3452555338541667, "grad_norm": 11.548481941223145, "learning_rate": 3.6726776255743744e-06, "loss": 3.4355, "step": 33940 }, { "epoch": 0.345306396484375, "grad_norm": 16.107242584228516, "learning_rate": 3.6723244515475126e-06, "loss": 3.2942, "step": 33945 }, { "epoch": 0.3453572591145833, "grad_norm": 8.494128227233887, "learning_rate": 3.6719712475270075e-06, "loss": 3.4592, "step": 33950 }, { "epoch": 0.3454081217447917, "grad_norm": 13.408146858215332, "learning_rate": 3.6716180135218965e-06, "loss": 3.3184, "step": 33955 }, { "epoch": 0.345458984375, "grad_norm": 14.734374046325684, "learning_rate": 3.6712647495412153e-06, "loss": 3.2729, "step": 33960 }, { "epoch": 0.3455098470052083, "grad_norm": 18.635473251342773, "learning_rate": 3.670911455594004e-06, "loss": 3.3193, "step": 33965 }, { "epoch": 0.3455607096354167, "grad_norm": 15.248563766479492, "learning_rate": 3.6705581316893e-06, "loss": 3.3903, "step": 33970 }, { "epoch": 0.345611572265625, "grad_norm": 14.444631576538086, "learning_rate": 3.670204777836144e-06, "loss": 3.2452, "step": 33975 }, { "epoch": 0.3456624348958333, "grad_norm": 9.606155395507812, "learning_rate": 3.669851394043576e-06, "loss": 3.0582, "step": 33980 }, { "epoch": 0.3457132975260417, "grad_norm": 18.810192108154297, "learning_rate": 3.669497980320637e-06, "loss": 2.8645, "step": 33985 }, { "epoch": 0.34576416015625, "grad_norm": 8.963079452514648, "learning_rate": 3.66914453667637e-06, "loss": 3.222, "step": 33990 }, { "epoch": 0.3458150227864583, "grad_norm": 10.067646026611328, "learning_rate": 3.668791063119817e-06, "loss": 3.4052, "step": 33995 }, { "epoch": 0.3458658854166667, "grad_norm": 7.015271186828613, "learning_rate": 3.6684375596600215e-06, "loss": 3.2415, "step": 34000 }, { "epoch": 0.345916748046875, "grad_norm": 14.001973152160645, "learning_rate": 3.668084026306028e-06, "loss": 3.3947, "step": 34005 }, { "epoch": 0.3459676106770833, "grad_norm": 11.566573143005371, "learning_rate": 3.667730463066882e-06, "loss": 3.3749, "step": 34010 }, { "epoch": 0.3460184733072917, "grad_norm": 13.14124870300293, "learning_rate": 3.667376869951628e-06, "loss": 3.2533, "step": 34015 }, { "epoch": 0.3460693359375, "grad_norm": 9.880794525146484, "learning_rate": 3.667023246969314e-06, "loss": 3.0014, "step": 34020 }, { "epoch": 0.3461201985677083, "grad_norm": 13.688362121582031, "learning_rate": 3.6666695941289866e-06, "loss": 3.4169, "step": 34025 }, { "epoch": 0.3461710611979167, "grad_norm": 13.356725692749023, "learning_rate": 3.6663159114396946e-06, "loss": 3.3283, "step": 34030 }, { "epoch": 0.346221923828125, "grad_norm": 15.20695686340332, "learning_rate": 3.665962198910486e-06, "loss": 3.1513, "step": 34035 }, { "epoch": 0.3462727864583333, "grad_norm": 16.854326248168945, "learning_rate": 3.665608456550411e-06, "loss": 3.7669, "step": 34040 }, { "epoch": 0.3463236490885417, "grad_norm": 10.738151550292969, "learning_rate": 3.66525468436852e-06, "loss": 3.5412, "step": 34045 }, { "epoch": 0.34637451171875, "grad_norm": 30.94281578063965, "learning_rate": 3.664900882373864e-06, "loss": 3.4419, "step": 34050 }, { "epoch": 0.3464253743489583, "grad_norm": 10.15609359741211, "learning_rate": 3.6645470505754954e-06, "loss": 3.7038, "step": 34055 }, { "epoch": 0.3464762369791667, "grad_norm": 10.925048828125, "learning_rate": 3.6641931889824666e-06, "loss": 3.0991, "step": 34060 }, { "epoch": 0.346527099609375, "grad_norm": 15.451886177062988, "learning_rate": 3.6638392976038304e-06, "loss": 3.3793, "step": 34065 }, { "epoch": 0.3465779622395833, "grad_norm": 10.374808311462402, "learning_rate": 3.663485376448642e-06, "loss": 3.9683, "step": 34070 }, { "epoch": 0.3466288248697917, "grad_norm": 10.30376148223877, "learning_rate": 3.6631314255259563e-06, "loss": 3.2515, "step": 34075 }, { "epoch": 0.3466796875, "grad_norm": 10.25160026550293, "learning_rate": 3.6627774448448284e-06, "loss": 3.009, "step": 34080 }, { "epoch": 0.3467305501302083, "grad_norm": 11.332584381103516, "learning_rate": 3.662423434414315e-06, "loss": 3.1738, "step": 34085 }, { "epoch": 0.3467814127604167, "grad_norm": 10.428305625915527, "learning_rate": 3.662069394243474e-06, "loss": 3.2948, "step": 34090 }, { "epoch": 0.346832275390625, "grad_norm": 11.851875305175781, "learning_rate": 3.661715324341363e-06, "loss": 3.5217, "step": 34095 }, { "epoch": 0.3468831380208333, "grad_norm": 12.3597993850708, "learning_rate": 3.6613612247170407e-06, "loss": 3.642, "step": 34100 }, { "epoch": 0.3469340006510417, "grad_norm": 16.413686752319336, "learning_rate": 3.6610070953795662e-06, "loss": 3.5137, "step": 34105 }, { "epoch": 0.34698486328125, "grad_norm": 8.804444313049316, "learning_rate": 3.660652936338001e-06, "loss": 3.2862, "step": 34110 }, { "epoch": 0.3470357259114583, "grad_norm": 17.43308448791504, "learning_rate": 3.6602987476014045e-06, "loss": 3.6762, "step": 34115 }, { "epoch": 0.3470865885416667, "grad_norm": 15.472068786621094, "learning_rate": 3.6599445291788404e-06, "loss": 3.5438, "step": 34120 }, { "epoch": 0.347137451171875, "grad_norm": 16.01087188720703, "learning_rate": 3.659590281079371e-06, "loss": 3.5225, "step": 34125 }, { "epoch": 0.3471883138020833, "grad_norm": 14.543718338012695, "learning_rate": 3.659236003312058e-06, "loss": 3.8949, "step": 34130 }, { "epoch": 0.3472391764322917, "grad_norm": 13.618134498596191, "learning_rate": 3.6588816958859673e-06, "loss": 3.3487, "step": 34135 }, { "epoch": 0.3472900390625, "grad_norm": 13.667912483215332, "learning_rate": 3.6585273588101634e-06, "loss": 3.2609, "step": 34140 }, { "epoch": 0.3473409016927083, "grad_norm": 12.12238883972168, "learning_rate": 3.658172992093711e-06, "loss": 3.2387, "step": 34145 }, { "epoch": 0.3473917643229167, "grad_norm": 9.653349876403809, "learning_rate": 3.6578185957456767e-06, "loss": 3.2573, "step": 34150 }, { "epoch": 0.347442626953125, "grad_norm": 9.517043113708496, "learning_rate": 3.657464169775129e-06, "loss": 3.5676, "step": 34155 }, { "epoch": 0.3474934895833333, "grad_norm": 12.061582565307617, "learning_rate": 3.6571097141911342e-06, "loss": 3.4772, "step": 34160 }, { "epoch": 0.3475443522135417, "grad_norm": 8.128222465515137, "learning_rate": 3.6567552290027626e-06, "loss": 4.0439, "step": 34165 }, { "epoch": 0.34759521484375, "grad_norm": 82.14265441894531, "learning_rate": 3.6564007142190818e-06, "loss": 3.4596, "step": 34170 }, { "epoch": 0.3476460774739583, "grad_norm": 9.599939346313477, "learning_rate": 3.6560461698491627e-06, "loss": 3.1819, "step": 34175 }, { "epoch": 0.3476969401041667, "grad_norm": 10.134929656982422, "learning_rate": 3.6556915959020766e-06, "loss": 3.5317, "step": 34180 }, { "epoch": 0.347747802734375, "grad_norm": 12.05319881439209, "learning_rate": 3.6553369923868953e-06, "loss": 3.8354, "step": 34185 }, { "epoch": 0.3477986653645833, "grad_norm": 16.20045280456543, "learning_rate": 3.6549823593126905e-06, "loss": 3.3641, "step": 34190 }, { "epoch": 0.3478495279947917, "grad_norm": 13.195075035095215, "learning_rate": 3.654627696688536e-06, "loss": 3.6895, "step": 34195 }, { "epoch": 0.347900390625, "grad_norm": 10.847936630249023, "learning_rate": 3.654273004523505e-06, "loss": 3.9151, "step": 34200 }, { "epoch": 0.3479512532552083, "grad_norm": 11.85213851928711, "learning_rate": 3.6539182828266736e-06, "loss": 3.8494, "step": 34205 }, { "epoch": 0.3480021158854167, "grad_norm": 12.77187442779541, "learning_rate": 3.653563531607116e-06, "loss": 3.1948, "step": 34210 }, { "epoch": 0.348052978515625, "grad_norm": 11.966257095336914, "learning_rate": 3.653208750873909e-06, "loss": 3.4879, "step": 34215 }, { "epoch": 0.3481038411458333, "grad_norm": 10.672539710998535, "learning_rate": 3.652853940636129e-06, "loss": 3.4875, "step": 34220 }, { "epoch": 0.3481547037760417, "grad_norm": 11.228493690490723, "learning_rate": 3.6524991009028544e-06, "loss": 3.4064, "step": 34225 }, { "epoch": 0.34820556640625, "grad_norm": 10.102757453918457, "learning_rate": 3.6521442316831645e-06, "loss": 3.3489, "step": 34230 }, { "epoch": 0.3482564290364583, "grad_norm": 10.402213096618652, "learning_rate": 3.651789332986136e-06, "loss": 3.0321, "step": 34235 }, { "epoch": 0.3483072916666667, "grad_norm": 15.23025894165039, "learning_rate": 3.6514344048208518e-06, "loss": 3.3532, "step": 34240 }, { "epoch": 0.348358154296875, "grad_norm": 126.61017608642578, "learning_rate": 3.6510794471963903e-06, "loss": 3.3617, "step": 34245 }, { "epoch": 0.3484090169270833, "grad_norm": 9.169806480407715, "learning_rate": 3.650724460121834e-06, "loss": 3.3471, "step": 34250 }, { "epoch": 0.3484598795572917, "grad_norm": 16.55471420288086, "learning_rate": 3.650369443606265e-06, "loss": 3.2522, "step": 34255 }, { "epoch": 0.3485107421875, "grad_norm": 13.121299743652344, "learning_rate": 3.650014397658767e-06, "loss": 3.428, "step": 34260 }, { "epoch": 0.3485616048177083, "grad_norm": 11.240519523620605, "learning_rate": 3.649659322288423e-06, "loss": 3.2809, "step": 34265 }, { "epoch": 0.3486124674479167, "grad_norm": 13.24062728881836, "learning_rate": 3.6493042175043182e-06, "loss": 3.2971, "step": 34270 }, { "epoch": 0.348663330078125, "grad_norm": 9.990971565246582, "learning_rate": 3.6489490833155365e-06, "loss": 3.0945, "step": 34275 }, { "epoch": 0.3487141927083333, "grad_norm": 17.911039352416992, "learning_rate": 3.6485939197311658e-06, "loss": 3.2916, "step": 34280 }, { "epoch": 0.3487650553385417, "grad_norm": 8.286005020141602, "learning_rate": 3.648238726760291e-06, "loss": 3.2104, "step": 34285 }, { "epoch": 0.34881591796875, "grad_norm": 11.795724868774414, "learning_rate": 3.6478835044120016e-06, "loss": 3.0933, "step": 34290 }, { "epoch": 0.3488667805989583, "grad_norm": 11.091285705566406, "learning_rate": 3.6475282526953838e-06, "loss": 3.4352, "step": 34295 }, { "epoch": 0.3489176432291667, "grad_norm": 7.997657775878906, "learning_rate": 3.6471729716195283e-06, "loss": 3.3882, "step": 34300 }, { "epoch": 0.348968505859375, "grad_norm": 7.602253437042236, "learning_rate": 3.6468176611935237e-06, "loss": 3.1502, "step": 34305 }, { "epoch": 0.3490193684895833, "grad_norm": 9.574580192565918, "learning_rate": 3.6464623214264618e-06, "loss": 3.1318, "step": 34310 }, { "epoch": 0.3490702311197917, "grad_norm": 14.666267395019531, "learning_rate": 3.6461069523274324e-06, "loss": 3.381, "step": 34315 }, { "epoch": 0.34912109375, "grad_norm": 12.042957305908203, "learning_rate": 3.6457515539055284e-06, "loss": 3.4057, "step": 34320 }, { "epoch": 0.3491719563802083, "grad_norm": 12.966578483581543, "learning_rate": 3.6453961261698424e-06, "loss": 3.361, "step": 34325 }, { "epoch": 0.3492228190104167, "grad_norm": 12.08524227142334, "learning_rate": 3.645040669129468e-06, "loss": 3.4151, "step": 34330 }, { "epoch": 0.349273681640625, "grad_norm": 9.36681079864502, "learning_rate": 3.6446851827935e-06, "loss": 3.6036, "step": 34335 }, { "epoch": 0.3493245442708333, "grad_norm": 11.86538028717041, "learning_rate": 3.6443296671710326e-06, "loss": 3.5037, "step": 34340 }, { "epoch": 0.3493754069010417, "grad_norm": 8.338165283203125, "learning_rate": 3.643974122271162e-06, "loss": 3.2591, "step": 34345 }, { "epoch": 0.34942626953125, "grad_norm": 12.40888786315918, "learning_rate": 3.643618548102984e-06, "loss": 3.3841, "step": 34350 }, { "epoch": 0.3494771321614583, "grad_norm": 15.538311958312988, "learning_rate": 3.643262944675598e-06, "loss": 3.423, "step": 34355 }, { "epoch": 0.3495279947916667, "grad_norm": 13.414130210876465, "learning_rate": 3.6429073119980994e-06, "loss": 3.4831, "step": 34360 }, { "epoch": 0.349578857421875, "grad_norm": 10.631868362426758, "learning_rate": 3.6425516500795883e-06, "loss": 3.6589, "step": 34365 }, { "epoch": 0.3496297200520833, "grad_norm": 10.415438652038574, "learning_rate": 3.642195958929164e-06, "loss": 3.522, "step": 34370 }, { "epoch": 0.3496805826822917, "grad_norm": 11.739583015441895, "learning_rate": 3.641840238555927e-06, "loss": 3.6878, "step": 34375 }, { "epoch": 0.3497314453125, "grad_norm": 7.453786849975586, "learning_rate": 3.6414844889689784e-06, "loss": 3.1575, "step": 34380 }, { "epoch": 0.3497823079427083, "grad_norm": 13.174354553222656, "learning_rate": 3.6411287101774194e-06, "loss": 3.3389, "step": 34385 }, { "epoch": 0.3498331705729167, "grad_norm": 15.388251304626465, "learning_rate": 3.640772902190352e-06, "loss": 3.2237, "step": 34390 }, { "epoch": 0.349884033203125, "grad_norm": 13.908624649047852, "learning_rate": 3.6404170650168817e-06, "loss": 3.4687, "step": 34395 }, { "epoch": 0.3499348958333333, "grad_norm": 13.464629173278809, "learning_rate": 3.6400611986661107e-06, "loss": 3.3808, "step": 34400 }, { "epoch": 0.3499857584635417, "grad_norm": 15.773457527160645, "learning_rate": 3.6397053031471435e-06, "loss": 3.3681, "step": 34405 }, { "epoch": 0.35003662109375, "grad_norm": 10.64908218383789, "learning_rate": 3.639349378469087e-06, "loss": 3.5906, "step": 34410 }, { "epoch": 0.3500874837239583, "grad_norm": 10.5111722946167, "learning_rate": 3.6389934246410456e-06, "loss": 3.145, "step": 34415 }, { "epoch": 0.3501383463541667, "grad_norm": 11.134115219116211, "learning_rate": 3.6386374416721286e-06, "loss": 2.9944, "step": 34420 }, { "epoch": 0.350189208984375, "grad_norm": 10.555818557739258, "learning_rate": 3.638281429571442e-06, "loss": 3.4728, "step": 34425 }, { "epoch": 0.3502400716145833, "grad_norm": 16.187204360961914, "learning_rate": 3.637925388348095e-06, "loss": 3.3397, "step": 34430 }, { "epoch": 0.3502909342447917, "grad_norm": 8.328694343566895, "learning_rate": 3.6375693180111965e-06, "loss": 3.543, "step": 34435 }, { "epoch": 0.350341796875, "grad_norm": 10.651793479919434, "learning_rate": 3.637213218569857e-06, "loss": 3.9065, "step": 34440 }, { "epoch": 0.3503926595052083, "grad_norm": 14.604371070861816, "learning_rate": 3.6368570900331866e-06, "loss": 3.3349, "step": 34445 }, { "epoch": 0.3504435221354167, "grad_norm": 8.624008178710938, "learning_rate": 3.6365009324102975e-06, "loss": 3.1049, "step": 34450 }, { "epoch": 0.350494384765625, "grad_norm": 8.160076141357422, "learning_rate": 3.6361447457103004e-06, "loss": 3.5228, "step": 34455 }, { "epoch": 0.3505452473958333, "grad_norm": 7.852463245391846, "learning_rate": 3.63578852994231e-06, "loss": 3.3375, "step": 34460 }, { "epoch": 0.3505961100260417, "grad_norm": 12.908072471618652, "learning_rate": 3.6354322851154397e-06, "loss": 3.2781, "step": 34465 }, { "epoch": 0.35064697265625, "grad_norm": 13.951038360595703, "learning_rate": 3.6350760112388024e-06, "loss": 3.3372, "step": 34470 }, { "epoch": 0.3506978352864583, "grad_norm": 11.072229385375977, "learning_rate": 3.6347197083215147e-06, "loss": 3.3124, "step": 34475 }, { "epoch": 0.3507486979166667, "grad_norm": 9.052788734436035, "learning_rate": 3.634363376372693e-06, "loss": 3.3465, "step": 34480 }, { "epoch": 0.350799560546875, "grad_norm": 7.021347999572754, "learning_rate": 3.634007015401452e-06, "loss": 3.2082, "step": 34485 }, { "epoch": 0.3508504231770833, "grad_norm": 13.342665672302246, "learning_rate": 3.6336506254169113e-06, "loss": 3.3017, "step": 34490 }, { "epoch": 0.3509012858072917, "grad_norm": 11.334712028503418, "learning_rate": 3.6332942064281874e-06, "loss": 3.3756, "step": 34495 }, { "epoch": 0.3509521484375, "grad_norm": 9.231757164001465, "learning_rate": 3.6329377584444004e-06, "loss": 3.0124, "step": 34500 }, { "epoch": 0.3510030110677083, "grad_norm": 11.232418060302734, "learning_rate": 3.6325812814746687e-06, "loss": 3.6996, "step": 34505 }, { "epoch": 0.3510538736979167, "grad_norm": 14.518206596374512, "learning_rate": 3.6322247755281144e-06, "loss": 3.1982, "step": 34510 }, { "epoch": 0.351104736328125, "grad_norm": 15.891706466674805, "learning_rate": 3.6318682406138565e-06, "loss": 3.0786, "step": 34515 }, { "epoch": 0.3511555989583333, "grad_norm": 8.141637802124023, "learning_rate": 3.631511676741018e-06, "loss": 3.4413, "step": 34520 }, { "epoch": 0.3512064615885417, "grad_norm": 13.049849510192871, "learning_rate": 3.6311550839187227e-06, "loss": 2.6128, "step": 34525 }, { "epoch": 0.35125732421875, "grad_norm": 10.91442584991455, "learning_rate": 3.6307984621560914e-06, "loss": 3.0481, "step": 34530 }, { "epoch": 0.3513081868489583, "grad_norm": 11.624651908874512, "learning_rate": 3.6304418114622498e-06, "loss": 3.4811, "step": 34535 }, { "epoch": 0.3513590494791667, "grad_norm": 11.577051162719727, "learning_rate": 3.6300851318463224e-06, "loss": 3.4565, "step": 34540 }, { "epoch": 0.351409912109375, "grad_norm": 13.190190315246582, "learning_rate": 3.629728423317435e-06, "loss": 3.0826, "step": 34545 }, { "epoch": 0.3514607747395833, "grad_norm": 10.728463172912598, "learning_rate": 3.629371685884714e-06, "loss": 3.6738, "step": 34550 }, { "epoch": 0.3515116373697917, "grad_norm": 9.551852226257324, "learning_rate": 3.6290149195572853e-06, "loss": 3.2391, "step": 34555 }, { "epoch": 0.3515625, "grad_norm": 10.663229942321777, "learning_rate": 3.6286581243442776e-06, "loss": 3.7829, "step": 34560 }, { "epoch": 0.3516133626302083, "grad_norm": 12.385886192321777, "learning_rate": 3.6283013002548202e-06, "loss": 3.2569, "step": 34565 }, { "epoch": 0.3516642252604167, "grad_norm": 9.58331298828125, "learning_rate": 3.62794444729804e-06, "loss": 3.1043, "step": 34570 }, { "epoch": 0.351715087890625, "grad_norm": 10.994060516357422, "learning_rate": 3.62758756548307e-06, "loss": 3.6247, "step": 34575 }, { "epoch": 0.3517659505208333, "grad_norm": 9.025533676147461, "learning_rate": 3.6272306548190383e-06, "loss": 3.1066, "step": 34580 }, { "epoch": 0.3518168131510417, "grad_norm": 13.23122501373291, "learning_rate": 3.626873715315078e-06, "loss": 3.2877, "step": 34585 }, { "epoch": 0.35186767578125, "grad_norm": 14.55508804321289, "learning_rate": 3.6265167469803207e-06, "loss": 3.6446, "step": 34590 }, { "epoch": 0.3519185384114583, "grad_norm": 14.5059232711792, "learning_rate": 3.6261597498238993e-06, "loss": 3.2737, "step": 34595 }, { "epoch": 0.3519694010416667, "grad_norm": 11.445781707763672, "learning_rate": 3.6258027238549475e-06, "loss": 3.0839, "step": 34600 }, { "epoch": 0.352020263671875, "grad_norm": 12.936619758605957, "learning_rate": 3.6254456690826005e-06, "loss": 3.4727, "step": 34605 }, { "epoch": 0.3520711263020833, "grad_norm": 14.624140739440918, "learning_rate": 3.6250885855159924e-06, "loss": 3.5822, "step": 34610 }, { "epoch": 0.3521219889322917, "grad_norm": 11.065165519714355, "learning_rate": 3.624731473164259e-06, "loss": 3.2801, "step": 34615 }, { "epoch": 0.3521728515625, "grad_norm": 13.35992431640625, "learning_rate": 3.624374332036539e-06, "loss": 3.3101, "step": 34620 }, { "epoch": 0.3522237141927083, "grad_norm": 12.415696144104004, "learning_rate": 3.6240171621419663e-06, "loss": 3.4841, "step": 34625 }, { "epoch": 0.3522745768229167, "grad_norm": 11.691892623901367, "learning_rate": 3.6236599634896823e-06, "loss": 3.3766, "step": 34630 }, { "epoch": 0.352325439453125, "grad_norm": 10.133980751037598, "learning_rate": 3.6233027360888236e-06, "loss": 3.3009, "step": 34635 }, { "epoch": 0.3523763020833333, "grad_norm": 11.231637001037598, "learning_rate": 3.6229454799485316e-06, "loss": 3.2301, "step": 34640 }, { "epoch": 0.3524271647135417, "grad_norm": 9.963400840759277, "learning_rate": 3.6225881950779447e-06, "loss": 3.2479, "step": 34645 }, { "epoch": 0.35247802734375, "grad_norm": 14.1806640625, "learning_rate": 3.622230881486205e-06, "loss": 3.3654, "step": 34650 }, { "epoch": 0.3525288899739583, "grad_norm": 20.314727783203125, "learning_rate": 3.6218735391824545e-06, "loss": 3.3579, "step": 34655 }, { "epoch": 0.3525797526041667, "grad_norm": 14.851888656616211, "learning_rate": 3.621516168175836e-06, "loss": 3.0346, "step": 34660 }, { "epoch": 0.352630615234375, "grad_norm": 11.343681335449219, "learning_rate": 3.6211587684754914e-06, "loss": 3.3709, "step": 34665 }, { "epoch": 0.3526814778645833, "grad_norm": 13.536211967468262, "learning_rate": 3.6208013400905664e-06, "loss": 3.5392, "step": 34670 }, { "epoch": 0.3527323404947917, "grad_norm": 10.644675254821777, "learning_rate": 3.620443883030203e-06, "loss": 3.5664, "step": 34675 }, { "epoch": 0.352783203125, "grad_norm": 13.698735237121582, "learning_rate": 3.62008639730355e-06, "loss": 3.1855, "step": 34680 }, { "epoch": 0.3528340657552083, "grad_norm": 11.285604476928711, "learning_rate": 3.6197288829197514e-06, "loss": 3.2029, "step": 34685 }, { "epoch": 0.3528849283854167, "grad_norm": 10.606297492980957, "learning_rate": 3.6193713398879547e-06, "loss": 3.5998, "step": 34690 }, { "epoch": 0.352935791015625, "grad_norm": 8.037817001342773, "learning_rate": 3.6190137682173075e-06, "loss": 3.5124, "step": 34695 }, { "epoch": 0.3529866536458333, "grad_norm": 16.26264762878418, "learning_rate": 3.6186561679169586e-06, "loss": 3.193, "step": 34700 }, { "epoch": 0.3530375162760417, "grad_norm": 8.826468467712402, "learning_rate": 3.6182985389960563e-06, "loss": 3.5367, "step": 34705 }, { "epoch": 0.35308837890625, "grad_norm": 12.868377685546875, "learning_rate": 3.617940881463751e-06, "loss": 3.3782, "step": 34710 }, { "epoch": 0.3531392415364583, "grad_norm": 9.595586776733398, "learning_rate": 3.617583195329194e-06, "loss": 3.557, "step": 34715 }, { "epoch": 0.3531901041666667, "grad_norm": 8.487686157226562, "learning_rate": 3.617225480601535e-06, "loss": 3.2387, "step": 34720 }, { "epoch": 0.353240966796875, "grad_norm": 15.628540992736816, "learning_rate": 3.6168677372899273e-06, "loss": 3.1546, "step": 34725 }, { "epoch": 0.3532918294270833, "grad_norm": 13.827096939086914, "learning_rate": 3.6165099654035227e-06, "loss": 3.466, "step": 34730 }, { "epoch": 0.3533426920572917, "grad_norm": 10.873969078063965, "learning_rate": 3.616152164951477e-06, "loss": 3.5061, "step": 34735 }, { "epoch": 0.3533935546875, "grad_norm": 14.796255111694336, "learning_rate": 3.615794335942941e-06, "loss": 3.7256, "step": 34740 }, { "epoch": 0.3534444173177083, "grad_norm": 7.992857933044434, "learning_rate": 3.615436478387072e-06, "loss": 3.1234, "step": 34745 }, { "epoch": 0.3534952799479167, "grad_norm": 8.480143547058105, "learning_rate": 3.6150785922930253e-06, "loss": 3.0514, "step": 34750 }, { "epoch": 0.353546142578125, "grad_norm": 15.358332633972168, "learning_rate": 3.6147206776699567e-06, "loss": 3.5486, "step": 34755 }, { "epoch": 0.3535970052083333, "grad_norm": 12.503549575805664, "learning_rate": 3.6143627345270237e-06, "loss": 3.1947, "step": 34760 }, { "epoch": 0.3536478678385417, "grad_norm": 14.133999824523926, "learning_rate": 3.6140047628733843e-06, "loss": 3.4198, "step": 34765 }, { "epoch": 0.35369873046875, "grad_norm": 13.71657657623291, "learning_rate": 3.613646762718197e-06, "loss": 3.4467, "step": 34770 }, { "epoch": 0.3537495930989583, "grad_norm": 12.718137741088867, "learning_rate": 3.6132887340706224e-06, "loss": 3.5934, "step": 34775 }, { "epoch": 0.3538004557291667, "grad_norm": 11.0115327835083, "learning_rate": 3.6129306769398174e-06, "loss": 3.4235, "step": 34780 }, { "epoch": 0.353851318359375, "grad_norm": 8.977180480957031, "learning_rate": 3.612572591334947e-06, "loss": 3.4629, "step": 34785 }, { "epoch": 0.3539021809895833, "grad_norm": 11.48985481262207, "learning_rate": 3.612214477265169e-06, "loss": 3.433, "step": 34790 }, { "epoch": 0.3539530436197917, "grad_norm": 9.702241897583008, "learning_rate": 3.611856334739648e-06, "loss": 3.5835, "step": 34795 }, { "epoch": 0.35400390625, "grad_norm": 15.769079208374023, "learning_rate": 3.6114981637675455e-06, "loss": 3.4196, "step": 34800 }, { "epoch": 0.3540547688802083, "grad_norm": 8.449577331542969, "learning_rate": 3.6111399643580266e-06, "loss": 3.2795, "step": 34805 }, { "epoch": 0.3541056315104167, "grad_norm": 14.473113059997559, "learning_rate": 3.6107817365202545e-06, "loss": 3.4543, "step": 34810 }, { "epoch": 0.354156494140625, "grad_norm": 13.105792045593262, "learning_rate": 3.6104234802633953e-06, "loss": 3.0334, "step": 34815 }, { "epoch": 0.3542073567708333, "grad_norm": 13.570204734802246, "learning_rate": 3.610065195596615e-06, "loss": 3.5513, "step": 34820 }, { "epoch": 0.3542582194010417, "grad_norm": 15.032855987548828, "learning_rate": 3.6097068825290793e-06, "loss": 3.3926, "step": 34825 }, { "epoch": 0.35430908203125, "grad_norm": 13.967044830322266, "learning_rate": 3.6093485410699557e-06, "loss": 3.1879, "step": 34830 }, { "epoch": 0.3543599446614583, "grad_norm": 13.504987716674805, "learning_rate": 3.608990171228413e-06, "loss": 3.6169, "step": 34835 }, { "epoch": 0.3544108072916667, "grad_norm": 14.612581253051758, "learning_rate": 3.60863177301362e-06, "loss": 3.257, "step": 34840 }, { "epoch": 0.354461669921875, "grad_norm": 14.804563522338867, "learning_rate": 3.608273346434745e-06, "loss": 3.4849, "step": 34845 }, { "epoch": 0.3545125325520833, "grad_norm": 13.429917335510254, "learning_rate": 3.60791489150096e-06, "loss": 3.4647, "step": 34850 }, { "epoch": 0.3545633951822917, "grad_norm": 11.944478034973145, "learning_rate": 3.607556408221435e-06, "loss": 3.3186, "step": 34855 }, { "epoch": 0.3546142578125, "grad_norm": 8.366698265075684, "learning_rate": 3.607197896605341e-06, "loss": 3.1635, "step": 34860 }, { "epoch": 0.3546651204427083, "grad_norm": 11.39499568939209, "learning_rate": 3.606839356661852e-06, "loss": 3.3217, "step": 34865 }, { "epoch": 0.3547159830729167, "grad_norm": 13.886192321777344, "learning_rate": 3.6064807884001404e-06, "loss": 3.5587, "step": 34870 }, { "epoch": 0.354766845703125, "grad_norm": 16.516136169433594, "learning_rate": 3.6061221918293797e-06, "loss": 3.5913, "step": 34875 }, { "epoch": 0.3548177083333333, "grad_norm": 10.966737747192383, "learning_rate": 3.6057635669587448e-06, "loss": 3.3713, "step": 34880 }, { "epoch": 0.3548685709635417, "grad_norm": 10.386698722839355, "learning_rate": 3.6054049137974114e-06, "loss": 3.091, "step": 34885 }, { "epoch": 0.35491943359375, "grad_norm": 8.499550819396973, "learning_rate": 3.605046232354556e-06, "loss": 3.4826, "step": 34890 }, { "epoch": 0.3549702962239583, "grad_norm": 11.241985321044922, "learning_rate": 3.604687522639353e-06, "loss": 3.2215, "step": 34895 }, { "epoch": 0.3550211588541667, "grad_norm": 12.003620147705078, "learning_rate": 3.604328784660983e-06, "loss": 3.7037, "step": 34900 }, { "epoch": 0.355072021484375, "grad_norm": 10.417730331420898, "learning_rate": 3.6039700184286224e-06, "loss": 3.714, "step": 34905 }, { "epoch": 0.3551228841145833, "grad_norm": 12.439254760742188, "learning_rate": 3.60361122395145e-06, "loss": 3.3445, "step": 34910 }, { "epoch": 0.3551737467447917, "grad_norm": 11.346061706542969, "learning_rate": 3.603252401238647e-06, "loss": 3.537, "step": 34915 }, { "epoch": 0.355224609375, "grad_norm": 12.715902328491211, "learning_rate": 3.6028935502993922e-06, "loss": 3.2552, "step": 34920 }, { "epoch": 0.3552754720052083, "grad_norm": 16.785329818725586, "learning_rate": 3.6025346711428677e-06, "loss": 3.4534, "step": 34925 }, { "epoch": 0.3553263346354167, "grad_norm": 9.780241012573242, "learning_rate": 3.6021757637782546e-06, "loss": 3.5138, "step": 34930 }, { "epoch": 0.355377197265625, "grad_norm": 12.479494094848633, "learning_rate": 3.601816828214736e-06, "loss": 3.2663, "step": 34935 }, { "epoch": 0.3554280598958333, "grad_norm": 13.093775749206543, "learning_rate": 3.601457864461495e-06, "loss": 3.3687, "step": 34940 }, { "epoch": 0.3554789225260417, "grad_norm": 15.283235549926758, "learning_rate": 3.6010988725277154e-06, "loss": 3.5688, "step": 34945 }, { "epoch": 0.35552978515625, "grad_norm": 17.717275619506836, "learning_rate": 3.6007398524225824e-06, "loss": 3.4344, "step": 34950 }, { "epoch": 0.3555806477864583, "grad_norm": 13.317954063415527, "learning_rate": 3.600380804155281e-06, "loss": 3.5325, "step": 34955 }, { "epoch": 0.3556315104166667, "grad_norm": 13.02704906463623, "learning_rate": 3.6000217277349976e-06, "loss": 3.2244, "step": 34960 }, { "epoch": 0.355682373046875, "grad_norm": 8.38286304473877, "learning_rate": 3.599662623170919e-06, "loss": 3.4214, "step": 34965 }, { "epoch": 0.3557332356770833, "grad_norm": 9.83238697052002, "learning_rate": 3.5993034904722333e-06, "loss": 3.072, "step": 34970 }, { "epoch": 0.3557840983072917, "grad_norm": 12.883691787719727, "learning_rate": 3.598944329648128e-06, "loss": 3.3078, "step": 34975 }, { "epoch": 0.3558349609375, "grad_norm": 9.63593578338623, "learning_rate": 3.598585140707792e-06, "loss": 3.6575, "step": 34980 }, { "epoch": 0.3558858235677083, "grad_norm": 14.492904663085938, "learning_rate": 3.598225923660416e-06, "loss": 3.0748, "step": 34985 }, { "epoch": 0.3559366861979167, "grad_norm": 13.749297142028809, "learning_rate": 3.5978666785151904e-06, "loss": 2.9942, "step": 34990 }, { "epoch": 0.355987548828125, "grad_norm": 14.713109970092773, "learning_rate": 3.5975074052813064e-06, "loss": 3.1924, "step": 34995 }, { "epoch": 0.3560384114583333, "grad_norm": 10.480639457702637, "learning_rate": 3.597148103967954e-06, "loss": 3.5809, "step": 35000 }, { "epoch": 0.3560892740885417, "grad_norm": 14.827054977416992, "learning_rate": 3.5967887745843284e-06, "loss": 5.4383, "step": 35005 }, { "epoch": 0.35614013671875, "grad_norm": 15.557093620300293, "learning_rate": 3.5964294171396227e-06, "loss": 3.301, "step": 35010 }, { "epoch": 0.3561909993489583, "grad_norm": 8.71878433227539, "learning_rate": 3.5960700316430284e-06, "loss": 3.3072, "step": 35015 }, { "epoch": 0.3562418619791667, "grad_norm": 8.105778694152832, "learning_rate": 3.5957106181037437e-06, "loss": 2.8905, "step": 35020 }, { "epoch": 0.356292724609375, "grad_norm": 13.239446640014648, "learning_rate": 3.595351176530961e-06, "loss": 3.4834, "step": 35025 }, { "epoch": 0.3563435872395833, "grad_norm": 12.645708084106445, "learning_rate": 3.59499170693388e-06, "loss": 3.3948, "step": 35030 }, { "epoch": 0.3563944498697917, "grad_norm": 11.654731750488281, "learning_rate": 3.5946322093216943e-06, "loss": 3.103, "step": 35035 }, { "epoch": 0.3564453125, "grad_norm": 10.604202270507812, "learning_rate": 3.594272683703604e-06, "loss": 3.252, "step": 35040 }, { "epoch": 0.3564961751302083, "grad_norm": 10.206571578979492, "learning_rate": 3.593913130088806e-06, "loss": 3.9125, "step": 35045 }, { "epoch": 0.3565470377604167, "grad_norm": 15.597684860229492, "learning_rate": 3.5935535484864996e-06, "loss": 3.2234, "step": 35050 }, { "epoch": 0.356597900390625, "grad_norm": 18.20827865600586, "learning_rate": 3.5931939389058852e-06, "loss": 3.2449, "step": 35055 }, { "epoch": 0.3566487630208333, "grad_norm": 14.010954856872559, "learning_rate": 3.5928343013561633e-06, "loss": 3.3907, "step": 35060 }, { "epoch": 0.3566996256510417, "grad_norm": 11.052334785461426, "learning_rate": 3.5924746358465335e-06, "loss": 3.5313, "step": 35065 }, { "epoch": 0.35675048828125, "grad_norm": 12.661726951599121, "learning_rate": 3.5921149423862005e-06, "loss": 3.582, "step": 35070 }, { "epoch": 0.3568013509114583, "grad_norm": 12.107625007629395, "learning_rate": 3.591755220984365e-06, "loss": 3.2842, "step": 35075 }, { "epoch": 0.3568522135416667, "grad_norm": 12.133563041687012, "learning_rate": 3.591395471650231e-06, "loss": 2.9995, "step": 35080 }, { "epoch": 0.356903076171875, "grad_norm": 13.665731430053711, "learning_rate": 3.5910356943930026e-06, "loss": 3.2549, "step": 35085 }, { "epoch": 0.3569539388020833, "grad_norm": 11.094895362854004, "learning_rate": 3.5906758892218844e-06, "loss": 3.2817, "step": 35090 }, { "epoch": 0.3570048014322917, "grad_norm": 11.758183479309082, "learning_rate": 3.5903160561460822e-06, "loss": 3.3395, "step": 35095 }, { "epoch": 0.3570556640625, "grad_norm": 11.913673400878906, "learning_rate": 3.5899561951748026e-06, "loss": 2.9786, "step": 35100 }, { "epoch": 0.3571065266927083, "grad_norm": 10.955697059631348, "learning_rate": 3.5895963063172514e-06, "loss": 3.8209, "step": 35105 }, { "epoch": 0.3571573893229167, "grad_norm": 12.346541404724121, "learning_rate": 3.5892363895826367e-06, "loss": 3.82, "step": 35110 }, { "epoch": 0.357208251953125, "grad_norm": 10.143900871276855, "learning_rate": 3.5888764449801676e-06, "loss": 3.3187, "step": 35115 }, { "epoch": 0.3572591145833333, "grad_norm": 10.030485153198242, "learning_rate": 3.588516472519053e-06, "loss": 3.4764, "step": 35120 }, { "epoch": 0.3573099772135417, "grad_norm": 14.733952522277832, "learning_rate": 3.5881564722085025e-06, "loss": 3.3969, "step": 35125 }, { "epoch": 0.35736083984375, "grad_norm": 14.487006187438965, "learning_rate": 3.5877964440577257e-06, "loss": 3.3901, "step": 35130 }, { "epoch": 0.3574117024739583, "grad_norm": 14.437053680419922, "learning_rate": 3.5874363880759357e-06, "loss": 3.0773, "step": 35135 }, { "epoch": 0.3574625651041667, "grad_norm": 8.986260414123535, "learning_rate": 3.5870763042723435e-06, "loss": 3.7938, "step": 35140 }, { "epoch": 0.357513427734375, "grad_norm": 13.2808256149292, "learning_rate": 3.586716192656161e-06, "loss": 3.2321, "step": 35145 }, { "epoch": 0.3575642903645833, "grad_norm": 11.628425598144531, "learning_rate": 3.586356053236603e-06, "loss": 3.4993, "step": 35150 }, { "epoch": 0.3576151529947917, "grad_norm": 13.323860168457031, "learning_rate": 3.5859958860228815e-06, "loss": 3.4119, "step": 35155 }, { "epoch": 0.357666015625, "grad_norm": 8.600274085998535, "learning_rate": 3.585635691024214e-06, "loss": 3.1944, "step": 35160 }, { "epoch": 0.3577168782552083, "grad_norm": 12.005955696105957, "learning_rate": 3.5852754682498147e-06, "loss": 3.3114, "step": 35165 }, { "epoch": 0.3577677408854167, "grad_norm": 11.952313423156738, "learning_rate": 3.5849152177088986e-06, "loss": 3.3038, "step": 35170 }, { "epoch": 0.357818603515625, "grad_norm": 15.262046813964844, "learning_rate": 3.5845549394106853e-06, "loss": 3.3386, "step": 35175 }, { "epoch": 0.3578694661458333, "grad_norm": 9.86393928527832, "learning_rate": 3.5841946333643896e-06, "loss": 3.655, "step": 35180 }, { "epoch": 0.3579203287760417, "grad_norm": 10.560075759887695, "learning_rate": 3.5838342995792324e-06, "loss": 3.1984, "step": 35185 }, { "epoch": 0.35797119140625, "grad_norm": 10.773649215698242, "learning_rate": 3.583473938064431e-06, "loss": 3.0001, "step": 35190 }, { "epoch": 0.3580220540364583, "grad_norm": 18.054447174072266, "learning_rate": 3.5831135488292055e-06, "loss": 3.3267, "step": 35195 }, { "epoch": 0.3580729166666667, "grad_norm": 14.0558500289917, "learning_rate": 3.582753131882777e-06, "loss": 3.0825, "step": 35200 }, { "epoch": 0.358123779296875, "grad_norm": 13.016155242919922, "learning_rate": 3.582392687234366e-06, "loss": 3.5745, "step": 35205 }, { "epoch": 0.3581746419270833, "grad_norm": 12.913225173950195, "learning_rate": 3.5820322148931945e-06, "loss": 3.4292, "step": 35210 }, { "epoch": 0.3582255045572917, "grad_norm": 9.55213737487793, "learning_rate": 3.5816717148684854e-06, "loss": 3.482, "step": 35215 }, { "epoch": 0.3582763671875, "grad_norm": 16.18498992919922, "learning_rate": 3.5813111871694616e-06, "loss": 3.4156, "step": 35220 }, { "epoch": 0.3583272298177083, "grad_norm": 9.790040969848633, "learning_rate": 3.5809506318053478e-06, "loss": 3.1728, "step": 35225 }, { "epoch": 0.3583780924479167, "grad_norm": 16.061195373535156, "learning_rate": 3.5805900487853685e-06, "loss": 3.1453, "step": 35230 }, { "epoch": 0.358428955078125, "grad_norm": 14.159446716308594, "learning_rate": 3.580229438118748e-06, "loss": 3.278, "step": 35235 }, { "epoch": 0.3584798177083333, "grad_norm": 14.750076293945312, "learning_rate": 3.5798687998147146e-06, "loss": 3.1484, "step": 35240 }, { "epoch": 0.3585306803385417, "grad_norm": 14.17802906036377, "learning_rate": 3.579508133882493e-06, "loss": 3.1719, "step": 35245 }, { "epoch": 0.35858154296875, "grad_norm": 13.584555625915527, "learning_rate": 3.579147440331312e-06, "loss": 3.2081, "step": 35250 }, { "epoch": 0.3586324055989583, "grad_norm": 10.397396087646484, "learning_rate": 3.5787867191703995e-06, "loss": 3.1229, "step": 35255 }, { "epoch": 0.3586832682291667, "grad_norm": 13.019648551940918, "learning_rate": 3.5784259704089842e-06, "loss": 3.3473, "step": 35260 }, { "epoch": 0.358734130859375, "grad_norm": 8.410650253295898, "learning_rate": 3.5780651940562966e-06, "loss": 3.0992, "step": 35265 }, { "epoch": 0.3587849934895833, "grad_norm": 10.615445137023926, "learning_rate": 3.577704390121566e-06, "loss": 3.5595, "step": 35270 }, { "epoch": 0.3588358561197917, "grad_norm": 9.943309783935547, "learning_rate": 3.577343558614024e-06, "loss": 3.3618, "step": 35275 }, { "epoch": 0.35888671875, "grad_norm": 14.800674438476562, "learning_rate": 3.5769826995429035e-06, "loss": 3.169, "step": 35280 }, { "epoch": 0.3589375813802083, "grad_norm": 10.84054183959961, "learning_rate": 3.5766218129174346e-06, "loss": 3.1432, "step": 35285 }, { "epoch": 0.3589884440104167, "grad_norm": 13.17013931274414, "learning_rate": 3.576260898746853e-06, "loss": 3.1751, "step": 35290 }, { "epoch": 0.359039306640625, "grad_norm": 13.68505573272705, "learning_rate": 3.57589995704039e-06, "loss": 3.4098, "step": 35295 }, { "epoch": 0.3590901692708333, "grad_norm": 9.305538177490234, "learning_rate": 3.575538987807282e-06, "loss": 3.655, "step": 35300 }, { "epoch": 0.3591410319010417, "grad_norm": 11.81442928314209, "learning_rate": 3.5751779910567647e-06, "loss": 3.0263, "step": 35305 }, { "epoch": 0.35919189453125, "grad_norm": 12.021374702453613, "learning_rate": 3.5748169667980727e-06, "loss": 3.1293, "step": 35310 }, { "epoch": 0.3592427571614583, "grad_norm": 13.424615859985352, "learning_rate": 3.5744559150404434e-06, "loss": 3.644, "step": 35315 }, { "epoch": 0.3592936197916667, "grad_norm": 17.6203670501709, "learning_rate": 3.574094835793115e-06, "loss": 3.3415, "step": 35320 }, { "epoch": 0.359344482421875, "grad_norm": 10.047724723815918, "learning_rate": 3.573733729065324e-06, "loss": 3.47, "step": 35325 }, { "epoch": 0.3593953450520833, "grad_norm": 14.883319854736328, "learning_rate": 3.57337259486631e-06, "loss": 3.4704, "step": 35330 }, { "epoch": 0.3594462076822917, "grad_norm": 9.46360969543457, "learning_rate": 3.573011433205312e-06, "loss": 3.1698, "step": 35335 }, { "epoch": 0.3594970703125, "grad_norm": 14.531940460205078, "learning_rate": 3.5726502440915724e-06, "loss": 3.2961, "step": 35340 }, { "epoch": 0.3595479329427083, "grad_norm": 10.947972297668457, "learning_rate": 3.57228902753433e-06, "loss": 3.3002, "step": 35345 }, { "epoch": 0.3595987955729167, "grad_norm": 15.049712181091309, "learning_rate": 3.5719277835428263e-06, "loss": 3.3406, "step": 35350 }, { "epoch": 0.359649658203125, "grad_norm": 8.383793830871582, "learning_rate": 3.5715665121263048e-06, "loss": 3.1684, "step": 35355 }, { "epoch": 0.3597005208333333, "grad_norm": 7.295913219451904, "learning_rate": 3.5712052132940085e-06, "loss": 3.0662, "step": 35360 }, { "epoch": 0.3597513834635417, "grad_norm": 11.55411434173584, "learning_rate": 3.5708438870551798e-06, "loss": 2.963, "step": 35365 }, { "epoch": 0.35980224609375, "grad_norm": 9.769346237182617, "learning_rate": 3.5704825334190645e-06, "loss": 3.092, "step": 35370 }, { "epoch": 0.3598531087239583, "grad_norm": 14.446013450622559, "learning_rate": 3.5701211523949075e-06, "loss": 3.2916, "step": 35375 }, { "epoch": 0.3599039713541667, "grad_norm": 15.183908462524414, "learning_rate": 3.5697597439919542e-06, "loss": 3.4919, "step": 35380 }, { "epoch": 0.359954833984375, "grad_norm": 11.906778335571289, "learning_rate": 3.5693983082194517e-06, "loss": 3.0823, "step": 35385 }, { "epoch": 0.3600056966145833, "grad_norm": 10.959227561950684, "learning_rate": 3.5690368450866466e-06, "loss": 3.2799, "step": 35390 }, { "epoch": 0.3600565592447917, "grad_norm": 9.536362648010254, "learning_rate": 3.5686753546027876e-06, "loss": 3.4638, "step": 35395 }, { "epoch": 0.360107421875, "grad_norm": 15.469267845153809, "learning_rate": 3.568313836777123e-06, "loss": 3.2376, "step": 35400 }, { "epoch": 0.3601582845052083, "grad_norm": 7.204216957092285, "learning_rate": 3.5679522916189014e-06, "loss": 3.501, "step": 35405 }, { "epoch": 0.3602091471354167, "grad_norm": 10.316786766052246, "learning_rate": 3.567590719137374e-06, "loss": 3.2892, "step": 35410 }, { "epoch": 0.360260009765625, "grad_norm": 14.605274200439453, "learning_rate": 3.5672291193417907e-06, "loss": 4.14, "step": 35415 }, { "epoch": 0.3603108723958333, "grad_norm": 13.843880653381348, "learning_rate": 3.5668674922414043e-06, "loss": 3.2044, "step": 35420 }, { "epoch": 0.3603617350260417, "grad_norm": 7.237750053405762, "learning_rate": 3.5665058378454655e-06, "loss": 3.8642, "step": 35425 }, { "epoch": 0.36041259765625, "grad_norm": 12.802541732788086, "learning_rate": 3.566144156163227e-06, "loss": 3.3252, "step": 35430 }, { "epoch": 0.3604634602864583, "grad_norm": 7.766964912414551, "learning_rate": 3.5657824472039437e-06, "loss": 3.6876, "step": 35435 }, { "epoch": 0.3605143229166667, "grad_norm": 16.223665237426758, "learning_rate": 3.565420710976869e-06, "loss": 3.157, "step": 35440 }, { "epoch": 0.360565185546875, "grad_norm": 14.279911994934082, "learning_rate": 3.5650589474912577e-06, "loss": 3.3762, "step": 35445 }, { "epoch": 0.3606160481770833, "grad_norm": 9.755158424377441, "learning_rate": 3.5646971567563663e-06, "loss": 3.6622, "step": 35450 }, { "epoch": 0.3606669108072917, "grad_norm": 10.42287540435791, "learning_rate": 3.5643353387814493e-06, "loss": 3.6163, "step": 35455 }, { "epoch": 0.3607177734375, "grad_norm": 13.92138671875, "learning_rate": 3.5639734935757664e-06, "loss": 3.197, "step": 35460 }, { "epoch": 0.3607686360677083, "grad_norm": 11.900999069213867, "learning_rate": 3.563611621148573e-06, "loss": 3.4892, "step": 35465 }, { "epoch": 0.3608194986979167, "grad_norm": 15.42886734008789, "learning_rate": 3.563249721509129e-06, "loss": 3.3542, "step": 35470 }, { "epoch": 0.360870361328125, "grad_norm": 14.31777286529541, "learning_rate": 3.562887794666692e-06, "loss": 3.5636, "step": 35475 }, { "epoch": 0.3609212239583333, "grad_norm": 14.096817016601562, "learning_rate": 3.562525840630523e-06, "loss": 3.1952, "step": 35480 }, { "epoch": 0.3609720865885417, "grad_norm": 12.328357696533203, "learning_rate": 3.5621638594098825e-06, "loss": 3.3161, "step": 35485 }, { "epoch": 0.36102294921875, "grad_norm": 7.772528648376465, "learning_rate": 3.5618018510140307e-06, "loss": 3.3572, "step": 35490 }, { "epoch": 0.3610738118489583, "grad_norm": 16.212244033813477, "learning_rate": 3.5614398154522313e-06, "loss": 3.2458, "step": 35495 }, { "epoch": 0.3611246744791667, "grad_norm": 11.501498222351074, "learning_rate": 3.5610777527337447e-06, "loss": 2.8993, "step": 35500 }, { "epoch": 0.361175537109375, "grad_norm": 10.752714157104492, "learning_rate": 3.5607156628678364e-06, "loss": 3.2463, "step": 35505 }, { "epoch": 0.3612263997395833, "grad_norm": 16.357065200805664, "learning_rate": 3.5603535458637684e-06, "loss": 3.2976, "step": 35510 }, { "epoch": 0.3612772623697917, "grad_norm": 9.25450325012207, "learning_rate": 3.5599914017308067e-06, "loss": 3.0116, "step": 35515 }, { "epoch": 0.361328125, "grad_norm": 12.183612823486328, "learning_rate": 3.559629230478216e-06, "loss": 3.4034, "step": 35520 }, { "epoch": 0.3613789876302083, "grad_norm": 10.711119651794434, "learning_rate": 3.559267032115263e-06, "loss": 3.1804, "step": 35525 }, { "epoch": 0.3614298502604167, "grad_norm": 10.811841011047363, "learning_rate": 3.558904806651214e-06, "loss": 3.385, "step": 35530 }, { "epoch": 0.361480712890625, "grad_norm": 11.532083511352539, "learning_rate": 3.558542554095336e-06, "loss": 3.3263, "step": 35535 }, { "epoch": 0.3615315755208333, "grad_norm": 15.292985916137695, "learning_rate": 3.5581802744568984e-06, "loss": 3.3674, "step": 35540 }, { "epoch": 0.3615824381510417, "grad_norm": 9.049236297607422, "learning_rate": 3.5578179677451685e-06, "loss": 3.825, "step": 35545 }, { "epoch": 0.36163330078125, "grad_norm": 12.779874801635742, "learning_rate": 3.5574556339694166e-06, "loss": 3.2203, "step": 35550 }, { "epoch": 0.3616841634114583, "grad_norm": 7.979409694671631, "learning_rate": 3.557093273138914e-06, "loss": 3.0655, "step": 35555 }, { "epoch": 0.3617350260416667, "grad_norm": 7.819886207580566, "learning_rate": 3.5567308852629295e-06, "loss": 3.2863, "step": 35560 }, { "epoch": 0.361785888671875, "grad_norm": 11.725508689880371, "learning_rate": 3.5563684703507366e-06, "loss": 3.6002, "step": 35565 }, { "epoch": 0.3618367513020833, "grad_norm": 11.572553634643555, "learning_rate": 3.556006028411606e-06, "loss": 3.2667, "step": 35570 }, { "epoch": 0.3618876139322917, "grad_norm": 11.396615028381348, "learning_rate": 3.5556435594548123e-06, "loss": 3.1082, "step": 35575 }, { "epoch": 0.3619384765625, "grad_norm": 13.791967391967773, "learning_rate": 3.5552810634896275e-06, "loss": 3.001, "step": 35580 }, { "epoch": 0.3619893391927083, "grad_norm": 11.551700592041016, "learning_rate": 3.5549185405253273e-06, "loss": 3.1784, "step": 35585 }, { "epoch": 0.3620402018229167, "grad_norm": 11.629484176635742, "learning_rate": 3.5545559905711857e-06, "loss": 3.2978, "step": 35590 }, { "epoch": 0.362091064453125, "grad_norm": 15.747698783874512, "learning_rate": 3.5541934136364795e-06, "loss": 3.2315, "step": 35595 }, { "epoch": 0.3621419270833333, "grad_norm": 11.128524780273438, "learning_rate": 3.5538308097304846e-06, "loss": 3.089, "step": 35600 }, { "epoch": 0.3621927897135417, "grad_norm": 11.763813018798828, "learning_rate": 3.5534681788624785e-06, "loss": 3.4707, "step": 35605 }, { "epoch": 0.36224365234375, "grad_norm": 13.028651237487793, "learning_rate": 3.5531055210417387e-06, "loss": 3.2072, "step": 35610 }, { "epoch": 0.3622945149739583, "grad_norm": 13.94176959991455, "learning_rate": 3.5527428362775433e-06, "loss": 3.2547, "step": 35615 }, { "epoch": 0.3623453776041667, "grad_norm": 8.804715156555176, "learning_rate": 3.5523801245791727e-06, "loss": 3.14, "step": 35620 }, { "epoch": 0.362396240234375, "grad_norm": 8.877320289611816, "learning_rate": 3.552017385955905e-06, "loss": 3.2741, "step": 35625 }, { "epoch": 0.3624471028645833, "grad_norm": 32.209754943847656, "learning_rate": 3.551654620417023e-06, "loss": 3.4959, "step": 35630 }, { "epoch": 0.3624979654947917, "grad_norm": 10.85776424407959, "learning_rate": 3.5512918279718054e-06, "loss": 3.2148, "step": 35635 }, { "epoch": 0.362548828125, "grad_norm": 11.025604248046875, "learning_rate": 3.5509290086295373e-06, "loss": 3.0602, "step": 35640 }, { "epoch": 0.3625996907552083, "grad_norm": 14.648173332214355, "learning_rate": 3.5505661623994987e-06, "loss": 3.3042, "step": 35645 }, { "epoch": 0.3626505533854167, "grad_norm": 8.177518844604492, "learning_rate": 3.550203289290974e-06, "loss": 3.5925, "step": 35650 }, { "epoch": 0.362701416015625, "grad_norm": 17.484180450439453, "learning_rate": 3.5498403893132465e-06, "loss": 3.1191, "step": 35655 }, { "epoch": 0.3627522786458333, "grad_norm": 12.216513633728027, "learning_rate": 3.5494774624756025e-06, "loss": 3.4687, "step": 35660 }, { "epoch": 0.3628031412760417, "grad_norm": 11.965054512023926, "learning_rate": 3.5491145087873253e-06, "loss": 3.1928, "step": 35665 }, { "epoch": 0.36285400390625, "grad_norm": 11.027030944824219, "learning_rate": 3.5487515282577027e-06, "loss": 3.4374, "step": 35670 }, { "epoch": 0.3629048665364583, "grad_norm": 12.054620742797852, "learning_rate": 3.5483885208960207e-06, "loss": 3.1135, "step": 35675 }, { "epoch": 0.3629557291666667, "grad_norm": 8.037942886352539, "learning_rate": 3.548025486711567e-06, "loss": 2.9982, "step": 35680 }, { "epoch": 0.363006591796875, "grad_norm": 9.676033973693848, "learning_rate": 3.5476624257136294e-06, "loss": 3.3297, "step": 35685 }, { "epoch": 0.3630574544270833, "grad_norm": 13.849072456359863, "learning_rate": 3.547299337911497e-06, "loss": 3.254, "step": 35690 }, { "epoch": 0.3631083170572917, "grad_norm": 13.027873992919922, "learning_rate": 3.5469362233144594e-06, "loss": 3.4734, "step": 35695 }, { "epoch": 0.3631591796875, "grad_norm": 13.047901153564453, "learning_rate": 3.5465730819318067e-06, "loss": 3.2893, "step": 35700 }, { "epoch": 0.3632100423177083, "grad_norm": 17.80720329284668, "learning_rate": 3.5462099137728294e-06, "loss": 3.3718, "step": 35705 }, { "epoch": 0.3632609049479167, "grad_norm": 9.708136558532715, "learning_rate": 3.5458467188468198e-06, "loss": 3.4184, "step": 35710 }, { "epoch": 0.363311767578125, "grad_norm": 16.222246170043945, "learning_rate": 3.54548349716307e-06, "loss": 3.3733, "step": 35715 }, { "epoch": 0.3633626302083333, "grad_norm": 12.150138854980469, "learning_rate": 3.545120248730872e-06, "loss": 3.3269, "step": 35720 }, { "epoch": 0.3634134928385417, "grad_norm": 13.112444877624512, "learning_rate": 3.5447569735595203e-06, "loss": 3.3737, "step": 35725 }, { "epoch": 0.36346435546875, "grad_norm": 16.09695816040039, "learning_rate": 3.5443936716583093e-06, "loss": 3.308, "step": 35730 }, { "epoch": 0.3635152180989583, "grad_norm": 14.034189224243164, "learning_rate": 3.5440303430365346e-06, "loss": 3.4142, "step": 35735 }, { "epoch": 0.3635660807291667, "grad_norm": 14.190086364746094, "learning_rate": 3.5436669877034903e-06, "loss": 3.5965, "step": 35740 }, { "epoch": 0.363616943359375, "grad_norm": 11.754796028137207, "learning_rate": 3.543303605668473e-06, "loss": 3.2113, "step": 35745 }, { "epoch": 0.3636678059895833, "grad_norm": 12.327696800231934, "learning_rate": 3.542940196940781e-06, "loss": 3.6533, "step": 35750 }, { "epoch": 0.3637186686197917, "grad_norm": 11.785676956176758, "learning_rate": 3.542576761529711e-06, "loss": 3.3057, "step": 35755 }, { "epoch": 0.36376953125, "grad_norm": 11.127363204956055, "learning_rate": 3.5422132994445617e-06, "loss": 3.2788, "step": 35760 }, { "epoch": 0.3638203938802083, "grad_norm": 14.203107833862305, "learning_rate": 3.541849810694632e-06, "loss": 3.0148, "step": 35765 }, { "epoch": 0.3638712565104167, "grad_norm": 9.956028938293457, "learning_rate": 3.541486295289222e-06, "loss": 3.6195, "step": 35770 }, { "epoch": 0.363922119140625, "grad_norm": 11.492575645446777, "learning_rate": 3.5411227532376323e-06, "loss": 3.2679, "step": 35775 }, { "epoch": 0.3639729817708333, "grad_norm": 7.851870536804199, "learning_rate": 3.540759184549163e-06, "loss": 3.0741, "step": 35780 }, { "epoch": 0.3640238444010417, "grad_norm": 13.882920265197754, "learning_rate": 3.540395589233117e-06, "loss": 3.476, "step": 35785 }, { "epoch": 0.36407470703125, "grad_norm": 7.528973579406738, "learning_rate": 3.540031967298796e-06, "loss": 3.3073, "step": 35790 }, { "epoch": 0.3641255696614583, "grad_norm": 7.640775680541992, "learning_rate": 3.539668318755504e-06, "loss": 3.2905, "step": 35795 }, { "epoch": 0.3641764322916667, "grad_norm": 10.673866271972656, "learning_rate": 3.5393046436125456e-06, "loss": 3.5734, "step": 35800 }, { "epoch": 0.364227294921875, "grad_norm": 13.123295783996582, "learning_rate": 3.5389409418792226e-06, "loss": 3.4012, "step": 35805 }, { "epoch": 0.3642781575520833, "grad_norm": 10.05083179473877, "learning_rate": 3.5385772135648428e-06, "loss": 3.2255, "step": 35810 }, { "epoch": 0.3643290201822917, "grad_norm": 9.180085182189941, "learning_rate": 3.5382134586787107e-06, "loss": 3.2786, "step": 35815 }, { "epoch": 0.3643798828125, "grad_norm": 10.960928916931152, "learning_rate": 3.5378496772301337e-06, "loss": 4.0712, "step": 35820 }, { "epoch": 0.3644307454427083, "grad_norm": 10.767245292663574, "learning_rate": 3.537485869228418e-06, "loss": 3.1526, "step": 35825 }, { "epoch": 0.3644816080729167, "grad_norm": 15.244921684265137, "learning_rate": 3.5371220346828725e-06, "loss": 3.1579, "step": 35830 }, { "epoch": 0.364532470703125, "grad_norm": 11.677315711975098, "learning_rate": 3.536758173602806e-06, "loss": 3.2464, "step": 35835 }, { "epoch": 0.3645833333333333, "grad_norm": 10.261982917785645, "learning_rate": 3.5363942859975275e-06, "loss": 3.2389, "step": 35840 }, { "epoch": 0.3646341959635417, "grad_norm": 10.926580429077148, "learning_rate": 3.536030371876346e-06, "loss": 3.6832, "step": 35845 }, { "epoch": 0.36468505859375, "grad_norm": 16.905780792236328, "learning_rate": 3.5356664312485743e-06, "loss": 3.5338, "step": 35850 }, { "epoch": 0.3647359212239583, "grad_norm": 9.730697631835938, "learning_rate": 3.535302464123522e-06, "loss": 3.182, "step": 35855 }, { "epoch": 0.3647867838541667, "grad_norm": 14.526654243469238, "learning_rate": 3.5349384705105006e-06, "loss": 3.247, "step": 35860 }, { "epoch": 0.364837646484375, "grad_norm": 15.030230522155762, "learning_rate": 3.5345744504188246e-06, "loss": 3.7499, "step": 35865 }, { "epoch": 0.3648885091145833, "grad_norm": 9.691508293151855, "learning_rate": 3.534210403857806e-06, "loss": 3.5489, "step": 35870 }, { "epoch": 0.3649393717447917, "grad_norm": 12.639229774475098, "learning_rate": 3.53384633083676e-06, "loss": 3.5291, "step": 35875 }, { "epoch": 0.364990234375, "grad_norm": 9.587438583374023, "learning_rate": 3.5334822313649998e-06, "loss": 3.1705, "step": 35880 }, { "epoch": 0.3650410970052083, "grad_norm": 13.8088960647583, "learning_rate": 3.533118105451842e-06, "loss": 3.8654, "step": 35885 }, { "epoch": 0.3650919596354167, "grad_norm": 15.760499000549316, "learning_rate": 3.532753953106603e-06, "loss": 3.3849, "step": 35890 }, { "epoch": 0.365142822265625, "grad_norm": 12.67516040802002, "learning_rate": 3.5323897743385983e-06, "loss": 3.447, "step": 35895 }, { "epoch": 0.3651936848958333, "grad_norm": 14.23686408996582, "learning_rate": 3.532025569157146e-06, "loss": 3.479, "step": 35900 }, { "epoch": 0.3652445475260417, "grad_norm": 14.446793556213379, "learning_rate": 3.531661337571564e-06, "loss": 3.3813, "step": 35905 }, { "epoch": 0.36529541015625, "grad_norm": 12.83717155456543, "learning_rate": 3.531297079591171e-06, "loss": 3.966, "step": 35910 }, { "epoch": 0.3653462727864583, "grad_norm": 8.24056625366211, "learning_rate": 3.5309327952252875e-06, "loss": 3.3361, "step": 35915 }, { "epoch": 0.3653971354166667, "grad_norm": 11.2170991897583, "learning_rate": 3.530568484483232e-06, "loss": 3.5179, "step": 35920 }, { "epoch": 0.365447998046875, "grad_norm": 12.390713691711426, "learning_rate": 3.5302041473743263e-06, "loss": 3.5603, "step": 35925 }, { "epoch": 0.3654988606770833, "grad_norm": 7.594927787780762, "learning_rate": 3.5298397839078923e-06, "loss": 3.2695, "step": 35930 }, { "epoch": 0.3655497233072917, "grad_norm": 15.015494346618652, "learning_rate": 3.529475394093251e-06, "loss": 3.2524, "step": 35935 }, { "epoch": 0.3656005859375, "grad_norm": 16.249225616455078, "learning_rate": 3.529110977939726e-06, "loss": 2.9257, "step": 35940 }, { "epoch": 0.3656514485677083, "grad_norm": 11.581992149353027, "learning_rate": 3.52874653545664e-06, "loss": 3.3796, "step": 35945 }, { "epoch": 0.3657023111979167, "grad_norm": 10.994409561157227, "learning_rate": 3.528382066653318e-06, "loss": 3.5111, "step": 35950 }, { "epoch": 0.365753173828125, "grad_norm": 12.003817558288574, "learning_rate": 3.528017571539085e-06, "loss": 3.6897, "step": 35955 }, { "epoch": 0.3658040364583333, "grad_norm": 9.575628280639648, "learning_rate": 3.527653050123265e-06, "loss": 3.7692, "step": 35960 }, { "epoch": 0.3658548990885417, "grad_norm": 9.068621635437012, "learning_rate": 3.5272885024151864e-06, "loss": 3.353, "step": 35965 }, { "epoch": 0.36590576171875, "grad_norm": 17.891878128051758, "learning_rate": 3.5269239284241747e-06, "loss": 3.2435, "step": 35970 }, { "epoch": 0.3659566243489583, "grad_norm": 11.083831787109375, "learning_rate": 3.526559328159558e-06, "loss": 3.449, "step": 35975 }, { "epoch": 0.3660074869791667, "grad_norm": 11.851883888244629, "learning_rate": 3.5261947016306634e-06, "loss": 3.6004, "step": 35980 }, { "epoch": 0.366058349609375, "grad_norm": 9.52242374420166, "learning_rate": 3.525830048846821e-06, "loss": 3.009, "step": 35985 }, { "epoch": 0.3661092122395833, "grad_norm": 10.086869239807129, "learning_rate": 3.52546536981736e-06, "loss": 3.0381, "step": 35990 }, { "epoch": 0.3661600748697917, "grad_norm": 15.746369361877441, "learning_rate": 3.5251006645516105e-06, "loss": 3.4968, "step": 35995 }, { "epoch": 0.3662109375, "grad_norm": 8.71462631225586, "learning_rate": 3.5247359330589032e-06, "loss": 3.6169, "step": 36000 }, { "epoch": 0.3662618001302083, "grad_norm": 16.58160400390625, "learning_rate": 3.5243711753485704e-06, "loss": 3.5387, "step": 36005 }, { "epoch": 0.3663126627604167, "grad_norm": 7.80565071105957, "learning_rate": 3.524006391429944e-06, "loss": 3.561, "step": 36010 }, { "epoch": 0.366363525390625, "grad_norm": 12.95910930633545, "learning_rate": 3.523641581312356e-06, "loss": 3.1926, "step": 36015 }, { "epoch": 0.3664143880208333, "grad_norm": 18.815176010131836, "learning_rate": 3.523276745005142e-06, "loss": 4.1247, "step": 36020 }, { "epoch": 0.3664652506510417, "grad_norm": 10.081045150756836, "learning_rate": 3.5229118825176336e-06, "loss": 3.4267, "step": 36025 }, { "epoch": 0.36651611328125, "grad_norm": 9.349115371704102, "learning_rate": 3.5225469938591682e-06, "loss": 3.5161, "step": 36030 }, { "epoch": 0.3665669759114583, "grad_norm": 12.156893730163574, "learning_rate": 3.52218207903908e-06, "loss": 3.151, "step": 36035 }, { "epoch": 0.3666178385416667, "grad_norm": 11.794998168945312, "learning_rate": 3.5218171380667056e-06, "loss": 3.6706, "step": 36040 }, { "epoch": 0.366668701171875, "grad_norm": 6.907103538513184, "learning_rate": 3.5214521709513815e-06, "loss": 3.1906, "step": 36045 }, { "epoch": 0.3667195638020833, "grad_norm": 8.981483459472656, "learning_rate": 3.5210871777024466e-06, "loss": 3.3278, "step": 36050 }, { "epoch": 0.3667704264322917, "grad_norm": 9.719010353088379, "learning_rate": 3.520722158329237e-06, "loss": 3.4919, "step": 36055 }, { "epoch": 0.3668212890625, "grad_norm": 13.264166831970215, "learning_rate": 3.5203571128410945e-06, "loss": 3.2168, "step": 36060 }, { "epoch": 0.3668721516927083, "grad_norm": 6.929196834564209, "learning_rate": 3.519992041247356e-06, "loss": 3.3111, "step": 36065 }, { "epoch": 0.3669230143229167, "grad_norm": 14.912343978881836, "learning_rate": 3.519626943557364e-06, "loss": 3.6861, "step": 36070 }, { "epoch": 0.366973876953125, "grad_norm": 11.711191177368164, "learning_rate": 3.5192618197804566e-06, "loss": 3.4719, "step": 36075 }, { "epoch": 0.3670247395833333, "grad_norm": 14.388081550598145, "learning_rate": 3.5188966699259785e-06, "loss": 3.6801, "step": 36080 }, { "epoch": 0.3670756022135417, "grad_norm": 13.36806869506836, "learning_rate": 3.51853149400327e-06, "loss": 3.3277, "step": 36085 }, { "epoch": 0.36712646484375, "grad_norm": 9.424427032470703, "learning_rate": 3.518166292021675e-06, "loss": 3.5005, "step": 36090 }, { "epoch": 0.3671773274739583, "grad_norm": 11.621115684509277, "learning_rate": 3.517801063990536e-06, "loss": 3.4082, "step": 36095 }, { "epoch": 0.3672281901041667, "grad_norm": 13.065186500549316, "learning_rate": 3.5174358099191986e-06, "loss": 3.0164, "step": 36100 }, { "epoch": 0.367279052734375, "grad_norm": 14.8209867477417, "learning_rate": 3.517070529817007e-06, "loss": 3.5625, "step": 36105 }, { "epoch": 0.3673299153645833, "grad_norm": 15.603599548339844, "learning_rate": 3.5167052236933076e-06, "loss": 3.2894, "step": 36110 }, { "epoch": 0.3673807779947917, "grad_norm": 13.455853462219238, "learning_rate": 3.516339891557445e-06, "loss": 3.205, "step": 36115 }, { "epoch": 0.367431640625, "grad_norm": 11.697784423828125, "learning_rate": 3.5159745334187677e-06, "loss": 3.3178, "step": 36120 }, { "epoch": 0.3674825032552083, "grad_norm": 14.168938636779785, "learning_rate": 3.5156091492866236e-06, "loss": 3.5605, "step": 36125 }, { "epoch": 0.3675333658854167, "grad_norm": 14.687520980834961, "learning_rate": 3.515243739170359e-06, "loss": 3.2411, "step": 36130 }, { "epoch": 0.367584228515625, "grad_norm": 14.495407104492188, "learning_rate": 3.5148783030793244e-06, "loss": 3.7752, "step": 36135 }, { "epoch": 0.3676350911458333, "grad_norm": 12.858672142028809, "learning_rate": 3.514512841022869e-06, "loss": 3.1062, "step": 36140 }, { "epoch": 0.3676859537760417, "grad_norm": 11.596325874328613, "learning_rate": 3.514147353010343e-06, "loss": 3.1808, "step": 36145 }, { "epoch": 0.36773681640625, "grad_norm": 11.595054626464844, "learning_rate": 3.513781839051097e-06, "loss": 3.3619, "step": 36150 }, { "epoch": 0.3677876790364583, "grad_norm": 8.274419784545898, "learning_rate": 3.5134162991544828e-06, "loss": 3.0254, "step": 36155 }, { "epoch": 0.3678385416666667, "grad_norm": 7.9185099601745605, "learning_rate": 3.5130507333298535e-06, "loss": 3.4626, "step": 36160 }, { "epoch": 0.367889404296875, "grad_norm": 9.295361518859863, "learning_rate": 3.5126851415865615e-06, "loss": 3.1589, "step": 36165 }, { "epoch": 0.3679402669270833, "grad_norm": 9.102407455444336, "learning_rate": 3.5123195239339593e-06, "loss": 3.3127, "step": 36170 }, { "epoch": 0.3679911295572917, "grad_norm": 12.359614372253418, "learning_rate": 3.511953880381403e-06, "loss": 3.3832, "step": 36175 }, { "epoch": 0.3680419921875, "grad_norm": 8.219758987426758, "learning_rate": 3.511588210938245e-06, "loss": 3.3632, "step": 36180 }, { "epoch": 0.3680928548177083, "grad_norm": 16.555068969726562, "learning_rate": 3.511222515613844e-06, "loss": 3.3802, "step": 36185 }, { "epoch": 0.3681437174479167, "grad_norm": 15.864944458007812, "learning_rate": 3.5108567944175537e-06, "loss": 3.4895, "step": 36190 }, { "epoch": 0.368194580078125, "grad_norm": 12.22673511505127, "learning_rate": 3.5104910473587315e-06, "loss": 3.0151, "step": 36195 }, { "epoch": 0.3682454427083333, "grad_norm": 8.683699607849121, "learning_rate": 3.5101252744467367e-06, "loss": 3.3945, "step": 36200 }, { "epoch": 0.3682963053385417, "grad_norm": 8.718663215637207, "learning_rate": 3.509759475690926e-06, "loss": 3.5381, "step": 36205 }, { "epoch": 0.36834716796875, "grad_norm": 10.717947006225586, "learning_rate": 3.5093936511006578e-06, "loss": 3.2846, "step": 36210 }, { "epoch": 0.3683980305989583, "grad_norm": 12.331782341003418, "learning_rate": 3.509027800685292e-06, "loss": 3.1712, "step": 36215 }, { "epoch": 0.3684488932291667, "grad_norm": 11.512397766113281, "learning_rate": 3.5086619244541896e-06, "loss": 3.4319, "step": 36220 }, { "epoch": 0.368499755859375, "grad_norm": 14.478857040405273, "learning_rate": 3.5082960224167107e-06, "loss": 2.9699, "step": 36225 }, { "epoch": 0.3685506184895833, "grad_norm": 12.586868286132812, "learning_rate": 3.5079300945822183e-06, "loss": 3.2637, "step": 36230 }, { "epoch": 0.3686014811197917, "grad_norm": 17.139347076416016, "learning_rate": 3.5075641409600712e-06, "loss": 3.1607, "step": 36235 }, { "epoch": 0.36865234375, "grad_norm": 14.53843879699707, "learning_rate": 3.5071981615596364e-06, "loss": 3.2149, "step": 36240 }, { "epoch": 0.3687032063802083, "grad_norm": 10.349335670471191, "learning_rate": 3.506832156390274e-06, "loss": 3.5323, "step": 36245 }, { "epoch": 0.3687540690104167, "grad_norm": 9.536778450012207, "learning_rate": 3.5064661254613497e-06, "loss": 3.1457, "step": 36250 }, { "epoch": 0.368804931640625, "grad_norm": 8.634055137634277, "learning_rate": 3.506100068782229e-06, "loss": 3.4729, "step": 36255 }, { "epoch": 0.3688557942708333, "grad_norm": 14.773881912231445, "learning_rate": 3.505733986362275e-06, "loss": 3.271, "step": 36260 }, { "epoch": 0.3689066569010417, "grad_norm": 15.675344467163086, "learning_rate": 3.505367878210857e-06, "loss": 3.2313, "step": 36265 }, { "epoch": 0.36895751953125, "grad_norm": 12.131467819213867, "learning_rate": 3.5050017443373392e-06, "loss": 3.429, "step": 36270 }, { "epoch": 0.3690083821614583, "grad_norm": 16.940414428710938, "learning_rate": 3.5046355847510897e-06, "loss": 3.3015, "step": 36275 }, { "epoch": 0.3690592447916667, "grad_norm": 12.1268310546875, "learning_rate": 3.504269399461478e-06, "loss": 3.0125, "step": 36280 }, { "epoch": 0.369110107421875, "grad_norm": 10.741314888000488, "learning_rate": 3.5039031884778707e-06, "loss": 3.0643, "step": 36285 }, { "epoch": 0.3691609700520833, "grad_norm": 17.02005386352539, "learning_rate": 3.5035369518096384e-06, "loss": 3.4271, "step": 36290 }, { "epoch": 0.3692118326822917, "grad_norm": 15.335061073303223, "learning_rate": 3.503170689466152e-06, "loss": 3.2227, "step": 36295 }, { "epoch": 0.3692626953125, "grad_norm": 8.688138008117676, "learning_rate": 3.50280440145678e-06, "loss": 3.0821, "step": 36300 }, { "epoch": 0.3693135579427083, "grad_norm": 11.173579216003418, "learning_rate": 3.5024380877908957e-06, "loss": 3.1695, "step": 36305 }, { "epoch": 0.3693644205729167, "grad_norm": 14.764756202697754, "learning_rate": 3.502071748477871e-06, "loss": 3.2324, "step": 36310 }, { "epoch": 0.369415283203125, "grad_norm": 16.455608367919922, "learning_rate": 3.5017053835270777e-06, "loss": 3.4476, "step": 36315 }, { "epoch": 0.3694661458333333, "grad_norm": 14.511435508728027, "learning_rate": 3.501338992947889e-06, "loss": 3.5649, "step": 36320 }, { "epoch": 0.3695170084635417, "grad_norm": 10.788686752319336, "learning_rate": 3.5009725767496806e-06, "loss": 3.3017, "step": 36325 }, { "epoch": 0.36956787109375, "grad_norm": 14.52601146697998, "learning_rate": 3.5006061349418253e-06, "loss": 3.6233, "step": 36330 }, { "epoch": 0.3696187337239583, "grad_norm": 10.335064888000488, "learning_rate": 3.5002396675336993e-06, "loss": 3.3017, "step": 36335 }, { "epoch": 0.3696695963541667, "grad_norm": 12.67693042755127, "learning_rate": 3.4998731745346786e-06, "loss": 3.4847, "step": 36340 }, { "epoch": 0.369720458984375, "grad_norm": 8.478259086608887, "learning_rate": 3.49950665595414e-06, "loss": 3.1835, "step": 36345 }, { "epoch": 0.3697713216145833, "grad_norm": 13.773451805114746, "learning_rate": 3.49914011180146e-06, "loss": 3.2737, "step": 36350 }, { "epoch": 0.3698221842447917, "grad_norm": 11.555225372314453, "learning_rate": 3.498773542086018e-06, "loss": 3.3203, "step": 36355 }, { "epoch": 0.369873046875, "grad_norm": 15.813162803649902, "learning_rate": 3.498406946817191e-06, "loss": 3.4779, "step": 36360 }, { "epoch": 0.3699239095052083, "grad_norm": 11.773980140686035, "learning_rate": 3.498040326004359e-06, "loss": 3.0863, "step": 36365 }, { "epoch": 0.3699747721354167, "grad_norm": 15.894186019897461, "learning_rate": 3.497673679656901e-06, "loss": 3.3233, "step": 36370 }, { "epoch": 0.370025634765625, "grad_norm": 10.541888236999512, "learning_rate": 3.4973070077841997e-06, "loss": 3.4923, "step": 36375 }, { "epoch": 0.3700764973958333, "grad_norm": 11.18358325958252, "learning_rate": 3.496940310395634e-06, "loss": 3.1329, "step": 36380 }, { "epoch": 0.3701273600260417, "grad_norm": 15.291526794433594, "learning_rate": 3.4965735875005873e-06, "loss": 3.4248, "step": 36385 }, { "epoch": 0.37017822265625, "grad_norm": 11.989048957824707, "learning_rate": 3.4962068391084414e-06, "loss": 3.3592, "step": 36390 }, { "epoch": 0.3702290852864583, "grad_norm": 10.62578010559082, "learning_rate": 3.4958400652285795e-06, "loss": 3.6871, "step": 36395 }, { "epoch": 0.3702799479166667, "grad_norm": 14.085732460021973, "learning_rate": 3.4954732658703862e-06, "loss": 2.7789, "step": 36400 }, { "epoch": 0.370330810546875, "grad_norm": 15.742152214050293, "learning_rate": 3.4951064410432446e-06, "loss": 3.7796, "step": 36405 }, { "epoch": 0.3703816731770833, "grad_norm": 14.037017822265625, "learning_rate": 3.494739590756541e-06, "loss": 3.049, "step": 36410 }, { "epoch": 0.3704325358072917, "grad_norm": 13.601982116699219, "learning_rate": 3.4943727150196607e-06, "loss": 3.3264, "step": 36415 }, { "epoch": 0.3704833984375, "grad_norm": 10.641363143920898, "learning_rate": 3.4940058138419903e-06, "loss": 3.3848, "step": 36420 }, { "epoch": 0.3705342610677083, "grad_norm": 12.306524276733398, "learning_rate": 3.4936388872329163e-06, "loss": 3.1746, "step": 36425 }, { "epoch": 0.3705851236979167, "grad_norm": 9.204092979431152, "learning_rate": 3.4932719352018272e-06, "loss": 3.4841, "step": 36430 }, { "epoch": 0.370635986328125, "grad_norm": 12.298256874084473, "learning_rate": 3.4929049577581107e-06, "loss": 3.1072, "step": 36435 }, { "epoch": 0.3706868489583333, "grad_norm": 10.112834930419922, "learning_rate": 3.492537954911157e-06, "loss": 3.3517, "step": 36440 }, { "epoch": 0.3707377115885417, "grad_norm": 9.618741989135742, "learning_rate": 3.492170926670354e-06, "loss": 3.3893, "step": 36445 }, { "epoch": 0.37078857421875, "grad_norm": 11.383841514587402, "learning_rate": 3.491803873045094e-06, "loss": 3.5973, "step": 36450 }, { "epoch": 0.3708394368489583, "grad_norm": 9.854310035705566, "learning_rate": 3.491436794044766e-06, "loss": 3.3188, "step": 36455 }, { "epoch": 0.3708902994791667, "grad_norm": 6.895596981048584, "learning_rate": 3.491069689678764e-06, "loss": 3.1569, "step": 36460 }, { "epoch": 0.370941162109375, "grad_norm": 7.189676761627197, "learning_rate": 3.490702559956478e-06, "loss": 3.1208, "step": 36465 }, { "epoch": 0.3709920247395833, "grad_norm": 12.943304061889648, "learning_rate": 3.4903354048873017e-06, "loss": 3.2073, "step": 36470 }, { "epoch": 0.3710428873697917, "grad_norm": 13.477482795715332, "learning_rate": 3.489968224480629e-06, "loss": 3.3182, "step": 36475 }, { "epoch": 0.37109375, "grad_norm": 12.34349250793457, "learning_rate": 3.489601018745854e-06, "loss": 3.531, "step": 36480 }, { "epoch": 0.3711446126302083, "grad_norm": 10.274271965026855, "learning_rate": 3.489233787692371e-06, "loss": 3.4869, "step": 36485 }, { "epoch": 0.3711954752604167, "grad_norm": 6.926421642303467, "learning_rate": 3.4888665313295767e-06, "loss": 3.6718, "step": 36490 }, { "epoch": 0.371246337890625, "grad_norm": 11.027121543884277, "learning_rate": 3.4884992496668655e-06, "loss": 3.0825, "step": 36495 }, { "epoch": 0.3712972005208333, "grad_norm": 14.85596752166748, "learning_rate": 3.488131942713636e-06, "loss": 3.6047, "step": 36500 }, { "epoch": 0.3713480631510417, "grad_norm": 16.440946578979492, "learning_rate": 3.487764610479285e-06, "loss": 3.0812, "step": 36505 }, { "epoch": 0.37139892578125, "grad_norm": 13.991331100463867, "learning_rate": 3.487397252973211e-06, "loss": 3.5671, "step": 36510 }, { "epoch": 0.3714497884114583, "grad_norm": 10.95915699005127, "learning_rate": 3.487029870204812e-06, "loss": 3.5479, "step": 36515 }, { "epoch": 0.3715006510416667, "grad_norm": 14.857094764709473, "learning_rate": 3.4866624621834877e-06, "loss": 3.2309, "step": 36520 }, { "epoch": 0.371551513671875, "grad_norm": 16.348299026489258, "learning_rate": 3.4862950289186383e-06, "loss": 3.8629, "step": 36525 }, { "epoch": 0.3716023763020833, "grad_norm": 12.496493339538574, "learning_rate": 3.4859275704196644e-06, "loss": 3.7878, "step": 36530 }, { "epoch": 0.3716532389322917, "grad_norm": 13.966819763183594, "learning_rate": 3.4855600866959667e-06, "loss": 2.906, "step": 36535 }, { "epoch": 0.3717041015625, "grad_norm": 8.030806541442871, "learning_rate": 3.485192577756948e-06, "loss": 3.4312, "step": 36540 }, { "epoch": 0.3717549641927083, "grad_norm": 11.806658744812012, "learning_rate": 3.4848250436120113e-06, "loss": 3.5966, "step": 36545 }, { "epoch": 0.3718058268229167, "grad_norm": 9.586294174194336, "learning_rate": 3.4844574842705586e-06, "loss": 3.2212, "step": 36550 }, { "epoch": 0.371856689453125, "grad_norm": 10.939385414123535, "learning_rate": 3.4840898997419955e-06, "loss": 3.9693, "step": 36555 }, { "epoch": 0.3719075520833333, "grad_norm": 14.730448722839355, "learning_rate": 3.4837222900357236e-06, "loss": 3.2931, "step": 36560 }, { "epoch": 0.3719584147135417, "grad_norm": 8.028823852539062, "learning_rate": 3.4833546551611517e-06, "loss": 3.288, "step": 36565 }, { "epoch": 0.37200927734375, "grad_norm": 8.313758850097656, "learning_rate": 3.4829869951276833e-06, "loss": 3.3429, "step": 36570 }, { "epoch": 0.3720601399739583, "grad_norm": 13.379473686218262, "learning_rate": 3.482619309944727e-06, "loss": 3.2575, "step": 36575 }, { "epoch": 0.3721110026041667, "grad_norm": 13.700751304626465, "learning_rate": 3.482251599621687e-06, "loss": 3.3155, "step": 36580 }, { "epoch": 0.372161865234375, "grad_norm": 7.261434555053711, "learning_rate": 3.481883864167973e-06, "loss": 3.2011, "step": 36585 }, { "epoch": 0.3722127278645833, "grad_norm": 13.945944786071777, "learning_rate": 3.4815161035929935e-06, "loss": 3.6079, "step": 36590 }, { "epoch": 0.3722635904947917, "grad_norm": 15.99990177154541, "learning_rate": 3.4811483179061565e-06, "loss": 3.5521, "step": 36595 }, { "epoch": 0.372314453125, "grad_norm": 10.341609001159668, "learning_rate": 3.480780507116873e-06, "loss": 3.4476, "step": 36600 }, { "epoch": 0.3723653157552083, "grad_norm": 10.77724552154541, "learning_rate": 3.480412671234552e-06, "loss": 3.3492, "step": 36605 }, { "epoch": 0.3724161783854167, "grad_norm": 13.822732925415039, "learning_rate": 3.480044810268606e-06, "loss": 3.2997, "step": 36610 }, { "epoch": 0.372467041015625, "grad_norm": 9.237661361694336, "learning_rate": 3.4796769242284457e-06, "loss": 3.246, "step": 36615 }, { "epoch": 0.3725179036458333, "grad_norm": 14.378121376037598, "learning_rate": 3.479309013123483e-06, "loss": 3.0785, "step": 36620 }, { "epoch": 0.3725687662760417, "grad_norm": 10.455818176269531, "learning_rate": 3.4789410769631317e-06, "loss": 3.5482, "step": 36625 }, { "epoch": 0.37261962890625, "grad_norm": 11.180048942565918, "learning_rate": 3.478573115756805e-06, "loss": 2.9193, "step": 36630 }, { "epoch": 0.3726704915364583, "grad_norm": 13.03493595123291, "learning_rate": 3.4782051295139164e-06, "loss": 3.1065, "step": 36635 }, { "epoch": 0.3727213541666667, "grad_norm": 10.241646766662598, "learning_rate": 3.4778371182438825e-06, "loss": 3.4099, "step": 36640 }, { "epoch": 0.372772216796875, "grad_norm": 12.54874038696289, "learning_rate": 3.477469081956117e-06, "loss": 3.4081, "step": 36645 }, { "epoch": 0.3728230794270833, "grad_norm": 13.122376441955566, "learning_rate": 3.4771010206600365e-06, "loss": 3.5846, "step": 36650 }, { "epoch": 0.3728739420572917, "grad_norm": 15.833231925964355, "learning_rate": 3.4767329343650584e-06, "loss": 3.4329, "step": 36655 }, { "epoch": 0.3729248046875, "grad_norm": 19.701417922973633, "learning_rate": 3.4763648230805997e-06, "loss": 3.8892, "step": 36660 }, { "epoch": 0.3729756673177083, "grad_norm": 11.933082580566406, "learning_rate": 3.4759966868160784e-06, "loss": 3.3443, "step": 36665 }, { "epoch": 0.3730265299479167, "grad_norm": 12.45585823059082, "learning_rate": 3.4756285255809137e-06, "loss": 3.1525, "step": 36670 }, { "epoch": 0.373077392578125, "grad_norm": 13.080124855041504, "learning_rate": 3.475260339384523e-06, "loss": 3.3393, "step": 36675 }, { "epoch": 0.3731282552083333, "grad_norm": 17.313114166259766, "learning_rate": 3.474892128236329e-06, "loss": 3.6527, "step": 36680 }, { "epoch": 0.3731791178385417, "grad_norm": 12.358736991882324, "learning_rate": 3.474523892145751e-06, "loss": 3.2541, "step": 36685 }, { "epoch": 0.37322998046875, "grad_norm": 14.195704460144043, "learning_rate": 3.4741556311222093e-06, "loss": 3.2213, "step": 36690 }, { "epoch": 0.3732808430989583, "grad_norm": 12.779534339904785, "learning_rate": 3.473787345175127e-06, "loss": 3.409, "step": 36695 }, { "epoch": 0.3733317057291667, "grad_norm": 12.559048652648926, "learning_rate": 3.4734190343139264e-06, "loss": 3.289, "step": 36700 }, { "epoch": 0.373382568359375, "grad_norm": 15.576386451721191, "learning_rate": 3.4730506985480307e-06, "loss": 3.7107, "step": 36705 }, { "epoch": 0.3734334309895833, "grad_norm": 8.136879920959473, "learning_rate": 3.4726823378868634e-06, "loss": 3.1032, "step": 36710 }, { "epoch": 0.3734842936197917, "grad_norm": 12.802074432373047, "learning_rate": 3.4723139523398487e-06, "loss": 3.2926, "step": 36715 }, { "epoch": 0.37353515625, "grad_norm": 16.977983474731445, "learning_rate": 3.4719455419164122e-06, "loss": 3.0812, "step": 36720 }, { "epoch": 0.3735860188802083, "grad_norm": 15.70494556427002, "learning_rate": 3.4715771066259795e-06, "loss": 3.2869, "step": 36725 }, { "epoch": 0.3736368815104167, "grad_norm": 9.459453582763672, "learning_rate": 3.471208646477977e-06, "loss": 3.335, "step": 36730 }, { "epoch": 0.373687744140625, "grad_norm": 14.831958770751953, "learning_rate": 3.4708401614818315e-06, "loss": 3.4771, "step": 36735 }, { "epoch": 0.3737386067708333, "grad_norm": 12.741683006286621, "learning_rate": 3.47047165164697e-06, "loss": 3.2332, "step": 36740 }, { "epoch": 0.3737894694010417, "grad_norm": 15.601336479187012, "learning_rate": 3.470103116982822e-06, "loss": 3.4188, "step": 36745 }, { "epoch": 0.37384033203125, "grad_norm": 11.928319931030273, "learning_rate": 3.469734557498815e-06, "loss": 3.5366, "step": 36750 }, { "epoch": 0.3738911946614583, "grad_norm": 13.991730690002441, "learning_rate": 3.46936597320438e-06, "loss": 3.5389, "step": 36755 }, { "epoch": 0.3739420572916667, "grad_norm": 12.347566604614258, "learning_rate": 3.4689973641089457e-06, "loss": 3.3752, "step": 36760 }, { "epoch": 0.373992919921875, "grad_norm": 16.722991943359375, "learning_rate": 3.4686287302219433e-06, "loss": 3.5947, "step": 36765 }, { "epoch": 0.3740437825520833, "grad_norm": 12.147093772888184, "learning_rate": 3.4682600715528055e-06, "loss": 3.3451, "step": 36770 }, { "epoch": 0.3740946451822917, "grad_norm": 11.044589042663574, "learning_rate": 3.4678913881109626e-06, "loss": 3.1253, "step": 36775 }, { "epoch": 0.3741455078125, "grad_norm": 7.72145938873291, "learning_rate": 3.4675226799058476e-06, "loss": 3.1175, "step": 36780 }, { "epoch": 0.3741963704427083, "grad_norm": 9.892139434814453, "learning_rate": 3.4671539469468944e-06, "loss": 3.5971, "step": 36785 }, { "epoch": 0.3742472330729167, "grad_norm": 12.230167388916016, "learning_rate": 3.4667851892435367e-06, "loss": 3.389, "step": 36790 }, { "epoch": 0.374298095703125, "grad_norm": 9.405281066894531, "learning_rate": 3.4664164068052094e-06, "loss": 2.8224, "step": 36795 }, { "epoch": 0.3743489583333333, "grad_norm": 11.115234375, "learning_rate": 3.466047599641348e-06, "loss": 3.2145, "step": 36800 }, { "epoch": 0.3743998209635417, "grad_norm": 10.069756507873535, "learning_rate": 3.4656787677613855e-06, "loss": 3.0822, "step": 36805 }, { "epoch": 0.37445068359375, "grad_norm": 8.526987075805664, "learning_rate": 3.465309911174763e-06, "loss": 3.2606, "step": 36810 }, { "epoch": 0.3745015462239583, "grad_norm": 10.341978073120117, "learning_rate": 3.464941029890914e-06, "loss": 3.1676, "step": 36815 }, { "epoch": 0.3745524088541667, "grad_norm": 10.42945671081543, "learning_rate": 3.464572123919278e-06, "loss": 3.0921, "step": 36820 }, { "epoch": 0.374603271484375, "grad_norm": 15.445566177368164, "learning_rate": 3.464203193269293e-06, "loss": 3.425, "step": 36825 }, { "epoch": 0.3746541341145833, "grad_norm": 8.392379760742188, "learning_rate": 3.463834237950397e-06, "loss": 3.0214, "step": 36830 }, { "epoch": 0.3747049967447917, "grad_norm": 14.550819396972656, "learning_rate": 3.463465257972031e-06, "loss": 3.2801, "step": 36835 }, { "epoch": 0.374755859375, "grad_norm": 11.48135757446289, "learning_rate": 3.463096253343635e-06, "loss": 3.2577, "step": 36840 }, { "epoch": 0.3748067220052083, "grad_norm": 10.536014556884766, "learning_rate": 3.462727224074649e-06, "loss": 3.4733, "step": 36845 }, { "epoch": 0.3748575846354167, "grad_norm": 15.377198219299316, "learning_rate": 3.4623581701745156e-06, "loss": 3.2647, "step": 36850 }, { "epoch": 0.374908447265625, "grad_norm": 9.998665809631348, "learning_rate": 3.4619890916526766e-06, "loss": 3.7961, "step": 36855 }, { "epoch": 0.3749593098958333, "grad_norm": 12.39189624786377, "learning_rate": 3.461619988518574e-06, "loss": 3.211, "step": 36860 }, { "epoch": 0.3750101725260417, "grad_norm": 11.28947639465332, "learning_rate": 3.4612508607816533e-06, "loss": 3.4307, "step": 36865 }, { "epoch": 0.37506103515625, "grad_norm": 13.22697925567627, "learning_rate": 3.460881708451356e-06, "loss": 3.2993, "step": 36870 }, { "epoch": 0.3751118977864583, "grad_norm": 11.20608901977539, "learning_rate": 3.460512531537128e-06, "loss": 3.534, "step": 36875 }, { "epoch": 0.3751627604166667, "grad_norm": 9.633331298828125, "learning_rate": 3.4601433300484143e-06, "loss": 3.2138, "step": 36880 }, { "epoch": 0.375213623046875, "grad_norm": 8.668368339538574, "learning_rate": 3.459774103994662e-06, "loss": 3.4622, "step": 36885 }, { "epoch": 0.3752644856770833, "grad_norm": 9.311735153198242, "learning_rate": 3.4594048533853166e-06, "loss": 3.3273, "step": 36890 }, { "epoch": 0.3753153483072917, "grad_norm": 12.04916000366211, "learning_rate": 3.4590355782298245e-06, "loss": 3.8702, "step": 36895 }, { "epoch": 0.3753662109375, "grad_norm": 10.879121780395508, "learning_rate": 3.458666278537635e-06, "loss": 3.2429, "step": 36900 }, { "epoch": 0.3754170735677083, "grad_norm": 8.562045097351074, "learning_rate": 3.4582969543181966e-06, "loss": 3.6933, "step": 36905 }, { "epoch": 0.3754679361979167, "grad_norm": 13.595769882202148, "learning_rate": 3.4579276055809566e-06, "loss": 3.4905, "step": 36910 }, { "epoch": 0.375518798828125, "grad_norm": 10.669339179992676, "learning_rate": 3.4575582323353673e-06, "loss": 3.0703, "step": 36915 }, { "epoch": 0.3755696614583333, "grad_norm": 12.172415733337402, "learning_rate": 3.4571888345908767e-06, "loss": 3.2722, "step": 36920 }, { "epoch": 0.3756205240885417, "grad_norm": 12.426019668579102, "learning_rate": 3.4568194123569365e-06, "loss": 2.9787, "step": 36925 }, { "epoch": 0.37567138671875, "grad_norm": 17.960912704467773, "learning_rate": 3.4564499656429983e-06, "loss": 3.682, "step": 36930 }, { "epoch": 0.3757222493489583, "grad_norm": 10.33563232421875, "learning_rate": 3.456080494458515e-06, "loss": 3.0111, "step": 36935 }, { "epoch": 0.3757731119791667, "grad_norm": 8.455516815185547, "learning_rate": 3.455710998812939e-06, "loss": 3.2332, "step": 36940 }, { "epoch": 0.375823974609375, "grad_norm": 12.977243423461914, "learning_rate": 3.4553414787157234e-06, "loss": 3.4772, "step": 36945 }, { "epoch": 0.3758748372395833, "grad_norm": 10.120162010192871, "learning_rate": 3.454971934176322e-06, "loss": 2.9089, "step": 36950 }, { "epoch": 0.3759256998697917, "grad_norm": 10.681510925292969, "learning_rate": 3.4546023652041903e-06, "loss": 3.2806, "step": 36955 }, { "epoch": 0.3759765625, "grad_norm": 12.329384803771973, "learning_rate": 3.454232771808783e-06, "loss": 3.3713, "step": 36960 }, { "epoch": 0.3760274251302083, "grad_norm": 13.539546966552734, "learning_rate": 3.4538631539995575e-06, "loss": 3.4736, "step": 36965 }, { "epoch": 0.3760782877604167, "grad_norm": 13.913695335388184, "learning_rate": 3.453493511785969e-06, "loss": 3.0884, "step": 36970 }, { "epoch": 0.376129150390625, "grad_norm": 13.61619758605957, "learning_rate": 3.4531238451774746e-06, "loss": 3.2207, "step": 36975 }, { "epoch": 0.3761800130208333, "grad_norm": 8.48856258392334, "learning_rate": 3.4527541541835323e-06, "loss": 3.3079, "step": 36980 }, { "epoch": 0.3762308756510417, "grad_norm": 14.161815643310547, "learning_rate": 3.4523844388136013e-06, "loss": 3.7884, "step": 36985 }, { "epoch": 0.37628173828125, "grad_norm": 17.034425735473633, "learning_rate": 3.4520146990771395e-06, "loss": 3.52, "step": 36990 }, { "epoch": 0.3763326009114583, "grad_norm": 8.844666481018066, "learning_rate": 3.451644934983608e-06, "loss": 3.49, "step": 36995 }, { "epoch": 0.3763834635416667, "grad_norm": 9.999996185302734, "learning_rate": 3.451275146542466e-06, "loss": 3.3897, "step": 37000 }, { "epoch": 0.376434326171875, "grad_norm": 12.458309173583984, "learning_rate": 3.450905333763175e-06, "loss": 3.9959, "step": 37005 }, { "epoch": 0.3764851888020833, "grad_norm": 12.006963729858398, "learning_rate": 3.4505354966551968e-06, "loss": 3.4137, "step": 37010 }, { "epoch": 0.3765360514322917, "grad_norm": 10.976564407348633, "learning_rate": 3.450165635227992e-06, "loss": 3.3991, "step": 37015 }, { "epoch": 0.3765869140625, "grad_norm": 9.879744529724121, "learning_rate": 3.449795749491026e-06, "loss": 3.1944, "step": 37020 }, { "epoch": 0.3766377766927083, "grad_norm": 8.062317848205566, "learning_rate": 3.4494258394537604e-06, "loss": 3.489, "step": 37025 }, { "epoch": 0.3766886393229167, "grad_norm": 8.62879753112793, "learning_rate": 3.44905590512566e-06, "loss": 3.1431, "step": 37030 }, { "epoch": 0.376739501953125, "grad_norm": 10.014484405517578, "learning_rate": 3.448685946516189e-06, "loss": 3.2168, "step": 37035 }, { "epoch": 0.3767903645833333, "grad_norm": 16.322481155395508, "learning_rate": 3.4483159636348127e-06, "loss": 3.0759, "step": 37040 }, { "epoch": 0.3768412272135417, "grad_norm": 14.804939270019531, "learning_rate": 3.4479459564909974e-06, "loss": 3.7576, "step": 37045 }, { "epoch": 0.37689208984375, "grad_norm": 10.91519546508789, "learning_rate": 3.447575925094209e-06, "loss": 3.4439, "step": 37050 }, { "epoch": 0.3769429524739583, "grad_norm": 10.703877449035645, "learning_rate": 3.447205869453916e-06, "loss": 4.0554, "step": 37055 }, { "epoch": 0.3769938151041667, "grad_norm": 15.431044578552246, "learning_rate": 3.4468357895795856e-06, "loss": 3.1134, "step": 37060 }, { "epoch": 0.377044677734375, "grad_norm": 10.122700691223145, "learning_rate": 3.4464656854806843e-06, "loss": 3.2573, "step": 37065 }, { "epoch": 0.3770955403645833, "grad_norm": 12.273554801940918, "learning_rate": 3.4460955571666844e-06, "loss": 3.44, "step": 37070 }, { "epoch": 0.3771464029947917, "grad_norm": 15.437214851379395, "learning_rate": 3.4457254046470533e-06, "loss": 3.9162, "step": 37075 }, { "epoch": 0.377197265625, "grad_norm": 14.604323387145996, "learning_rate": 3.4453552279312615e-06, "loss": 3.6954, "step": 37080 }, { "epoch": 0.3772481282552083, "grad_norm": 21.16621971130371, "learning_rate": 3.444985027028781e-06, "loss": 3.8536, "step": 37085 }, { "epoch": 0.3772989908854167, "grad_norm": 16.204978942871094, "learning_rate": 3.444614801949082e-06, "loss": 3.4781, "step": 37090 }, { "epoch": 0.377349853515625, "grad_norm": 13.42151927947998, "learning_rate": 3.444244552701638e-06, "loss": 3.5459, "step": 37095 }, { "epoch": 0.3774007161458333, "grad_norm": 8.625651359558105, "learning_rate": 3.44387427929592e-06, "loss": 3.0503, "step": 37100 }, { "epoch": 0.3774515787760417, "grad_norm": 12.579015731811523, "learning_rate": 3.4435039817414025e-06, "loss": 3.4166, "step": 37105 }, { "epoch": 0.37750244140625, "grad_norm": 10.912002563476562, "learning_rate": 3.4431336600475595e-06, "loss": 2.9924, "step": 37110 }, { "epoch": 0.3775533040364583, "grad_norm": 11.0430269241333, "learning_rate": 3.442763314223865e-06, "loss": 3.471, "step": 37115 }, { "epoch": 0.3776041666666667, "grad_norm": 12.075160026550293, "learning_rate": 3.442392944279795e-06, "loss": 3.1275, "step": 37120 }, { "epoch": 0.377655029296875, "grad_norm": 13.223108291625977, "learning_rate": 3.442022550224825e-06, "loss": 3.1128, "step": 37125 }, { "epoch": 0.3777058919270833, "grad_norm": 13.981732368469238, "learning_rate": 3.4416521320684305e-06, "loss": 3.4978, "step": 37130 }, { "epoch": 0.3777567545572917, "grad_norm": 10.219934463500977, "learning_rate": 3.441281689820091e-06, "loss": 3.1208, "step": 37135 }, { "epoch": 0.3778076171875, "grad_norm": 10.667137145996094, "learning_rate": 3.440911223489281e-06, "loss": 4.0081, "step": 37140 }, { "epoch": 0.3778584798177083, "grad_norm": 9.314359664916992, "learning_rate": 3.4405407330854806e-06, "loss": 3.142, "step": 37145 }, { "epoch": 0.3779093424479167, "grad_norm": 8.450724601745605, "learning_rate": 3.440170218618169e-06, "loss": 3.5646, "step": 37150 }, { "epoch": 0.377960205078125, "grad_norm": 12.683512687683105, "learning_rate": 3.439799680096825e-06, "loss": 3.3085, "step": 37155 }, { "epoch": 0.3780110677083333, "grad_norm": 15.177215576171875, "learning_rate": 3.4394291175309284e-06, "loss": 3.2349, "step": 37160 }, { "epoch": 0.3780619303385417, "grad_norm": 10.385830879211426, "learning_rate": 3.4390585309299617e-06, "loss": 3.7784, "step": 37165 }, { "epoch": 0.37811279296875, "grad_norm": 12.019537925720215, "learning_rate": 3.438687920303404e-06, "loss": 3.3907, "step": 37170 }, { "epoch": 0.3781636555989583, "grad_norm": 21.841266632080078, "learning_rate": 3.4383172856607383e-06, "loss": 3.4745, "step": 37175 }, { "epoch": 0.3782145182291667, "grad_norm": 15.879341125488281, "learning_rate": 3.437946627011448e-06, "loss": 3.584, "step": 37180 }, { "epoch": 0.378265380859375, "grad_norm": 14.873907089233398, "learning_rate": 3.4375759443650148e-06, "loss": 3.4617, "step": 37185 }, { "epoch": 0.3783162434895833, "grad_norm": 10.92282772064209, "learning_rate": 3.4372052377309244e-06, "loss": 3.6786, "step": 37190 }, { "epoch": 0.3783671061197917, "grad_norm": 16.87035369873047, "learning_rate": 3.4368345071186594e-06, "loss": 3.4028, "step": 37195 }, { "epoch": 0.37841796875, "grad_norm": 17.945995330810547, "learning_rate": 3.436463752537706e-06, "loss": 3.4505, "step": 37200 }, { "epoch": 0.3784688313802083, "grad_norm": 10.609006881713867, "learning_rate": 3.436092973997549e-06, "loss": 3.5485, "step": 37205 }, { "epoch": 0.3785196940104167, "grad_norm": 8.49173641204834, "learning_rate": 3.4357221715076748e-06, "loss": 3.4046, "step": 37210 }, { "epoch": 0.378570556640625, "grad_norm": 10.780359268188477, "learning_rate": 3.435351345077571e-06, "loss": 2.913, "step": 37215 }, { "epoch": 0.3786214192708333, "grad_norm": 9.702098846435547, "learning_rate": 3.4349804947167253e-06, "loss": 3.3036, "step": 37220 }, { "epoch": 0.3786722819010417, "grad_norm": 12.375340461730957, "learning_rate": 3.4346096204346247e-06, "loss": 3.3027, "step": 37225 }, { "epoch": 0.37872314453125, "grad_norm": 9.697147369384766, "learning_rate": 3.434238722240759e-06, "loss": 3.4894, "step": 37230 }, { "epoch": 0.3787740071614583, "grad_norm": 12.709754943847656, "learning_rate": 3.4338678001446156e-06, "loss": 3.2872, "step": 37235 }, { "epoch": 0.3788248697916667, "grad_norm": 9.060853958129883, "learning_rate": 3.433496854155688e-06, "loss": 3.6688, "step": 37240 }, { "epoch": 0.378875732421875, "grad_norm": 11.765962600708008, "learning_rate": 3.433125884283463e-06, "loss": 2.81, "step": 37245 }, { "epoch": 0.3789265950520833, "grad_norm": 11.612247467041016, "learning_rate": 3.432754890537434e-06, "loss": 3.3576, "step": 37250 }, { "epoch": 0.3789774576822917, "grad_norm": 11.189865112304688, "learning_rate": 3.432383872927092e-06, "loss": 3.1866, "step": 37255 }, { "epoch": 0.3790283203125, "grad_norm": 8.750208854675293, "learning_rate": 3.43201283146193e-06, "loss": 3.2166, "step": 37260 }, { "epoch": 0.3790791829427083, "grad_norm": 16.319190979003906, "learning_rate": 3.43164176615144e-06, "loss": 3.3616, "step": 37265 }, { "epoch": 0.3791300455729167, "grad_norm": 12.739177703857422, "learning_rate": 3.431270677005117e-06, "loss": 3.4428, "step": 37270 }, { "epoch": 0.379180908203125, "grad_norm": 12.700593948364258, "learning_rate": 3.430899564032454e-06, "loss": 3.8315, "step": 37275 }, { "epoch": 0.3792317708333333, "grad_norm": 10.68607234954834, "learning_rate": 3.4305284272429463e-06, "loss": 3.041, "step": 37280 }, { "epoch": 0.3792826334635417, "grad_norm": 10.991312980651855, "learning_rate": 3.430157266646089e-06, "loss": 3.2114, "step": 37285 }, { "epoch": 0.37933349609375, "grad_norm": 11.34199333190918, "learning_rate": 3.4297860822513794e-06, "loss": 3.1788, "step": 37290 }, { "epoch": 0.3793843587239583, "grad_norm": 15.527979850769043, "learning_rate": 3.429414874068313e-06, "loss": 3.0974, "step": 37295 }, { "epoch": 0.3794352213541667, "grad_norm": 15.220682144165039, "learning_rate": 3.4290436421063867e-06, "loss": 3.6317, "step": 37300 }, { "epoch": 0.379486083984375, "grad_norm": 11.738930702209473, "learning_rate": 3.4286723863751004e-06, "loss": 3.5017, "step": 37305 }, { "epoch": 0.3795369466145833, "grad_norm": 11.440299034118652, "learning_rate": 3.42830110688395e-06, "loss": 3.6732, "step": 37310 }, { "epoch": 0.3795878092447917, "grad_norm": 12.166158676147461, "learning_rate": 3.4279298036424356e-06, "loss": 2.5458, "step": 37315 }, { "epoch": 0.379638671875, "grad_norm": 9.943821907043457, "learning_rate": 3.4275584766600578e-06, "loss": 3.2652, "step": 37320 }, { "epoch": 0.3796895345052083, "grad_norm": 9.365017890930176, "learning_rate": 3.4271871259463164e-06, "loss": 3.2439, "step": 37325 }, { "epoch": 0.3797403971354167, "grad_norm": 12.738382339477539, "learning_rate": 3.4268157515107113e-06, "loss": 3.2796, "step": 37330 }, { "epoch": 0.379791259765625, "grad_norm": 11.039345741271973, "learning_rate": 3.4264443533627454e-06, "loss": 3.5859, "step": 37335 }, { "epoch": 0.3798421223958333, "grad_norm": 14.100454330444336, "learning_rate": 3.4260729315119207e-06, "loss": 3.0601, "step": 37340 }, { "epoch": 0.3798929850260417, "grad_norm": 15.386163711547852, "learning_rate": 3.4257014859677397e-06, "loss": 3.2344, "step": 37345 }, { "epoch": 0.37994384765625, "grad_norm": 8.511161804199219, "learning_rate": 3.4253300167397042e-06, "loss": 3.1838, "step": 37350 }, { "epoch": 0.3799947102864583, "grad_norm": 15.614014625549316, "learning_rate": 3.4249585238373207e-06, "loss": 3.9259, "step": 37355 }, { "epoch": 0.3800455729166667, "grad_norm": 13.464808464050293, "learning_rate": 3.4245870072700927e-06, "loss": 3.2666, "step": 37360 }, { "epoch": 0.380096435546875, "grad_norm": 8.618041038513184, "learning_rate": 3.4242154670475245e-06, "loss": 3.5037, "step": 37365 }, { "epoch": 0.3801472981770833, "grad_norm": 16.455469131469727, "learning_rate": 3.423843903179123e-06, "loss": 3.7024, "step": 37370 }, { "epoch": 0.3801981608072917, "grad_norm": 7.997560501098633, "learning_rate": 3.4234723156743945e-06, "loss": 2.9579, "step": 37375 }, { "epoch": 0.3802490234375, "grad_norm": 8.839055061340332, "learning_rate": 3.423100704542845e-06, "loss": 3.616, "step": 37380 }, { "epoch": 0.3802998860677083, "grad_norm": 17.621545791625977, "learning_rate": 3.422729069793983e-06, "loss": 3.3201, "step": 37385 }, { "epoch": 0.3803507486979167, "grad_norm": 8.127358436584473, "learning_rate": 3.422357411437317e-06, "loss": 3.215, "step": 37390 }, { "epoch": 0.380401611328125, "grad_norm": 11.943425178527832, "learning_rate": 3.4219857294823546e-06, "loss": 3.4149, "step": 37395 }, { "epoch": 0.3804524739583333, "grad_norm": 10.405409812927246, "learning_rate": 3.421614023938607e-06, "loss": 2.9753, "step": 37400 }, { "epoch": 0.3805033365885417, "grad_norm": 8.548178672790527, "learning_rate": 3.4212422948155816e-06, "loss": 3.5656, "step": 37405 }, { "epoch": 0.38055419921875, "grad_norm": 12.904085159301758, "learning_rate": 3.420870542122791e-06, "loss": 3.1404, "step": 37410 }, { "epoch": 0.3806050618489583, "grad_norm": 11.220914840698242, "learning_rate": 3.420498765869746e-06, "loss": 3.2931, "step": 37415 }, { "epoch": 0.3806559244791667, "grad_norm": 9.857573509216309, "learning_rate": 3.420126966065958e-06, "loss": 3.2885, "step": 37420 }, { "epoch": 0.380706787109375, "grad_norm": 10.07679557800293, "learning_rate": 3.41975514272094e-06, "loss": 3.2386, "step": 37425 }, { "epoch": 0.3807576497395833, "grad_norm": 14.047000885009766, "learning_rate": 3.419383295844204e-06, "loss": 3.236, "step": 37430 }, { "epoch": 0.3808085123697917, "grad_norm": 9.781455039978027, "learning_rate": 3.4190114254452654e-06, "loss": 3.4674, "step": 37435 }, { "epoch": 0.380859375, "grad_norm": 17.191829681396484, "learning_rate": 3.4186395315336364e-06, "loss": 3.4247, "step": 37440 }, { "epoch": 0.3809102376302083, "grad_norm": 8.731918334960938, "learning_rate": 3.4182676141188332e-06, "loss": 3.1671, "step": 37445 }, { "epoch": 0.3809611002604167, "grad_norm": 12.978816032409668, "learning_rate": 3.4178956732103706e-06, "loss": 3.4202, "step": 37450 }, { "epoch": 0.381011962890625, "grad_norm": 12.776073455810547, "learning_rate": 3.4175237088177645e-06, "loss": 3.2907, "step": 37455 }, { "epoch": 0.3810628255208333, "grad_norm": 17.695144653320312, "learning_rate": 3.417151720950532e-06, "loss": 3.5898, "step": 37460 }, { "epoch": 0.3811136881510417, "grad_norm": 10.285888671875, "learning_rate": 3.4167797096181905e-06, "loss": 3.3763, "step": 37465 }, { "epoch": 0.38116455078125, "grad_norm": 10.863819122314453, "learning_rate": 3.4164076748302567e-06, "loss": 3.5098, "step": 37470 }, { "epoch": 0.3812154134114583, "grad_norm": 11.196011543273926, "learning_rate": 3.4160356165962506e-06, "loss": 3.2739, "step": 37475 }, { "epoch": 0.3812662760416667, "grad_norm": 11.469472885131836, "learning_rate": 3.4156635349256895e-06, "loss": 2.9974, "step": 37480 }, { "epoch": 0.381317138671875, "grad_norm": 10.920792579650879, "learning_rate": 3.4152914298280944e-06, "loss": 3.9813, "step": 37485 }, { "epoch": 0.3813680013020833, "grad_norm": 10.986191749572754, "learning_rate": 3.414919301312985e-06, "loss": 3.4946, "step": 37490 }, { "epoch": 0.3814188639322917, "grad_norm": 17.591659545898438, "learning_rate": 3.414547149389882e-06, "loss": 3.3492, "step": 37495 }, { "epoch": 0.3814697265625, "grad_norm": 9.149885177612305, "learning_rate": 3.4141749740683068e-06, "loss": 3.3342, "step": 37500 }, { "epoch": 0.3815205891927083, "grad_norm": 8.332978248596191, "learning_rate": 3.413802775357782e-06, "loss": 3.405, "step": 37505 }, { "epoch": 0.3815714518229167, "grad_norm": 7.789477825164795, "learning_rate": 3.4134305532678293e-06, "loss": 3.8016, "step": 37510 }, { "epoch": 0.381622314453125, "grad_norm": 9.647196769714355, "learning_rate": 3.4130583078079725e-06, "loss": 2.9892, "step": 37515 }, { "epoch": 0.3816731770833333, "grad_norm": 9.158622741699219, "learning_rate": 3.412686038987735e-06, "loss": 3.3382, "step": 37520 }, { "epoch": 0.3817240397135417, "grad_norm": 8.998952865600586, "learning_rate": 3.4123137468166427e-06, "loss": 3.0108, "step": 37525 }, { "epoch": 0.38177490234375, "grad_norm": 13.929523468017578, "learning_rate": 3.4119414313042186e-06, "loss": 3.0273, "step": 37530 }, { "epoch": 0.3818257649739583, "grad_norm": 10.859432220458984, "learning_rate": 3.411569092459989e-06, "loss": 3.6175, "step": 37535 }, { "epoch": 0.3818766276041667, "grad_norm": 8.946311950683594, "learning_rate": 3.4111967302934807e-06, "loss": 3.3618, "step": 37540 }, { "epoch": 0.381927490234375, "grad_norm": 14.10853385925293, "learning_rate": 3.4108243448142196e-06, "loss": 3.1185, "step": 37545 }, { "epoch": 0.3819783528645833, "grad_norm": 11.832138061523438, "learning_rate": 3.410451936031734e-06, "loss": 3.5672, "step": 37550 }, { "epoch": 0.3820292154947917, "grad_norm": 12.631633758544922, "learning_rate": 3.410079503955552e-06, "loss": 3.3899, "step": 37555 }, { "epoch": 0.382080078125, "grad_norm": 9.619515419006348, "learning_rate": 3.4097070485951998e-06, "loss": 3.1777, "step": 37560 }, { "epoch": 0.3821309407552083, "grad_norm": 11.570428848266602, "learning_rate": 3.4093345699602097e-06, "loss": 3.4874, "step": 37565 }, { "epoch": 0.3821818033854167, "grad_norm": 15.203789710998535, "learning_rate": 3.408962068060109e-06, "loss": 3.653, "step": 37570 }, { "epoch": 0.382232666015625, "grad_norm": 14.011837005615234, "learning_rate": 3.4085895429044303e-06, "loss": 3.3553, "step": 37575 }, { "epoch": 0.3822835286458333, "grad_norm": 15.710648536682129, "learning_rate": 3.4082169945027037e-06, "loss": 3.8274, "step": 37580 }, { "epoch": 0.3823343912760417, "grad_norm": 13.160861015319824, "learning_rate": 3.4078444228644596e-06, "loss": 3.4303, "step": 37585 }, { "epoch": 0.38238525390625, "grad_norm": 12.528409957885742, "learning_rate": 3.4074718279992324e-06, "loss": 3.1339, "step": 37590 }, { "epoch": 0.3824361165364583, "grad_norm": 15.585745811462402, "learning_rate": 3.4070992099165523e-06, "loss": 3.173, "step": 37595 }, { "epoch": 0.3824869791666667, "grad_norm": 14.537068367004395, "learning_rate": 3.406726568625955e-06, "loss": 3.5707, "step": 37600 }, { "epoch": 0.382537841796875, "grad_norm": 14.80423641204834, "learning_rate": 3.4063539041369726e-06, "loss": 3.3544, "step": 37605 }, { "epoch": 0.3825887044270833, "grad_norm": 14.486916542053223, "learning_rate": 3.4059812164591407e-06, "loss": 3.2886, "step": 37610 }, { "epoch": 0.3826395670572917, "grad_norm": 10.272647857666016, "learning_rate": 3.405608505601994e-06, "loss": 3.2059, "step": 37615 }, { "epoch": 0.3826904296875, "grad_norm": 14.196993827819824, "learning_rate": 3.4052357715750683e-06, "loss": 3.7612, "step": 37620 }, { "epoch": 0.3827412923177083, "grad_norm": 11.729535102844238, "learning_rate": 3.4048630143878997e-06, "loss": 3.5948, "step": 37625 }, { "epoch": 0.3827921549479167, "grad_norm": 12.782881736755371, "learning_rate": 3.4044902340500263e-06, "loss": 3.5323, "step": 37630 }, { "epoch": 0.382843017578125, "grad_norm": 13.800341606140137, "learning_rate": 3.4041174305709833e-06, "loss": 3.7534, "step": 37635 }, { "epoch": 0.3828938802083333, "grad_norm": 9.593366622924805, "learning_rate": 3.403744603960311e-06, "loss": 3.3979, "step": 37640 }, { "epoch": 0.3829447428385417, "grad_norm": 13.039984703063965, "learning_rate": 3.4033717542275466e-06, "loss": 3.4147, "step": 37645 }, { "epoch": 0.38299560546875, "grad_norm": 11.21808910369873, "learning_rate": 3.402998881382231e-06, "loss": 3.0617, "step": 37650 }, { "epoch": 0.3830464680989583, "grad_norm": 14.062593460083008, "learning_rate": 3.4026259854339023e-06, "loss": 3.0885, "step": 37655 }, { "epoch": 0.3830973307291667, "grad_norm": 8.29125690460205, "learning_rate": 3.4022530663921015e-06, "loss": 3.4393, "step": 37660 }, { "epoch": 0.383148193359375, "grad_norm": 14.169401168823242, "learning_rate": 3.4018801242663706e-06, "loss": 3.2393, "step": 37665 }, { "epoch": 0.3831990559895833, "grad_norm": 8.234171867370605, "learning_rate": 3.40150715906625e-06, "loss": 3.3952, "step": 37670 }, { "epoch": 0.3832499186197917, "grad_norm": 11.920029640197754, "learning_rate": 3.4011341708012826e-06, "loss": 3.3447, "step": 37675 }, { "epoch": 0.38330078125, "grad_norm": 12.649847984313965, "learning_rate": 3.400761159481011e-06, "loss": 3.7396, "step": 37680 }, { "epoch": 0.3833516438802083, "grad_norm": 12.721214294433594, "learning_rate": 3.400388125114979e-06, "loss": 3.185, "step": 37685 }, { "epoch": 0.3834025065104167, "grad_norm": 13.129537582397461, "learning_rate": 3.4000150677127297e-06, "loss": 3.4912, "step": 37690 }, { "epoch": 0.383453369140625, "grad_norm": 15.017121315002441, "learning_rate": 3.399641987283808e-06, "loss": 3.5474, "step": 37695 }, { "epoch": 0.3835042317708333, "grad_norm": 7.627927303314209, "learning_rate": 3.39926888383776e-06, "loss": 2.946, "step": 37700 }, { "epoch": 0.3835550944010417, "grad_norm": 10.144737243652344, "learning_rate": 3.3988957573841306e-06, "loss": 3.4099, "step": 37705 }, { "epoch": 0.38360595703125, "grad_norm": 15.387688636779785, "learning_rate": 3.398522607932466e-06, "loss": 3.6402, "step": 37710 }, { "epoch": 0.3836568196614583, "grad_norm": 11.039338111877441, "learning_rate": 3.3981494354923137e-06, "loss": 3.7447, "step": 37715 }, { "epoch": 0.3837076822916667, "grad_norm": 11.357863426208496, "learning_rate": 3.3977762400732206e-06, "loss": 3.3448, "step": 37720 }, { "epoch": 0.383758544921875, "grad_norm": 13.65638542175293, "learning_rate": 3.397403021684735e-06, "loss": 2.9614, "step": 37725 }, { "epoch": 0.3838094075520833, "grad_norm": 10.85644245147705, "learning_rate": 3.3970297803364066e-06, "loss": 3.4445, "step": 37730 }, { "epoch": 0.3838602701822917, "grad_norm": 8.943359375, "learning_rate": 3.396656516037784e-06, "loss": 3.6558, "step": 37735 }, { "epoch": 0.3839111328125, "grad_norm": 10.290979385375977, "learning_rate": 3.396283228798416e-06, "loss": 3.4049, "step": 37740 }, { "epoch": 0.3839619954427083, "grad_norm": 14.245318412780762, "learning_rate": 3.3959099186278553e-06, "loss": 3.4897, "step": 37745 }, { "epoch": 0.3840128580729167, "grad_norm": 12.544392585754395, "learning_rate": 3.395536585535651e-06, "loss": 3.0121, "step": 37750 }, { "epoch": 0.384063720703125, "grad_norm": 8.912129402160645, "learning_rate": 3.3951632295313552e-06, "loss": 2.9532, "step": 37755 }, { "epoch": 0.3841145833333333, "grad_norm": 11.526346206665039, "learning_rate": 3.3947898506245203e-06, "loss": 3.4343, "step": 37760 }, { "epoch": 0.3841654459635417, "grad_norm": 8.159797668457031, "learning_rate": 3.3944164488246992e-06, "loss": 3.1769, "step": 37765 }, { "epoch": 0.38421630859375, "grad_norm": 13.55785083770752, "learning_rate": 3.394043024141446e-06, "loss": 3.3025, "step": 37770 }, { "epoch": 0.3842671712239583, "grad_norm": 14.68910026550293, "learning_rate": 3.393669576584313e-06, "loss": 3.4664, "step": 37775 }, { "epoch": 0.3843180338541667, "grad_norm": 16.233421325683594, "learning_rate": 3.393296106162856e-06, "loss": 3.3517, "step": 37780 }, { "epoch": 0.384368896484375, "grad_norm": 10.414290428161621, "learning_rate": 3.39292261288663e-06, "loss": 3.6864, "step": 37785 }, { "epoch": 0.3844197591145833, "grad_norm": 8.684490203857422, "learning_rate": 3.3925490967651904e-06, "loss": 3.3804, "step": 37790 }, { "epoch": 0.3844706217447917, "grad_norm": 14.901695251464844, "learning_rate": 3.392175557808094e-06, "loss": 3.2678, "step": 37795 }, { "epoch": 0.384521484375, "grad_norm": 8.012540817260742, "learning_rate": 3.3918019960248976e-06, "loss": 3.3453, "step": 37800 }, { "epoch": 0.3845723470052083, "grad_norm": 14.347160339355469, "learning_rate": 3.3914284114251573e-06, "loss": 3.6749, "step": 37805 }, { "epoch": 0.3846232096354167, "grad_norm": 11.914584159851074, "learning_rate": 3.391054804018434e-06, "loss": 3.6538, "step": 37810 }, { "epoch": 0.384674072265625, "grad_norm": 12.325974464416504, "learning_rate": 3.3906811738142843e-06, "loss": 3.7795, "step": 37815 }, { "epoch": 0.3847249348958333, "grad_norm": 11.537654876708984, "learning_rate": 3.3903075208222667e-06, "loss": 3.1268, "step": 37820 }, { "epoch": 0.3847757975260417, "grad_norm": 13.890557289123535, "learning_rate": 3.3899338450519437e-06, "loss": 3.4425, "step": 37825 }, { "epoch": 0.38482666015625, "grad_norm": 22.127164840698242, "learning_rate": 3.3895601465128736e-06, "loss": 3.7079, "step": 37830 }, { "epoch": 0.3848775227864583, "grad_norm": 11.531108856201172, "learning_rate": 3.3891864252146177e-06, "loss": 3.5737, "step": 37835 }, { "epoch": 0.3849283854166667, "grad_norm": 14.390570640563965, "learning_rate": 3.3888126811667387e-06, "loss": 3.6822, "step": 37840 }, { "epoch": 0.384979248046875, "grad_norm": 9.560125350952148, "learning_rate": 3.3884389143787965e-06, "loss": 3.6045, "step": 37845 }, { "epoch": 0.3850301106770833, "grad_norm": 15.163629531860352, "learning_rate": 3.3880651248603567e-06, "loss": 3.3025, "step": 37850 }, { "epoch": 0.3850809733072917, "grad_norm": 15.606112480163574, "learning_rate": 3.387691312620981e-06, "loss": 3.3638, "step": 37855 }, { "epoch": 0.3851318359375, "grad_norm": 13.837167739868164, "learning_rate": 3.387317477670233e-06, "loss": 3.6738, "step": 37860 }, { "epoch": 0.3851826985677083, "grad_norm": 9.533172607421875, "learning_rate": 3.3869436200176775e-06, "loss": 3.3599, "step": 37865 }, { "epoch": 0.3852335611979167, "grad_norm": 13.04584789276123, "learning_rate": 3.3865697396728795e-06, "loss": 3.492, "step": 37870 }, { "epoch": 0.385284423828125, "grad_norm": 10.076404571533203, "learning_rate": 3.386195836645406e-06, "loss": 3.3765, "step": 37875 }, { "epoch": 0.3853352864583333, "grad_norm": 12.413851737976074, "learning_rate": 3.385821910944821e-06, "loss": 3.2374, "step": 37880 }, { "epoch": 0.3853861490885417, "grad_norm": 8.019109725952148, "learning_rate": 3.385447962580693e-06, "loss": 3.2756, "step": 37885 }, { "epoch": 0.38543701171875, "grad_norm": 7.4843645095825195, "learning_rate": 3.3850739915625885e-06, "loss": 3.2692, "step": 37890 }, { "epoch": 0.3854878743489583, "grad_norm": 14.0797119140625, "learning_rate": 3.3846999979000757e-06, "loss": 3.5322, "step": 37895 }, { "epoch": 0.3855387369791667, "grad_norm": 11.294448852539062, "learning_rate": 3.384325981602723e-06, "loss": 3.4381, "step": 37900 }, { "epoch": 0.385589599609375, "grad_norm": 17.284252166748047, "learning_rate": 3.3839519426801006e-06, "loss": 3.3795, "step": 37905 }, { "epoch": 0.3856404622395833, "grad_norm": 11.139725685119629, "learning_rate": 3.3835778811417757e-06, "loss": 3.2291, "step": 37910 }, { "epoch": 0.3856913248697917, "grad_norm": 14.060380935668945, "learning_rate": 3.3832037969973214e-06, "loss": 3.5328, "step": 37915 }, { "epoch": 0.3857421875, "grad_norm": 16.855520248413086, "learning_rate": 3.3828296902563066e-06, "loss": 3.7078, "step": 37920 }, { "epoch": 0.3857930501302083, "grad_norm": 9.740677833557129, "learning_rate": 3.382455560928304e-06, "loss": 3.6188, "step": 37925 }, { "epoch": 0.3858439127604167, "grad_norm": 11.402836799621582, "learning_rate": 3.3820814090228847e-06, "loss": 3.5231, "step": 37930 }, { "epoch": 0.385894775390625, "grad_norm": 20.03525733947754, "learning_rate": 3.3817072345496214e-06, "loss": 3.5392, "step": 37935 }, { "epoch": 0.3859456380208333, "grad_norm": 10.353108406066895, "learning_rate": 3.3813330375180873e-06, "loss": 3.2455, "step": 37940 }, { "epoch": 0.3859965006510417, "grad_norm": 14.113561630249023, "learning_rate": 3.3809588179378577e-06, "loss": 3.0992, "step": 37945 }, { "epoch": 0.38604736328125, "grad_norm": 9.764810562133789, "learning_rate": 3.380584575818504e-06, "loss": 3.2733, "step": 37950 }, { "epoch": 0.3860982259114583, "grad_norm": 9.683736801147461, "learning_rate": 3.380210311169604e-06, "loss": 3.3936, "step": 37955 }, { "epoch": 0.3861490885416667, "grad_norm": 7.924754619598389, "learning_rate": 3.3798360240007306e-06, "loss": 3.3648, "step": 37960 }, { "epoch": 0.386199951171875, "grad_norm": 11.664175987243652, "learning_rate": 3.3794617143214615e-06, "loss": 3.2296, "step": 37965 }, { "epoch": 0.3862508138020833, "grad_norm": 8.792074203491211, "learning_rate": 3.3790873821413733e-06, "loss": 3.8841, "step": 37970 }, { "epoch": 0.3863016764322917, "grad_norm": 14.745179176330566, "learning_rate": 3.3787130274700417e-06, "loss": 3.5543, "step": 37975 }, { "epoch": 0.3863525390625, "grad_norm": 10.410104751586914, "learning_rate": 3.3783386503170467e-06, "loss": 3.6948, "step": 37980 }, { "epoch": 0.3864034016927083, "grad_norm": 10.549935340881348, "learning_rate": 3.377964250691965e-06, "loss": 3.4288, "step": 37985 }, { "epoch": 0.3864542643229167, "grad_norm": 10.103965759277344, "learning_rate": 3.3775898286043762e-06, "loss": 3.451, "step": 37990 }, { "epoch": 0.386505126953125, "grad_norm": 12.432733535766602, "learning_rate": 3.37721538406386e-06, "loss": 3.3372, "step": 37995 }, { "epoch": 0.3865559895833333, "grad_norm": 10.565135955810547, "learning_rate": 3.3768409170799956e-06, "loss": 3.3918, "step": 38000 }, { "epoch": 0.3866068522135417, "grad_norm": 10.372321128845215, "learning_rate": 3.376466427662364e-06, "loss": 3.3803, "step": 38005 }, { "epoch": 0.38665771484375, "grad_norm": 11.90112590789795, "learning_rate": 3.3760919158205472e-06, "loss": 3.3195, "step": 38010 }, { "epoch": 0.3867085774739583, "grad_norm": 8.72597885131836, "learning_rate": 3.375717381564126e-06, "loss": 3.4056, "step": 38015 }, { "epoch": 0.3867594401041667, "grad_norm": 12.27939510345459, "learning_rate": 3.375342824902684e-06, "loss": 3.9223, "step": 38020 }, { "epoch": 0.386810302734375, "grad_norm": 13.502927780151367, "learning_rate": 3.3749682458458016e-06, "loss": 2.9973, "step": 38025 }, { "epoch": 0.3868611653645833, "grad_norm": 14.647866249084473, "learning_rate": 3.374593644403066e-06, "loss": 3.3111, "step": 38030 }, { "epoch": 0.3869120279947917, "grad_norm": 8.057086944580078, "learning_rate": 3.3742190205840574e-06, "loss": 3.7146, "step": 38035 }, { "epoch": 0.386962890625, "grad_norm": 14.464299201965332, "learning_rate": 3.373844374398364e-06, "loss": 3.6537, "step": 38040 }, { "epoch": 0.3870137532552083, "grad_norm": 8.134767532348633, "learning_rate": 3.373469705855568e-06, "loss": 3.5302, "step": 38045 }, { "epoch": 0.3870646158854167, "grad_norm": 12.267831802368164, "learning_rate": 3.3730950149652574e-06, "loss": 3.4322, "step": 38050 }, { "epoch": 0.387115478515625, "grad_norm": 14.318890571594238, "learning_rate": 3.372720301737018e-06, "loss": 3.4095, "step": 38055 }, { "epoch": 0.3871663411458333, "grad_norm": 11.825246810913086, "learning_rate": 3.3723455661804364e-06, "loss": 3.3964, "step": 38060 }, { "epoch": 0.3872172037760417, "grad_norm": 10.063560485839844, "learning_rate": 3.3719708083050996e-06, "loss": 3.2893, "step": 38065 }, { "epoch": 0.38726806640625, "grad_norm": 12.584623336791992, "learning_rate": 3.3715960281205968e-06, "loss": 3.6519, "step": 38070 }, { "epoch": 0.3873189290364583, "grad_norm": 9.536698341369629, "learning_rate": 3.3712212256365167e-06, "loss": 3.3757, "step": 38075 }, { "epoch": 0.3873697916666667, "grad_norm": 15.417890548706055, "learning_rate": 3.370846400862447e-06, "loss": 3.301, "step": 38080 }, { "epoch": 0.387420654296875, "grad_norm": 10.475431442260742, "learning_rate": 3.3704715538079794e-06, "loss": 3.2111, "step": 38085 }, { "epoch": 0.3874715169270833, "grad_norm": 2325.23974609375, "learning_rate": 3.3700966844827033e-06, "loss": 5.4233, "step": 38090 }, { "epoch": 0.3875223795572917, "grad_norm": 11.278244018554688, "learning_rate": 3.3697217928962095e-06, "loss": 3.4104, "step": 38095 }, { "epoch": 0.3875732421875, "grad_norm": 12.78177261352539, "learning_rate": 3.3693468790580895e-06, "loss": 3.5219, "step": 38100 }, { "epoch": 0.3876241048177083, "grad_norm": 12.451904296875, "learning_rate": 3.3689719429779367e-06, "loss": 3.0766, "step": 38105 }, { "epoch": 0.3876749674479167, "grad_norm": 13.55923843383789, "learning_rate": 3.3685969846653417e-06, "loss": 3.3094, "step": 38110 }, { "epoch": 0.387725830078125, "grad_norm": 15.302332878112793, "learning_rate": 3.3682220041298993e-06, "loss": 3.0365, "step": 38115 }, { "epoch": 0.3877766927083333, "grad_norm": 8.71083927154541, "learning_rate": 3.367847001381202e-06, "loss": 3.2149, "step": 38120 }, { "epoch": 0.3878275553385417, "grad_norm": 11.540759086608887, "learning_rate": 3.367471976428846e-06, "loss": 3.2459, "step": 38125 }, { "epoch": 0.38787841796875, "grad_norm": 12.014065742492676, "learning_rate": 3.367096929282424e-06, "loss": 3.3404, "step": 38130 }, { "epoch": 0.3879292805989583, "grad_norm": 14.558552742004395, "learning_rate": 3.366721859951533e-06, "loss": 3.2887, "step": 38135 }, { "epoch": 0.3879801432291667, "grad_norm": 10.725698471069336, "learning_rate": 3.366346768445769e-06, "loss": 3.5935, "step": 38140 }, { "epoch": 0.388031005859375, "grad_norm": 10.478405952453613, "learning_rate": 3.365971654774728e-06, "loss": 3.1538, "step": 38145 }, { "epoch": 0.3880818684895833, "grad_norm": 9.388585090637207, "learning_rate": 3.365596518948007e-06, "loss": 3.4627, "step": 38150 }, { "epoch": 0.3881327311197917, "grad_norm": 9.345610618591309, "learning_rate": 3.365221360975205e-06, "loss": 3.3876, "step": 38155 }, { "epoch": 0.38818359375, "grad_norm": 15.602376937866211, "learning_rate": 3.3648461808659194e-06, "loss": 3.1752, "step": 38160 }, { "epoch": 0.3882344563802083, "grad_norm": 8.027948379516602, "learning_rate": 3.364470978629749e-06, "loss": 3.1229, "step": 38165 }, { "epoch": 0.3882853190104167, "grad_norm": 16.314498901367188, "learning_rate": 3.364095754276293e-06, "loss": 3.73, "step": 38170 }, { "epoch": 0.388336181640625, "grad_norm": 8.504803657531738, "learning_rate": 3.363720507815153e-06, "loss": 3.3806, "step": 38175 }, { "epoch": 0.3883870442708333, "grad_norm": 16.09746551513672, "learning_rate": 3.363345239255928e-06, "loss": 3.6046, "step": 38180 }, { "epoch": 0.3884379069010417, "grad_norm": 10.618088722229004, "learning_rate": 3.3629699486082197e-06, "loss": 3.0786, "step": 38185 }, { "epoch": 0.38848876953125, "grad_norm": 10.036343574523926, "learning_rate": 3.3625946358816303e-06, "loss": 2.9537, "step": 38190 }, { "epoch": 0.3885396321614583, "grad_norm": 10.493426322937012, "learning_rate": 3.3622193010857606e-06, "loss": 3.3826, "step": 38195 }, { "epoch": 0.3885904947916667, "grad_norm": 13.192093849182129, "learning_rate": 3.3618439442302154e-06, "loss": 3.2187, "step": 38200 }, { "epoch": 0.388641357421875, "grad_norm": 15.833111763000488, "learning_rate": 3.361468565324597e-06, "loss": 3.2154, "step": 38205 }, { "epoch": 0.3886922200520833, "grad_norm": 12.631217002868652, "learning_rate": 3.3610931643785095e-06, "loss": 3.4353, "step": 38210 }, { "epoch": 0.3887430826822917, "grad_norm": 13.263562202453613, "learning_rate": 3.360717741401557e-06, "loss": 3.3342, "step": 38215 }, { "epoch": 0.3887939453125, "grad_norm": 9.171459197998047, "learning_rate": 3.3603422964033456e-06, "loss": 3.9951, "step": 38220 }, { "epoch": 0.3888448079427083, "grad_norm": 11.791070938110352, "learning_rate": 3.35996682939348e-06, "loss": 3.0921, "step": 38225 }, { "epoch": 0.3888956705729167, "grad_norm": 13.00941276550293, "learning_rate": 3.359591340381568e-06, "loss": 3.9208, "step": 38230 }, { "epoch": 0.388946533203125, "grad_norm": 8.12760066986084, "learning_rate": 3.3592158293772135e-06, "loss": 3.1266, "step": 38235 }, { "epoch": 0.3889973958333333, "grad_norm": 9.810503005981445, "learning_rate": 3.3588402963900273e-06, "loss": 3.2701, "step": 38240 }, { "epoch": 0.3890482584635417, "grad_norm": 9.685894012451172, "learning_rate": 3.358464741429615e-06, "loss": 3.4047, "step": 38245 }, { "epoch": 0.38909912109375, "grad_norm": 9.390931129455566, "learning_rate": 3.3580891645055857e-06, "loss": 3.1132, "step": 38250 }, { "epoch": 0.3891499837239583, "grad_norm": 13.093478202819824, "learning_rate": 3.3577135656275485e-06, "loss": 3.3899, "step": 38255 }, { "epoch": 0.3892008463541667, "grad_norm": 10.473499298095703, "learning_rate": 3.3573379448051125e-06, "loss": 3.2168, "step": 38260 }, { "epoch": 0.389251708984375, "grad_norm": 15.70926570892334, "learning_rate": 3.3569623020478896e-06, "loss": 3.515, "step": 38265 }, { "epoch": 0.3893025716145833, "grad_norm": 14.697281837463379, "learning_rate": 3.3565866373654887e-06, "loss": 3.3272, "step": 38270 }, { "epoch": 0.3893534342447917, "grad_norm": 9.951302528381348, "learning_rate": 3.356210950767522e-06, "loss": 3.1649, "step": 38275 }, { "epoch": 0.389404296875, "grad_norm": 9.453201293945312, "learning_rate": 3.3558352422636007e-06, "loss": 3.5697, "step": 38280 }, { "epoch": 0.3894551595052083, "grad_norm": 14.826644897460938, "learning_rate": 3.3554595118633376e-06, "loss": 3.1434, "step": 38285 }, { "epoch": 0.3895060221354167, "grad_norm": 14.816740036010742, "learning_rate": 3.3550837595763457e-06, "loss": 3.0265, "step": 38290 }, { "epoch": 0.389556884765625, "grad_norm": 9.352396965026855, "learning_rate": 3.354707985412239e-06, "loss": 3.5541, "step": 38295 }, { "epoch": 0.3896077473958333, "grad_norm": 14.234426498413086, "learning_rate": 3.3543321893806303e-06, "loss": 3.2591, "step": 38300 }, { "epoch": 0.3896586100260417, "grad_norm": 13.746682167053223, "learning_rate": 3.353956371491136e-06, "loss": 3.3658, "step": 38305 }, { "epoch": 0.38970947265625, "grad_norm": 13.630725860595703, "learning_rate": 3.35358053175337e-06, "loss": 3.3378, "step": 38310 }, { "epoch": 0.3897603352864583, "grad_norm": 9.117375373840332, "learning_rate": 3.353204670176948e-06, "loss": 3.5115, "step": 38315 }, { "epoch": 0.3898111979166667, "grad_norm": 13.624043464660645, "learning_rate": 3.3528287867714864e-06, "loss": 3.3602, "step": 38320 }, { "epoch": 0.389862060546875, "grad_norm": 8.452689170837402, "learning_rate": 3.352452881546603e-06, "loss": 3.6228, "step": 38325 }, { "epoch": 0.3899129231770833, "grad_norm": 11.07059097290039, "learning_rate": 3.3520769545119146e-06, "loss": 3.1787, "step": 38330 }, { "epoch": 0.3899637858072917, "grad_norm": 14.2334623336792, "learning_rate": 3.3517010056770395e-06, "loss": 3.2317, "step": 38335 }, { "epoch": 0.3900146484375, "grad_norm": 11.949200630187988, "learning_rate": 3.3513250350515956e-06, "loss": 3.1173, "step": 38340 }, { "epoch": 0.3900655110677083, "grad_norm": 40.111392974853516, "learning_rate": 3.350949042645203e-06, "loss": 3.4921, "step": 38345 }, { "epoch": 0.3901163736979167, "grad_norm": 14.802889823913574, "learning_rate": 3.35057302846748e-06, "loss": 3.3277, "step": 38350 }, { "epoch": 0.390167236328125, "grad_norm": 9.277702331542969, "learning_rate": 3.3501969925280486e-06, "loss": 3.4958, "step": 38355 }, { "epoch": 0.3902180989583333, "grad_norm": 15.449729919433594, "learning_rate": 3.3498209348365285e-06, "loss": 3.4463, "step": 38360 }, { "epoch": 0.3902689615885417, "grad_norm": 15.135710716247559, "learning_rate": 3.3494448554025404e-06, "loss": 3.4774, "step": 38365 }, { "epoch": 0.39031982421875, "grad_norm": 9.1742582321167, "learning_rate": 3.3490687542357083e-06, "loss": 3.5518, "step": 38370 }, { "epoch": 0.3903706868489583, "grad_norm": 14.67941665649414, "learning_rate": 3.3486926313456524e-06, "loss": 3.5348, "step": 38375 }, { "epoch": 0.3904215494791667, "grad_norm": 9.221536636352539, "learning_rate": 3.348316486741997e-06, "loss": 3.2008, "step": 38380 }, { "epoch": 0.390472412109375, "grad_norm": 10.60205078125, "learning_rate": 3.347940320434365e-06, "loss": 3.1944, "step": 38385 }, { "epoch": 0.3905232747395833, "grad_norm": 8.800256729125977, "learning_rate": 3.3475641324323814e-06, "loss": 3.3865, "step": 38390 }, { "epoch": 0.3905741373697917, "grad_norm": 7.432771682739258, "learning_rate": 3.3471879227456706e-06, "loss": 3.5595, "step": 38395 }, { "epoch": 0.390625, "grad_norm": 14.499105453491211, "learning_rate": 3.346811691383857e-06, "loss": 3.3196, "step": 38400 }, { "epoch": 0.3906758626302083, "grad_norm": 12.427467346191406, "learning_rate": 3.346435438356567e-06, "loss": 3.555, "step": 38405 }, { "epoch": 0.3907267252604167, "grad_norm": 13.272066116333008, "learning_rate": 3.3460591636734274e-06, "loss": 3.0785, "step": 38410 }, { "epoch": 0.390777587890625, "grad_norm": 10.83601188659668, "learning_rate": 3.3456828673440637e-06, "loss": 3.5042, "step": 38415 }, { "epoch": 0.3908284505208333, "grad_norm": 9.56989860534668, "learning_rate": 3.3453065493781057e-06, "loss": 3.2477, "step": 38420 }, { "epoch": 0.3908793131510417, "grad_norm": 9.048479080200195, "learning_rate": 3.344930209785179e-06, "loss": 3.6054, "step": 38425 }, { "epoch": 0.39093017578125, "grad_norm": 9.921485900878906, "learning_rate": 3.3445538485749135e-06, "loss": 3.2071, "step": 38430 }, { "epoch": 0.3909810384114583, "grad_norm": 13.206910133361816, "learning_rate": 3.3441774657569374e-06, "loss": 3.3644, "step": 38435 }, { "epoch": 0.3910319010416667, "grad_norm": 10.711992263793945, "learning_rate": 3.3438010613408816e-06, "loss": 3.0718, "step": 38440 }, { "epoch": 0.391082763671875, "grad_norm": 8.891669273376465, "learning_rate": 3.343424635336375e-06, "loss": 3.4491, "step": 38445 }, { "epoch": 0.3911336263020833, "grad_norm": 16.935821533203125, "learning_rate": 3.343048187753051e-06, "loss": 3.6171, "step": 38450 }, { "epoch": 0.3911844889322917, "grad_norm": 13.522112846374512, "learning_rate": 3.3426717186005365e-06, "loss": 3.6182, "step": 38455 }, { "epoch": 0.3912353515625, "grad_norm": 12.0574369430542, "learning_rate": 3.3422952278884663e-06, "loss": 3.3017, "step": 38460 }, { "epoch": 0.3912862141927083, "grad_norm": 10.464619636535645, "learning_rate": 3.341918715626473e-06, "loss": 3.3591, "step": 38465 }, { "epoch": 0.3913370768229167, "grad_norm": 9.480717658996582, "learning_rate": 3.341542181824188e-06, "loss": 3.2459, "step": 38470 }, { "epoch": 0.391387939453125, "grad_norm": 8.027229309082031, "learning_rate": 3.3411656264912473e-06, "loss": 3.4763, "step": 38475 }, { "epoch": 0.3914388020833333, "grad_norm": 12.631207466125488, "learning_rate": 3.340789049637282e-06, "loss": 3.5192, "step": 38480 }, { "epoch": 0.3914896647135417, "grad_norm": 8.793301582336426, "learning_rate": 3.3404124512719295e-06, "loss": 3.0144, "step": 38485 }, { "epoch": 0.39154052734375, "grad_norm": 12.1557035446167, "learning_rate": 3.3400358314048227e-06, "loss": 3.5435, "step": 38490 }, { "epoch": 0.3915913899739583, "grad_norm": 14.589640617370605, "learning_rate": 3.3396591900455983e-06, "loss": 3.5327, "step": 38495 }, { "epoch": 0.3916422526041667, "grad_norm": 9.701825141906738, "learning_rate": 3.3392825272038925e-06, "loss": 2.9686, "step": 38500 }, { "epoch": 0.391693115234375, "grad_norm": 15.59730052947998, "learning_rate": 3.3389058428893424e-06, "loss": 3.3883, "step": 38505 }, { "epoch": 0.3917439778645833, "grad_norm": 9.84763240814209, "learning_rate": 3.3385291371115847e-06, "loss": 3.1096, "step": 38510 }, { "epoch": 0.3917948404947917, "grad_norm": 11.552181243896484, "learning_rate": 3.3381524098802588e-06, "loss": 3.4595, "step": 38515 }, { "epoch": 0.391845703125, "grad_norm": 12.711458206176758, "learning_rate": 3.3377756612050003e-06, "loss": 3.2141, "step": 38520 }, { "epoch": 0.3918965657552083, "grad_norm": 11.764037132263184, "learning_rate": 3.3373988910954525e-06, "loss": 3.6333, "step": 38525 }, { "epoch": 0.3919474283854167, "grad_norm": 13.153989791870117, "learning_rate": 3.337022099561251e-06, "loss": 3.3329, "step": 38530 }, { "epoch": 0.391998291015625, "grad_norm": 13.215889930725098, "learning_rate": 3.336645286612038e-06, "loss": 3.3511, "step": 38535 }, { "epoch": 0.3920491536458333, "grad_norm": 10.890106201171875, "learning_rate": 3.3362684522574536e-06, "loss": 3.1375, "step": 38540 }, { "epoch": 0.3921000162760417, "grad_norm": 12.711638450622559, "learning_rate": 3.3358915965071387e-06, "loss": 3.1453, "step": 38545 }, { "epoch": 0.39215087890625, "grad_norm": 13.96977424621582, "learning_rate": 3.3355147193707355e-06, "loss": 3.4909, "step": 38550 }, { "epoch": 0.3922017415364583, "grad_norm": 8.59538745880127, "learning_rate": 3.3351378208578864e-06, "loss": 3.1961, "step": 38555 }, { "epoch": 0.3922526041666667, "grad_norm": 14.170539855957031, "learning_rate": 3.334760900978234e-06, "loss": 3.1656, "step": 38560 }, { "epoch": 0.392303466796875, "grad_norm": 16.233667373657227, "learning_rate": 3.3343839597414216e-06, "loss": 3.0222, "step": 38565 }, { "epoch": 0.3923543294270833, "grad_norm": 13.056116104125977, "learning_rate": 3.3340069971570937e-06, "loss": 3.082, "step": 38570 }, { "epoch": 0.3924051920572917, "grad_norm": 10.929540634155273, "learning_rate": 3.333630013234894e-06, "loss": 3.2129, "step": 38575 }, { "epoch": 0.3924560546875, "grad_norm": 13.191286087036133, "learning_rate": 3.3332530079844694e-06, "loss": 3.2389, "step": 38580 }, { "epoch": 0.3925069173177083, "grad_norm": 12.56567096710205, "learning_rate": 3.3328759814154628e-06, "loss": 3.3224, "step": 38585 }, { "epoch": 0.3925577799479167, "grad_norm": 9.680841445922852, "learning_rate": 3.3324989335375226e-06, "loss": 3.4063, "step": 38590 }, { "epoch": 0.392608642578125, "grad_norm": 14.00575065612793, "learning_rate": 3.332121864360294e-06, "loss": 3.1624, "step": 38595 }, { "epoch": 0.3926595052083333, "grad_norm": 10.88829517364502, "learning_rate": 3.331744773893424e-06, "loss": 2.9704, "step": 38600 }, { "epoch": 0.3927103678385417, "grad_norm": 9.82624626159668, "learning_rate": 3.331367662146562e-06, "loss": 3.2943, "step": 38605 }, { "epoch": 0.39276123046875, "grad_norm": 9.934083938598633, "learning_rate": 3.330990529129355e-06, "loss": 3.341, "step": 38610 }, { "epoch": 0.3928120930989583, "grad_norm": 9.311182022094727, "learning_rate": 3.3306133748514523e-06, "loss": 3.3888, "step": 38615 }, { "epoch": 0.3928629557291667, "grad_norm": 12.890077590942383, "learning_rate": 3.3302361993225036e-06, "loss": 3.4668, "step": 38620 }, { "epoch": 0.392913818359375, "grad_norm": 11.402129173278809, "learning_rate": 3.3298590025521577e-06, "loss": 3.4034, "step": 38625 }, { "epoch": 0.3929646809895833, "grad_norm": 12.979676246643066, "learning_rate": 3.329481784550067e-06, "loss": 3.4632, "step": 38630 }, { "epoch": 0.3930155436197917, "grad_norm": 11.994227409362793, "learning_rate": 3.329104545325881e-06, "loss": 3.5591, "step": 38635 }, { "epoch": 0.39306640625, "grad_norm": 8.088932037353516, "learning_rate": 3.3287272848892515e-06, "loss": 3.4373, "step": 38640 }, { "epoch": 0.3931172688802083, "grad_norm": 9.991019248962402, "learning_rate": 3.3283500032498313e-06, "loss": 3.1668, "step": 38645 }, { "epoch": 0.3931681315104167, "grad_norm": 8.292196273803711, "learning_rate": 3.3279727004172724e-06, "loss": 3.2774, "step": 38650 }, { "epoch": 0.393218994140625, "grad_norm": 14.125115394592285, "learning_rate": 3.327595376401228e-06, "loss": 3.1344, "step": 38655 }, { "epoch": 0.3932698567708333, "grad_norm": 16.854019165039062, "learning_rate": 3.327218031211352e-06, "loss": 4.2892, "step": 38660 }, { "epoch": 0.3933207194010417, "grad_norm": 16.030033111572266, "learning_rate": 3.326840664857299e-06, "loss": 3.4319, "step": 38665 }, { "epoch": 0.39337158203125, "grad_norm": 11.797090530395508, "learning_rate": 3.326463277348724e-06, "loss": 3.4787, "step": 38670 }, { "epoch": 0.3934224446614583, "grad_norm": 15.557062149047852, "learning_rate": 3.3260858686952813e-06, "loss": 3.2515, "step": 38675 }, { "epoch": 0.3934733072916667, "grad_norm": 11.602935791015625, "learning_rate": 3.3257084389066273e-06, "loss": 3.1548, "step": 38680 }, { "epoch": 0.393524169921875, "grad_norm": 11.777176856994629, "learning_rate": 3.3253309879924194e-06, "loss": 3.424, "step": 38685 }, { "epoch": 0.3935750325520833, "grad_norm": 11.584993362426758, "learning_rate": 3.3249535159623133e-06, "loss": 3.413, "step": 38690 }, { "epoch": 0.3936258951822917, "grad_norm": 13.79917049407959, "learning_rate": 3.324576022825967e-06, "loss": 3.3521, "step": 38695 }, { "epoch": 0.3936767578125, "grad_norm": 10.56005573272705, "learning_rate": 3.324198508593039e-06, "loss": 3.0865, "step": 38700 }, { "epoch": 0.3937276204427083, "grad_norm": 17.659931182861328, "learning_rate": 3.3238209732731868e-06, "loss": 3.2109, "step": 38705 }, { "epoch": 0.3937784830729167, "grad_norm": 11.138640403747559, "learning_rate": 3.323443416876071e-06, "loss": 3.1011, "step": 38710 }, { "epoch": 0.393829345703125, "grad_norm": 9.119121551513672, "learning_rate": 3.3230658394113504e-06, "loss": 3.2088, "step": 38715 }, { "epoch": 0.3938802083333333, "grad_norm": 10.155940055847168, "learning_rate": 3.3226882408886846e-06, "loss": 3.3344, "step": 38720 }, { "epoch": 0.3939310709635417, "grad_norm": 11.100317001342773, "learning_rate": 3.322310621317736e-06, "loss": 3.4379, "step": 38725 }, { "epoch": 0.39398193359375, "grad_norm": 11.67934799194336, "learning_rate": 3.3219329807081645e-06, "loss": 3.1253, "step": 38730 }, { "epoch": 0.3940327962239583, "grad_norm": 11.354928970336914, "learning_rate": 3.3215553190696336e-06, "loss": 2.928, "step": 38735 }, { "epoch": 0.3940836588541667, "grad_norm": 9.212844848632812, "learning_rate": 3.321177636411803e-06, "loss": 3.7323, "step": 38740 }, { "epoch": 0.394134521484375, "grad_norm": 14.865660667419434, "learning_rate": 3.3207999327443385e-06, "loss": 3.1197, "step": 38745 }, { "epoch": 0.3941853841145833, "grad_norm": 15.50028133392334, "learning_rate": 3.320422208076902e-06, "loss": 3.1172, "step": 38750 }, { "epoch": 0.3942362467447917, "grad_norm": 10.393682479858398, "learning_rate": 3.320044462419157e-06, "loss": 3.2642, "step": 38755 }, { "epoch": 0.394287109375, "grad_norm": 12.6064453125, "learning_rate": 3.31966669578077e-06, "loss": 3.602, "step": 38760 }, { "epoch": 0.3943379720052083, "grad_norm": 15.318078994750977, "learning_rate": 3.319288908171404e-06, "loss": 3.5894, "step": 38765 }, { "epoch": 0.3943888346354167, "grad_norm": 15.770649909973145, "learning_rate": 3.3189110996007257e-06, "loss": 3.8763, "step": 38770 }, { "epoch": 0.394439697265625, "grad_norm": 8.988329887390137, "learning_rate": 3.318533270078401e-06, "loss": 3.4983, "step": 38775 }, { "epoch": 0.3944905598958333, "grad_norm": 18.160795211791992, "learning_rate": 3.3181554196140964e-06, "loss": 3.348, "step": 38780 }, { "epoch": 0.3945414225260417, "grad_norm": 13.910605430603027, "learning_rate": 3.3177775482174796e-06, "loss": 3.4063, "step": 38785 }, { "epoch": 0.39459228515625, "grad_norm": 14.50535774230957, "learning_rate": 3.3173996558982186e-06, "loss": 3.5568, "step": 38790 }, { "epoch": 0.3946431477864583, "grad_norm": 10.57275676727295, "learning_rate": 3.31702174266598e-06, "loss": 3.222, "step": 38795 }, { "epoch": 0.3946940104166667, "grad_norm": 9.792989730834961, "learning_rate": 3.3166438085304347e-06, "loss": 3.3537, "step": 38800 }, { "epoch": 0.394744873046875, "grad_norm": 15.056622505187988, "learning_rate": 3.31626585350125e-06, "loss": 3.378, "step": 38805 }, { "epoch": 0.3947957356770833, "grad_norm": 10.857216835021973, "learning_rate": 3.315887877588098e-06, "loss": 3.4722, "step": 38810 }, { "epoch": 0.3948465983072917, "grad_norm": 11.502994537353516, "learning_rate": 3.3155098808006483e-06, "loss": 3.6144, "step": 38815 }, { "epoch": 0.3948974609375, "grad_norm": 14.918729782104492, "learning_rate": 3.3151318631485707e-06, "loss": 3.3524, "step": 38820 }, { "epoch": 0.3949483235677083, "grad_norm": 11.880370140075684, "learning_rate": 3.314753824641538e-06, "loss": 3.5212, "step": 38825 }, { "epoch": 0.3949991861979167, "grad_norm": 12.545670509338379, "learning_rate": 3.3143757652892216e-06, "loss": 3.5019, "step": 38830 }, { "epoch": 0.395050048828125, "grad_norm": 13.899107933044434, "learning_rate": 3.313997685101294e-06, "loss": 3.0761, "step": 38835 }, { "epoch": 0.3951009114583333, "grad_norm": 14.468853950500488, "learning_rate": 3.3136195840874297e-06, "loss": 3.463, "step": 38840 }, { "epoch": 0.3951517740885417, "grad_norm": 9.901703834533691, "learning_rate": 3.3132414622572995e-06, "loss": 3.1865, "step": 38845 }, { "epoch": 0.39520263671875, "grad_norm": 14.128046035766602, "learning_rate": 3.3128633196205805e-06, "loss": 3.4865, "step": 38850 }, { "epoch": 0.3952534993489583, "grad_norm": 8.942483901977539, "learning_rate": 3.3124851561869465e-06, "loss": 3.3604, "step": 38855 }, { "epoch": 0.3953043619791667, "grad_norm": 12.734345436096191, "learning_rate": 3.3121069719660707e-06, "loss": 3.6297, "step": 38860 }, { "epoch": 0.395355224609375, "grad_norm": 12.513358116149902, "learning_rate": 3.3117287669676317e-06, "loss": 3.4542, "step": 38865 }, { "epoch": 0.3954060872395833, "grad_norm": 8.752370834350586, "learning_rate": 3.311350541201304e-06, "loss": 3.3082, "step": 38870 }, { "epoch": 0.3954569498697917, "grad_norm": 9.757655143737793, "learning_rate": 3.310972294676766e-06, "loss": 3.2355, "step": 38875 }, { "epoch": 0.3955078125, "grad_norm": 10.152702331542969, "learning_rate": 3.3105940274036935e-06, "loss": 3.3853, "step": 38880 }, { "epoch": 0.3955586751302083, "grad_norm": 8.71654224395752, "learning_rate": 3.310215739391765e-06, "loss": 3.4702, "step": 38885 }, { "epoch": 0.3956095377604167, "grad_norm": 11.565936088562012, "learning_rate": 3.3098374306506586e-06, "loss": 3.125, "step": 38890 }, { "epoch": 0.395660400390625, "grad_norm": 13.354435920715332, "learning_rate": 3.3094591011900535e-06, "loss": 3.4371, "step": 38895 }, { "epoch": 0.3957112630208333, "grad_norm": 8.289600372314453, "learning_rate": 3.3090807510196295e-06, "loss": 3.2972, "step": 38900 }, { "epoch": 0.3957621256510417, "grad_norm": 12.343297958374023, "learning_rate": 3.3087023801490663e-06, "loss": 3.1386, "step": 38905 }, { "epoch": 0.39581298828125, "grad_norm": 16.656522750854492, "learning_rate": 3.3083239885880437e-06, "loss": 3.5522, "step": 38910 }, { "epoch": 0.3958638509114583, "grad_norm": 16.437965393066406, "learning_rate": 3.307945576346244e-06, "loss": 3.3625, "step": 38915 }, { "epoch": 0.3959147135416667, "grad_norm": 12.688868522644043, "learning_rate": 3.3075671434333482e-06, "loss": 3.1965, "step": 38920 }, { "epoch": 0.395965576171875, "grad_norm": 13.657536506652832, "learning_rate": 3.307188689859038e-06, "loss": 3.2702, "step": 38925 }, { "epoch": 0.3960164388020833, "grad_norm": 13.055109024047852, "learning_rate": 3.306810215632997e-06, "loss": 3.6169, "step": 38930 }, { "epoch": 0.3960673014322917, "grad_norm": 18.273826599121094, "learning_rate": 3.3064317207649077e-06, "loss": 3.5969, "step": 38935 }, { "epoch": 0.3961181640625, "grad_norm": 15.294970512390137, "learning_rate": 3.3060532052644547e-06, "loss": 3.4694, "step": 38940 }, { "epoch": 0.3961690266927083, "grad_norm": 12.5772705078125, "learning_rate": 3.305674669141321e-06, "loss": 3.4484, "step": 38945 }, { "epoch": 0.3962198893229167, "grad_norm": 9.832720756530762, "learning_rate": 3.3052961124051907e-06, "loss": 3.0731, "step": 38950 }, { "epoch": 0.396270751953125, "grad_norm": 12.782217979431152, "learning_rate": 3.3049175350657514e-06, "loss": 3.1778, "step": 38955 }, { "epoch": 0.3963216145833333, "grad_norm": 12.149210929870605, "learning_rate": 3.3045389371326873e-06, "loss": 3.3827, "step": 38960 }, { "epoch": 0.3963724772135417, "grad_norm": 12.495834350585938, "learning_rate": 3.3041603186156857e-06, "loss": 3.4491, "step": 38965 }, { "epoch": 0.39642333984375, "grad_norm": 13.705297470092773, "learning_rate": 3.303781679524433e-06, "loss": 3.9191, "step": 38970 }, { "epoch": 0.3964742024739583, "grad_norm": 12.02988052368164, "learning_rate": 3.303403019868616e-06, "loss": 3.2583, "step": 38975 }, { "epoch": 0.3965250651041667, "grad_norm": 12.529438018798828, "learning_rate": 3.303024339657924e-06, "loss": 3.1721, "step": 38980 }, { "epoch": 0.396575927734375, "grad_norm": 15.870917320251465, "learning_rate": 3.302645638902043e-06, "loss": 3.3295, "step": 38985 }, { "epoch": 0.3966267903645833, "grad_norm": 13.028214454650879, "learning_rate": 3.3022669176106653e-06, "loss": 3.4199, "step": 38990 }, { "epoch": 0.3966776529947917, "grad_norm": 10.765739440917969, "learning_rate": 3.3018881757934777e-06, "loss": 3.3773, "step": 38995 }, { "epoch": 0.396728515625, "grad_norm": 9.587928771972656, "learning_rate": 3.3015094134601716e-06, "loss": 3.2503, "step": 39000 }, { "epoch": 0.3967793782552083, "grad_norm": 14.970799446105957, "learning_rate": 3.301130630620437e-06, "loss": 3.3718, "step": 39005 }, { "epoch": 0.3968302408854167, "grad_norm": 11.802124977111816, "learning_rate": 3.3007518272839658e-06, "loss": 3.3806, "step": 39010 }, { "epoch": 0.396881103515625, "grad_norm": 11.562552452087402, "learning_rate": 3.3003730034604477e-06, "loss": 3.2105, "step": 39015 }, { "epoch": 0.3969319661458333, "grad_norm": 13.692420959472656, "learning_rate": 3.299994159159576e-06, "loss": 3.4629, "step": 39020 }, { "epoch": 0.3969828287760417, "grad_norm": 13.56282901763916, "learning_rate": 3.299615294391044e-06, "loss": 3.5262, "step": 39025 }, { "epoch": 0.39703369140625, "grad_norm": 17.018476486206055, "learning_rate": 3.2992364091645446e-06, "loss": 3.5014, "step": 39030 }, { "epoch": 0.3970845540364583, "grad_norm": 12.96693229675293, "learning_rate": 3.298857503489771e-06, "loss": 3.194, "step": 39035 }, { "epoch": 0.3971354166666667, "grad_norm": 13.554923057556152, "learning_rate": 3.2984785773764167e-06, "loss": 3.2144, "step": 39040 }, { "epoch": 0.397186279296875, "grad_norm": 10.257043838500977, "learning_rate": 3.2980996308341775e-06, "loss": 3.3742, "step": 39045 }, { "epoch": 0.3972371419270833, "grad_norm": 9.593994140625, "learning_rate": 3.2977206638727487e-06, "loss": 3.6419, "step": 39050 }, { "epoch": 0.3972880045572917, "grad_norm": 6.843266487121582, "learning_rate": 3.2973416765018257e-06, "loss": 3.053, "step": 39055 }, { "epoch": 0.3973388671875, "grad_norm": 7.151871204376221, "learning_rate": 3.2969626687311047e-06, "loss": 3.7464, "step": 39060 }, { "epoch": 0.3973897298177083, "grad_norm": 9.020061492919922, "learning_rate": 3.296583640570283e-06, "loss": 3.4251, "step": 39065 }, { "epoch": 0.3974405924479167, "grad_norm": 10.482080459594727, "learning_rate": 3.2962045920290575e-06, "loss": 3.5422, "step": 39070 }, { "epoch": 0.397491455078125, "grad_norm": 10.591201782226562, "learning_rate": 3.2958255231171267e-06, "loss": 3.4915, "step": 39075 }, { "epoch": 0.3975423177083333, "grad_norm": 15.000960350036621, "learning_rate": 3.2954464338441874e-06, "loss": 3.0752, "step": 39080 }, { "epoch": 0.3975931803385417, "grad_norm": 9.86584186553955, "learning_rate": 3.295067324219941e-06, "loss": 3.3665, "step": 39085 }, { "epoch": 0.39764404296875, "grad_norm": 15.093664169311523, "learning_rate": 3.2946881942540846e-06, "loss": 3.1153, "step": 39090 }, { "epoch": 0.3976949055989583, "grad_norm": 13.898468971252441, "learning_rate": 3.2943090439563195e-06, "loss": 3.3152, "step": 39095 }, { "epoch": 0.3977457682291667, "grad_norm": 9.757120132446289, "learning_rate": 3.2939298733363455e-06, "loss": 3.2708, "step": 39100 }, { "epoch": 0.397796630859375, "grad_norm": 7.900053977966309, "learning_rate": 3.293550682403864e-06, "loss": 3.2929, "step": 39105 }, { "epoch": 0.3978474934895833, "grad_norm": 11.337242126464844, "learning_rate": 3.293171471168577e-06, "loss": 3.3876, "step": 39110 }, { "epoch": 0.3978983561197917, "grad_norm": 12.0416259765625, "learning_rate": 3.2927922396401853e-06, "loss": 3.1044, "step": 39115 }, { "epoch": 0.39794921875, "grad_norm": 7.893319606781006, "learning_rate": 3.2924129878283917e-06, "loss": 3.9901, "step": 39120 }, { "epoch": 0.3980000813802083, "grad_norm": 15.375621795654297, "learning_rate": 3.2920337157429004e-06, "loss": 3.1633, "step": 39125 }, { "epoch": 0.3980509440104167, "grad_norm": 7.960724830627441, "learning_rate": 3.291654423393414e-06, "loss": 3.4319, "step": 39130 }, { "epoch": 0.398101806640625, "grad_norm": 15.423291206359863, "learning_rate": 3.291275110789637e-06, "loss": 3.1555, "step": 39135 }, { "epoch": 0.3981526692708333, "grad_norm": 14.006745338439941, "learning_rate": 3.290895777941273e-06, "loss": 3.1331, "step": 39140 }, { "epoch": 0.3982035319010417, "grad_norm": 13.815712928771973, "learning_rate": 3.2905164248580287e-06, "loss": 3.5096, "step": 39145 }, { "epoch": 0.39825439453125, "grad_norm": 12.66348934173584, "learning_rate": 3.290137051549609e-06, "loss": 2.9672, "step": 39150 }, { "epoch": 0.3983052571614583, "grad_norm": 8.546638488769531, "learning_rate": 3.28975765802572e-06, "loss": 3.4663, "step": 39155 }, { "epoch": 0.3983561197916667, "grad_norm": 13.081283569335938, "learning_rate": 3.2893782442960686e-06, "loss": 3.091, "step": 39160 }, { "epoch": 0.398406982421875, "grad_norm": 9.0906343460083, "learning_rate": 3.2889988103703617e-06, "loss": 3.0275, "step": 39165 }, { "epoch": 0.3984578450520833, "grad_norm": 9.618579864501953, "learning_rate": 3.2886193562583074e-06, "loss": 3.4742, "step": 39170 }, { "epoch": 0.3985087076822917, "grad_norm": 11.26836109161377, "learning_rate": 3.2882398819696137e-06, "loss": 3.328, "step": 39175 }, { "epoch": 0.3985595703125, "grad_norm": 12.071304321289062, "learning_rate": 3.2878603875139896e-06, "loss": 3.5502, "step": 39180 }, { "epoch": 0.3986104329427083, "grad_norm": 11.873283386230469, "learning_rate": 3.2874808729011443e-06, "loss": 3.2419, "step": 39185 }, { "epoch": 0.3986612955729167, "grad_norm": 12.063312530517578, "learning_rate": 3.2871013381407877e-06, "loss": 3.1923, "step": 39190 }, { "epoch": 0.398712158203125, "grad_norm": 15.63031005859375, "learning_rate": 3.286721783242629e-06, "loss": 3.3086, "step": 39195 }, { "epoch": 0.3987630208333333, "grad_norm": 15.412569046020508, "learning_rate": 3.286342208216381e-06, "loss": 3.2832, "step": 39200 }, { "epoch": 0.3988138834635417, "grad_norm": 10.493462562561035, "learning_rate": 3.2859626130717536e-06, "loss": 3.7159, "step": 39205 }, { "epoch": 0.39886474609375, "grad_norm": 11.053747177124023, "learning_rate": 3.2855829978184587e-06, "loss": 3.5785, "step": 39210 }, { "epoch": 0.3989156087239583, "grad_norm": 12.584538459777832, "learning_rate": 3.2852033624662095e-06, "loss": 3.4814, "step": 39215 }, { "epoch": 0.3989664713541667, "grad_norm": 13.197242736816406, "learning_rate": 3.284823707024718e-06, "loss": 3.036, "step": 39220 }, { "epoch": 0.399017333984375, "grad_norm": 11.060696601867676, "learning_rate": 3.284444031503698e-06, "loss": 3.4832, "step": 39225 }, { "epoch": 0.3990681966145833, "grad_norm": 11.496437072753906, "learning_rate": 3.2840643359128643e-06, "loss": 3.4278, "step": 39230 }, { "epoch": 0.3991190592447917, "grad_norm": 15.989913940429688, "learning_rate": 3.283684620261929e-06, "loss": 3.6831, "step": 39235 }, { "epoch": 0.399169921875, "grad_norm": 12.756202697753906, "learning_rate": 3.2833048845606095e-06, "loss": 3.4123, "step": 39240 }, { "epoch": 0.3992207845052083, "grad_norm": 12.794174194335938, "learning_rate": 3.2829251288186197e-06, "loss": 3.0151, "step": 39245 }, { "epoch": 0.3992716471354167, "grad_norm": 9.944336891174316, "learning_rate": 3.282545353045676e-06, "loss": 3.6058, "step": 39250 }, { "epoch": 0.399322509765625, "grad_norm": 12.509191513061523, "learning_rate": 3.2821655572514955e-06, "loss": 3.5381, "step": 39255 }, { "epoch": 0.3993733723958333, "grad_norm": 7.6840009689331055, "learning_rate": 3.2817857414457934e-06, "loss": 3.1112, "step": 39260 }, { "epoch": 0.3994242350260417, "grad_norm": 12.166910171508789, "learning_rate": 3.2814059056382895e-06, "loss": 3.7905, "step": 39265 }, { "epoch": 0.39947509765625, "grad_norm": 11.381367683410645, "learning_rate": 3.2810260498387002e-06, "loss": 3.2597, "step": 39270 }, { "epoch": 0.3995259602864583, "grad_norm": 9.968490600585938, "learning_rate": 3.2806461740567453e-06, "loss": 3.4514, "step": 39275 }, { "epoch": 0.3995768229166667, "grad_norm": 14.34542179107666, "learning_rate": 3.280266278302142e-06, "loss": 3.2793, "step": 39280 }, { "epoch": 0.399627685546875, "grad_norm": 11.94149398803711, "learning_rate": 3.2798863625846116e-06, "loss": 3.2478, "step": 39285 }, { "epoch": 0.3996785481770833, "grad_norm": 15.4173583984375, "learning_rate": 3.279506426913873e-06, "loss": 3.3845, "step": 39290 }, { "epoch": 0.3997294108072917, "grad_norm": 11.536433219909668, "learning_rate": 3.2791264712996486e-06, "loss": 3.4988, "step": 39295 }, { "epoch": 0.3997802734375, "grad_norm": 15.47469425201416, "learning_rate": 3.2787464957516556e-06, "loss": 3.614, "step": 39300 }, { "epoch": 0.3998311360677083, "grad_norm": 13.07068920135498, "learning_rate": 3.2783665002796207e-06, "loss": 3.2102, "step": 39305 }, { "epoch": 0.3998819986979167, "grad_norm": 11.186180114746094, "learning_rate": 3.2779864848932618e-06, "loss": 3.178, "step": 39310 }, { "epoch": 0.399932861328125, "grad_norm": 14.144645690917969, "learning_rate": 3.2776064496023037e-06, "loss": 3.5366, "step": 39315 }, { "epoch": 0.3999837239583333, "grad_norm": 13.992406845092773, "learning_rate": 3.2772263944164683e-06, "loss": 3.2436, "step": 39320 }, { "epoch": 0.4000345865885417, "grad_norm": 17.365379333496094, "learning_rate": 3.2768463193454803e-06, "loss": 3.3642, "step": 39325 }, { "epoch": 0.40008544921875, "grad_norm": 9.210827827453613, "learning_rate": 3.2764662243990632e-06, "loss": 3.5888, "step": 39330 }, { "epoch": 0.4001363118489583, "grad_norm": 14.269044876098633, "learning_rate": 3.276086109586942e-06, "loss": 3.056, "step": 39335 }, { "epoch": 0.4001871744791667, "grad_norm": 13.896188735961914, "learning_rate": 3.2757059749188415e-06, "loss": 3.7036, "step": 39340 }, { "epoch": 0.400238037109375, "grad_norm": 12.715867042541504, "learning_rate": 3.2753258204044873e-06, "loss": 3.6218, "step": 39345 }, { "epoch": 0.4002888997395833, "grad_norm": 8.956153869628906, "learning_rate": 3.2749456460536057e-06, "loss": 3.3922, "step": 39350 }, { "epoch": 0.4003397623697917, "grad_norm": 14.240581512451172, "learning_rate": 3.274565451875924e-06, "loss": 3.332, "step": 39355 }, { "epoch": 0.400390625, "grad_norm": 15.196061134338379, "learning_rate": 3.274185237881169e-06, "loss": 3.2174, "step": 39360 }, { "epoch": 0.4004414876302083, "grad_norm": 17.01357650756836, "learning_rate": 3.273805004079067e-06, "loss": 3.5295, "step": 39365 }, { "epoch": 0.4004923502604167, "grad_norm": 11.21593189239502, "learning_rate": 3.2734247504793483e-06, "loss": 3.1045, "step": 39370 }, { "epoch": 0.400543212890625, "grad_norm": 9.288373947143555, "learning_rate": 3.27304447709174e-06, "loss": 3.4651, "step": 39375 }, { "epoch": 0.4005940755208333, "grad_norm": 7.687575340270996, "learning_rate": 3.272664183925973e-06, "loss": 3.4994, "step": 39380 }, { "epoch": 0.4006449381510417, "grad_norm": 13.036829948425293, "learning_rate": 3.2722838709917753e-06, "loss": 3.1886, "step": 39385 }, { "epoch": 0.40069580078125, "grad_norm": 8.436564445495605, "learning_rate": 3.271903538298878e-06, "loss": 3.1957, "step": 39390 }, { "epoch": 0.4007466634114583, "grad_norm": 11.310314178466797, "learning_rate": 3.271523185857011e-06, "loss": 3.4303, "step": 39395 }, { "epoch": 0.4007975260416667, "grad_norm": 14.260161399841309, "learning_rate": 3.271142813675908e-06, "loss": 3.6256, "step": 39400 }, { "epoch": 0.400848388671875, "grad_norm": 9.424277305603027, "learning_rate": 3.270762421765297e-06, "loss": 3.5836, "step": 39405 }, { "epoch": 0.4008992513020833, "grad_norm": 6.394208908081055, "learning_rate": 3.2703820101349127e-06, "loss": 3.3019, "step": 39410 }, { "epoch": 0.4009501139322917, "grad_norm": 14.603628158569336, "learning_rate": 3.2700015787944865e-06, "loss": 2.9622, "step": 39415 }, { "epoch": 0.4010009765625, "grad_norm": 10.907510757446289, "learning_rate": 3.2696211277537535e-06, "loss": 3.8089, "step": 39420 }, { "epoch": 0.4010518391927083, "grad_norm": 14.512213706970215, "learning_rate": 3.2692406570224468e-06, "loss": 3.1383, "step": 39425 }, { "epoch": 0.4011027018229167, "grad_norm": 13.748353958129883, "learning_rate": 3.2688601666102994e-06, "loss": 3.3665, "step": 39430 }, { "epoch": 0.401153564453125, "grad_norm": 14.942314147949219, "learning_rate": 3.2684796565270464e-06, "loss": 3.3018, "step": 39435 }, { "epoch": 0.4012044270833333, "grad_norm": 15.140890121459961, "learning_rate": 3.2680991267824248e-06, "loss": 3.581, "step": 39440 }, { "epoch": 0.4012552897135417, "grad_norm": 17.58057975769043, "learning_rate": 3.267718577386168e-06, "loss": 3.0269, "step": 39445 }, { "epoch": 0.40130615234375, "grad_norm": 10.288013458251953, "learning_rate": 3.267338008348014e-06, "loss": 3.4707, "step": 39450 }, { "epoch": 0.4013570149739583, "grad_norm": 15.400893211364746, "learning_rate": 3.266957419677699e-06, "loss": 3.4214, "step": 39455 }, { "epoch": 0.4014078776041667, "grad_norm": 13.282986640930176, "learning_rate": 3.2665768113849596e-06, "loss": 3.3755, "step": 39460 }, { "epoch": 0.401458740234375, "grad_norm": 15.696666717529297, "learning_rate": 3.2661961834795346e-06, "loss": 3.48, "step": 39465 }, { "epoch": 0.4015096028645833, "grad_norm": 9.888688087463379, "learning_rate": 3.2658155359711612e-06, "loss": 3.4776, "step": 39470 }, { "epoch": 0.4015604654947917, "grad_norm": 13.198280334472656, "learning_rate": 3.26543486886958e-06, "loss": 3.1052, "step": 39475 }, { "epoch": 0.401611328125, "grad_norm": 8.186661720275879, "learning_rate": 3.265054182184528e-06, "loss": 3.5331, "step": 39480 }, { "epoch": 0.4016621907552083, "grad_norm": 10.0850191116333, "learning_rate": 3.264673475925747e-06, "loss": 3.302, "step": 39485 }, { "epoch": 0.4017130533854167, "grad_norm": 9.62065601348877, "learning_rate": 3.264292750102976e-06, "loss": 2.9363, "step": 39490 }, { "epoch": 0.401763916015625, "grad_norm": 12.95029354095459, "learning_rate": 3.263912004725956e-06, "loss": 3.5954, "step": 39495 }, { "epoch": 0.4018147786458333, "grad_norm": 14.583548545837402, "learning_rate": 3.263531239804428e-06, "loss": 3.3027, "step": 39500 }, { "epoch": 0.4018656412760417, "grad_norm": 9.015926361083984, "learning_rate": 3.263150455348135e-06, "loss": 3.0706, "step": 39505 }, { "epoch": 0.40191650390625, "grad_norm": 14.476874351501465, "learning_rate": 3.2627696513668182e-06, "loss": 3.4707, "step": 39510 }, { "epoch": 0.4019673665364583, "grad_norm": 9.141477584838867, "learning_rate": 3.2623888278702214e-06, "loss": 3.8334, "step": 39515 }, { "epoch": 0.4020182291666667, "grad_norm": 11.498889923095703, "learning_rate": 3.262007984868086e-06, "loss": 3.3327, "step": 39520 }, { "epoch": 0.402069091796875, "grad_norm": 10.279153823852539, "learning_rate": 3.261627122370158e-06, "loss": 3.6671, "step": 39525 }, { "epoch": 0.4021199544270833, "grad_norm": 6.8709211349487305, "learning_rate": 3.26124624038618e-06, "loss": 3.0967, "step": 39530 }, { "epoch": 0.4021708170572917, "grad_norm": 10.445243835449219, "learning_rate": 3.260865338925897e-06, "loss": 3.2211, "step": 39535 }, { "epoch": 0.4022216796875, "grad_norm": 11.800230979919434, "learning_rate": 3.2604844179990547e-06, "loss": 3.1659, "step": 39540 }, { "epoch": 0.4022725423177083, "grad_norm": 8.571924209594727, "learning_rate": 3.2601034776153997e-06, "loss": 2.9351, "step": 39545 }, { "epoch": 0.4023234049479167, "grad_norm": 10.99386215209961, "learning_rate": 3.2597225177846762e-06, "loss": 3.0938, "step": 39550 }, { "epoch": 0.402374267578125, "grad_norm": 14.340719223022461, "learning_rate": 3.259341538516633e-06, "loss": 3.4287, "step": 39555 }, { "epoch": 0.4024251302083333, "grad_norm": 15.162818908691406, "learning_rate": 3.258960539821016e-06, "loss": 3.4843, "step": 39560 }, { "epoch": 0.4024759928385417, "grad_norm": 10.90530014038086, "learning_rate": 3.258579521707574e-06, "loss": 3.1177, "step": 39565 }, { "epoch": 0.40252685546875, "grad_norm": 15.255401611328125, "learning_rate": 3.258198484186054e-06, "loss": 3.0077, "step": 39570 }, { "epoch": 0.4025777180989583, "grad_norm": 15.258075714111328, "learning_rate": 3.257817427266206e-06, "loss": 3.1873, "step": 39575 }, { "epoch": 0.4026285807291667, "grad_norm": 11.011824607849121, "learning_rate": 3.2574363509577794e-06, "loss": 3.7077, "step": 39580 }, { "epoch": 0.402679443359375, "grad_norm": 12.97339153289795, "learning_rate": 3.257055255270522e-06, "loss": 3.1655, "step": 39585 }, { "epoch": 0.4027303059895833, "grad_norm": 10.90396499633789, "learning_rate": 3.2566741402141866e-06, "loss": 3.3544, "step": 39590 }, { "epoch": 0.4027811686197917, "grad_norm": 16.23613929748535, "learning_rate": 3.256293005798522e-06, "loss": 3.1684, "step": 39595 }, { "epoch": 0.40283203125, "grad_norm": 13.79626750946045, "learning_rate": 3.25591185203328e-06, "loss": 3.4085, "step": 39600 }, { "epoch": 0.4028828938802083, "grad_norm": 15.524015426635742, "learning_rate": 3.2555306789282127e-06, "loss": 3.5288, "step": 39605 }, { "epoch": 0.4029337565104167, "grad_norm": 12.625886917114258, "learning_rate": 3.2551494864930716e-06, "loss": 3.1274, "step": 39610 }, { "epoch": 0.402984619140625, "grad_norm": 10.071491241455078, "learning_rate": 3.2547682747376103e-06, "loss": 3.1743, "step": 39615 }, { "epoch": 0.4030354817708333, "grad_norm": 8.225013732910156, "learning_rate": 3.2543870436715818e-06, "loss": 3.1853, "step": 39620 }, { "epoch": 0.4030863444010417, "grad_norm": 14.447437286376953, "learning_rate": 3.2540057933047386e-06, "loss": 4.0105, "step": 39625 }, { "epoch": 0.40313720703125, "grad_norm": 12.400070190429688, "learning_rate": 3.2536245236468373e-06, "loss": 3.5334, "step": 39630 }, { "epoch": 0.4031880696614583, "grad_norm": 9.871600151062012, "learning_rate": 3.2532432347076303e-06, "loss": 3.0529, "step": 39635 }, { "epoch": 0.4032389322916667, "grad_norm": 10.247000694274902, "learning_rate": 3.2528619264968737e-06, "loss": 3.4275, "step": 39640 }, { "epoch": 0.403289794921875, "grad_norm": 15.366523742675781, "learning_rate": 3.2524805990243236e-06, "loss": 3.1253, "step": 39645 }, { "epoch": 0.4033406575520833, "grad_norm": 8.520768165588379, "learning_rate": 3.2520992522997353e-06, "loss": 3.5719, "step": 39650 }, { "epoch": 0.4033915201822917, "grad_norm": 10.238525390625, "learning_rate": 3.251717886332866e-06, "loss": 3.2692, "step": 39655 }, { "epoch": 0.4034423828125, "grad_norm": 16.172805786132812, "learning_rate": 3.251336501133473e-06, "loss": 3.3253, "step": 39660 }, { "epoch": 0.4034932454427083, "grad_norm": 14.514467239379883, "learning_rate": 3.2509550967113133e-06, "loss": 3.1995, "step": 39665 }, { "epoch": 0.4035441080729167, "grad_norm": 17.597890853881836, "learning_rate": 3.2505736730761457e-06, "loss": 3.3171, "step": 39670 }, { "epoch": 0.403594970703125, "grad_norm": 14.871464729309082, "learning_rate": 3.2501922302377285e-06, "loss": 3.131, "step": 39675 }, { "epoch": 0.4036458333333333, "grad_norm": 14.677927017211914, "learning_rate": 3.2498107682058213e-06, "loss": 3.705, "step": 39680 }, { "epoch": 0.4036966959635417, "grad_norm": 11.570945739746094, "learning_rate": 3.2494292869901835e-06, "loss": 3.2704, "step": 39685 }, { "epoch": 0.40374755859375, "grad_norm": 14.676580429077148, "learning_rate": 3.249047786600574e-06, "loss": 3.4076, "step": 39690 }, { "epoch": 0.4037984212239583, "grad_norm": 8.944365501403809, "learning_rate": 3.248666267046755e-06, "loss": 3.3502, "step": 39695 }, { "epoch": 0.4038492838541667, "grad_norm": 7.9353790283203125, "learning_rate": 3.2482847283384866e-06, "loss": 3.3113, "step": 39700 }, { "epoch": 0.403900146484375, "grad_norm": 16.29496955871582, "learning_rate": 3.2479031704855313e-06, "loss": 3.3095, "step": 39705 }, { "epoch": 0.4039510091145833, "grad_norm": 12.825359344482422, "learning_rate": 3.2475215934976506e-06, "loss": 3.4118, "step": 39710 }, { "epoch": 0.4040018717447917, "grad_norm": 8.6707124710083, "learning_rate": 3.2471399973846067e-06, "loss": 3.4168, "step": 39715 }, { "epoch": 0.404052734375, "grad_norm": 16.991374969482422, "learning_rate": 3.2467583821561632e-06, "loss": 3.3508, "step": 39720 }, { "epoch": 0.4041035970052083, "grad_norm": 10.363889694213867, "learning_rate": 3.2463767478220836e-06, "loss": 3.5395, "step": 39725 }, { "epoch": 0.4041544596354167, "grad_norm": 13.849224090576172, "learning_rate": 3.245995094392132e-06, "loss": 3.5952, "step": 39730 }, { "epoch": 0.404205322265625, "grad_norm": 11.806497573852539, "learning_rate": 3.2456134218760732e-06, "loss": 3.2266, "step": 39735 }, { "epoch": 0.4042561848958333, "grad_norm": 8.696374893188477, "learning_rate": 3.2452317302836705e-06, "loss": 3.0952, "step": 39740 }, { "epoch": 0.4043070475260417, "grad_norm": 8.538705825805664, "learning_rate": 3.244850019624691e-06, "loss": 3.3628, "step": 39745 }, { "epoch": 0.40435791015625, "grad_norm": 11.07223129272461, "learning_rate": 3.2444682899089007e-06, "loss": 3.0328, "step": 39750 }, { "epoch": 0.4044087727864583, "grad_norm": 7.9295125007629395, "learning_rate": 3.2440865411460655e-06, "loss": 3.476, "step": 39755 }, { "epoch": 0.4044596354166667, "grad_norm": 15.09688663482666, "learning_rate": 3.2437047733459526e-06, "loss": 3.7012, "step": 39760 }, { "epoch": 0.404510498046875, "grad_norm": 16.56294822692871, "learning_rate": 3.2433229865183296e-06, "loss": 3.7268, "step": 39765 }, { "epoch": 0.4045613606770833, "grad_norm": 15.655258178710938, "learning_rate": 3.2429411806729636e-06, "loss": 3.1404, "step": 39770 }, { "epoch": 0.4046122233072917, "grad_norm": 8.41606330871582, "learning_rate": 3.242559355819624e-06, "loss": 3.4145, "step": 39775 }, { "epoch": 0.4046630859375, "grad_norm": 13.415966987609863, "learning_rate": 3.2421775119680785e-06, "loss": 2.911, "step": 39780 }, { "epoch": 0.4047139485677083, "grad_norm": 16.03108024597168, "learning_rate": 3.2417956491280983e-06, "loss": 3.3638, "step": 39785 }, { "epoch": 0.4047648111979167, "grad_norm": 9.713606834411621, "learning_rate": 3.2414137673094525e-06, "loss": 3.5204, "step": 39790 }, { "epoch": 0.404815673828125, "grad_norm": 9.479934692382812, "learning_rate": 3.2410318665219097e-06, "loss": 3.909, "step": 39795 }, { "epoch": 0.4048665364583333, "grad_norm": 13.595467567443848, "learning_rate": 3.240649946775243e-06, "loss": 3.5359, "step": 39800 }, { "epoch": 0.4049173990885417, "grad_norm": 8.232876777648926, "learning_rate": 3.240268008079223e-06, "loss": 3.3631, "step": 39805 }, { "epoch": 0.40496826171875, "grad_norm": 14.731582641601562, "learning_rate": 3.2398860504436218e-06, "loss": 3.2081, "step": 39810 }, { "epoch": 0.4050191243489583, "grad_norm": 10.484471321105957, "learning_rate": 3.2395040738782113e-06, "loss": 3.629, "step": 39815 }, { "epoch": 0.4050699869791667, "grad_norm": 8.36217975616455, "learning_rate": 3.239122078392764e-06, "loss": 3.1228, "step": 39820 }, { "epoch": 0.405120849609375, "grad_norm": 11.761137962341309, "learning_rate": 3.2387400639970532e-06, "loss": 3.4016, "step": 39825 }, { "epoch": 0.4051717122395833, "grad_norm": 9.876433372497559, "learning_rate": 3.238358030700854e-06, "loss": 3.5512, "step": 39830 }, { "epoch": 0.4052225748697917, "grad_norm": 7.70481538772583, "learning_rate": 3.2379759785139386e-06, "loss": 3.2944, "step": 39835 }, { "epoch": 0.4052734375, "grad_norm": 12.468230247497559, "learning_rate": 3.2375939074460833e-06, "loss": 3.0154, "step": 39840 }, { "epoch": 0.4053243001302083, "grad_norm": 11.805959701538086, "learning_rate": 3.237211817507062e-06, "loss": 3.4327, "step": 39845 }, { "epoch": 0.4053751627604167, "grad_norm": 7.966749668121338, "learning_rate": 3.236829708706652e-06, "loss": 3.295, "step": 39850 }, { "epoch": 0.405426025390625, "grad_norm": 13.538468360900879, "learning_rate": 3.2364475810546287e-06, "loss": 3.0303, "step": 39855 }, { "epoch": 0.4054768880208333, "grad_norm": 11.909079551696777, "learning_rate": 3.236065434560767e-06, "loss": 3.2183, "step": 39860 }, { "epoch": 0.4055277506510417, "grad_norm": 12.326981544494629, "learning_rate": 3.235683269234847e-06, "loss": 3.4753, "step": 39865 }, { "epoch": 0.40557861328125, "grad_norm": 13.924688339233398, "learning_rate": 3.2353010850866445e-06, "loss": 3.3513, "step": 39870 }, { "epoch": 0.4056294759114583, "grad_norm": 7.7965087890625, "learning_rate": 3.234918882125939e-06, "loss": 3.1596, "step": 39875 }, { "epoch": 0.4056803385416667, "grad_norm": 13.77583122253418, "learning_rate": 3.2345366603625065e-06, "loss": 3.2544, "step": 39880 }, { "epoch": 0.405731201171875, "grad_norm": 8.31397819519043, "learning_rate": 3.2341544198061287e-06, "loss": 3.0558, "step": 39885 }, { "epoch": 0.4057820638020833, "grad_norm": 8.37823486328125, "learning_rate": 3.233772160466584e-06, "loss": 3.1776, "step": 39890 }, { "epoch": 0.4058329264322917, "grad_norm": 8.468817710876465, "learning_rate": 3.233389882353652e-06, "loss": 3.1737, "step": 39895 }, { "epoch": 0.4058837890625, "grad_norm": 12.608991622924805, "learning_rate": 3.233007585477114e-06, "loss": 3.1122, "step": 39900 }, { "epoch": 0.4059346516927083, "grad_norm": 15.017359733581543, "learning_rate": 3.2326252698467507e-06, "loss": 3.3513, "step": 39905 }, { "epoch": 0.4059855143229167, "grad_norm": 14.726430892944336, "learning_rate": 3.232242935472343e-06, "loss": 3.3759, "step": 39910 }, { "epoch": 0.406036376953125, "grad_norm": 11.463208198547363, "learning_rate": 3.2318605823636743e-06, "loss": 3.0833, "step": 39915 }, { "epoch": 0.4060872395833333, "grad_norm": 10.70964241027832, "learning_rate": 3.2314782105305254e-06, "loss": 3.7387, "step": 39920 }, { "epoch": 0.4061381022135417, "grad_norm": 11.310379028320312, "learning_rate": 3.2310958199826802e-06, "loss": 3.3598, "step": 39925 }, { "epoch": 0.40618896484375, "grad_norm": 11.360577583312988, "learning_rate": 3.2307134107299213e-06, "loss": 3.1917, "step": 39930 }, { "epoch": 0.4062398274739583, "grad_norm": 16.626344680786133, "learning_rate": 3.2303309827820335e-06, "loss": 3.6492, "step": 39935 }, { "epoch": 0.4062906901041667, "grad_norm": 12.946182250976562, "learning_rate": 3.2299485361488005e-06, "loss": 3.2241, "step": 39940 }, { "epoch": 0.406341552734375, "grad_norm": 12.617770195007324, "learning_rate": 3.2295660708400067e-06, "loss": 3.1731, "step": 39945 }, { "epoch": 0.4063924153645833, "grad_norm": 13.231428146362305, "learning_rate": 3.229183586865438e-06, "loss": 3.4243, "step": 39950 }, { "epoch": 0.4064432779947917, "grad_norm": 13.319751739501953, "learning_rate": 3.2288010842348806e-06, "loss": 3.5972, "step": 39955 }, { "epoch": 0.406494140625, "grad_norm": 16.305463790893555, "learning_rate": 3.22841856295812e-06, "loss": 3.3722, "step": 39960 }, { "epoch": 0.4065450032552083, "grad_norm": 13.032581329345703, "learning_rate": 3.2280360230449425e-06, "loss": 3.3068, "step": 39965 }, { "epoch": 0.4065958658854167, "grad_norm": 12.078779220581055, "learning_rate": 3.227653464505137e-06, "loss": 3.1496, "step": 39970 }, { "epoch": 0.406646728515625, "grad_norm": 8.394103050231934, "learning_rate": 3.2272708873484892e-06, "loss": 3.4932, "step": 39975 }, { "epoch": 0.4066975911458333, "grad_norm": 15.861308097839355, "learning_rate": 3.226888291584789e-06, "loss": 3.4615, "step": 39980 }, { "epoch": 0.4067484537760417, "grad_norm": 12.765704154968262, "learning_rate": 3.2265056772238235e-06, "loss": 3.5515, "step": 39985 }, { "epoch": 0.40679931640625, "grad_norm": 11.849873542785645, "learning_rate": 3.2261230442753828e-06, "loss": 3.1232, "step": 39990 }, { "epoch": 0.4068501790364583, "grad_norm": 8.603911399841309, "learning_rate": 3.225740392749256e-06, "loss": 3.1466, "step": 39995 }, { "epoch": 0.4069010416666667, "grad_norm": 15.368163108825684, "learning_rate": 3.225357722655233e-06, "loss": 3.3987, "step": 40000 }, { "epoch": 0.406951904296875, "grad_norm": 17.930904388427734, "learning_rate": 3.224975034003105e-06, "loss": 3.541, "step": 40005 }, { "epoch": 0.4070027669270833, "grad_norm": 11.624622344970703, "learning_rate": 3.2245923268026623e-06, "loss": 3.4463, "step": 40010 }, { "epoch": 0.4070536295572917, "grad_norm": 9.872394561767578, "learning_rate": 3.2242096010636964e-06, "loss": 3.3589, "step": 40015 }, { "epoch": 0.4071044921875, "grad_norm": 10.430153846740723, "learning_rate": 3.223826856796001e-06, "loss": 3.1212, "step": 40020 }, { "epoch": 0.4071553548177083, "grad_norm": 16.75178337097168, "learning_rate": 3.223444094009365e-06, "loss": 3.5168, "step": 40025 }, { "epoch": 0.4072062174479167, "grad_norm": 8.424890518188477, "learning_rate": 3.2230613127135846e-06, "loss": 3.3789, "step": 40030 }, { "epoch": 0.407257080078125, "grad_norm": 13.780265808105469, "learning_rate": 3.2226785129184514e-06, "loss": 3.329, "step": 40035 }, { "epoch": 0.4073079427083333, "grad_norm": 7.752483367919922, "learning_rate": 3.2222956946337593e-06, "loss": 3.1299, "step": 40040 }, { "epoch": 0.4073588053385417, "grad_norm": 13.269911766052246, "learning_rate": 3.221912857869304e-06, "loss": 3.4392, "step": 40045 }, { "epoch": 0.40740966796875, "grad_norm": 12.497602462768555, "learning_rate": 3.221530002634879e-06, "loss": 3.4714, "step": 40050 }, { "epoch": 0.4074605305989583, "grad_norm": 19.447315216064453, "learning_rate": 3.2211471289402797e-06, "loss": 3.0852, "step": 40055 }, { "epoch": 0.4075113932291667, "grad_norm": 9.918488502502441, "learning_rate": 3.2207642367953025e-06, "loss": 3.1742, "step": 40060 }, { "epoch": 0.407562255859375, "grad_norm": 9.899670600891113, "learning_rate": 3.2203813262097427e-06, "loss": 3.4072, "step": 40065 }, { "epoch": 0.4076131184895833, "grad_norm": 13.883380889892578, "learning_rate": 3.219998397193397e-06, "loss": 3.3758, "step": 40070 }, { "epoch": 0.4076639811197917, "grad_norm": 16.322729110717773, "learning_rate": 3.2196154497560642e-06, "loss": 3.201, "step": 40075 }, { "epoch": 0.40771484375, "grad_norm": 7.2647600173950195, "learning_rate": 3.2192324839075397e-06, "loss": 3.3452, "step": 40080 }, { "epoch": 0.4077657063802083, "grad_norm": 10.059800148010254, "learning_rate": 3.218849499657623e-06, "loss": 3.4234, "step": 40085 }, { "epoch": 0.4078165690104167, "grad_norm": 14.233746528625488, "learning_rate": 3.2184664970161124e-06, "loss": 3.5071, "step": 40090 }, { "epoch": 0.407867431640625, "grad_norm": 9.250040054321289, "learning_rate": 3.2180834759928065e-06, "loss": 3.467, "step": 40095 }, { "epoch": 0.4079182942708333, "grad_norm": 13.586669921875, "learning_rate": 3.217700436597505e-06, "loss": 3.0783, "step": 40100 }, { "epoch": 0.4079691569010417, "grad_norm": 15.873119354248047, "learning_rate": 3.217317378840008e-06, "loss": 3.3066, "step": 40105 }, { "epoch": 0.40802001953125, "grad_norm": 12.343993186950684, "learning_rate": 3.216934302730116e-06, "loss": 3.0871, "step": 40110 }, { "epoch": 0.4080708821614583, "grad_norm": 12.203359603881836, "learning_rate": 3.2165512082776297e-06, "loss": 3.1915, "step": 40115 }, { "epoch": 0.4081217447916667, "grad_norm": 9.524660110473633, "learning_rate": 3.2161680954923504e-06, "loss": 3.3897, "step": 40120 }, { "epoch": 0.408172607421875, "grad_norm": 11.293547630310059, "learning_rate": 3.2157849643840806e-06, "loss": 3.573, "step": 40125 }, { "epoch": 0.4082234700520833, "grad_norm": 10.416081428527832, "learning_rate": 3.215401814962621e-06, "loss": 3.3696, "step": 40130 }, { "epoch": 0.4082743326822917, "grad_norm": 9.743280410766602, "learning_rate": 3.2150186472377767e-06, "loss": 3.4491, "step": 40135 }, { "epoch": 0.4083251953125, "grad_norm": 9.836259841918945, "learning_rate": 3.2146354612193497e-06, "loss": 3.2513, "step": 40140 }, { "epoch": 0.4083760579427083, "grad_norm": 8.281194686889648, "learning_rate": 3.214252256917143e-06, "loss": 3.3309, "step": 40145 }, { "epoch": 0.4084269205729167, "grad_norm": 12.972834587097168, "learning_rate": 3.213869034340963e-06, "loss": 3.2908, "step": 40150 }, { "epoch": 0.408477783203125, "grad_norm": 9.326537132263184, "learning_rate": 3.213485793500612e-06, "loss": 3.3215, "step": 40155 }, { "epoch": 0.4085286458333333, "grad_norm": 14.250112533569336, "learning_rate": 3.213102534405896e-06, "loss": 3.3848, "step": 40160 }, { "epoch": 0.4085795084635417, "grad_norm": 14.024307250976562, "learning_rate": 3.212719257066621e-06, "loss": 3.2285, "step": 40165 }, { "epoch": 0.40863037109375, "grad_norm": 12.0462007522583, "learning_rate": 3.2123359614925925e-06, "loss": 3.5627, "step": 40170 }, { "epoch": 0.4086812337239583, "grad_norm": 13.510028839111328, "learning_rate": 3.2119526476936177e-06, "loss": 3.0223, "step": 40175 }, { "epoch": 0.4087320963541667, "grad_norm": 11.582918167114258, "learning_rate": 3.2115693156795026e-06, "loss": 3.3018, "step": 40180 }, { "epoch": 0.408782958984375, "grad_norm": 8.617096900939941, "learning_rate": 3.2111859654600554e-06, "loss": 3.4995, "step": 40185 }, { "epoch": 0.4088338216145833, "grad_norm": 16.85012435913086, "learning_rate": 3.210802597045084e-06, "loss": 3.5344, "step": 40190 }, { "epoch": 0.4088846842447917, "grad_norm": 12.56917667388916, "learning_rate": 3.2104192104443965e-06, "loss": 3.2028, "step": 40195 }, { "epoch": 0.408935546875, "grad_norm": 13.219779968261719, "learning_rate": 3.210035805667802e-06, "loss": 3.5665, "step": 40200 }, { "epoch": 0.4089864095052083, "grad_norm": 9.362250328063965, "learning_rate": 3.2096523827251096e-06, "loss": 3.5273, "step": 40205 }, { "epoch": 0.4090372721354167, "grad_norm": 12.411036491394043, "learning_rate": 3.2092689416261295e-06, "loss": 3.619, "step": 40210 }, { "epoch": 0.409088134765625, "grad_norm": 13.231313705444336, "learning_rate": 3.2088854823806714e-06, "loss": 3.682, "step": 40215 }, { "epoch": 0.4091389973958333, "grad_norm": 15.368950843811035, "learning_rate": 3.208502004998546e-06, "loss": 3.4148, "step": 40220 }, { "epoch": 0.4091898600260417, "grad_norm": 8.858798027038574, "learning_rate": 3.208118509489565e-06, "loss": 3.5826, "step": 40225 }, { "epoch": 0.40924072265625, "grad_norm": 12.872635841369629, "learning_rate": 3.20773499586354e-06, "loss": 3.4896, "step": 40230 }, { "epoch": 0.4092915852864583, "grad_norm": 12.837394714355469, "learning_rate": 3.2073514641302827e-06, "loss": 3.192, "step": 40235 }, { "epoch": 0.4093424479166667, "grad_norm": 11.67916202545166, "learning_rate": 3.2069679142996055e-06, "loss": 3.3476, "step": 40240 }, { "epoch": 0.409393310546875, "grad_norm": 9.539045333862305, "learning_rate": 3.206584346381323e-06, "loss": 3.2381, "step": 40245 }, { "epoch": 0.4094441731770833, "grad_norm": 9.679408073425293, "learning_rate": 3.206200760385246e-06, "loss": 3.7806, "step": 40250 }, { "epoch": 0.4094950358072917, "grad_norm": 8.230401992797852, "learning_rate": 3.2058171563211905e-06, "loss": 3.5604, "step": 40255 }, { "epoch": 0.4095458984375, "grad_norm": 13.984374046325684, "learning_rate": 3.20543353419897e-06, "loss": 3.1788, "step": 40260 }, { "epoch": 0.4095967610677083, "grad_norm": 14.798341751098633, "learning_rate": 3.2050498940284003e-06, "loss": 4.2144, "step": 40265 }, { "epoch": 0.4096476236979167, "grad_norm": 10.024030685424805, "learning_rate": 3.204666235819296e-06, "loss": 3.4174, "step": 40270 }, { "epoch": 0.409698486328125, "grad_norm": 19.80939483642578, "learning_rate": 3.204282559581473e-06, "loss": 3.6143, "step": 40275 }, { "epoch": 0.4097493489583333, "grad_norm": 12.339203834533691, "learning_rate": 3.2038988653247477e-06, "loss": 3.3237, "step": 40280 }, { "epoch": 0.4098002115885417, "grad_norm": 13.307379722595215, "learning_rate": 3.203515153058937e-06, "loss": 3.0846, "step": 40285 }, { "epoch": 0.40985107421875, "grad_norm": 13.441474914550781, "learning_rate": 3.203131422793857e-06, "loss": 2.8081, "step": 40290 }, { "epoch": 0.4099019368489583, "grad_norm": 9.035524368286133, "learning_rate": 3.2027476745393277e-06, "loss": 3.4538, "step": 40295 }, { "epoch": 0.4099527994791667, "grad_norm": 9.35716438293457, "learning_rate": 3.2023639083051644e-06, "loss": 3.2194, "step": 40300 }, { "epoch": 0.410003662109375, "grad_norm": 15.0332612991333, "learning_rate": 3.2019801241011877e-06, "loss": 3.0303, "step": 40305 }, { "epoch": 0.4100545247395833, "grad_norm": 12.640445709228516, "learning_rate": 3.2015963219372158e-06, "loss": 3.6029, "step": 40310 }, { "epoch": 0.4101053873697917, "grad_norm": 9.166719436645508, "learning_rate": 3.2012125018230677e-06, "loss": 2.9824, "step": 40315 }, { "epoch": 0.41015625, "grad_norm": 13.075416564941406, "learning_rate": 3.200828663768565e-06, "loss": 3.0641, "step": 40320 }, { "epoch": 0.4102071126302083, "grad_norm": 9.390490531921387, "learning_rate": 3.200444807783526e-06, "loss": 3.2462, "step": 40325 }, { "epoch": 0.4102579752604167, "grad_norm": 13.261961936950684, "learning_rate": 3.200060933877773e-06, "loss": 3.1896, "step": 40330 }, { "epoch": 0.410308837890625, "grad_norm": 9.776216506958008, "learning_rate": 3.1996770420611265e-06, "loss": 3.715, "step": 40335 }, { "epoch": 0.4103597005208333, "grad_norm": 8.422574996948242, "learning_rate": 3.1992931323434097e-06, "loss": 3.3871, "step": 40340 }, { "epoch": 0.4104105631510417, "grad_norm": 10.582122802734375, "learning_rate": 3.198909204734443e-06, "loss": 3.4996, "step": 40345 }, { "epoch": 0.41046142578125, "grad_norm": 11.832892417907715, "learning_rate": 3.1985252592440507e-06, "loss": 3.3974, "step": 40350 }, { "epoch": 0.4105122884114583, "grad_norm": 15.56053352355957, "learning_rate": 3.1981412958820547e-06, "loss": 3.265, "step": 40355 }, { "epoch": 0.4105631510416667, "grad_norm": 17.74793243408203, "learning_rate": 3.1977573146582798e-06, "loss": 3.6518, "step": 40360 }, { "epoch": 0.410614013671875, "grad_norm": 14.441006660461426, "learning_rate": 3.197373315582548e-06, "loss": 3.4867, "step": 40365 }, { "epoch": 0.4106648763020833, "grad_norm": 12.163776397705078, "learning_rate": 3.1969892986646866e-06, "loss": 3.4167, "step": 40370 }, { "epoch": 0.4107157389322917, "grad_norm": 7.191044807434082, "learning_rate": 3.1966052639145188e-06, "loss": 3.267, "step": 40375 }, { "epoch": 0.4107666015625, "grad_norm": 14.789758682250977, "learning_rate": 3.19622121134187e-06, "loss": 3.2197, "step": 40380 }, { "epoch": 0.4108174641927083, "grad_norm": 10.832036018371582, "learning_rate": 3.195837140956567e-06, "loss": 3.1452, "step": 40385 }, { "epoch": 0.4108683268229167, "grad_norm": 12.134127616882324, "learning_rate": 3.1954530527684356e-06, "loss": 4.023, "step": 40390 }, { "epoch": 0.410919189453125, "grad_norm": 15.02275276184082, "learning_rate": 3.1950689467873026e-06, "loss": 2.8569, "step": 40395 }, { "epoch": 0.4109700520833333, "grad_norm": 11.308838844299316, "learning_rate": 3.1946848230229955e-06, "loss": 3.1812, "step": 40400 }, { "epoch": 0.4110209147135417, "grad_norm": 12.479435920715332, "learning_rate": 3.1943006814853417e-06, "loss": 3.8656, "step": 40405 }, { "epoch": 0.41107177734375, "grad_norm": 16.549497604370117, "learning_rate": 3.1939165221841696e-06, "loss": 3.2209, "step": 40410 }, { "epoch": 0.4111226399739583, "grad_norm": 10.488978385925293, "learning_rate": 3.193532345129307e-06, "loss": 4.3753, "step": 40415 }, { "epoch": 0.4111735026041667, "grad_norm": 17.199155807495117, "learning_rate": 3.193148150330585e-06, "loss": 3.2604, "step": 40420 }, { "epoch": 0.411224365234375, "grad_norm": 11.074233055114746, "learning_rate": 3.1927639377978314e-06, "loss": 3.5254, "step": 40425 }, { "epoch": 0.4112752278645833, "grad_norm": 14.462937355041504, "learning_rate": 3.1923797075408757e-06, "loss": 3.1973, "step": 40430 }, { "epoch": 0.4113260904947917, "grad_norm": 14.890127182006836, "learning_rate": 3.191995459569551e-06, "loss": 3.4347, "step": 40435 }, { "epoch": 0.411376953125, "grad_norm": 14.60438346862793, "learning_rate": 3.1916111938936857e-06, "loss": 3.4392, "step": 40440 }, { "epoch": 0.4114278157552083, "grad_norm": 10.742544174194336, "learning_rate": 3.191226910523112e-06, "loss": 3.4331, "step": 40445 }, { "epoch": 0.4114786783854167, "grad_norm": 14.729475021362305, "learning_rate": 3.1908426094676616e-06, "loss": 3.1607, "step": 40450 }, { "epoch": 0.411529541015625, "grad_norm": 7.225653648376465, "learning_rate": 3.1904582907371663e-06, "loss": 3.2739, "step": 40455 }, { "epoch": 0.4115804036458333, "grad_norm": 16.015857696533203, "learning_rate": 3.1900739543414592e-06, "loss": 3.3931, "step": 40460 }, { "epoch": 0.4116312662760417, "grad_norm": 11.942667961120605, "learning_rate": 3.189689600290375e-06, "loss": 3.2618, "step": 40465 }, { "epoch": 0.41168212890625, "grad_norm": 10.380414962768555, "learning_rate": 3.1893052285937443e-06, "loss": 3.5267, "step": 40470 }, { "epoch": 0.4117329915364583, "grad_norm": 15.762372016906738, "learning_rate": 3.1889208392614037e-06, "loss": 3.4845, "step": 40475 }, { "epoch": 0.4117838541666667, "grad_norm": 11.058128356933594, "learning_rate": 3.1885364323031853e-06, "loss": 3.2676, "step": 40480 }, { "epoch": 0.411834716796875, "grad_norm": 12.959691047668457, "learning_rate": 3.1881520077289272e-06, "loss": 3.8364, "step": 40485 }, { "epoch": 0.4118855794270833, "grad_norm": 9.470345497131348, "learning_rate": 3.1877675655484625e-06, "loss": 3.337, "step": 40490 }, { "epoch": 0.4119364420572917, "grad_norm": 10.67660903930664, "learning_rate": 3.1873831057716277e-06, "loss": 3.1722, "step": 40495 }, { "epoch": 0.4119873046875, "grad_norm": 14.760432243347168, "learning_rate": 3.1869986284082587e-06, "loss": 3.3537, "step": 40500 }, { "epoch": 0.4120381673177083, "grad_norm": 11.180314064025879, "learning_rate": 3.186614133468193e-06, "loss": 3.3887, "step": 40505 }, { "epoch": 0.4120890299479167, "grad_norm": 17.601219177246094, "learning_rate": 3.186229620961267e-06, "loss": 3.3066, "step": 40510 }, { "epoch": 0.412139892578125, "grad_norm": 11.670747756958008, "learning_rate": 3.185845090897319e-06, "loss": 3.4507, "step": 40515 }, { "epoch": 0.4121907552083333, "grad_norm": 11.915066719055176, "learning_rate": 3.185460543286187e-06, "loss": 3.4408, "step": 40520 }, { "epoch": 0.4122416178385417, "grad_norm": 10.542513847351074, "learning_rate": 3.18507597813771e-06, "loss": 3.3083, "step": 40525 }, { "epoch": 0.41229248046875, "grad_norm": 9.632444381713867, "learning_rate": 3.1846913954617257e-06, "loss": 3.736, "step": 40530 }, { "epoch": 0.4123433430989583, "grad_norm": 11.756135940551758, "learning_rate": 3.184306795268074e-06, "loss": 3.3021, "step": 40535 }, { "epoch": 0.4123942057291667, "grad_norm": 11.835874557495117, "learning_rate": 3.1839221775665964e-06, "loss": 3.2396, "step": 40540 }, { "epoch": 0.412445068359375, "grad_norm": 16.399227142333984, "learning_rate": 3.183537542367131e-06, "loss": 3.3441, "step": 40545 }, { "epoch": 0.4124959309895833, "grad_norm": 13.708842277526855, "learning_rate": 3.18315288967952e-06, "loss": 3.224, "step": 40550 }, { "epoch": 0.4125467936197917, "grad_norm": 11.074498176574707, "learning_rate": 3.1827682195136038e-06, "loss": 3.8189, "step": 40555 }, { "epoch": 0.41259765625, "grad_norm": 12.426509857177734, "learning_rate": 3.182383531879225e-06, "loss": 4.0286, "step": 40560 }, { "epoch": 0.4126485188802083, "grad_norm": 15.233750343322754, "learning_rate": 3.1819988267862253e-06, "loss": 3.5985, "step": 40565 }, { "epoch": 0.4126993815104167, "grad_norm": 12.714953422546387, "learning_rate": 3.181614104244447e-06, "loss": 3.2174, "step": 40570 }, { "epoch": 0.412750244140625, "grad_norm": 13.589609146118164, "learning_rate": 3.181229364263734e-06, "loss": 3.2432, "step": 40575 }, { "epoch": 0.4128011067708333, "grad_norm": 14.362196922302246, "learning_rate": 3.180844606853929e-06, "loss": 3.0738, "step": 40580 }, { "epoch": 0.4128519694010417, "grad_norm": 14.002069473266602, "learning_rate": 3.1804598320248747e-06, "loss": 3.1727, "step": 40585 }, { "epoch": 0.41290283203125, "grad_norm": 11.42420482635498, "learning_rate": 3.180075039786419e-06, "loss": 3.5389, "step": 40590 }, { "epoch": 0.4129536946614583, "grad_norm": 8.944576263427734, "learning_rate": 3.1796902301484033e-06, "loss": 3.7406, "step": 40595 }, { "epoch": 0.4130045572916667, "grad_norm": 11.496153831481934, "learning_rate": 3.1793054031206742e-06, "loss": 3.1414, "step": 40600 }, { "epoch": 0.413055419921875, "grad_norm": 11.331368446350098, "learning_rate": 3.1789205587130777e-06, "loss": 3.2902, "step": 40605 }, { "epoch": 0.4131062825520833, "grad_norm": 15.670097351074219, "learning_rate": 3.17853569693546e-06, "loss": 3.5692, "step": 40610 }, { "epoch": 0.4131571451822917, "grad_norm": 12.869950294494629, "learning_rate": 3.1781508177976667e-06, "loss": 3.3101, "step": 40615 }, { "epoch": 0.4132080078125, "grad_norm": 17.0570125579834, "learning_rate": 3.177765921309546e-06, "loss": 3.3191, "step": 40620 }, { "epoch": 0.4132588704427083, "grad_norm": 9.5101318359375, "learning_rate": 3.177381007480944e-06, "loss": 3.1341, "step": 40625 }, { "epoch": 0.4133097330729167, "grad_norm": 9.91584300994873, "learning_rate": 3.17699607632171e-06, "loss": 3.5979, "step": 40630 }, { "epoch": 0.413360595703125, "grad_norm": 13.129979133605957, "learning_rate": 3.1766111278416916e-06, "loss": 3.4869, "step": 40635 }, { "epoch": 0.4134114583333333, "grad_norm": 10.525177001953125, "learning_rate": 3.1762261620507386e-06, "loss": 3.0418, "step": 40640 }, { "epoch": 0.4134623209635417, "grad_norm": 10.457929611206055, "learning_rate": 3.1758411789586992e-06, "loss": 3.219, "step": 40645 }, { "epoch": 0.41351318359375, "grad_norm": 9.514257431030273, "learning_rate": 3.1754561785754225e-06, "loss": 3.1455, "step": 40650 }, { "epoch": 0.4135640462239583, "grad_norm": 16.182477951049805, "learning_rate": 3.175071160910761e-06, "loss": 3.3036, "step": 40655 }, { "epoch": 0.4136149088541667, "grad_norm": 15.760019302368164, "learning_rate": 3.1746861259745633e-06, "loss": 3.3682, "step": 40660 }, { "epoch": 0.413665771484375, "grad_norm": 10.646612167358398, "learning_rate": 3.1743010737766803e-06, "loss": 3.5076, "step": 40665 }, { "epoch": 0.4137166341145833, "grad_norm": 12.429669380187988, "learning_rate": 3.173916004326965e-06, "loss": 2.9539, "step": 40670 }, { "epoch": 0.4137674967447917, "grad_norm": 9.560185432434082, "learning_rate": 3.173530917635268e-06, "loss": 3.1944, "step": 40675 }, { "epoch": 0.413818359375, "grad_norm": 10.479032516479492, "learning_rate": 3.1731458137114423e-06, "loss": 3.4629, "step": 40680 }, { "epoch": 0.4138692220052083, "grad_norm": 14.869799613952637, "learning_rate": 3.1727606925653416e-06, "loss": 3.681, "step": 40685 }, { "epoch": 0.4139200846354167, "grad_norm": 14.663705825805664, "learning_rate": 3.1723755542068157e-06, "loss": 3.5367, "step": 40690 }, { "epoch": 0.413970947265625, "grad_norm": 8.11724853515625, "learning_rate": 3.171990398645723e-06, "loss": 3.2612, "step": 40695 }, { "epoch": 0.4140218098958333, "grad_norm": 8.949166297912598, "learning_rate": 3.171605225891914e-06, "loss": 3.157, "step": 40700 }, { "epoch": 0.4140726725260417, "grad_norm": 9.309784889221191, "learning_rate": 3.1712200359552445e-06, "loss": 2.7949, "step": 40705 }, { "epoch": 0.41412353515625, "grad_norm": 9.50880241394043, "learning_rate": 3.170834828845569e-06, "loss": 3.4913, "step": 40710 }, { "epoch": 0.4141743977864583, "grad_norm": 10.189599990844727, "learning_rate": 3.1704496045727447e-06, "loss": 3.4456, "step": 40715 }, { "epoch": 0.4142252604166667, "grad_norm": 16.509906768798828, "learning_rate": 3.1700643631466256e-06, "loss": 3.2289, "step": 40720 }, { "epoch": 0.414276123046875, "grad_norm": 9.536114692687988, "learning_rate": 3.1696791045770682e-06, "loss": 3.3998, "step": 40725 }, { "epoch": 0.4143269856770833, "grad_norm": 14.481447219848633, "learning_rate": 3.1692938288739296e-06, "loss": 3.6835, "step": 40730 }, { "epoch": 0.4143778483072917, "grad_norm": 12.22691535949707, "learning_rate": 3.1689085360470674e-06, "loss": 3.6889, "step": 40735 }, { "epoch": 0.4144287109375, "grad_norm": 16.401357650756836, "learning_rate": 3.1685232261063386e-06, "loss": 3.0401, "step": 40740 }, { "epoch": 0.4144795735677083, "grad_norm": 7.340702056884766, "learning_rate": 3.168137899061602e-06, "loss": 3.0076, "step": 40745 }, { "epoch": 0.4145304361979167, "grad_norm": 9.923087120056152, "learning_rate": 3.167752554922716e-06, "loss": 3.1689, "step": 40750 }, { "epoch": 0.414581298828125, "grad_norm": 15.847943305969238, "learning_rate": 3.1673671936995377e-06, "loss": 3.2989, "step": 40755 }, { "epoch": 0.4146321614583333, "grad_norm": 10.91378116607666, "learning_rate": 3.1669818154019287e-06, "loss": 3.1673, "step": 40760 }, { "epoch": 0.4146830240885417, "grad_norm": 9.859515190124512, "learning_rate": 3.166596420039748e-06, "loss": 3.3117, "step": 40765 }, { "epoch": 0.41473388671875, "grad_norm": 15.599798202514648, "learning_rate": 3.1662110076228564e-06, "loss": 3.3584, "step": 40770 }, { "epoch": 0.4147847493489583, "grad_norm": 14.321131706237793, "learning_rate": 3.1658255781611138e-06, "loss": 3.8719, "step": 40775 }, { "epoch": 0.4148356119791667, "grad_norm": 13.262664794921875, "learning_rate": 3.165440131664381e-06, "loss": 3.406, "step": 40780 }, { "epoch": 0.414886474609375, "grad_norm": 14.643454551696777, "learning_rate": 3.1650546681425204e-06, "loss": 3.4258, "step": 40785 }, { "epoch": 0.4149373372395833, "grad_norm": 9.192747116088867, "learning_rate": 3.164669187605395e-06, "loss": 3.7027, "step": 40790 }, { "epoch": 0.4149881998697917, "grad_norm": 12.051810264587402, "learning_rate": 3.1642836900628637e-06, "loss": 3.4306, "step": 40795 }, { "epoch": 0.4150390625, "grad_norm": 9.823638916015625, "learning_rate": 3.1638981755247934e-06, "loss": 3.5007, "step": 40800 }, { "epoch": 0.4150899251302083, "grad_norm": 11.998472213745117, "learning_rate": 3.1635126440010444e-06, "loss": 3.477, "step": 40805 }, { "epoch": 0.4151407877604167, "grad_norm": 14.473235130310059, "learning_rate": 3.1631270955014833e-06, "loss": 3.4691, "step": 40810 }, { "epoch": 0.415191650390625, "grad_norm": 8.37810230255127, "learning_rate": 3.1627415300359717e-06, "loss": 3.4367, "step": 40815 }, { "epoch": 0.4152425130208333, "grad_norm": 19.481773376464844, "learning_rate": 3.162355947614375e-06, "loss": 3.3668, "step": 40820 }, { "epoch": 0.4152933756510417, "grad_norm": 16.640199661254883, "learning_rate": 3.1619703482465586e-06, "loss": 3.0726, "step": 40825 }, { "epoch": 0.41534423828125, "grad_norm": 12.760043144226074, "learning_rate": 3.1615847319423874e-06, "loss": 3.6446, "step": 40830 }, { "epoch": 0.4153951009114583, "grad_norm": 15.554396629333496, "learning_rate": 3.161199098711728e-06, "loss": 3.2753, "step": 40835 }, { "epoch": 0.4154459635416667, "grad_norm": 9.984983444213867, "learning_rate": 3.1608134485644458e-06, "loss": 2.8998, "step": 40840 }, { "epoch": 0.415496826171875, "grad_norm": 12.402283668518066, "learning_rate": 3.160427781510409e-06, "loss": 3.2864, "step": 40845 }, { "epoch": 0.4155476888020833, "grad_norm": 11.073039054870605, "learning_rate": 3.160042097559484e-06, "loss": 2.866, "step": 40850 }, { "epoch": 0.4155985514322917, "grad_norm": 10.235372543334961, "learning_rate": 3.159656396721538e-06, "loss": 3.1743, "step": 40855 }, { "epoch": 0.4156494140625, "grad_norm": 11.603734970092773, "learning_rate": 3.1592706790064387e-06, "loss": 3.4963, "step": 40860 }, { "epoch": 0.4157002766927083, "grad_norm": 9.706622123718262, "learning_rate": 3.1588849444240564e-06, "loss": 3.244, "step": 40865 }, { "epoch": 0.4157511393229167, "grad_norm": 10.390192985534668, "learning_rate": 3.1584991929842586e-06, "loss": 3.0856, "step": 40870 }, { "epoch": 0.415802001953125, "grad_norm": 12.130228996276855, "learning_rate": 3.1581134246969153e-06, "loss": 3.2899, "step": 40875 }, { "epoch": 0.4158528645833333, "grad_norm": 9.266514778137207, "learning_rate": 3.1577276395718957e-06, "loss": 3.3391, "step": 40880 }, { "epoch": 0.4159037272135417, "grad_norm": 10.627102851867676, "learning_rate": 3.157341837619071e-06, "loss": 3.7669, "step": 40885 }, { "epoch": 0.41595458984375, "grad_norm": 10.077190399169922, "learning_rate": 3.15695601884831e-06, "loss": 3.2736, "step": 40890 }, { "epoch": 0.4160054524739583, "grad_norm": 14.39554500579834, "learning_rate": 3.1565701832694856e-06, "loss": 3.5625, "step": 40895 }, { "epoch": 0.4160563151041667, "grad_norm": 11.000900268554688, "learning_rate": 3.1561843308924687e-06, "loss": 3.4582, "step": 40900 }, { "epoch": 0.416107177734375, "grad_norm": 7.12355375289917, "learning_rate": 3.155798461727132e-06, "loss": 3.2976, "step": 40905 }, { "epoch": 0.4161580403645833, "grad_norm": 7.608798503875732, "learning_rate": 3.1554125757833453e-06, "loss": 3.3805, "step": 40910 }, { "epoch": 0.4162089029947917, "grad_norm": 17.018798828125, "learning_rate": 3.155026673070985e-06, "loss": 3.5029, "step": 40915 }, { "epoch": 0.416259765625, "grad_norm": 10.469268798828125, "learning_rate": 3.1546407535999214e-06, "loss": 3.0716, "step": 40920 }, { "epoch": 0.4163106282552083, "grad_norm": 7.395883083343506, "learning_rate": 3.1542548173800293e-06, "loss": 3.6238, "step": 40925 }, { "epoch": 0.4163614908854167, "grad_norm": 14.05190372467041, "learning_rate": 3.1538688644211834e-06, "loss": 3.0576, "step": 40930 }, { "epoch": 0.416412353515625, "grad_norm": 10.219439506530762, "learning_rate": 3.1534828947332573e-06, "loss": 3.6053, "step": 40935 }, { "epoch": 0.4164632161458333, "grad_norm": 11.207366943359375, "learning_rate": 3.1530969083261265e-06, "loss": 3.9513, "step": 40940 }, { "epoch": 0.4165140787760417, "grad_norm": 10.072528839111328, "learning_rate": 3.1527109052096656e-06, "loss": 3.1873, "step": 40945 }, { "epoch": 0.41656494140625, "grad_norm": 7.561704635620117, "learning_rate": 3.152324885393751e-06, "loss": 3.3976, "step": 40950 }, { "epoch": 0.4166158040364583, "grad_norm": 13.165698051452637, "learning_rate": 3.1519388488882586e-06, "loss": 3.2867, "step": 40955 }, { "epoch": 0.4166666666666667, "grad_norm": 10.214173316955566, "learning_rate": 3.1515527957030657e-06, "loss": 3.0653, "step": 40960 }, { "epoch": 0.416717529296875, "grad_norm": 12.931204795837402, "learning_rate": 3.151166725848049e-06, "loss": 3.2135, "step": 40965 }, { "epoch": 0.4167683919270833, "grad_norm": 8.744141578674316, "learning_rate": 3.1507806393330865e-06, "loss": 3.33, "step": 40970 }, { "epoch": 0.4168192545572917, "grad_norm": 13.387395858764648, "learning_rate": 3.1503945361680543e-06, "loss": 3.1974, "step": 40975 }, { "epoch": 0.4168701171875, "grad_norm": 8.401640892028809, "learning_rate": 3.150008416362833e-06, "loss": 3.1772, "step": 40980 }, { "epoch": 0.4169209798177083, "grad_norm": 13.557087898254395, "learning_rate": 3.1496222799273002e-06, "loss": 3.2859, "step": 40985 }, { "epoch": 0.4169718424479167, "grad_norm": 12.962182998657227, "learning_rate": 3.149236126871335e-06, "loss": 3.4554, "step": 40990 }, { "epoch": 0.417022705078125, "grad_norm": 13.028253555297852, "learning_rate": 3.1488499572048177e-06, "loss": 3.6858, "step": 40995 }, { "epoch": 0.4170735677083333, "grad_norm": 10.180688858032227, "learning_rate": 3.1484637709376274e-06, "loss": 3.274, "step": 41000 }, { "epoch": 0.4171244303385417, "grad_norm": 18.28401756286621, "learning_rate": 3.148077568079646e-06, "loss": 3.2267, "step": 41005 }, { "epoch": 0.41717529296875, "grad_norm": 13.654256820678711, "learning_rate": 3.147691348640754e-06, "loss": 3.538, "step": 41010 }, { "epoch": 0.4172261555989583, "grad_norm": 13.836880683898926, "learning_rate": 3.1473051126308314e-06, "loss": 3.0475, "step": 41015 }, { "epoch": 0.4172770182291667, "grad_norm": 11.592706680297852, "learning_rate": 3.1469188600597613e-06, "loss": 4.1781, "step": 41020 }, { "epoch": 0.417327880859375, "grad_norm": 12.247729301452637, "learning_rate": 3.1465325909374255e-06, "loss": 3.3115, "step": 41025 }, { "epoch": 0.4173787434895833, "grad_norm": 11.236956596374512, "learning_rate": 3.1461463052737064e-06, "loss": 3.3245, "step": 41030 }, { "epoch": 0.4174296061197917, "grad_norm": 12.398927688598633, "learning_rate": 3.145760003078488e-06, "loss": 3.2676, "step": 41035 }, { "epoch": 0.41748046875, "grad_norm": 9.001666069030762, "learning_rate": 3.1453736843616515e-06, "loss": 3.1629, "step": 41040 }, { "epoch": 0.4175313313802083, "grad_norm": 15.180867195129395, "learning_rate": 3.144987349133084e-06, "loss": 3.2056, "step": 41045 }, { "epoch": 0.4175821940104167, "grad_norm": 14.018913269042969, "learning_rate": 3.144600997402667e-06, "loss": 3.4004, "step": 41050 }, { "epoch": 0.417633056640625, "grad_norm": 10.41834545135498, "learning_rate": 3.1442146291802866e-06, "loss": 3.9112, "step": 41055 }, { "epoch": 0.4176839192708333, "grad_norm": 22.930706024169922, "learning_rate": 3.143828244475828e-06, "loss": 3.8011, "step": 41060 }, { "epoch": 0.4177347819010417, "grad_norm": 8.728473663330078, "learning_rate": 3.1434418432991755e-06, "loss": 3.4481, "step": 41065 }, { "epoch": 0.41778564453125, "grad_norm": 13.227070808410645, "learning_rate": 3.1430554256602165e-06, "loss": 3.3979, "step": 41070 }, { "epoch": 0.4178365071614583, "grad_norm": 13.314940452575684, "learning_rate": 3.1426689915688367e-06, "loss": 3.5534, "step": 41075 }, { "epoch": 0.4178873697916667, "grad_norm": 16.44963836669922, "learning_rate": 3.142282541034923e-06, "loss": 3.0206, "step": 41080 }, { "epoch": 0.417938232421875, "grad_norm": 8.901022911071777, "learning_rate": 3.141896074068363e-06, "loss": 3.0547, "step": 41085 }, { "epoch": 0.4179890950520833, "grad_norm": 12.60720157623291, "learning_rate": 3.1415095906790436e-06, "loss": 3.4113, "step": 41090 }, { "epoch": 0.4180399576822917, "grad_norm": 12.682599067687988, "learning_rate": 3.141123090876854e-06, "loss": 3.7857, "step": 41095 }, { "epoch": 0.4180908203125, "grad_norm": 8.320108413696289, "learning_rate": 3.1407365746716816e-06, "loss": 3.243, "step": 41100 }, { "epoch": 0.4181416829427083, "grad_norm": 13.9808931350708, "learning_rate": 3.140350042073416e-06, "loss": 4.0655, "step": 41105 }, { "epoch": 0.4181925455729167, "grad_norm": 9.053630828857422, "learning_rate": 3.1399634930919463e-06, "loss": 3.3757, "step": 41110 }, { "epoch": 0.418243408203125, "grad_norm": 12.047245979309082, "learning_rate": 3.1395769277371624e-06, "loss": 3.209, "step": 41115 }, { "epoch": 0.4182942708333333, "grad_norm": 15.260944366455078, "learning_rate": 3.1391903460189543e-06, "loss": 3.4581, "step": 41120 }, { "epoch": 0.4183451334635417, "grad_norm": 8.774908065795898, "learning_rate": 3.138803747947213e-06, "loss": 3.1256, "step": 41125 }, { "epoch": 0.41839599609375, "grad_norm": 8.514405250549316, "learning_rate": 3.138417133531829e-06, "loss": 3.3727, "step": 41130 }, { "epoch": 0.4184468587239583, "grad_norm": 11.792862892150879, "learning_rate": 3.1380305027826933e-06, "loss": 3.5753, "step": 41135 }, { "epoch": 0.4184977213541667, "grad_norm": 15.699068069458008, "learning_rate": 3.1376438557096995e-06, "loss": 3.6065, "step": 41140 }, { "epoch": 0.418548583984375, "grad_norm": 11.407424926757812, "learning_rate": 3.137257192322738e-06, "loss": 3.1444, "step": 41145 }, { "epoch": 0.4185994466145833, "grad_norm": 10.706039428710938, "learning_rate": 3.136870512631703e-06, "loss": 3.0158, "step": 41150 }, { "epoch": 0.4186503092447917, "grad_norm": 8.059556007385254, "learning_rate": 3.136483816646486e-06, "loss": 3.3934, "step": 41155 }, { "epoch": 0.418701171875, "grad_norm": 14.3755521774292, "learning_rate": 3.136097104376982e-06, "loss": 3.1492, "step": 41160 }, { "epoch": 0.4187520345052083, "grad_norm": 12.719627380371094, "learning_rate": 3.1357103758330844e-06, "loss": 3.523, "step": 41165 }, { "epoch": 0.4188028971354167, "grad_norm": 10.903666496276855, "learning_rate": 3.1353236310246878e-06, "loss": 3.41, "step": 41170 }, { "epoch": 0.418853759765625, "grad_norm": 14.709659576416016, "learning_rate": 3.1349368699616857e-06, "loss": 3.1697, "step": 41175 }, { "epoch": 0.4189046223958333, "grad_norm": 9.065529823303223, "learning_rate": 3.1345500926539753e-06, "loss": 3.2718, "step": 41180 }, { "epoch": 0.4189554850260417, "grad_norm": 14.227614402770996, "learning_rate": 3.13416329911145e-06, "loss": 3.3014, "step": 41185 }, { "epoch": 0.41900634765625, "grad_norm": 12.289112091064453, "learning_rate": 3.1337764893440086e-06, "loss": 3.155, "step": 41190 }, { "epoch": 0.4190572102864583, "grad_norm": 13.675725936889648, "learning_rate": 3.1333896633615444e-06, "loss": 3.2811, "step": 41195 }, { "epoch": 0.4191080729166667, "grad_norm": 15.298088073730469, "learning_rate": 3.133002821173957e-06, "loss": 3.3435, "step": 41200 }, { "epoch": 0.419158935546875, "grad_norm": 10.695393562316895, "learning_rate": 3.132615962791142e-06, "loss": 3.4197, "step": 41205 }, { "epoch": 0.4192097981770833, "grad_norm": 9.276285171508789, "learning_rate": 3.132229088222997e-06, "loss": 3.8276, "step": 41210 }, { "epoch": 0.4192606608072917, "grad_norm": 8.871838569641113, "learning_rate": 3.131842197479421e-06, "loss": 3.3686, "step": 41215 }, { "epoch": 0.4193115234375, "grad_norm": 11.672993659973145, "learning_rate": 3.1314552905703126e-06, "loss": 3.384, "step": 41220 }, { "epoch": 0.4193623860677083, "grad_norm": 9.316916465759277, "learning_rate": 3.13106836750557e-06, "loss": 3.2806, "step": 41225 }, { "epoch": 0.4194132486979167, "grad_norm": 10.380117416381836, "learning_rate": 3.1306814282950926e-06, "loss": 3.4212, "step": 41230 }, { "epoch": 0.419464111328125, "grad_norm": 12.29304313659668, "learning_rate": 3.1302944729487804e-06, "loss": 3.5514, "step": 41235 }, { "epoch": 0.4195149739583333, "grad_norm": 16.73832130432129, "learning_rate": 3.1299075014765334e-06, "loss": 3.4344, "step": 41240 }, { "epoch": 0.4195658365885417, "grad_norm": 15.393379211425781, "learning_rate": 3.129520513888253e-06, "loss": 3.0878, "step": 41245 }, { "epoch": 0.41961669921875, "grad_norm": 8.426596641540527, "learning_rate": 3.129133510193838e-06, "loss": 3.2919, "step": 41250 }, { "epoch": 0.4196675618489583, "grad_norm": 14.54990291595459, "learning_rate": 3.128746490403193e-06, "loss": 3.2858, "step": 41255 }, { "epoch": 0.4197184244791667, "grad_norm": 11.128558158874512, "learning_rate": 3.1283594545262163e-06, "loss": 3.3531, "step": 41260 }, { "epoch": 0.419769287109375, "grad_norm": 7.920388698577881, "learning_rate": 3.1279724025728137e-06, "loss": 3.1741, "step": 41265 }, { "epoch": 0.4198201497395833, "grad_norm": 14.856907844543457, "learning_rate": 3.127585334552885e-06, "loss": 4.0395, "step": 41270 }, { "epoch": 0.4198710123697917, "grad_norm": 12.031919479370117, "learning_rate": 3.127198250476334e-06, "loss": 3.2902, "step": 41275 }, { "epoch": 0.419921875, "grad_norm": 8.964862823486328, "learning_rate": 3.1268111503530652e-06, "loss": 3.4011, "step": 41280 }, { "epoch": 0.4199727376302083, "grad_norm": 15.2599458694458, "learning_rate": 3.126424034192981e-06, "loss": 3.0933, "step": 41285 }, { "epoch": 0.4200236002604167, "grad_norm": 10.90890884399414, "learning_rate": 3.1260369020059864e-06, "loss": 3.1269, "step": 41290 }, { "epoch": 0.420074462890625, "grad_norm": 16.50959587097168, "learning_rate": 3.1256497538019875e-06, "loss": 3.8762, "step": 41295 }, { "epoch": 0.4201253255208333, "grad_norm": 15.000621795654297, "learning_rate": 3.125262589590886e-06, "loss": 3.6052, "step": 41300 }, { "epoch": 0.4201761881510417, "grad_norm": 15.3053560256958, "learning_rate": 3.124875409382591e-06, "loss": 3.4111, "step": 41305 }, { "epoch": 0.42022705078125, "grad_norm": 14.705530166625977, "learning_rate": 3.1244882131870056e-06, "loss": 3.3694, "step": 41310 }, { "epoch": 0.4202779134114583, "grad_norm": 14.04732894897461, "learning_rate": 3.124101001014037e-06, "loss": 3.4689, "step": 41315 }, { "epoch": 0.4203287760416667, "grad_norm": 10.53316879272461, "learning_rate": 3.123713772873593e-06, "loss": 3.1923, "step": 41320 }, { "epoch": 0.420379638671875, "grad_norm": 13.080552101135254, "learning_rate": 3.1233265287755793e-06, "loss": 2.9996, "step": 41325 }, { "epoch": 0.4204305013020833, "grad_norm": 10.696488380432129, "learning_rate": 3.1229392687299056e-06, "loss": 3.5193, "step": 41330 }, { "epoch": 0.4204813639322917, "grad_norm": 14.827099800109863, "learning_rate": 3.122551992746477e-06, "loss": 2.8518, "step": 41335 }, { "epoch": 0.4205322265625, "grad_norm": 12.05504322052002, "learning_rate": 3.1221647008352035e-06, "loss": 3.7252, "step": 41340 }, { "epoch": 0.4205830891927083, "grad_norm": 12.480534553527832, "learning_rate": 3.1217773930059936e-06, "loss": 3.0831, "step": 41345 }, { "epoch": 0.4206339518229167, "grad_norm": 9.91734790802002, "learning_rate": 3.1213900692687565e-06, "loss": 2.9163, "step": 41350 }, { "epoch": 0.420684814453125, "grad_norm": 16.006969451904297, "learning_rate": 3.121002729633402e-06, "loss": 3.2898, "step": 41355 }, { "epoch": 0.4207356770833333, "grad_norm": 9.468695640563965, "learning_rate": 3.1206153741098397e-06, "loss": 3.2785, "step": 41360 }, { "epoch": 0.4207865397135417, "grad_norm": 13.025923728942871, "learning_rate": 3.1202280027079797e-06, "loss": 3.3407, "step": 41365 }, { "epoch": 0.42083740234375, "grad_norm": 8.222331047058105, "learning_rate": 3.119840615437734e-06, "loss": 3.6244, "step": 41370 }, { "epoch": 0.4208882649739583, "grad_norm": 8.104004859924316, "learning_rate": 3.1194532123090125e-06, "loss": 3.0128, "step": 41375 }, { "epoch": 0.4209391276041667, "grad_norm": 15.87883186340332, "learning_rate": 3.119065793331728e-06, "loss": 3.5668, "step": 41380 }, { "epoch": 0.420989990234375, "grad_norm": 9.6399564743042, "learning_rate": 3.1186783585157913e-06, "loss": 3.4511, "step": 41385 }, { "epoch": 0.4210408528645833, "grad_norm": 14.242037773132324, "learning_rate": 3.1182909078711154e-06, "loss": 3.3657, "step": 41390 }, { "epoch": 0.4210917154947917, "grad_norm": 15.923033714294434, "learning_rate": 3.1179034414076138e-06, "loss": 3.1755, "step": 41395 }, { "epoch": 0.421142578125, "grad_norm": 10.9227876663208, "learning_rate": 3.117515959135199e-06, "loss": 3.2076, "step": 41400 }, { "epoch": 0.4211934407552083, "grad_norm": 13.24682331085205, "learning_rate": 3.117128461063784e-06, "loss": 3.177, "step": 41405 }, { "epoch": 0.4212443033854167, "grad_norm": 14.045320510864258, "learning_rate": 3.116740947203285e-06, "loss": 3.5047, "step": 41410 }, { "epoch": 0.421295166015625, "grad_norm": 12.409698486328125, "learning_rate": 3.1163534175636134e-06, "loss": 3.2689, "step": 41415 }, { "epoch": 0.4213460286458333, "grad_norm": 8.52938461303711, "learning_rate": 3.1159658721546866e-06, "loss": 3.4171, "step": 41420 }, { "epoch": 0.4213968912760417, "grad_norm": 7.385495185852051, "learning_rate": 3.115578310986419e-06, "loss": 3.3524, "step": 41425 }, { "epoch": 0.42144775390625, "grad_norm": 17.02286720275879, "learning_rate": 3.1151907340687256e-06, "loss": 3.314, "step": 41430 }, { "epoch": 0.4214986165364583, "grad_norm": 14.409526824951172, "learning_rate": 3.114803141411524e-06, "loss": 2.9813, "step": 41435 }, { "epoch": 0.4215494791666667, "grad_norm": 9.918559074401855, "learning_rate": 3.1144155330247292e-06, "loss": 3.5645, "step": 41440 }, { "epoch": 0.421600341796875, "grad_norm": 14.866680145263672, "learning_rate": 3.1140279089182584e-06, "loss": 3.3423, "step": 41445 }, { "epoch": 0.4216512044270833, "grad_norm": 13.22377872467041, "learning_rate": 3.1136402691020294e-06, "loss": 3.5402, "step": 41450 }, { "epoch": 0.4217020670572917, "grad_norm": 12.47979736328125, "learning_rate": 3.11325261358596e-06, "loss": 2.97, "step": 41455 }, { "epoch": 0.4217529296875, "grad_norm": 8.604597091674805, "learning_rate": 3.112864942379967e-06, "loss": 3.2102, "step": 41460 }, { "epoch": 0.4218037923177083, "grad_norm": 12.656006813049316, "learning_rate": 3.112477255493971e-06, "loss": 4.0634, "step": 41465 }, { "epoch": 0.4218546549479167, "grad_norm": 16.776290893554688, "learning_rate": 3.112089552937888e-06, "loss": 3.5199, "step": 41470 }, { "epoch": 0.421905517578125, "grad_norm": 17.225765228271484, "learning_rate": 3.1117018347216398e-06, "loss": 3.5333, "step": 41475 }, { "epoch": 0.4219563802083333, "grad_norm": 13.472655296325684, "learning_rate": 3.1113141008551446e-06, "loss": 3.4921, "step": 41480 }, { "epoch": 0.4220072428385417, "grad_norm": 12.355141639709473, "learning_rate": 3.1109263513483234e-06, "loss": 3.3573, "step": 41485 }, { "epoch": 0.42205810546875, "grad_norm": 12.142220497131348, "learning_rate": 3.1105385862110964e-06, "loss": 3.428, "step": 41490 }, { "epoch": 0.4221089680989583, "grad_norm": 8.77768611907959, "learning_rate": 3.110150805453384e-06, "loss": 3.4354, "step": 41495 }, { "epoch": 0.4221598307291667, "grad_norm": 19.088306427001953, "learning_rate": 3.1097630090851084e-06, "loss": 3.127, "step": 41500 }, { "epoch": 0.422210693359375, "grad_norm": 13.503829002380371, "learning_rate": 3.10937519711619e-06, "loss": 3.7707, "step": 41505 }, { "epoch": 0.4222615559895833, "grad_norm": 11.96296215057373, "learning_rate": 3.1089873695565523e-06, "loss": 3.2588, "step": 41510 }, { "epoch": 0.4223124186197917, "grad_norm": 13.315999984741211, "learning_rate": 3.1085995264161167e-06, "loss": 3.3339, "step": 41515 }, { "epoch": 0.42236328125, "grad_norm": 14.427928924560547, "learning_rate": 3.108211667704807e-06, "loss": 3.5766, "step": 41520 }, { "epoch": 0.4224141438802083, "grad_norm": 14.34740924835205, "learning_rate": 3.107823793432545e-06, "loss": 3.1351, "step": 41525 }, { "epoch": 0.4224650065104167, "grad_norm": 11.97638988494873, "learning_rate": 3.107435903609257e-06, "loss": 3.3518, "step": 41530 }, { "epoch": 0.422515869140625, "grad_norm": 14.582337379455566, "learning_rate": 3.107047998244863e-06, "loss": 3.1531, "step": 41535 }, { "epoch": 0.4225667317708333, "grad_norm": 12.777178764343262, "learning_rate": 3.106660077349292e-06, "loss": 3.6059, "step": 41540 }, { "epoch": 0.4226175944010417, "grad_norm": 15.88293170928955, "learning_rate": 3.106272140932466e-06, "loss": 3.6732, "step": 41545 }, { "epoch": 0.42266845703125, "grad_norm": 10.44229507446289, "learning_rate": 3.1058841890043105e-06, "loss": 3.8964, "step": 41550 }, { "epoch": 0.4227193196614583, "grad_norm": 11.0337553024292, "learning_rate": 3.105496221574752e-06, "loss": 3.2802, "step": 41555 }, { "epoch": 0.4227701822916667, "grad_norm": 10.296064376831055, "learning_rate": 3.105108238653716e-06, "loss": 3.2045, "step": 41560 }, { "epoch": 0.422821044921875, "grad_norm": 14.51941204071045, "learning_rate": 3.1047202402511294e-06, "loss": 3.7077, "step": 41565 }, { "epoch": 0.4228719075520833, "grad_norm": 12.24843978881836, "learning_rate": 3.104332226376919e-06, "loss": 3.1883, "step": 41570 }, { "epoch": 0.4229227701822917, "grad_norm": 11.264163970947266, "learning_rate": 3.103944197041011e-06, "loss": 3.4847, "step": 41575 }, { "epoch": 0.4229736328125, "grad_norm": 11.658236503601074, "learning_rate": 3.103556152253335e-06, "loss": 3.2653, "step": 41580 }, { "epoch": 0.4230244954427083, "grad_norm": 15.09562873840332, "learning_rate": 3.1031680920238165e-06, "loss": 3.1765, "step": 41585 }, { "epoch": 0.4230753580729167, "grad_norm": 14.389519691467285, "learning_rate": 3.1027800163623866e-06, "loss": 3.8784, "step": 41590 }, { "epoch": 0.423126220703125, "grad_norm": 13.224161148071289, "learning_rate": 3.1023919252789724e-06, "loss": 3.4739, "step": 41595 }, { "epoch": 0.4231770833333333, "grad_norm": 11.407111167907715, "learning_rate": 3.102003818783503e-06, "loss": 3.22, "step": 41600 }, { "epoch": 0.4232279459635417, "grad_norm": 12.572507858276367, "learning_rate": 3.1016156968859095e-06, "loss": 3.3877, "step": 41605 }, { "epoch": 0.42327880859375, "grad_norm": 9.610457420349121, "learning_rate": 3.1012275595961204e-06, "loss": 3.2691, "step": 41610 }, { "epoch": 0.4233296712239583, "grad_norm": 12.857911109924316, "learning_rate": 3.1008394069240672e-06, "loss": 3.4903, "step": 41615 }, { "epoch": 0.4233805338541667, "grad_norm": 10.919528007507324, "learning_rate": 3.10045123887968e-06, "loss": 3.2011, "step": 41620 }, { "epoch": 0.423431396484375, "grad_norm": 16.585697174072266, "learning_rate": 3.10006305547289e-06, "loss": 3.4325, "step": 41625 }, { "epoch": 0.4234822591145833, "grad_norm": 18.34198570251465, "learning_rate": 3.0996748567136286e-06, "loss": 3.4349, "step": 41630 }, { "epoch": 0.4235331217447917, "grad_norm": 16.982179641723633, "learning_rate": 3.0992866426118294e-06, "loss": 3.3223, "step": 41635 }, { "epoch": 0.423583984375, "grad_norm": 14.218949317932129, "learning_rate": 3.098898413177422e-06, "loss": 3.4303, "step": 41640 }, { "epoch": 0.4236348470052083, "grad_norm": 7.693777561187744, "learning_rate": 3.098510168420342e-06, "loss": 3.1307, "step": 41645 }, { "epoch": 0.4236857096354167, "grad_norm": 11.543193817138672, "learning_rate": 3.09812190835052e-06, "loss": 2.9734, "step": 41650 }, { "epoch": 0.423736572265625, "grad_norm": 14.944109916687012, "learning_rate": 3.097733632977891e-06, "loss": 3.3276, "step": 41655 }, { "epoch": 0.4237874348958333, "grad_norm": 10.503029823303223, "learning_rate": 3.0973453423123895e-06, "loss": 3.3436, "step": 41660 }, { "epoch": 0.4238382975260417, "grad_norm": 12.030123710632324, "learning_rate": 3.096957036363948e-06, "loss": 3.5391, "step": 41665 }, { "epoch": 0.42388916015625, "grad_norm": 14.35947036743164, "learning_rate": 3.096568715142503e-06, "loss": 3.2357, "step": 41670 }, { "epoch": 0.4239400227864583, "grad_norm": 18.176090240478516, "learning_rate": 3.096180378657988e-06, "loss": 3.8218, "step": 41675 }, { "epoch": 0.4239908854166667, "grad_norm": 14.452081680297852, "learning_rate": 3.09579202692034e-06, "loss": 3.3728, "step": 41680 }, { "epoch": 0.424041748046875, "grad_norm": 8.979386329650879, "learning_rate": 3.0954036599394945e-06, "loss": 3.3105, "step": 41685 }, { "epoch": 0.4240926106770833, "grad_norm": 9.725926399230957, "learning_rate": 3.0950152777253866e-06, "loss": 3.2534, "step": 41690 }, { "epoch": 0.4241434733072917, "grad_norm": 10.265632629394531, "learning_rate": 3.094626880287955e-06, "loss": 3.2297, "step": 41695 }, { "epoch": 0.4241943359375, "grad_norm": 12.90949821472168, "learning_rate": 3.0942384676371346e-06, "loss": 3.2365, "step": 41700 }, { "epoch": 0.4242451985677083, "grad_norm": 16.56032943725586, "learning_rate": 3.093850039782864e-06, "loss": 3.3633, "step": 41705 }, { "epoch": 0.4242960611979167, "grad_norm": 9.831174850463867, "learning_rate": 3.093461596735081e-06, "loss": 3.3728, "step": 41710 }, { "epoch": 0.424346923828125, "grad_norm": 11.747597694396973, "learning_rate": 3.093073138503723e-06, "loss": 3.2889, "step": 41715 }, { "epoch": 0.4243977864583333, "grad_norm": 15.865684509277344, "learning_rate": 3.092684665098731e-06, "loss": 3.9162, "step": 41720 }, { "epoch": 0.4244486490885417, "grad_norm": 14.615534782409668, "learning_rate": 3.0922961765300407e-06, "loss": 3.1447, "step": 41725 }, { "epoch": 0.42449951171875, "grad_norm": 15.222784996032715, "learning_rate": 3.091907672807594e-06, "loss": 3.5148, "step": 41730 }, { "epoch": 0.4245503743489583, "grad_norm": 25.343584060668945, "learning_rate": 3.091519153941329e-06, "loss": 3.6415, "step": 41735 }, { "epoch": 0.4246012369791667, "grad_norm": 13.214790344238281, "learning_rate": 3.0911306199411876e-06, "loss": 3.4832, "step": 41740 }, { "epoch": 0.424652099609375, "grad_norm": 12.833844184875488, "learning_rate": 3.090742070817109e-06, "loss": 3.3397, "step": 41745 }, { "epoch": 0.4247029622395833, "grad_norm": 12.496012687683105, "learning_rate": 3.0903535065790353e-06, "loss": 3.5337, "step": 41750 }, { "epoch": 0.4247538248697917, "grad_norm": 8.92209243774414, "learning_rate": 3.0899649272369054e-06, "loss": 3.48, "step": 41755 }, { "epoch": 0.4248046875, "grad_norm": 9.917067527770996, "learning_rate": 3.0895763328006644e-06, "loss": 3.2993, "step": 41760 }, { "epoch": 0.4248555501302083, "grad_norm": 8.58231258392334, "learning_rate": 3.0891877232802515e-06, "loss": 3.6489, "step": 41765 }, { "epoch": 0.4249064127604167, "grad_norm": 10.70272445678711, "learning_rate": 3.088799098685611e-06, "loss": 2.9813, "step": 41770 }, { "epoch": 0.424957275390625, "grad_norm": 10.071898460388184, "learning_rate": 3.0884104590266844e-06, "loss": 3.4546, "step": 41775 }, { "epoch": 0.4250081380208333, "grad_norm": 14.641159057617188, "learning_rate": 3.0880218043134164e-06, "loss": 3.164, "step": 41780 }, { "epoch": 0.4250590006510417, "grad_norm": 15.49095344543457, "learning_rate": 3.0876331345557498e-06, "loss": 3.4119, "step": 41785 }, { "epoch": 0.42510986328125, "grad_norm": 9.982519149780273, "learning_rate": 3.087244449763629e-06, "loss": 3.6413, "step": 41790 }, { "epoch": 0.4251607259114583, "grad_norm": 10.193666458129883, "learning_rate": 3.086855749946997e-06, "loss": 3.2032, "step": 41795 }, { "epoch": 0.4252115885416667, "grad_norm": 15.675253868103027, "learning_rate": 3.0864670351158012e-06, "loss": 3.5083, "step": 41800 }, { "epoch": 0.425262451171875, "grad_norm": 11.914649963378906, "learning_rate": 3.0860783052799837e-06, "loss": 3.4752, "step": 41805 }, { "epoch": 0.4253133138020833, "grad_norm": 11.520695686340332, "learning_rate": 3.085689560449492e-06, "loss": 3.3944, "step": 41810 }, { "epoch": 0.4253641764322917, "grad_norm": 12.948492050170898, "learning_rate": 3.0853008006342734e-06, "loss": 3.4788, "step": 41815 }, { "epoch": 0.4254150390625, "grad_norm": 14.214773178100586, "learning_rate": 3.0849120258442706e-06, "loss": 2.8699, "step": 41820 }, { "epoch": 0.4254659016927083, "grad_norm": 13.516385078430176, "learning_rate": 3.084523236089434e-06, "loss": 3.3412, "step": 41825 }, { "epoch": 0.4255167643229167, "grad_norm": 15.694234848022461, "learning_rate": 3.084134431379708e-06, "loss": 2.9498, "step": 41830 }, { "epoch": 0.425567626953125, "grad_norm": 10.564866065979004, "learning_rate": 3.083745611725041e-06, "loss": 3.2879, "step": 41835 }, { "epoch": 0.4256184895833333, "grad_norm": 9.610162734985352, "learning_rate": 3.0833567771353806e-06, "loss": 3.6513, "step": 41840 }, { "epoch": 0.4256693522135417, "grad_norm": 17.4929141998291, "learning_rate": 3.082967927620676e-06, "loss": 3.3903, "step": 41845 }, { "epoch": 0.42572021484375, "grad_norm": 12.404966354370117, "learning_rate": 3.082579063190874e-06, "loss": 3.2674, "step": 41850 }, { "epoch": 0.4257710774739583, "grad_norm": 8.494036674499512, "learning_rate": 3.0821901838559263e-06, "loss": 3.5712, "step": 41855 }, { "epoch": 0.4258219401041667, "grad_norm": 12.155200958251953, "learning_rate": 3.0818012896257797e-06, "loss": 3.2202, "step": 41860 }, { "epoch": 0.425872802734375, "grad_norm": 9.61408805847168, "learning_rate": 3.081412380510386e-06, "loss": 3.0132, "step": 41865 }, { "epoch": 0.4259236653645833, "grad_norm": 10.762458801269531, "learning_rate": 3.081023456519693e-06, "loss": 3.2979, "step": 41870 }, { "epoch": 0.4259745279947917, "grad_norm": 9.656315803527832, "learning_rate": 3.0806345176636536e-06, "loss": 3.2092, "step": 41875 }, { "epoch": 0.426025390625, "grad_norm": 15.16588306427002, "learning_rate": 3.080245563952217e-06, "loss": 3.336, "step": 41880 }, { "epoch": 0.4260762532552083, "grad_norm": 13.997447967529297, "learning_rate": 3.079856595395336e-06, "loss": 3.2055, "step": 41885 }, { "epoch": 0.4261271158854167, "grad_norm": 9.956498146057129, "learning_rate": 3.0794676120029605e-06, "loss": 3.5072, "step": 41890 }, { "epoch": 0.426177978515625, "grad_norm": 11.688020706176758, "learning_rate": 3.0790786137850442e-06, "loss": 3.2936, "step": 41895 }, { "epoch": 0.4262288411458333, "grad_norm": 10.722555160522461, "learning_rate": 3.0786896007515383e-06, "loss": 3.178, "step": 41900 }, { "epoch": 0.4262797037760417, "grad_norm": 12.866437911987305, "learning_rate": 3.078300572912397e-06, "loss": 3.2504, "step": 41905 }, { "epoch": 0.42633056640625, "grad_norm": 11.225545883178711, "learning_rate": 3.0779115302775713e-06, "loss": 3.1698, "step": 41910 }, { "epoch": 0.4263814290364583, "grad_norm": 9.123448371887207, "learning_rate": 3.077522472857017e-06, "loss": 3.5688, "step": 41915 }, { "epoch": 0.4264322916666667, "grad_norm": 17.776391983032227, "learning_rate": 3.077133400660687e-06, "loss": 3.4547, "step": 41920 }, { "epoch": 0.426483154296875, "grad_norm": 7.346116542816162, "learning_rate": 3.076744313698535e-06, "loss": 3.1451, "step": 41925 }, { "epoch": 0.4265340169270833, "grad_norm": 20.024919509887695, "learning_rate": 3.076355211980518e-06, "loss": 3.3667, "step": 41930 }, { "epoch": 0.4265848795572917, "grad_norm": 14.997565269470215, "learning_rate": 3.0759660955165887e-06, "loss": 3.0529, "step": 41935 }, { "epoch": 0.4266357421875, "grad_norm": 13.311877250671387, "learning_rate": 3.075576964316703e-06, "loss": 3.4168, "step": 41940 }, { "epoch": 0.4266866048177083, "grad_norm": 14.687515258789062, "learning_rate": 3.0751878183908175e-06, "loss": 2.9444, "step": 41945 }, { "epoch": 0.4267374674479167, "grad_norm": 15.228010177612305, "learning_rate": 3.074798657748888e-06, "loss": 3.3023, "step": 41950 }, { "epoch": 0.426788330078125, "grad_norm": 15.780599594116211, "learning_rate": 3.0744094824008707e-06, "loss": 3.3186, "step": 41955 }, { "epoch": 0.4268391927083333, "grad_norm": 11.762367248535156, "learning_rate": 3.074020292356723e-06, "loss": 3.5871, "step": 41960 }, { "epoch": 0.4268900553385417, "grad_norm": 9.062051773071289, "learning_rate": 3.0736310876264024e-06, "loss": 3.0746, "step": 41965 }, { "epoch": 0.42694091796875, "grad_norm": 11.719466209411621, "learning_rate": 3.073241868219867e-06, "loss": 3.362, "step": 41970 }, { "epoch": 0.4269917805989583, "grad_norm": 16.24999237060547, "learning_rate": 3.072852634147073e-06, "loss": 3.2346, "step": 41975 }, { "epoch": 0.4270426432291667, "grad_norm": 12.696512222290039, "learning_rate": 3.0724633854179814e-06, "loss": 3.3545, "step": 41980 }, { "epoch": 0.427093505859375, "grad_norm": 9.804299354553223, "learning_rate": 3.072074122042549e-06, "loss": 2.9765, "step": 41985 }, { "epoch": 0.4271443684895833, "grad_norm": 14.234360694885254, "learning_rate": 3.071684844030736e-06, "loss": 3.2353, "step": 41990 }, { "epoch": 0.4271952311197917, "grad_norm": 9.232892990112305, "learning_rate": 3.0712955513925017e-06, "loss": 3.9413, "step": 41995 }, { "epoch": 0.42724609375, "grad_norm": 10.719136238098145, "learning_rate": 3.0709062441378062e-06, "loss": 3.2093, "step": 42000 }, { "epoch": 0.4272969563802083, "grad_norm": 11.004392623901367, "learning_rate": 3.0705169222766095e-06, "loss": 3.4891, "step": 42005 }, { "epoch": 0.4273478190104167, "grad_norm": 8.278154373168945, "learning_rate": 3.070127585818873e-06, "loss": 3.3757, "step": 42010 }, { "epoch": 0.427398681640625, "grad_norm": 13.0377836227417, "learning_rate": 3.069738234774557e-06, "loss": 3.4463, "step": 42015 }, { "epoch": 0.4274495442708333, "grad_norm": 11.712611198425293, "learning_rate": 3.069348869153623e-06, "loss": 3.3497, "step": 42020 }, { "epoch": 0.4275004069010417, "grad_norm": 10.128121376037598, "learning_rate": 3.068959488966034e-06, "loss": 3.2071, "step": 42025 }, { "epoch": 0.42755126953125, "grad_norm": 16.13844108581543, "learning_rate": 3.0685700942217506e-06, "loss": 3.2666, "step": 42030 }, { "epoch": 0.4276021321614583, "grad_norm": 11.657336235046387, "learning_rate": 3.068180684930737e-06, "loss": 3.5323, "step": 42035 }, { "epoch": 0.4276529947916667, "grad_norm": 10.846715927124023, "learning_rate": 3.0677912611029544e-06, "loss": 3.0523, "step": 42040 }, { "epoch": 0.427703857421875, "grad_norm": 15.682669639587402, "learning_rate": 3.0674018227483673e-06, "loss": 3.2705, "step": 42045 }, { "epoch": 0.4277547200520833, "grad_norm": 9.073358535766602, "learning_rate": 3.067012369876939e-06, "loss": 3.3751, "step": 42050 }, { "epoch": 0.4278055826822917, "grad_norm": 10.69783878326416, "learning_rate": 3.0666229024986337e-06, "loss": 3.6506, "step": 42055 }, { "epoch": 0.4278564453125, "grad_norm": 15.259477615356445, "learning_rate": 3.0662334206234155e-06, "loss": 3.3437, "step": 42060 }, { "epoch": 0.4279073079427083, "grad_norm": 10.186830520629883, "learning_rate": 3.065843924261249e-06, "loss": 3.4981, "step": 42065 }, { "epoch": 0.4279581705729167, "grad_norm": 9.871879577636719, "learning_rate": 3.065454413422101e-06, "loss": 3.3813, "step": 42070 }, { "epoch": 0.428009033203125, "grad_norm": 14.064395904541016, "learning_rate": 3.0650648881159357e-06, "loss": 3.571, "step": 42075 }, { "epoch": 0.4280598958333333, "grad_norm": 11.894659996032715, "learning_rate": 3.064675348352718e-06, "loss": 3.1771, "step": 42080 }, { "epoch": 0.4281107584635417, "grad_norm": 9.00032901763916, "learning_rate": 3.0642857941424163e-06, "loss": 3.429, "step": 42085 }, { "epoch": 0.42816162109375, "grad_norm": 14.137030601501465, "learning_rate": 3.063896225494996e-06, "loss": 2.908, "step": 42090 }, { "epoch": 0.4282124837239583, "grad_norm": 11.593270301818848, "learning_rate": 3.0635066424204245e-06, "loss": 3.2668, "step": 42095 }, { "epoch": 0.4282633463541667, "grad_norm": 13.249083518981934, "learning_rate": 3.0631170449286695e-06, "loss": 3.1671, "step": 42100 }, { "epoch": 0.428314208984375, "grad_norm": 13.262222290039062, "learning_rate": 3.0627274330296976e-06, "loss": 3.1571, "step": 42105 }, { "epoch": 0.4283650716145833, "grad_norm": 15.089553833007812, "learning_rate": 3.0623378067334785e-06, "loss": 3.2122, "step": 42110 }, { "epoch": 0.4284159342447917, "grad_norm": 7.551955223083496, "learning_rate": 3.0619481660499795e-06, "loss": 2.9784, "step": 42115 }, { "epoch": 0.428466796875, "grad_norm": 10.404687881469727, "learning_rate": 3.06155851098917e-06, "loss": 3.5793, "step": 42120 }, { "epoch": 0.4285176595052083, "grad_norm": 16.388019561767578, "learning_rate": 3.0611688415610187e-06, "loss": 3.2954, "step": 42125 }, { "epoch": 0.4285685221354167, "grad_norm": 8.546836853027344, "learning_rate": 3.060779157775496e-06, "loss": 3.3968, "step": 42130 }, { "epoch": 0.428619384765625, "grad_norm": 14.341412544250488, "learning_rate": 3.060389459642571e-06, "loss": 3.6612, "step": 42135 }, { "epoch": 0.4286702473958333, "grad_norm": 10.097040176391602, "learning_rate": 3.0599997471722153e-06, "loss": 3.036, "step": 42140 }, { "epoch": 0.4287211100260417, "grad_norm": 11.047060012817383, "learning_rate": 3.0596100203743977e-06, "loss": 3.4227, "step": 42145 }, { "epoch": 0.42877197265625, "grad_norm": 11.28085708618164, "learning_rate": 3.059220279259092e-06, "loss": 3.1427, "step": 42150 }, { "epoch": 0.4288228352864583, "grad_norm": 8.427857398986816, "learning_rate": 3.058830523836267e-06, "loss": 3.4633, "step": 42155 }, { "epoch": 0.4288736979166667, "grad_norm": 12.277863502502441, "learning_rate": 3.0584407541158958e-06, "loss": 3.2633, "step": 42160 }, { "epoch": 0.428924560546875, "grad_norm": 10.43952465057373, "learning_rate": 3.05805097010795e-06, "loss": 3.2075, "step": 42165 }, { "epoch": 0.4289754231770833, "grad_norm": 12.587766647338867, "learning_rate": 3.057661171822402e-06, "loss": 3.3297, "step": 42170 }, { "epoch": 0.4290262858072917, "grad_norm": 13.791691780090332, "learning_rate": 3.0572713592692255e-06, "loss": 3.4883, "step": 42175 }, { "epoch": 0.4290771484375, "grad_norm": 9.079309463500977, "learning_rate": 3.0568815324583933e-06, "loss": 3.4208, "step": 42180 }, { "epoch": 0.4291280110677083, "grad_norm": 13.565601348876953, "learning_rate": 3.0564916913998787e-06, "loss": 3.6558, "step": 42185 }, { "epoch": 0.4291788736979167, "grad_norm": 12.191876411437988, "learning_rate": 3.0561018361036575e-06, "loss": 3.8942, "step": 42190 }, { "epoch": 0.429229736328125, "grad_norm": 10.371522903442383, "learning_rate": 3.0557119665797007e-06, "loss": 3.0518, "step": 42195 }, { "epoch": 0.4292805989583333, "grad_norm": 12.896306037902832, "learning_rate": 3.0553220828379867e-06, "loss": 3.5945, "step": 42200 }, { "epoch": 0.4293314615885417, "grad_norm": 16.03690528869629, "learning_rate": 3.0549321848884876e-06, "loss": 3.1975, "step": 42205 }, { "epoch": 0.42938232421875, "grad_norm": 13.459425926208496, "learning_rate": 3.0545422727411807e-06, "loss": 3.1506, "step": 42210 }, { "epoch": 0.4294331868489583, "grad_norm": 15.138161659240723, "learning_rate": 3.0541523464060414e-06, "loss": 3.5121, "step": 42215 }, { "epoch": 0.4294840494791667, "grad_norm": 15.504406929016113, "learning_rate": 3.053762405893045e-06, "loss": 3.0507, "step": 42220 }, { "epoch": 0.429534912109375, "grad_norm": 14.8836030960083, "learning_rate": 3.0533724512121686e-06, "loss": 3.2129, "step": 42225 }, { "epoch": 0.4295857747395833, "grad_norm": 12.824821472167969, "learning_rate": 3.05298248237339e-06, "loss": 3.2611, "step": 42230 }, { "epoch": 0.4296366373697917, "grad_norm": 14.585503578186035, "learning_rate": 3.052592499386685e-06, "loss": 3.4047, "step": 42235 }, { "epoch": 0.4296875, "grad_norm": 9.65004825592041, "learning_rate": 3.0522025022620323e-06, "loss": 3.4409, "step": 42240 }, { "epoch": 0.4297383626302083, "grad_norm": 10.6661376953125, "learning_rate": 3.05181249100941e-06, "loss": 3.4552, "step": 42245 }, { "epoch": 0.4297892252604167, "grad_norm": 13.470560073852539, "learning_rate": 3.0514224656387948e-06, "loss": 3.5744, "step": 42250 }, { "epoch": 0.429840087890625, "grad_norm": 9.623108863830566, "learning_rate": 3.051032426160167e-06, "loss": 3.9649, "step": 42255 }, { "epoch": 0.4298909505208333, "grad_norm": 15.441165924072266, "learning_rate": 3.050642372583505e-06, "loss": 3.4889, "step": 42260 }, { "epoch": 0.4299418131510417, "grad_norm": 10.242965698242188, "learning_rate": 3.050252304918789e-06, "loss": 3.4703, "step": 42265 }, { "epoch": 0.42999267578125, "grad_norm": 14.811580657958984, "learning_rate": 3.049862223175998e-06, "loss": 3.6813, "step": 42270 }, { "epoch": 0.4300435384114583, "grad_norm": 8.836296081542969, "learning_rate": 3.049472127365112e-06, "loss": 3.0087, "step": 42275 }, { "epoch": 0.4300944010416667, "grad_norm": 8.60825252532959, "learning_rate": 3.0490820174961115e-06, "loss": 3.1833, "step": 42280 }, { "epoch": 0.430145263671875, "grad_norm": 13.509065628051758, "learning_rate": 3.048691893578978e-06, "loss": 3.6716, "step": 42285 }, { "epoch": 0.4301961263020833, "grad_norm": 12.435196876525879, "learning_rate": 3.0483017556236926e-06, "loss": 3.7522, "step": 42290 }, { "epoch": 0.4302469889322917, "grad_norm": 11.747030258178711, "learning_rate": 3.0479116036402374e-06, "loss": 3.435, "step": 42295 }, { "epoch": 0.4302978515625, "grad_norm": 17.41107749938965, "learning_rate": 3.0475214376385927e-06, "loss": 3.3643, "step": 42300 }, { "epoch": 0.4303487141927083, "grad_norm": 15.284570693969727, "learning_rate": 3.047131257628742e-06, "loss": 3.0763, "step": 42305 }, { "epoch": 0.4303995768229167, "grad_norm": 12.56916332244873, "learning_rate": 3.0467410636206684e-06, "loss": 3.1925, "step": 42310 }, { "epoch": 0.430450439453125, "grad_norm": 11.555366516113281, "learning_rate": 3.0463508556243534e-06, "loss": 3.6569, "step": 42315 }, { "epoch": 0.4305013020833333, "grad_norm": 8.468916893005371, "learning_rate": 3.0459606336497822e-06, "loss": 3.2736, "step": 42320 }, { "epoch": 0.4305521647135417, "grad_norm": 15.363468170166016, "learning_rate": 3.045570397706936e-06, "loss": 3.1511, "step": 42325 }, { "epoch": 0.43060302734375, "grad_norm": 14.744574546813965, "learning_rate": 3.0451801478058025e-06, "loss": 3.0805, "step": 42330 }, { "epoch": 0.4306538899739583, "grad_norm": 8.937968254089355, "learning_rate": 3.0447898839563628e-06, "loss": 3.3218, "step": 42335 }, { "epoch": 0.4307047526041667, "grad_norm": 12.4723482131958, "learning_rate": 3.0443996061686037e-06, "loss": 3.6424, "step": 42340 }, { "epoch": 0.430755615234375, "grad_norm": 10.444912910461426, "learning_rate": 3.0440093144525097e-06, "loss": 3.2465, "step": 42345 }, { "epoch": 0.4308064778645833, "grad_norm": 12.38646411895752, "learning_rate": 3.0436190088180656e-06, "loss": 3.0429, "step": 42350 }, { "epoch": 0.4308573404947917, "grad_norm": 14.403653144836426, "learning_rate": 3.0432286892752583e-06, "loss": 3.3918, "step": 42355 }, { "epoch": 0.430908203125, "grad_norm": 9.09655475616455, "learning_rate": 3.0428383558340744e-06, "loss": 3.1717, "step": 42360 }, { "epoch": 0.4309590657552083, "grad_norm": 13.981171607971191, "learning_rate": 3.0424480085044987e-06, "loss": 3.3384, "step": 42365 }, { "epoch": 0.4310099283854167, "grad_norm": 11.393648147583008, "learning_rate": 3.0420576472965207e-06, "loss": 3.3822, "step": 42370 }, { "epoch": 0.431060791015625, "grad_norm": 9.865949630737305, "learning_rate": 3.0416672722201255e-06, "loss": 3.3485, "step": 42375 }, { "epoch": 0.4311116536458333, "grad_norm": 13.398794174194336, "learning_rate": 3.041276883285302e-06, "loss": 3.4622, "step": 42380 }, { "epoch": 0.4311625162760417, "grad_norm": 12.496500015258789, "learning_rate": 3.0408864805020365e-06, "loss": 3.3111, "step": 42385 }, { "epoch": 0.43121337890625, "grad_norm": 8.879119873046875, "learning_rate": 3.04049606388032e-06, "loss": 3.2794, "step": 42390 }, { "epoch": 0.4312642415364583, "grad_norm": 11.061419486999512, "learning_rate": 3.040105633430139e-06, "loss": 3.489, "step": 42395 }, { "epoch": 0.4313151041666667, "grad_norm": 12.778908729553223, "learning_rate": 3.0397151891614833e-06, "loss": 3.0799, "step": 42400 }, { "epoch": 0.431365966796875, "grad_norm": 7.622555732727051, "learning_rate": 3.0393247310843428e-06, "loss": 3.2384, "step": 42405 }, { "epoch": 0.4314168294270833, "grad_norm": 13.910135269165039, "learning_rate": 3.038934259208707e-06, "loss": 3.5974, "step": 42410 }, { "epoch": 0.4314676920572917, "grad_norm": 9.1984224319458, "learning_rate": 3.0385437735445656e-06, "loss": 3.8423, "step": 42415 }, { "epoch": 0.4315185546875, "grad_norm": 11.517208099365234, "learning_rate": 3.038153274101909e-06, "loss": 3.668, "step": 42420 }, { "epoch": 0.4315694173177083, "grad_norm": 11.862286567687988, "learning_rate": 3.03776276089073e-06, "loss": 3.171, "step": 42425 }, { "epoch": 0.4316202799479167, "grad_norm": 13.053690910339355, "learning_rate": 3.0373722339210165e-06, "loss": 3.58, "step": 42430 }, { "epoch": 0.431671142578125, "grad_norm": 16.717992782592773, "learning_rate": 3.036981693202763e-06, "loss": 3.3191, "step": 42435 }, { "epoch": 0.4317220052083333, "grad_norm": 15.545528411865234, "learning_rate": 3.0365911387459596e-06, "loss": 3.0347, "step": 42440 }, { "epoch": 0.4317728678385417, "grad_norm": 8.379535675048828, "learning_rate": 3.036200570560599e-06, "loss": 3.1425, "step": 42445 }, { "epoch": 0.43182373046875, "grad_norm": 10.65440845489502, "learning_rate": 3.0358099886566738e-06, "loss": 3.2935, "step": 42450 }, { "epoch": 0.4318745930989583, "grad_norm": 16.401159286499023, "learning_rate": 3.035419393044177e-06, "loss": 3.4272, "step": 42455 }, { "epoch": 0.4319254557291667, "grad_norm": 11.619854927062988, "learning_rate": 3.035028783733103e-06, "loss": 4.0466, "step": 42460 }, { "epoch": 0.431976318359375, "grad_norm": 15.325679779052734, "learning_rate": 3.0346381607334434e-06, "loss": 3.5697, "step": 42465 }, { "epoch": 0.4320271809895833, "grad_norm": 13.767721176147461, "learning_rate": 3.034247524055193e-06, "loss": 2.9997, "step": 42470 }, { "epoch": 0.4320780436197917, "grad_norm": 8.000898361206055, "learning_rate": 3.0338568737083474e-06, "loss": 3.1968, "step": 42475 }, { "epoch": 0.43212890625, "grad_norm": 13.607340812683105, "learning_rate": 3.033466209702899e-06, "loss": 3.4118, "step": 42480 }, { "epoch": 0.4321797688802083, "grad_norm": 14.804567337036133, "learning_rate": 3.0330755320488446e-06, "loss": 3.3317, "step": 42485 }, { "epoch": 0.4322306315104167, "grad_norm": 10.511212348937988, "learning_rate": 3.032684840756179e-06, "loss": 3.5529, "step": 42490 }, { "epoch": 0.432281494140625, "grad_norm": 9.441598892211914, "learning_rate": 3.0322941358348985e-06, "loss": 3.2548, "step": 42495 }, { "epoch": 0.4323323567708333, "grad_norm": 8.868534088134766, "learning_rate": 3.031903417294998e-06, "loss": 3.397, "step": 42500 }, { "epoch": 0.4323832194010417, "grad_norm": 8.431057929992676, "learning_rate": 3.0315126851464747e-06, "loss": 3.4448, "step": 42505 }, { "epoch": 0.43243408203125, "grad_norm": 9.394766807556152, "learning_rate": 3.0311219393993258e-06, "loss": 3.4651, "step": 42510 }, { "epoch": 0.4324849446614583, "grad_norm": 8.319314956665039, "learning_rate": 3.0307311800635475e-06, "loss": 2.8608, "step": 42515 }, { "epoch": 0.4325358072916667, "grad_norm": 14.183341026306152, "learning_rate": 3.0303404071491377e-06, "loss": 3.1964, "step": 42520 }, { "epoch": 0.432586669921875, "grad_norm": 8.481855392456055, "learning_rate": 3.0299496206660944e-06, "loss": 3.454, "step": 42525 }, { "epoch": 0.4326375325520833, "grad_norm": 15.066344261169434, "learning_rate": 3.0295588206244165e-06, "loss": 3.036, "step": 42530 }, { "epoch": 0.4326883951822917, "grad_norm": 8.034235000610352, "learning_rate": 3.0291680070341005e-06, "loss": 2.9867, "step": 42535 }, { "epoch": 0.4327392578125, "grad_norm": 15.006841659545898, "learning_rate": 3.0287771799051475e-06, "loss": 3.4276, "step": 42540 }, { "epoch": 0.4327901204427083, "grad_norm": 7.801065921783447, "learning_rate": 3.028386339247555e-06, "loss": 3.1465, "step": 42545 }, { "epoch": 0.4328409830729167, "grad_norm": 14.672794342041016, "learning_rate": 3.0279954850713232e-06, "loss": 3.2184, "step": 42550 }, { "epoch": 0.432891845703125, "grad_norm": 10.196673393249512, "learning_rate": 3.0276046173864526e-06, "loss": 3.3737, "step": 42555 }, { "epoch": 0.4329427083333333, "grad_norm": 7.468010425567627, "learning_rate": 3.0272137362029426e-06, "loss": 3.1381, "step": 42560 }, { "epoch": 0.4329935709635417, "grad_norm": 14.29434871673584, "learning_rate": 3.0268228415307938e-06, "loss": 3.0089, "step": 42565 }, { "epoch": 0.43304443359375, "grad_norm": 8.408454895019531, "learning_rate": 3.026431933380008e-06, "loss": 3.0385, "step": 42570 }, { "epoch": 0.4330952962239583, "grad_norm": 8.719644546508789, "learning_rate": 3.0260410117605853e-06, "loss": 3.4599, "step": 42575 }, { "epoch": 0.4331461588541667, "grad_norm": 13.945448875427246, "learning_rate": 3.0256500766825293e-06, "loss": 3.1144, "step": 42580 }, { "epoch": 0.433197021484375, "grad_norm": 17.763442993164062, "learning_rate": 3.0252591281558393e-06, "loss": 3.2801, "step": 42585 }, { "epoch": 0.4332478841145833, "grad_norm": 9.635198593139648, "learning_rate": 3.02486816619052e-06, "loss": 3.1231, "step": 42590 }, { "epoch": 0.4332987467447917, "grad_norm": 11.811942100524902, "learning_rate": 3.0244771907965727e-06, "loss": 3.1192, "step": 42595 }, { "epoch": 0.433349609375, "grad_norm": 14.521533966064453, "learning_rate": 3.024086201984e-06, "loss": 3.1643, "step": 42600 }, { "epoch": 0.4334004720052083, "grad_norm": 8.677797317504883, "learning_rate": 3.0236951997628074e-06, "loss": 3.4529, "step": 42605 }, { "epoch": 0.4334513346354167, "grad_norm": 12.783221244812012, "learning_rate": 3.023304184142996e-06, "loss": 3.2235, "step": 42610 }, { "epoch": 0.433502197265625, "grad_norm": 8.39991283416748, "learning_rate": 3.022913155134572e-06, "loss": 3.0379, "step": 42615 }, { "epoch": 0.4335530598958333, "grad_norm": 7.332389831542969, "learning_rate": 3.0225221127475384e-06, "loss": 3.3022, "step": 42620 }, { "epoch": 0.4336039225260417, "grad_norm": 7.816256999969482, "learning_rate": 3.0221310569919004e-06, "loss": 3.1951, "step": 42625 }, { "epoch": 0.43365478515625, "grad_norm": 8.470682144165039, "learning_rate": 3.021739987877663e-06, "loss": 3.258, "step": 42630 }, { "epoch": 0.4337056477864583, "grad_norm": 7.367519855499268, "learning_rate": 3.0213489054148327e-06, "loss": 3.6178, "step": 42635 }, { "epoch": 0.4337565104166667, "grad_norm": 11.68221378326416, "learning_rate": 3.020957809613413e-06, "loss": 3.4607, "step": 42640 }, { "epoch": 0.433807373046875, "grad_norm": 10.065248489379883, "learning_rate": 3.0205667004834117e-06, "loss": 3.4678, "step": 42645 }, { "epoch": 0.4338582356770833, "grad_norm": 9.878422737121582, "learning_rate": 3.0201755780348343e-06, "loss": 3.3738, "step": 42650 }, { "epoch": 0.4339090983072917, "grad_norm": 9.238396644592285, "learning_rate": 3.019784442277689e-06, "loss": 3.4269, "step": 42655 }, { "epoch": 0.4339599609375, "grad_norm": 13.242209434509277, "learning_rate": 3.019393293221981e-06, "loss": 4.0495, "step": 42660 }, { "epoch": 0.4340108235677083, "grad_norm": 9.85168170928955, "learning_rate": 3.0190021308777194e-06, "loss": 3.3997, "step": 42665 }, { "epoch": 0.4340616861979167, "grad_norm": 10.898030281066895, "learning_rate": 3.018610955254911e-06, "loss": 3.8838, "step": 42670 }, { "epoch": 0.434112548828125, "grad_norm": 7.273516654968262, "learning_rate": 3.0182197663635636e-06, "loss": 3.5127, "step": 42675 }, { "epoch": 0.4341634114583333, "grad_norm": 13.67503833770752, "learning_rate": 3.0178285642136874e-06, "loss": 3.2358, "step": 42680 }, { "epoch": 0.4342142740885417, "grad_norm": 11.212492942810059, "learning_rate": 3.0174373488152896e-06, "loss": 3.4223, "step": 42685 }, { "epoch": 0.43426513671875, "grad_norm": 12.348718643188477, "learning_rate": 3.0170461201783797e-06, "loss": 3.4572, "step": 42690 }, { "epoch": 0.4343159993489583, "grad_norm": 12.13748836517334, "learning_rate": 3.0166548783129675e-06, "loss": 3.4449, "step": 42695 }, { "epoch": 0.4343668619791667, "grad_norm": 15.374476432800293, "learning_rate": 3.0162636232290632e-06, "loss": 3.6061, "step": 42700 }, { "epoch": 0.434417724609375, "grad_norm": 11.082989692687988, "learning_rate": 3.0158723549366754e-06, "loss": 3.1109, "step": 42705 }, { "epoch": 0.4344685872395833, "grad_norm": 15.245978355407715, "learning_rate": 3.0154810734458168e-06, "loss": 3.7396, "step": 42710 }, { "epoch": 0.4345194498697917, "grad_norm": 12.285462379455566, "learning_rate": 3.0150897787664963e-06, "loss": 3.3428, "step": 42715 }, { "epoch": 0.4345703125, "grad_norm": 13.291040420532227, "learning_rate": 3.014698470908727e-06, "loss": 3.1849, "step": 42720 }, { "epoch": 0.4346211751302083, "grad_norm": 15.192420959472656, "learning_rate": 3.014307149882518e-06, "loss": 3.4254, "step": 42725 }, { "epoch": 0.4346720377604167, "grad_norm": 11.007890701293945, "learning_rate": 3.013915815697883e-06, "loss": 3.0592, "step": 42730 }, { "epoch": 0.434722900390625, "grad_norm": 16.328813552856445, "learning_rate": 3.0135244683648335e-06, "loss": 3.4706, "step": 42735 }, { "epoch": 0.4347737630208333, "grad_norm": 14.131364822387695, "learning_rate": 3.0131331078933825e-06, "loss": 3.5952, "step": 42740 }, { "epoch": 0.4348246256510417, "grad_norm": 8.084444999694824, "learning_rate": 3.0127417342935426e-06, "loss": 3.2432, "step": 42745 }, { "epoch": 0.43487548828125, "grad_norm": 14.293861389160156, "learning_rate": 3.0123503475753274e-06, "loss": 3.7382, "step": 42750 }, { "epoch": 0.4349263509114583, "grad_norm": 11.47707748413086, "learning_rate": 3.0119589477487487e-06, "loss": 3.5475, "step": 42755 }, { "epoch": 0.4349772135416667, "grad_norm": 8.84982681274414, "learning_rate": 3.011567534823823e-06, "loss": 3.0028, "step": 42760 }, { "epoch": 0.435028076171875, "grad_norm": 10.389779090881348, "learning_rate": 3.0111761088105625e-06, "loss": 3.1776, "step": 42765 }, { "epoch": 0.4350789388020833, "grad_norm": 10.124707221984863, "learning_rate": 3.0107846697189825e-06, "loss": 3.2998, "step": 42770 }, { "epoch": 0.4351298014322917, "grad_norm": 17.50204086303711, "learning_rate": 3.010393217559098e-06, "loss": 3.3225, "step": 42775 }, { "epoch": 0.4351806640625, "grad_norm": 9.7677640914917, "learning_rate": 3.010001752340923e-06, "loss": 3.9175, "step": 42780 }, { "epoch": 0.4352315266927083, "grad_norm": 15.209006309509277, "learning_rate": 3.0096102740744747e-06, "loss": 3.5655, "step": 42785 }, { "epoch": 0.4352823893229167, "grad_norm": 14.229117393493652, "learning_rate": 3.0092187827697692e-06, "loss": 3.6215, "step": 42790 }, { "epoch": 0.435333251953125, "grad_norm": 10.619292259216309, "learning_rate": 3.0088272784368203e-06, "loss": 3.4509, "step": 42795 }, { "epoch": 0.4353841145833333, "grad_norm": 14.222017288208008, "learning_rate": 3.008435761085647e-06, "loss": 3.321, "step": 42800 }, { "epoch": 0.4354349772135417, "grad_norm": 13.797867774963379, "learning_rate": 3.008044230726265e-06, "loss": 3.5503, "step": 42805 }, { "epoch": 0.43548583984375, "grad_norm": 14.982044219970703, "learning_rate": 3.0076526873686923e-06, "loss": 3.3824, "step": 42810 }, { "epoch": 0.4355367024739583, "grad_norm": 13.971278190612793, "learning_rate": 3.007261131022946e-06, "loss": 3.5267, "step": 42815 }, { "epoch": 0.4355875651041667, "grad_norm": 15.691052436828613, "learning_rate": 3.0068695616990427e-06, "loss": 3.2466, "step": 42820 }, { "epoch": 0.435638427734375, "grad_norm": 8.944365501403809, "learning_rate": 3.0064779794070033e-06, "loss": 3.4923, "step": 42825 }, { "epoch": 0.4356892903645833, "grad_norm": 6.83592414855957, "learning_rate": 3.006086384156844e-06, "loss": 3.2261, "step": 42830 }, { "epoch": 0.4357401529947917, "grad_norm": 9.382534980773926, "learning_rate": 3.0056947759585843e-06, "loss": 3.6441, "step": 42835 }, { "epoch": 0.435791015625, "grad_norm": 10.255620956420898, "learning_rate": 3.0053031548222437e-06, "loss": 2.9907, "step": 42840 }, { "epoch": 0.4358418782552083, "grad_norm": 12.222493171691895, "learning_rate": 3.004911520757842e-06, "loss": 3.3177, "step": 42845 }, { "epoch": 0.4358927408854167, "grad_norm": 10.061985969543457, "learning_rate": 3.0045198737753987e-06, "loss": 3.1202, "step": 42850 }, { "epoch": 0.435943603515625, "grad_norm": 15.395977973937988, "learning_rate": 3.004128213884934e-06, "loss": 3.6209, "step": 42855 }, { "epoch": 0.4359944661458333, "grad_norm": 8.896660804748535, "learning_rate": 3.003736541096468e-06, "loss": 3.1989, "step": 42860 }, { "epoch": 0.4360453287760417, "grad_norm": 12.098740577697754, "learning_rate": 3.0033448554200228e-06, "loss": 3.3545, "step": 42865 }, { "epoch": 0.43609619140625, "grad_norm": 9.284294128417969, "learning_rate": 3.002953156865618e-06, "loss": 2.994, "step": 42870 }, { "epoch": 0.4361470540364583, "grad_norm": 13.549686431884766, "learning_rate": 3.0025614454432765e-06, "loss": 3.2236, "step": 42875 }, { "epoch": 0.4361979166666667, "grad_norm": 13.168487548828125, "learning_rate": 3.0021697211630195e-06, "loss": 3.0275, "step": 42880 }, { "epoch": 0.436248779296875, "grad_norm": 9.46147346496582, "learning_rate": 3.001777984034869e-06, "loss": 3.3652, "step": 42885 }, { "epoch": 0.4362996419270833, "grad_norm": 8.875849723815918, "learning_rate": 3.0013862340688473e-06, "loss": 3.3035, "step": 42890 }, { "epoch": 0.4363505045572917, "grad_norm": 13.512296676635742, "learning_rate": 3.0009944712749783e-06, "loss": 3.5612, "step": 42895 }, { "epoch": 0.4364013671875, "grad_norm": 8.877745628356934, "learning_rate": 3.000602695663284e-06, "loss": 3.4272, "step": 42900 }, { "epoch": 0.4364522298177083, "grad_norm": 16.58599853515625, "learning_rate": 3.0002109072437883e-06, "loss": 3.4123, "step": 42905 }, { "epoch": 0.4365030924479167, "grad_norm": 12.493461608886719, "learning_rate": 2.9998191060265156e-06, "loss": 3.4447, "step": 42910 }, { "epoch": 0.436553955078125, "grad_norm": 12.325043678283691, "learning_rate": 2.9994272920214896e-06, "loss": 3.0123, "step": 42915 }, { "epoch": 0.4366048177083333, "grad_norm": 9.454997062683105, "learning_rate": 2.9990354652387348e-06, "loss": 3.2529, "step": 42920 }, { "epoch": 0.4366556803385417, "grad_norm": 14.23027229309082, "learning_rate": 2.998643625688275e-06, "loss": 3.8447, "step": 42925 }, { "epoch": 0.43670654296875, "grad_norm": 12.489675521850586, "learning_rate": 2.998251773380137e-06, "loss": 2.9214, "step": 42930 }, { "epoch": 0.4367574055989583, "grad_norm": 10.686521530151367, "learning_rate": 2.9978599083243454e-06, "loss": 3.5369, "step": 42935 }, { "epoch": 0.4368082682291667, "grad_norm": 12.095287322998047, "learning_rate": 2.9974680305309257e-06, "loss": 3.582, "step": 42940 }, { "epoch": 0.436859130859375, "grad_norm": 13.138361930847168, "learning_rate": 2.9970761400099047e-06, "loss": 3.0216, "step": 42945 }, { "epoch": 0.4369099934895833, "grad_norm": 14.297666549682617, "learning_rate": 2.9966842367713082e-06, "loss": 3.444, "step": 42950 }, { "epoch": 0.4369608561197917, "grad_norm": 15.614699363708496, "learning_rate": 2.996292320825164e-06, "loss": 3.2882, "step": 42955 }, { "epoch": 0.43701171875, "grad_norm": 10.250551223754883, "learning_rate": 2.9959003921814973e-06, "loss": 2.9559, "step": 42960 }, { "epoch": 0.4370625813802083, "grad_norm": 13.618687629699707, "learning_rate": 2.9955084508503367e-06, "loss": 3.5666, "step": 42965 }, { "epoch": 0.4371134440104167, "grad_norm": 8.38098430633545, "learning_rate": 2.995116496841711e-06, "loss": 3.1937, "step": 42970 }, { "epoch": 0.437164306640625, "grad_norm": 18.40715980529785, "learning_rate": 2.994724530165646e-06, "loss": 3.6116, "step": 42975 }, { "epoch": 0.4372151692708333, "grad_norm": 16.524473190307617, "learning_rate": 2.9943325508321717e-06, "loss": 3.3483, "step": 42980 }, { "epoch": 0.4372660319010417, "grad_norm": 13.106752395629883, "learning_rate": 2.9939405588513158e-06, "loss": 3.5727, "step": 42985 }, { "epoch": 0.43731689453125, "grad_norm": 15.565990447998047, "learning_rate": 2.9935485542331085e-06, "loss": 3.8515, "step": 42990 }, { "epoch": 0.4373677571614583, "grad_norm": 7.531777858734131, "learning_rate": 2.9931565369875776e-06, "loss": 3.044, "step": 42995 }, { "epoch": 0.4374186197916667, "grad_norm": 10.903145790100098, "learning_rate": 2.992764507124754e-06, "loss": 3.4493, "step": 43000 }, { "epoch": 0.437469482421875, "grad_norm": 11.029446601867676, "learning_rate": 2.992372464654667e-06, "loss": 3.3143, "step": 43005 }, { "epoch": 0.4375203450520833, "grad_norm": 14.928308486938477, "learning_rate": 2.9919804095873473e-06, "loss": 3.2214, "step": 43010 }, { "epoch": 0.4375712076822917, "grad_norm": 9.891948699951172, "learning_rate": 2.9915883419328258e-06, "loss": 3.4254, "step": 43015 }, { "epoch": 0.4376220703125, "grad_norm": 12.621122360229492, "learning_rate": 2.9911962617011325e-06, "loss": 3.2019, "step": 43020 }, { "epoch": 0.4376729329427083, "grad_norm": 10.894399642944336, "learning_rate": 2.9908041689023e-06, "loss": 3.2527, "step": 43025 }, { "epoch": 0.4377237955729167, "grad_norm": 10.197098731994629, "learning_rate": 2.9904120635463583e-06, "loss": 3.3771, "step": 43030 }, { "epoch": 0.437774658203125, "grad_norm": 15.673666954040527, "learning_rate": 2.9900199456433414e-06, "loss": 3.1911, "step": 43035 }, { "epoch": 0.4378255208333333, "grad_norm": 10.539162635803223, "learning_rate": 2.9896278152032785e-06, "loss": 3.1443, "step": 43040 }, { "epoch": 0.4378763834635417, "grad_norm": 12.590764999389648, "learning_rate": 2.989235672236206e-06, "loss": 3.3408, "step": 43045 }, { "epoch": 0.43792724609375, "grad_norm": 10.05128002166748, "learning_rate": 2.988843516752154e-06, "loss": 3.5955, "step": 43050 }, { "epoch": 0.4379781087239583, "grad_norm": 10.43685531616211, "learning_rate": 2.988451348761156e-06, "loss": 3.3872, "step": 43055 }, { "epoch": 0.4380289713541667, "grad_norm": 15.180144309997559, "learning_rate": 2.988059168273247e-06, "loss": 3.3636, "step": 43060 }, { "epoch": 0.438079833984375, "grad_norm": 11.525684356689453, "learning_rate": 2.9876669752984588e-06, "loss": 3.4391, "step": 43065 }, { "epoch": 0.4381306966145833, "grad_norm": 7.944387435913086, "learning_rate": 2.9872747698468267e-06, "loss": 3.6003, "step": 43070 }, { "epoch": 0.4381815592447917, "grad_norm": 8.787629127502441, "learning_rate": 2.9868825519283863e-06, "loss": 2.9129, "step": 43075 }, { "epoch": 0.438232421875, "grad_norm": 14.393413543701172, "learning_rate": 2.9864903215531697e-06, "loss": 3.3476, "step": 43080 }, { "epoch": 0.4382832845052083, "grad_norm": 15.774198532104492, "learning_rate": 2.986098078731215e-06, "loss": 3.4395, "step": 43085 }, { "epoch": 0.4383341471354167, "grad_norm": 8.885433197021484, "learning_rate": 2.9857058234725556e-06, "loss": 3.651, "step": 43090 }, { "epoch": 0.438385009765625, "grad_norm": 10.791901588439941, "learning_rate": 2.985313555787227e-06, "loss": 3.1684, "step": 43095 }, { "epoch": 0.4384358723958333, "grad_norm": 6.774324893951416, "learning_rate": 2.984921275685267e-06, "loss": 3.4454, "step": 43100 }, { "epoch": 0.4384867350260417, "grad_norm": 9.706006050109863, "learning_rate": 2.9845289831767103e-06, "loss": 3.289, "step": 43105 }, { "epoch": 0.43853759765625, "grad_norm": 10.4579496383667, "learning_rate": 2.984136678271596e-06, "loss": 3.3626, "step": 43110 }, { "epoch": 0.4385884602864583, "grad_norm": 14.540216445922852, "learning_rate": 2.983744360979958e-06, "loss": 3.5328, "step": 43115 }, { "epoch": 0.4386393229166667, "grad_norm": 14.207021713256836, "learning_rate": 2.983352031311836e-06, "loss": 3.4168, "step": 43120 }, { "epoch": 0.438690185546875, "grad_norm": 13.922112464904785, "learning_rate": 2.982959689277266e-06, "loss": 3.4668, "step": 43125 }, { "epoch": 0.4387410481770833, "grad_norm": 7.798769950866699, "learning_rate": 2.9825673348862878e-06, "loss": 3.6244, "step": 43130 }, { "epoch": 0.4387919108072917, "grad_norm": 13.192652702331543, "learning_rate": 2.9821749681489383e-06, "loss": 3.0737, "step": 43135 }, { "epoch": 0.4388427734375, "grad_norm": 13.703279495239258, "learning_rate": 2.9817825890752566e-06, "loss": 3.1973, "step": 43140 }, { "epoch": 0.4388936360677083, "grad_norm": 9.974124908447266, "learning_rate": 2.981390197675281e-06, "loss": 3.1817, "step": 43145 }, { "epoch": 0.4389444986979167, "grad_norm": 9.606637954711914, "learning_rate": 2.9809977939590525e-06, "loss": 3.3443, "step": 43150 }, { "epoch": 0.438995361328125, "grad_norm": 10.829172134399414, "learning_rate": 2.9806053779366086e-06, "loss": 3.2096, "step": 43155 }, { "epoch": 0.4390462239583333, "grad_norm": 8.69129753112793, "learning_rate": 2.98021294961799e-06, "loss": 3.435, "step": 43160 }, { "epoch": 0.4390970865885417, "grad_norm": 10.557202339172363, "learning_rate": 2.979820509013237e-06, "loss": 3.2752, "step": 43165 }, { "epoch": 0.43914794921875, "grad_norm": 16.040302276611328, "learning_rate": 2.9794280561323895e-06, "loss": 3.3947, "step": 43170 }, { "epoch": 0.4391988118489583, "grad_norm": 14.875178337097168, "learning_rate": 2.9790355909854897e-06, "loss": 3.3824, "step": 43175 }, { "epoch": 0.4392496744791667, "grad_norm": 12.481240272521973, "learning_rate": 2.9786431135825776e-06, "loss": 3.5868, "step": 43180 }, { "epoch": 0.439300537109375, "grad_norm": 10.882767677307129, "learning_rate": 2.9782506239336946e-06, "loss": 3.3987, "step": 43185 }, { "epoch": 0.4393513997395833, "grad_norm": 10.94859790802002, "learning_rate": 2.977858122048883e-06, "loss": 3.5864, "step": 43190 }, { "epoch": 0.4394022623697917, "grad_norm": 15.125771522521973, "learning_rate": 2.977465607938185e-06, "loss": 3.6607, "step": 43195 }, { "epoch": 0.439453125, "grad_norm": 10.725627899169922, "learning_rate": 2.9770730816116416e-06, "loss": 3.6029, "step": 43200 }, { "epoch": 0.4395039876302083, "grad_norm": 12.071203231811523, "learning_rate": 2.9766805430792976e-06, "loss": 3.2604, "step": 43205 }, { "epoch": 0.4395548502604167, "grad_norm": 13.576530456542969, "learning_rate": 2.976287992351194e-06, "loss": 3.9804, "step": 43210 }, { "epoch": 0.439605712890625, "grad_norm": 14.199870109558105, "learning_rate": 2.975895429437375e-06, "loss": 3.7286, "step": 43215 }, { "epoch": 0.4396565755208333, "grad_norm": 12.568558692932129, "learning_rate": 2.9755028543478854e-06, "loss": 3.2988, "step": 43220 }, { "epoch": 0.4397074381510417, "grad_norm": 13.958497047424316, "learning_rate": 2.975110267092767e-06, "loss": 3.5233, "step": 43225 }, { "epoch": 0.43975830078125, "grad_norm": 13.36304759979248, "learning_rate": 2.974717667682065e-06, "loss": 3.6748, "step": 43230 }, { "epoch": 0.4398091634114583, "grad_norm": 11.973743438720703, "learning_rate": 2.974325056125824e-06, "loss": 3.2443, "step": 43235 }, { "epoch": 0.4398600260416667, "grad_norm": 10.666085243225098, "learning_rate": 2.973932432434089e-06, "loss": 3.4837, "step": 43240 }, { "epoch": 0.439910888671875, "grad_norm": 14.517359733581543, "learning_rate": 2.9735397966169055e-06, "loss": 3.4415, "step": 43245 }, { "epoch": 0.4399617513020833, "grad_norm": 10.464876174926758, "learning_rate": 2.973147148684318e-06, "loss": 3.5669, "step": 43250 }, { "epoch": 0.4400126139322917, "grad_norm": 11.049302101135254, "learning_rate": 2.972754488646373e-06, "loss": 3.245, "step": 43255 }, { "epoch": 0.4400634765625, "grad_norm": 9.860051155090332, "learning_rate": 2.972361816513116e-06, "loss": 3.3552, "step": 43260 }, { "epoch": 0.4401143391927083, "grad_norm": 10.621298789978027, "learning_rate": 2.971969132294595e-06, "loss": 3.2837, "step": 43265 }, { "epoch": 0.4401652018229167, "grad_norm": 14.433876991271973, "learning_rate": 2.9715764360008548e-06, "loss": 3.3214, "step": 43270 }, { "epoch": 0.440216064453125, "grad_norm": 10.880840301513672, "learning_rate": 2.971183727641944e-06, "loss": 3.3316, "step": 43275 }, { "epoch": 0.4402669270833333, "grad_norm": 13.201940536499023, "learning_rate": 2.970791007227908e-06, "loss": 3.2394, "step": 43280 }, { "epoch": 0.4403177897135417, "grad_norm": 11.534923553466797, "learning_rate": 2.9703982747687966e-06, "loss": 3.3618, "step": 43285 }, { "epoch": 0.44036865234375, "grad_norm": 13.809548377990723, "learning_rate": 2.9700055302746562e-06, "loss": 3.3902, "step": 43290 }, { "epoch": 0.4404195149739583, "grad_norm": 8.454058647155762, "learning_rate": 2.969612773755536e-06, "loss": 3.1477, "step": 43295 }, { "epoch": 0.4404703776041667, "grad_norm": 9.063993453979492, "learning_rate": 2.9692200052214842e-06, "loss": 3.4497, "step": 43300 }, { "epoch": 0.440521240234375, "grad_norm": 11.819409370422363, "learning_rate": 2.96882722468255e-06, "loss": 2.8236, "step": 43305 }, { "epoch": 0.4405721028645833, "grad_norm": 11.365645408630371, "learning_rate": 2.968434432148783e-06, "loss": 3.6733, "step": 43310 }, { "epoch": 0.4406229654947917, "grad_norm": 15.08333969116211, "learning_rate": 2.9680416276302303e-06, "loss": 3.3216, "step": 43315 }, { "epoch": 0.440673828125, "grad_norm": 13.111920356750488, "learning_rate": 2.9676488111369456e-06, "loss": 3.5055, "step": 43320 }, { "epoch": 0.4407246907552083, "grad_norm": 11.382067680358887, "learning_rate": 2.967255982678975e-06, "loss": 3.5201, "step": 43325 }, { "epoch": 0.4407755533854167, "grad_norm": 13.540173530578613, "learning_rate": 2.9668631422663722e-06, "loss": 3.6338, "step": 43330 }, { "epoch": 0.440826416015625, "grad_norm": 8.033857345581055, "learning_rate": 2.966470289909186e-06, "loss": 3.2214, "step": 43335 }, { "epoch": 0.4408772786458333, "grad_norm": 8.666276931762695, "learning_rate": 2.9660774256174684e-06, "loss": 3.4007, "step": 43340 }, { "epoch": 0.4409281412760417, "grad_norm": 8.476204872131348, "learning_rate": 2.96568454940127e-06, "loss": 3.4108, "step": 43345 }, { "epoch": 0.44097900390625, "grad_norm": 15.811210632324219, "learning_rate": 2.9652916612706426e-06, "loss": 3.3204, "step": 43350 }, { "epoch": 0.4410298665364583, "grad_norm": 14.496684074401855, "learning_rate": 2.9648987612356385e-06, "loss": 3.3657, "step": 43355 }, { "epoch": 0.4410807291666667, "grad_norm": 9.801753044128418, "learning_rate": 2.9645058493063105e-06, "loss": 3.8043, "step": 43360 }, { "epoch": 0.441131591796875, "grad_norm": 8.788553237915039, "learning_rate": 2.9641129254927092e-06, "loss": 3.6548, "step": 43365 }, { "epoch": 0.4411824544270833, "grad_norm": 7.200490951538086, "learning_rate": 2.9637199898048898e-06, "loss": 3.1184, "step": 43370 }, { "epoch": 0.4412333170572917, "grad_norm": 10.760024070739746, "learning_rate": 2.963327042252904e-06, "loss": 3.1018, "step": 43375 }, { "epoch": 0.4412841796875, "grad_norm": 16.04799461364746, "learning_rate": 2.962934082846805e-06, "loss": 3.262, "step": 43380 }, { "epoch": 0.4413350423177083, "grad_norm": 11.933404922485352, "learning_rate": 2.962541111596648e-06, "loss": 3.0244, "step": 43385 }, { "epoch": 0.4413859049479167, "grad_norm": 11.039929389953613, "learning_rate": 2.962148128512486e-06, "loss": 3.1555, "step": 43390 }, { "epoch": 0.441436767578125, "grad_norm": 13.250151634216309, "learning_rate": 2.9617551336043736e-06, "loss": 2.922, "step": 43395 }, { "epoch": 0.4414876302083333, "grad_norm": 8.479625701904297, "learning_rate": 2.9613621268823654e-06, "loss": 3.2628, "step": 43400 }, { "epoch": 0.4415384928385417, "grad_norm": 7.887979984283447, "learning_rate": 2.960969108356517e-06, "loss": 3.4097, "step": 43405 }, { "epoch": 0.44158935546875, "grad_norm": 14.438159942626953, "learning_rate": 2.960576078036883e-06, "loss": 2.8774, "step": 43410 }, { "epoch": 0.4416402180989583, "grad_norm": 10.12606430053711, "learning_rate": 2.9601830359335193e-06, "loss": 3.2402, "step": 43415 }, { "epoch": 0.4416910807291667, "grad_norm": 18.690998077392578, "learning_rate": 2.9597899820564813e-06, "loss": 3.9748, "step": 43420 }, { "epoch": 0.441741943359375, "grad_norm": 11.186441421508789, "learning_rate": 2.9593969164158265e-06, "loss": 3.7492, "step": 43425 }, { "epoch": 0.4417928059895833, "grad_norm": 13.399593353271484, "learning_rate": 2.959003839021609e-06, "loss": 3.3452, "step": 43430 }, { "epoch": 0.4418436686197917, "grad_norm": 10.064865112304688, "learning_rate": 2.958610749883889e-06, "loss": 2.8705, "step": 43435 }, { "epoch": 0.44189453125, "grad_norm": 11.90968132019043, "learning_rate": 2.95821764901272e-06, "loss": 3.2716, "step": 43440 }, { "epoch": 0.4419453938802083, "grad_norm": 12.014603614807129, "learning_rate": 2.957824536418162e-06, "loss": 3.3077, "step": 43445 }, { "epoch": 0.4419962565104167, "grad_norm": 7.141475200653076, "learning_rate": 2.9574314121102717e-06, "loss": 3.0187, "step": 43450 }, { "epoch": 0.442047119140625, "grad_norm": 11.256348609924316, "learning_rate": 2.957038276099107e-06, "loss": 3.0342, "step": 43455 }, { "epoch": 0.4420979817708333, "grad_norm": 13.187239646911621, "learning_rate": 2.956645128394726e-06, "loss": 3.8987, "step": 43460 }, { "epoch": 0.4421488444010417, "grad_norm": 11.204666137695312, "learning_rate": 2.9562519690071876e-06, "loss": 3.4287, "step": 43465 }, { "epoch": 0.44219970703125, "grad_norm": 8.557195663452148, "learning_rate": 2.9558587979465507e-06, "loss": 3.4322, "step": 43470 }, { "epoch": 0.4422505696614583, "grad_norm": 13.133142471313477, "learning_rate": 2.9554656152228758e-06, "loss": 3.3743, "step": 43475 }, { "epoch": 0.4423014322916667, "grad_norm": 14.486852645874023, "learning_rate": 2.9550724208462187e-06, "loss": 3.3238, "step": 43480 }, { "epoch": 0.442352294921875, "grad_norm": 14.670029640197754, "learning_rate": 2.9546792148266436e-06, "loss": 3.0607, "step": 43485 }, { "epoch": 0.4424031575520833, "grad_norm": 15.148486137390137, "learning_rate": 2.954285997174208e-06, "loss": 3.3221, "step": 43490 }, { "epoch": 0.4424540201822917, "grad_norm": 9.987661361694336, "learning_rate": 2.9538927678989725e-06, "loss": 3.327, "step": 43495 }, { "epoch": 0.4425048828125, "grad_norm": 14.121315002441406, "learning_rate": 2.953499527010999e-06, "loss": 3.6905, "step": 43500 }, { "epoch": 0.4425557454427083, "grad_norm": 10.06773853302002, "learning_rate": 2.9531062745203466e-06, "loss": 3.3083, "step": 43505 }, { "epoch": 0.4426066080729167, "grad_norm": 15.57811450958252, "learning_rate": 2.9527130104370776e-06, "loss": 4.0533, "step": 43510 }, { "epoch": 0.442657470703125, "grad_norm": 12.95801830291748, "learning_rate": 2.952319734771254e-06, "loss": 3.5316, "step": 43515 }, { "epoch": 0.4427083333333333, "grad_norm": 7.576696395874023, "learning_rate": 2.9519264475329374e-06, "loss": 3.393, "step": 43520 }, { "epoch": 0.4427591959635417, "grad_norm": 12.71930980682373, "learning_rate": 2.9515331487321893e-06, "loss": 3.3971, "step": 43525 }, { "epoch": 0.44281005859375, "grad_norm": 10.263254165649414, "learning_rate": 2.951139838379073e-06, "loss": 3.5974, "step": 43530 }, { "epoch": 0.4428609212239583, "grad_norm": 15.159732818603516, "learning_rate": 2.95074651648365e-06, "loss": 3.3649, "step": 43535 }, { "epoch": 0.4429117838541667, "grad_norm": 15.83638858795166, "learning_rate": 2.950353183055985e-06, "loss": 3.3533, "step": 43540 }, { "epoch": 0.442962646484375, "grad_norm": 13.222535133361816, "learning_rate": 2.9499598381061408e-06, "loss": 3.4962, "step": 43545 }, { "epoch": 0.4430135091145833, "grad_norm": 9.307706832885742, "learning_rate": 2.94956648164418e-06, "loss": 3.1602, "step": 43550 }, { "epoch": 0.4430643717447917, "grad_norm": 15.66658878326416, "learning_rate": 2.9491731136801677e-06, "loss": 3.4844, "step": 43555 }, { "epoch": 0.443115234375, "grad_norm": 10.596369743347168, "learning_rate": 2.9487797342241677e-06, "loss": 2.9099, "step": 43560 }, { "epoch": 0.4431660970052083, "grad_norm": 10.095293998718262, "learning_rate": 2.9483863432862447e-06, "loss": 3.0744, "step": 43565 }, { "epoch": 0.4432169596354167, "grad_norm": 14.364975929260254, "learning_rate": 2.947992940876463e-06, "loss": 3.2898, "step": 43570 }, { "epoch": 0.443267822265625, "grad_norm": 8.403112411499023, "learning_rate": 2.9475995270048887e-06, "loss": 3.217, "step": 43575 }, { "epoch": 0.4433186848958333, "grad_norm": 13.855816841125488, "learning_rate": 2.9472061016815866e-06, "loss": 3.3371, "step": 43580 }, { "epoch": 0.4433695475260417, "grad_norm": 15.479269981384277, "learning_rate": 2.946812664916622e-06, "loss": 3.5384, "step": 43585 }, { "epoch": 0.44342041015625, "grad_norm": 9.135083198547363, "learning_rate": 2.9464192167200607e-06, "loss": 3.6018, "step": 43590 }, { "epoch": 0.4434712727864583, "grad_norm": 13.801974296569824, "learning_rate": 2.946025757101971e-06, "loss": 3.2447, "step": 43595 }, { "epoch": 0.4435221354166667, "grad_norm": 13.949684143066406, "learning_rate": 2.945632286072416e-06, "loss": 3.3698, "step": 43600 }, { "epoch": 0.443572998046875, "grad_norm": 16.448776245117188, "learning_rate": 2.945238803641467e-06, "loss": 3.3822, "step": 43605 }, { "epoch": 0.4436238606770833, "grad_norm": 10.418899536132812, "learning_rate": 2.944845309819187e-06, "loss": 3.3236, "step": 43610 }, { "epoch": 0.4436747233072917, "grad_norm": 8.672096252441406, "learning_rate": 2.944451804615646e-06, "loss": 3.4957, "step": 43615 }, { "epoch": 0.4437255859375, "grad_norm": 11.790623664855957, "learning_rate": 2.9440582880409106e-06, "loss": 3.5351, "step": 43620 }, { "epoch": 0.4437764485677083, "grad_norm": 17.244104385375977, "learning_rate": 2.9436647601050494e-06, "loss": 3.5512, "step": 43625 }, { "epoch": 0.4438273111979167, "grad_norm": 12.104947090148926, "learning_rate": 2.94327122081813e-06, "loss": 2.9641, "step": 43630 }, { "epoch": 0.443878173828125, "grad_norm": 7.7502336502075195, "learning_rate": 2.9428776701902223e-06, "loss": 3.3353, "step": 43635 }, { "epoch": 0.4439290364583333, "grad_norm": 11.081683158874512, "learning_rate": 2.942484108231393e-06, "loss": 3.1159, "step": 43640 }, { "epoch": 0.4439798990885417, "grad_norm": 8.795500755310059, "learning_rate": 2.9420905349517143e-06, "loss": 3.3727, "step": 43645 }, { "epoch": 0.44403076171875, "grad_norm": 9.463774681091309, "learning_rate": 2.9416969503612527e-06, "loss": 3.4369, "step": 43650 }, { "epoch": 0.4440816243489583, "grad_norm": 15.080618858337402, "learning_rate": 2.94130335447008e-06, "loss": 3.4072, "step": 43655 }, { "epoch": 0.4441324869791667, "grad_norm": 15.829692840576172, "learning_rate": 2.940909747288266e-06, "loss": 3.1416, "step": 43660 }, { "epoch": 0.444183349609375, "grad_norm": 14.829861640930176, "learning_rate": 2.9405161288258797e-06, "loss": 3.2442, "step": 43665 }, { "epoch": 0.4442342122395833, "grad_norm": 12.270400047302246, "learning_rate": 2.940122499092993e-06, "loss": 3.2945, "step": 43670 }, { "epoch": 0.4442850748697917, "grad_norm": 9.87391185760498, "learning_rate": 2.939728858099676e-06, "loss": 3.652, "step": 43675 }, { "epoch": 0.4443359375, "grad_norm": 8.332228660583496, "learning_rate": 2.9393352058560014e-06, "loss": 3.4948, "step": 43680 }, { "epoch": 0.4443868001302083, "grad_norm": 11.852291107177734, "learning_rate": 2.938941542372039e-06, "loss": 3.4039, "step": 43685 }, { "epoch": 0.4444376627604167, "grad_norm": 9.319053649902344, "learning_rate": 2.9385478676578617e-06, "loss": 3.3997, "step": 43690 }, { "epoch": 0.444488525390625, "grad_norm": 15.921639442443848, "learning_rate": 2.938154181723541e-06, "loss": 3.6342, "step": 43695 }, { "epoch": 0.4445393880208333, "grad_norm": 14.104935646057129, "learning_rate": 2.9377604845791498e-06, "loss": 3.7262, "step": 43700 }, { "epoch": 0.4445902506510417, "grad_norm": 11.885713577270508, "learning_rate": 2.937366776234759e-06, "loss": 3.49, "step": 43705 }, { "epoch": 0.44464111328125, "grad_norm": 13.739585876464844, "learning_rate": 2.9369730567004447e-06, "loss": 3.5327, "step": 43710 }, { "epoch": 0.4446919759114583, "grad_norm": 14.605637550354004, "learning_rate": 2.936579325986277e-06, "loss": 3.1572, "step": 43715 }, { "epoch": 0.4447428385416667, "grad_norm": 14.697052001953125, "learning_rate": 2.936185584102331e-06, "loss": 3.511, "step": 43720 }, { "epoch": 0.444793701171875, "grad_norm": 10.544771194458008, "learning_rate": 2.9357918310586808e-06, "loss": 3.4963, "step": 43725 }, { "epoch": 0.4448445638020833, "grad_norm": 8.620457649230957, "learning_rate": 2.9353980668654e-06, "loss": 3.2496, "step": 43730 }, { "epoch": 0.4448954264322917, "grad_norm": 15.771838188171387, "learning_rate": 2.9350042915325617e-06, "loss": 3.4466, "step": 43735 }, { "epoch": 0.4449462890625, "grad_norm": 9.760074615478516, "learning_rate": 2.9346105050702425e-06, "loss": 3.1918, "step": 43740 }, { "epoch": 0.4449971516927083, "grad_norm": 13.089041709899902, "learning_rate": 2.9342167074885165e-06, "loss": 3.3699, "step": 43745 }, { "epoch": 0.4450480143229167, "grad_norm": 17.783554077148438, "learning_rate": 2.933822898797459e-06, "loss": 3.2082, "step": 43750 }, { "epoch": 0.445098876953125, "grad_norm": 16.995149612426758, "learning_rate": 2.933429079007145e-06, "loss": 3.0578, "step": 43755 }, { "epoch": 0.4451497395833333, "grad_norm": 9.41482925415039, "learning_rate": 2.9330352481276513e-06, "loss": 3.4183, "step": 43760 }, { "epoch": 0.4452006022135417, "grad_norm": 13.689038276672363, "learning_rate": 2.9326414061690535e-06, "loss": 3.4102, "step": 43765 }, { "epoch": 0.44525146484375, "grad_norm": 17.326047897338867, "learning_rate": 2.9322475531414275e-06, "loss": 3.6235, "step": 43770 }, { "epoch": 0.4453023274739583, "grad_norm": 12.240701675415039, "learning_rate": 2.93185368905485e-06, "loss": 3.7562, "step": 43775 }, { "epoch": 0.4453531901041667, "grad_norm": 13.802271842956543, "learning_rate": 2.9314598139193984e-06, "loss": 3.4392, "step": 43780 }, { "epoch": 0.445404052734375, "grad_norm": 10.350001335144043, "learning_rate": 2.9310659277451505e-06, "loss": 3.0039, "step": 43785 }, { "epoch": 0.4454549153645833, "grad_norm": 15.589079856872559, "learning_rate": 2.930672030542182e-06, "loss": 3.3543, "step": 43790 }, { "epoch": 0.4455057779947917, "grad_norm": 14.357402801513672, "learning_rate": 2.9302781223205725e-06, "loss": 3.5534, "step": 43795 }, { "epoch": 0.445556640625, "grad_norm": 10.52519702911377, "learning_rate": 2.9298842030903984e-06, "loss": 3.1371, "step": 43800 }, { "epoch": 0.4456075032552083, "grad_norm": 14.252992630004883, "learning_rate": 2.929490272861739e-06, "loss": 3.437, "step": 43805 }, { "epoch": 0.4456583658854167, "grad_norm": 9.607766151428223, "learning_rate": 2.929096331644673e-06, "loss": 3.1274, "step": 43810 }, { "epoch": 0.445709228515625, "grad_norm": 14.653151512145996, "learning_rate": 2.9287023794492797e-06, "loss": 3.2154, "step": 43815 }, { "epoch": 0.4457600911458333, "grad_norm": 12.342843055725098, "learning_rate": 2.928308416285637e-06, "loss": 3.8825, "step": 43820 }, { "epoch": 0.4458109537760417, "grad_norm": 15.029866218566895, "learning_rate": 2.927914442163825e-06, "loss": 3.5363, "step": 43825 }, { "epoch": 0.44586181640625, "grad_norm": 9.904187202453613, "learning_rate": 2.9275204570939236e-06, "loss": 3.4545, "step": 43830 }, { "epoch": 0.4459126790364583, "grad_norm": 14.366094589233398, "learning_rate": 2.9271264610860124e-06, "loss": 3.3427, "step": 43835 }, { "epoch": 0.4459635416666667, "grad_norm": 13.560625076293945, "learning_rate": 2.926732454150172e-06, "loss": 3.8012, "step": 43840 }, { "epoch": 0.446014404296875, "grad_norm": 16.008623123168945, "learning_rate": 2.926338436296483e-06, "loss": 3.2344, "step": 43845 }, { "epoch": 0.4460652669270833, "grad_norm": 13.258788108825684, "learning_rate": 2.925944407535026e-06, "loss": 3.5272, "step": 43850 }, { "epoch": 0.4461161295572917, "grad_norm": 10.633359909057617, "learning_rate": 2.925550367875883e-06, "loss": 3.9106, "step": 43855 }, { "epoch": 0.4461669921875, "grad_norm": 13.368167877197266, "learning_rate": 2.925156317329133e-06, "loss": 3.4759, "step": 43860 }, { "epoch": 0.4462178548177083, "grad_norm": 10.804916381835938, "learning_rate": 2.9247622559048615e-06, "loss": 3.3424, "step": 43865 }, { "epoch": 0.4462687174479167, "grad_norm": 10.019235610961914, "learning_rate": 2.9243681836131466e-06, "loss": 3.8252, "step": 43870 }, { "epoch": 0.446319580078125, "grad_norm": 11.168046951293945, "learning_rate": 2.9239741004640736e-06, "loss": 3.6156, "step": 43875 }, { "epoch": 0.4463704427083333, "grad_norm": 13.641862869262695, "learning_rate": 2.9235800064677235e-06, "loss": 3.3427, "step": 43880 }, { "epoch": 0.4464213053385417, "grad_norm": 11.824005126953125, "learning_rate": 2.923185901634178e-06, "loss": 3.1257, "step": 43885 }, { "epoch": 0.44647216796875, "grad_norm": 9.489578247070312, "learning_rate": 2.9227917859735233e-06, "loss": 3.1383, "step": 43890 }, { "epoch": 0.4465230305989583, "grad_norm": 12.910775184631348, "learning_rate": 2.9223976594958402e-06, "loss": 3.29, "step": 43895 }, { "epoch": 0.4465738932291667, "grad_norm": 8.678617477416992, "learning_rate": 2.922003522211213e-06, "loss": 3.1416, "step": 43900 }, { "epoch": 0.446624755859375, "grad_norm": 14.237542152404785, "learning_rate": 2.9216093741297257e-06, "loss": 3.3661, "step": 43905 }, { "epoch": 0.4466756184895833, "grad_norm": 13.427087783813477, "learning_rate": 2.9212152152614625e-06, "loss": 3.5646, "step": 43910 }, { "epoch": 0.4467264811197917, "grad_norm": 17.48465347290039, "learning_rate": 2.9208210456165083e-06, "loss": 3.2427, "step": 43915 }, { "epoch": 0.44677734375, "grad_norm": 10.90257740020752, "learning_rate": 2.920426865204947e-06, "loss": 3.5392, "step": 43920 }, { "epoch": 0.4468282063802083, "grad_norm": 9.019089698791504, "learning_rate": 2.920032674036864e-06, "loss": 3.1428, "step": 43925 }, { "epoch": 0.4468790690104167, "grad_norm": 12.710570335388184, "learning_rate": 2.9196384721223448e-06, "loss": 3.3098, "step": 43930 }, { "epoch": 0.446929931640625, "grad_norm": 10.439959526062012, "learning_rate": 2.9192442594714747e-06, "loss": 3.0495, "step": 43935 }, { "epoch": 0.4469807942708333, "grad_norm": 10.362679481506348, "learning_rate": 2.9188500360943393e-06, "loss": 3.5898, "step": 43940 }, { "epoch": 0.4470316569010417, "grad_norm": 10.04350471496582, "learning_rate": 2.918455802001026e-06, "loss": 3.1113, "step": 43945 }, { "epoch": 0.44708251953125, "grad_norm": 10.389842987060547, "learning_rate": 2.9180615572016187e-06, "loss": 3.0661, "step": 43950 }, { "epoch": 0.4471333821614583, "grad_norm": 7.375126361846924, "learning_rate": 2.9176673017062064e-06, "loss": 3.5667, "step": 43955 }, { "epoch": 0.4471842447916667, "grad_norm": 11.38521957397461, "learning_rate": 2.9172730355248747e-06, "loss": 3.1038, "step": 43960 }, { "epoch": 0.447235107421875, "grad_norm": 11.45709228515625, "learning_rate": 2.9168787586677116e-06, "loss": 3.347, "step": 43965 }, { "epoch": 0.4472859700520833, "grad_norm": 14.73982048034668, "learning_rate": 2.916484471144805e-06, "loss": 3.2477, "step": 43970 }, { "epoch": 0.4473368326822917, "grad_norm": 13.484076499938965, "learning_rate": 2.9160901729662404e-06, "loss": 3.463, "step": 43975 }, { "epoch": 0.4473876953125, "grad_norm": 14.229711532592773, "learning_rate": 2.9156958641421085e-06, "loss": 3.2316, "step": 43980 }, { "epoch": 0.4474385579427083, "grad_norm": 12.872627258300781, "learning_rate": 2.9153015446824967e-06, "loss": 3.1471, "step": 43985 }, { "epoch": 0.4474894205729167, "grad_norm": 11.86507797241211, "learning_rate": 2.9149072145974923e-06, "loss": 3.5043, "step": 43990 }, { "epoch": 0.447540283203125, "grad_norm": 12.148114204406738, "learning_rate": 2.9145128738971856e-06, "loss": 3.3233, "step": 43995 }, { "epoch": 0.4475911458333333, "grad_norm": 16.555404663085938, "learning_rate": 2.914118522591665e-06, "loss": 3.6835, "step": 44000 }, { "epoch": 0.4476420084635417, "grad_norm": 10.267139434814453, "learning_rate": 2.9137241606910206e-06, "loss": 3.4503, "step": 44005 }, { "epoch": 0.44769287109375, "grad_norm": 7.522677898406982, "learning_rate": 2.9133297882053413e-06, "loss": 3.1649, "step": 44010 }, { "epoch": 0.4477437337239583, "grad_norm": 14.890913009643555, "learning_rate": 2.912935405144718e-06, "loss": 3.2125, "step": 44015 }, { "epoch": 0.4477945963541667, "grad_norm": 15.445168495178223, "learning_rate": 2.912541011519239e-06, "loss": 3.6791, "step": 44020 }, { "epoch": 0.447845458984375, "grad_norm": 11.479595184326172, "learning_rate": 2.912146607338997e-06, "loss": 3.3403, "step": 44025 }, { "epoch": 0.4478963216145833, "grad_norm": 9.104013442993164, "learning_rate": 2.911752192614081e-06, "loss": 3.2455, "step": 44030 }, { "epoch": 0.4479471842447917, "grad_norm": 12.49300479888916, "learning_rate": 2.9113577673545845e-06, "loss": 3.3308, "step": 44035 }, { "epoch": 0.447998046875, "grad_norm": 11.121861457824707, "learning_rate": 2.9109633315705947e-06, "loss": 3.3387, "step": 44040 }, { "epoch": 0.4480489095052083, "grad_norm": 14.24190902709961, "learning_rate": 2.910568885272207e-06, "loss": 3.2661, "step": 44045 }, { "epoch": 0.4480997721354167, "grad_norm": 9.73490047454834, "learning_rate": 2.910174428469511e-06, "loss": 3.3985, "step": 44050 }, { "epoch": 0.448150634765625, "grad_norm": 7.493213653564453, "learning_rate": 2.9097799611726e-06, "loss": 3.3632, "step": 44055 }, { "epoch": 0.4482014973958333, "grad_norm": 14.473955154418945, "learning_rate": 2.909385483391565e-06, "loss": 3.1905, "step": 44060 }, { "epoch": 0.4482523600260417, "grad_norm": 13.767448425292969, "learning_rate": 2.9089909951365003e-06, "loss": 3.6482, "step": 44065 }, { "epoch": 0.44830322265625, "grad_norm": 15.65981388092041, "learning_rate": 2.9085964964174974e-06, "loss": 3.3085, "step": 44070 }, { "epoch": 0.4483540852864583, "grad_norm": 11.123335838317871, "learning_rate": 2.9082019872446503e-06, "loss": 3.3357, "step": 44075 }, { "epoch": 0.4484049479166667, "grad_norm": 11.893462181091309, "learning_rate": 2.907807467628052e-06, "loss": 3.3115, "step": 44080 }, { "epoch": 0.448455810546875, "grad_norm": 11.333732604980469, "learning_rate": 2.907412937577796e-06, "loss": 2.977, "step": 44085 }, { "epoch": 0.4485066731770833, "grad_norm": 12.191730499267578, "learning_rate": 2.907018397103977e-06, "loss": 3.4391, "step": 44090 }, { "epoch": 0.4485575358072917, "grad_norm": 10.514629364013672, "learning_rate": 2.9066238462166885e-06, "loss": 3.3427, "step": 44095 }, { "epoch": 0.4486083984375, "grad_norm": 15.09231948852539, "learning_rate": 2.906229284926026e-06, "loss": 3.558, "step": 44100 }, { "epoch": 0.4486592610677083, "grad_norm": 7.452277183532715, "learning_rate": 2.9058347132420822e-06, "loss": 3.3224, "step": 44105 }, { "epoch": 0.4487101236979167, "grad_norm": 11.329075813293457, "learning_rate": 2.905440131174955e-06, "loss": 3.7236, "step": 44110 }, { "epoch": 0.448760986328125, "grad_norm": 9.755703926086426, "learning_rate": 2.9050455387347367e-06, "loss": 3.1724, "step": 44115 }, { "epoch": 0.4488118489583333, "grad_norm": 15.50869083404541, "learning_rate": 2.9046509359315252e-06, "loss": 3.201, "step": 44120 }, { "epoch": 0.4488627115885417, "grad_norm": 15.588220596313477, "learning_rate": 2.904256322775415e-06, "loss": 3.4182, "step": 44125 }, { "epoch": 0.44891357421875, "grad_norm": 6.770971298217773, "learning_rate": 2.9038616992765024e-06, "loss": 3.1646, "step": 44130 }, { "epoch": 0.4489644368489583, "grad_norm": 13.444995880126953, "learning_rate": 2.9034670654448842e-06, "loss": 3.4471, "step": 44135 }, { "epoch": 0.4490152994791667, "grad_norm": 10.779013633728027, "learning_rate": 2.9030724212906577e-06, "loss": 3.3747, "step": 44140 }, { "epoch": 0.449066162109375, "grad_norm": 11.873066902160645, "learning_rate": 2.902677766823917e-06, "loss": 3.4175, "step": 44145 }, { "epoch": 0.4491170247395833, "grad_norm": 16.036754608154297, "learning_rate": 2.9022831020547624e-06, "loss": 3.3016, "step": 44150 }, { "epoch": 0.4491678873697917, "grad_norm": 10.282904624938965, "learning_rate": 2.90188842699329e-06, "loss": 3.101, "step": 44155 }, { "epoch": 0.44921875, "grad_norm": 14.96367073059082, "learning_rate": 2.901493741649597e-06, "loss": 3.606, "step": 44160 }, { "epoch": 0.4492696126302083, "grad_norm": 9.411566734313965, "learning_rate": 2.901099046033782e-06, "loss": 3.7507, "step": 44165 }, { "epoch": 0.4493204752604167, "grad_norm": 9.179765701293945, "learning_rate": 2.9007043401559427e-06, "loss": 3.7467, "step": 44170 }, { "epoch": 0.449371337890625, "grad_norm": 11.785852432250977, "learning_rate": 2.900309624026178e-06, "loss": 3.5494, "step": 44175 }, { "epoch": 0.4494222005208333, "grad_norm": 14.475744247436523, "learning_rate": 2.8999148976545868e-06, "loss": 3.4811, "step": 44180 }, { "epoch": 0.4494730631510417, "grad_norm": 10.330108642578125, "learning_rate": 2.899520161051267e-06, "loss": 3.1912, "step": 44185 }, { "epoch": 0.44952392578125, "grad_norm": 10.2774019241333, "learning_rate": 2.8991254142263187e-06, "loss": 3.1315, "step": 44190 }, { "epoch": 0.4495747884114583, "grad_norm": 8.573261260986328, "learning_rate": 2.8987306571898417e-06, "loss": 3.4502, "step": 44195 }, { "epoch": 0.4496256510416667, "grad_norm": 7.999054908752441, "learning_rate": 2.898335889951935e-06, "loss": 4.0442, "step": 44200 }, { "epoch": 0.449676513671875, "grad_norm": 20.809791564941406, "learning_rate": 2.8979411125226997e-06, "loss": 3.6222, "step": 44205 }, { "epoch": 0.4497273763020833, "grad_norm": 8.303959846496582, "learning_rate": 2.8975463249122343e-06, "loss": 3.0456, "step": 44210 }, { "epoch": 0.4497782389322917, "grad_norm": 12.522747039794922, "learning_rate": 2.897151527130641e-06, "loss": 3.1759, "step": 44215 }, { "epoch": 0.4498291015625, "grad_norm": 15.157700538635254, "learning_rate": 2.8967567191880203e-06, "loss": 3.3152, "step": 44220 }, { "epoch": 0.4498799641927083, "grad_norm": 13.41623306274414, "learning_rate": 2.8963619010944722e-06, "loss": 3.3587, "step": 44225 }, { "epoch": 0.4499308268229167, "grad_norm": 8.851814270019531, "learning_rate": 2.895967072860099e-06, "loss": 3.3081, "step": 44230 }, { "epoch": 0.449981689453125, "grad_norm": 13.496138572692871, "learning_rate": 2.895572234495003e-06, "loss": 3.3986, "step": 44235 }, { "epoch": 0.4500325520833333, "grad_norm": 7.6826629638671875, "learning_rate": 2.895177386009284e-06, "loss": 3.2947, "step": 44240 }, { "epoch": 0.4500834147135417, "grad_norm": 9.343023300170898, "learning_rate": 2.8947825274130463e-06, "loss": 3.2867, "step": 44245 }, { "epoch": 0.45013427734375, "grad_norm": 8.761979103088379, "learning_rate": 2.89438765871639e-06, "loss": 3.3941, "step": 44250 }, { "epoch": 0.4501851399739583, "grad_norm": 8.454582214355469, "learning_rate": 2.89399277992942e-06, "loss": 3.3526, "step": 44255 }, { "epoch": 0.4502360026041667, "grad_norm": 15.36679458618164, "learning_rate": 2.893597891062237e-06, "loss": 3.0334, "step": 44260 }, { "epoch": 0.450286865234375, "grad_norm": 8.259591102600098, "learning_rate": 2.8932029921249466e-06, "loss": 3.1613, "step": 44265 }, { "epoch": 0.4503377278645833, "grad_norm": 11.846033096313477, "learning_rate": 2.8928080831276503e-06, "loss": 3.1133, "step": 44270 }, { "epoch": 0.4503885904947917, "grad_norm": 14.526901245117188, "learning_rate": 2.8924131640804515e-06, "loss": 3.0744, "step": 44275 }, { "epoch": 0.450439453125, "grad_norm": 10.386990547180176, "learning_rate": 2.8920182349934556e-06, "loss": 3.8038, "step": 44280 }, { "epoch": 0.4504903157552083, "grad_norm": 12.09192180633545, "learning_rate": 2.8916232958767667e-06, "loss": 3.1284, "step": 44285 }, { "epoch": 0.4505411783854167, "grad_norm": 7.6954569816589355, "learning_rate": 2.8912283467404873e-06, "loss": 3.095, "step": 44290 }, { "epoch": 0.450592041015625, "grad_norm": 10.720759391784668, "learning_rate": 2.8908333875947243e-06, "loss": 3.0396, "step": 44295 }, { "epoch": 0.4506429036458333, "grad_norm": 8.4306001663208, "learning_rate": 2.890438418449581e-06, "loss": 3.2896, "step": 44300 }, { "epoch": 0.4506937662760417, "grad_norm": 11.261151313781738, "learning_rate": 2.890043439315164e-06, "loss": 3.3463, "step": 44305 }, { "epoch": 0.45074462890625, "grad_norm": 10.319689750671387, "learning_rate": 2.889648450201578e-06, "loss": 3.2779, "step": 44310 }, { "epoch": 0.4507954915364583, "grad_norm": 11.34036636352539, "learning_rate": 2.889253451118928e-06, "loss": 3.4178, "step": 44315 }, { "epoch": 0.4508463541666667, "grad_norm": 8.487945556640625, "learning_rate": 2.888858442077322e-06, "loss": 3.0309, "step": 44320 }, { "epoch": 0.450897216796875, "grad_norm": 10.793403625488281, "learning_rate": 2.8884634230868636e-06, "loss": 3.4121, "step": 44325 }, { "epoch": 0.4509480794270833, "grad_norm": 11.224672317504883, "learning_rate": 2.8880683941576615e-06, "loss": 3.4405, "step": 44330 }, { "epoch": 0.4509989420572917, "grad_norm": 11.25199031829834, "learning_rate": 2.8876733552998216e-06, "loss": 3.7493, "step": 44335 }, { "epoch": 0.4510498046875, "grad_norm": 9.063387870788574, "learning_rate": 2.88727830652345e-06, "loss": 3.3024, "step": 44340 }, { "epoch": 0.4511006673177083, "grad_norm": 10.825544357299805, "learning_rate": 2.886883247838655e-06, "loss": 3.8387, "step": 44345 }, { "epoch": 0.4511515299479167, "grad_norm": 9.08434772491455, "learning_rate": 2.8864881792555437e-06, "loss": 3.6101, "step": 44350 }, { "epoch": 0.451202392578125, "grad_norm": 12.246991157531738, "learning_rate": 2.8860931007842248e-06, "loss": 3.5041, "step": 44355 }, { "epoch": 0.4512532552083333, "grad_norm": 7.686623573303223, "learning_rate": 2.885698012434805e-06, "loss": 3.4835, "step": 44360 }, { "epoch": 0.4513041178385417, "grad_norm": 7.685125827789307, "learning_rate": 2.885302914217392e-06, "loss": 3.4892, "step": 44365 }, { "epoch": 0.45135498046875, "grad_norm": 8.283895492553711, "learning_rate": 2.8849078061420964e-06, "loss": 3.0764, "step": 44370 }, { "epoch": 0.4514058430989583, "grad_norm": 14.107020378112793, "learning_rate": 2.8845126882190257e-06, "loss": 3.5087, "step": 44375 }, { "epoch": 0.4514567057291667, "grad_norm": 14.426298141479492, "learning_rate": 2.8841175604582882e-06, "loss": 3.343, "step": 44380 }, { "epoch": 0.451507568359375, "grad_norm": 9.314127922058105, "learning_rate": 2.883722422869995e-06, "loss": 3.2457, "step": 44385 }, { "epoch": 0.4515584309895833, "grad_norm": 14.12999439239502, "learning_rate": 2.883327275464254e-06, "loss": 3.0399, "step": 44390 }, { "epoch": 0.4516092936197917, "grad_norm": 14.253714561462402, "learning_rate": 2.8829321182511756e-06, "loss": 3.6243, "step": 44395 }, { "epoch": 0.45166015625, "grad_norm": 15.71931266784668, "learning_rate": 2.8825369512408697e-06, "loss": 3.4012, "step": 44400 }, { "epoch": 0.4517110188802083, "grad_norm": 8.17529582977295, "learning_rate": 2.8821417744434468e-06, "loss": 2.9436, "step": 44405 }, { "epoch": 0.4517618815104167, "grad_norm": 9.646404266357422, "learning_rate": 2.8817465878690164e-06, "loss": 3.4106, "step": 44410 }, { "epoch": 0.451812744140625, "grad_norm": 8.477702140808105, "learning_rate": 2.881351391527691e-06, "loss": 2.9446, "step": 44415 }, { "epoch": 0.4518636067708333, "grad_norm": 9.89619255065918, "learning_rate": 2.8809561854295798e-06, "loss": 3.3598, "step": 44420 }, { "epoch": 0.4519144694010417, "grad_norm": 12.336702346801758, "learning_rate": 2.8805609695847964e-06, "loss": 3.8555, "step": 44425 }, { "epoch": 0.45196533203125, "grad_norm": 10.78642463684082, "learning_rate": 2.8801657440034493e-06, "loss": 3.5572, "step": 44430 }, { "epoch": 0.4520161946614583, "grad_norm": 16.197214126586914, "learning_rate": 2.879770508695652e-06, "loss": 3.1144, "step": 44435 }, { "epoch": 0.4520670572916667, "grad_norm": 15.63759708404541, "learning_rate": 2.8793752636715168e-06, "loss": 3.0399, "step": 44440 }, { "epoch": 0.452117919921875, "grad_norm": 9.543905258178711, "learning_rate": 2.8789800089411546e-06, "loss": 3.7195, "step": 44445 }, { "epoch": 0.4521687825520833, "grad_norm": 9.484859466552734, "learning_rate": 2.8785847445146792e-06, "loss": 3.5228, "step": 44450 }, { "epoch": 0.4522196451822917, "grad_norm": 11.978971481323242, "learning_rate": 2.878189470402203e-06, "loss": 3.3213, "step": 44455 }, { "epoch": 0.4522705078125, "grad_norm": 16.76762580871582, "learning_rate": 2.8777941866138385e-06, "loss": 3.2711, "step": 44460 }, { "epoch": 0.4523213704427083, "grad_norm": 17.06298065185547, "learning_rate": 2.8773988931596995e-06, "loss": 3.1585, "step": 44465 }, { "epoch": 0.4523722330729167, "grad_norm": 10.328750610351562, "learning_rate": 2.877003590049899e-06, "loss": 3.1892, "step": 44470 }, { "epoch": 0.452423095703125, "grad_norm": 10.768985748291016, "learning_rate": 2.8766082772945514e-06, "loss": 3.5259, "step": 44475 }, { "epoch": 0.4524739583333333, "grad_norm": 9.1839599609375, "learning_rate": 2.8762129549037705e-06, "loss": 3.726, "step": 44480 }, { "epoch": 0.4525248209635417, "grad_norm": 8.133028984069824, "learning_rate": 2.8758176228876688e-06, "loss": 3.1541, "step": 44485 }, { "epoch": 0.45257568359375, "grad_norm": 11.028250694274902, "learning_rate": 2.875422281256364e-06, "loss": 3.9127, "step": 44490 }, { "epoch": 0.4526265462239583, "grad_norm": 14.899149894714355, "learning_rate": 2.8750269300199684e-06, "loss": 3.6109, "step": 44495 }, { "epoch": 0.4526774088541667, "grad_norm": 13.18986701965332, "learning_rate": 2.874631569188598e-06, "loss": 3.4452, "step": 44500 }, { "epoch": 0.452728271484375, "grad_norm": 9.838666915893555, "learning_rate": 2.8742361987723676e-06, "loss": 3.1275, "step": 44505 }, { "epoch": 0.4527791341145833, "grad_norm": 9.220782279968262, "learning_rate": 2.8738408187813925e-06, "loss": 3.2301, "step": 44510 }, { "epoch": 0.4528299967447917, "grad_norm": 15.787191390991211, "learning_rate": 2.8734454292257883e-06, "loss": 3.3359, "step": 44515 }, { "epoch": 0.452880859375, "grad_norm": 10.859371185302734, "learning_rate": 2.8730500301156724e-06, "loss": 3.1415, "step": 44520 }, { "epoch": 0.4529317220052083, "grad_norm": 16.449567794799805, "learning_rate": 2.8726546214611595e-06, "loss": 3.4536, "step": 44525 }, { "epoch": 0.4529825846354167, "grad_norm": 14.785994529724121, "learning_rate": 2.8722592032723663e-06, "loss": 3.393, "step": 44530 }, { "epoch": 0.453033447265625, "grad_norm": 14.163969039916992, "learning_rate": 2.8718637755594093e-06, "loss": 3.3349, "step": 44535 }, { "epoch": 0.4530843098958333, "grad_norm": 10.803580284118652, "learning_rate": 2.8714683383324067e-06, "loss": 3.0007, "step": 44540 }, { "epoch": 0.4531351725260417, "grad_norm": 8.182945251464844, "learning_rate": 2.8710728916014734e-06, "loss": 3.4462, "step": 44545 }, { "epoch": 0.45318603515625, "grad_norm": 11.769540786743164, "learning_rate": 2.8706774353767292e-06, "loss": 3.1659, "step": 44550 }, { "epoch": 0.4532368977864583, "grad_norm": 12.93024730682373, "learning_rate": 2.87028196966829e-06, "loss": 3.49, "step": 44555 }, { "epoch": 0.4532877604166667, "grad_norm": 12.073081970214844, "learning_rate": 2.8698864944862746e-06, "loss": 3.4769, "step": 44560 }, { "epoch": 0.453338623046875, "grad_norm": 8.057173728942871, "learning_rate": 2.8694910098408006e-06, "loss": 3.081, "step": 44565 }, { "epoch": 0.4533894856770833, "grad_norm": 15.351044654846191, "learning_rate": 2.8690955157419874e-06, "loss": 3.0802, "step": 44570 }, { "epoch": 0.4534403483072917, "grad_norm": 8.478313446044922, "learning_rate": 2.8687000121999525e-06, "loss": 3.3409, "step": 44575 }, { "epoch": 0.4534912109375, "grad_norm": 10.503058433532715, "learning_rate": 2.868304499224815e-06, "loss": 3.1832, "step": 44580 }, { "epoch": 0.4535420735677083, "grad_norm": 14.508081436157227, "learning_rate": 2.8679089768266945e-06, "loss": 3.077, "step": 44585 }, { "epoch": 0.4535929361979167, "grad_norm": 12.231405258178711, "learning_rate": 2.86751344501571e-06, "loss": 3.0866, "step": 44590 }, { "epoch": 0.453643798828125, "grad_norm": 10.745758056640625, "learning_rate": 2.8671179038019813e-06, "loss": 3.1604, "step": 44595 }, { "epoch": 0.4536946614583333, "grad_norm": 13.465664863586426, "learning_rate": 2.8667223531956274e-06, "loss": 3.2526, "step": 44600 }, { "epoch": 0.4537455240885417, "grad_norm": 11.453778266906738, "learning_rate": 2.8663267932067697e-06, "loss": 3.3491, "step": 44605 }, { "epoch": 0.45379638671875, "grad_norm": 11.788174629211426, "learning_rate": 2.8659312238455276e-06, "loss": 3.5024, "step": 44610 }, { "epoch": 0.4538472493489583, "grad_norm": 14.00549602508545, "learning_rate": 2.865535645122022e-06, "loss": 3.0471, "step": 44615 }, { "epoch": 0.4538981119791667, "grad_norm": 10.149195671081543, "learning_rate": 2.8651400570463728e-06, "loss": 3.3712, "step": 44620 }, { "epoch": 0.453948974609375, "grad_norm": 7.4141411781311035, "learning_rate": 2.864744459628703e-06, "loss": 3.2301, "step": 44625 }, { "epoch": 0.4539998372395833, "grad_norm": 11.498921394348145, "learning_rate": 2.8643488528791313e-06, "loss": 3.1663, "step": 44630 }, { "epoch": 0.4540506998697917, "grad_norm": 9.980927467346191, "learning_rate": 2.863953236807782e-06, "loss": 3.5264, "step": 44635 }, { "epoch": 0.4541015625, "grad_norm": 11.899552345275879, "learning_rate": 2.8635576114247744e-06, "loss": 3.1135, "step": 44640 }, { "epoch": 0.4541524251302083, "grad_norm": 12.848044395446777, "learning_rate": 2.863161976740232e-06, "loss": 3.4724, "step": 44645 }, { "epoch": 0.4542032877604167, "grad_norm": 14.635205268859863, "learning_rate": 2.862766332764276e-06, "loss": 3.5827, "step": 44650 }, { "epoch": 0.454254150390625, "grad_norm": 13.011972427368164, "learning_rate": 2.86237067950703e-06, "loss": 3.2725, "step": 44655 }, { "epoch": 0.4543050130208333, "grad_norm": 14.444993019104004, "learning_rate": 2.861975016978615e-06, "loss": 3.9499, "step": 44660 }, { "epoch": 0.4543558756510417, "grad_norm": 12.825224876403809, "learning_rate": 2.861579345189156e-06, "loss": 3.3978, "step": 44665 }, { "epoch": 0.45440673828125, "grad_norm": 11.300124168395996, "learning_rate": 2.861183664148775e-06, "loss": 3.2149, "step": 44670 }, { "epoch": 0.4544576009114583, "grad_norm": 16.177522659301758, "learning_rate": 2.860787973867595e-06, "loss": 3.1184, "step": 44675 }, { "epoch": 0.4545084635416667, "grad_norm": 7.653833866119385, "learning_rate": 2.860392274355741e-06, "loss": 3.395, "step": 44680 }, { "epoch": 0.454559326171875, "grad_norm": 13.640836715698242, "learning_rate": 2.8599965656233352e-06, "loss": 3.1572, "step": 44685 }, { "epoch": 0.4546101888020833, "grad_norm": 8.581427574157715, "learning_rate": 2.859600847680503e-06, "loss": 3.2961, "step": 44690 }, { "epoch": 0.4546610514322917, "grad_norm": 13.232842445373535, "learning_rate": 2.8592051205373683e-06, "loss": 2.9906, "step": 44695 }, { "epoch": 0.4547119140625, "grad_norm": 10.621664047241211, "learning_rate": 2.8588093842040564e-06, "loss": 3.5622, "step": 44700 }, { "epoch": 0.4547627766927083, "grad_norm": 11.978408813476562, "learning_rate": 2.8584136386906907e-06, "loss": 3.232, "step": 44705 }, { "epoch": 0.4548136393229167, "grad_norm": 8.554805755615234, "learning_rate": 2.8580178840073976e-06, "loss": 3.084, "step": 44710 }, { "epoch": 0.454864501953125, "grad_norm": 11.701556205749512, "learning_rate": 2.857622120164301e-06, "loss": 3.4473, "step": 44715 }, { "epoch": 0.4549153645833333, "grad_norm": 11.200251579284668, "learning_rate": 2.857226347171528e-06, "loss": 3.3229, "step": 44720 }, { "epoch": 0.4549662272135417, "grad_norm": 15.928792953491211, "learning_rate": 2.856830565039203e-06, "loss": 3.4841, "step": 44725 }, { "epoch": 0.45501708984375, "grad_norm": 8.158794403076172, "learning_rate": 2.8564347737774527e-06, "loss": 3.5875, "step": 44730 }, { "epoch": 0.4550679524739583, "grad_norm": 14.221781730651855, "learning_rate": 2.8560389733964035e-06, "loss": 3.7044, "step": 44735 }, { "epoch": 0.4551188151041667, "grad_norm": 12.200833320617676, "learning_rate": 2.855643163906181e-06, "loss": 3.1577, "step": 44740 }, { "epoch": 0.455169677734375, "grad_norm": 12.440650939941406, "learning_rate": 2.8552473453169126e-06, "loss": 3.7456, "step": 44745 }, { "epoch": 0.4552205403645833, "grad_norm": 12.854925155639648, "learning_rate": 2.854851517638726e-06, "loss": 3.5147, "step": 44750 }, { "epoch": 0.4552714029947917, "grad_norm": 12.155538558959961, "learning_rate": 2.8544556808817466e-06, "loss": 3.8193, "step": 44755 }, { "epoch": 0.455322265625, "grad_norm": 8.608756065368652, "learning_rate": 2.854059835056103e-06, "loss": 3.0574, "step": 44760 }, { "epoch": 0.4553731282552083, "grad_norm": 12.807647705078125, "learning_rate": 2.853663980171922e-06, "loss": 3.3311, "step": 44765 }, { "epoch": 0.4554239908854167, "grad_norm": 14.807835578918457, "learning_rate": 2.8532681162393315e-06, "loss": 3.1986, "step": 44770 }, { "epoch": 0.455474853515625, "grad_norm": 8.61264705657959, "learning_rate": 2.8528722432684608e-06, "loss": 3.3935, "step": 44775 }, { "epoch": 0.4555257161458333, "grad_norm": 14.185766220092773, "learning_rate": 2.8524763612694373e-06, "loss": 3.2794, "step": 44780 }, { "epoch": 0.4555765787760417, "grad_norm": 13.053300857543945, "learning_rate": 2.852080470252389e-06, "loss": 3.2844, "step": 44785 }, { "epoch": 0.45562744140625, "grad_norm": 6.562133312225342, "learning_rate": 2.851684570227446e-06, "loss": 3.4165, "step": 44790 }, { "epoch": 0.4556783040364583, "grad_norm": 13.429403305053711, "learning_rate": 2.8512886612047363e-06, "loss": 3.4167, "step": 44795 }, { "epoch": 0.4557291666666667, "grad_norm": 14.063451766967773, "learning_rate": 2.8508927431943895e-06, "loss": 3.5461, "step": 44800 }, { "epoch": 0.455780029296875, "grad_norm": 10.274933815002441, "learning_rate": 2.850496816206535e-06, "loss": 3.4237, "step": 44805 }, { "epoch": 0.4558308919270833, "grad_norm": 12.584627151489258, "learning_rate": 2.850100880251303e-06, "loss": 3.5103, "step": 44810 }, { "epoch": 0.4558817545572917, "grad_norm": 8.542360305786133, "learning_rate": 2.8497049353388227e-06, "loss": 3.4182, "step": 44815 }, { "epoch": 0.4559326171875, "grad_norm": 14.560110092163086, "learning_rate": 2.849308981479224e-06, "loss": 3.2972, "step": 44820 }, { "epoch": 0.4559834798177083, "grad_norm": 13.858331680297852, "learning_rate": 2.8489130186826386e-06, "loss": 3.4506, "step": 44825 }, { "epoch": 0.4560343424479167, "grad_norm": 9.340231895446777, "learning_rate": 2.848517046959196e-06, "loss": 2.8409, "step": 44830 }, { "epoch": 0.456085205078125, "grad_norm": 16.869752883911133, "learning_rate": 2.848121066319027e-06, "loss": 3.3378, "step": 44835 }, { "epoch": 0.4561360677083333, "grad_norm": 11.588399887084961, "learning_rate": 2.8477250767722634e-06, "loss": 3.4205, "step": 44840 }, { "epoch": 0.4561869303385417, "grad_norm": 8.269610404968262, "learning_rate": 2.8473290783290364e-06, "loss": 3.361, "step": 44845 }, { "epoch": 0.45623779296875, "grad_norm": 12.79600715637207, "learning_rate": 2.846933070999477e-06, "loss": 3.2668, "step": 44850 }, { "epoch": 0.4562886555989583, "grad_norm": 7.245375633239746, "learning_rate": 2.846537054793718e-06, "loss": 3.6122, "step": 44855 }, { "epoch": 0.4563395182291667, "grad_norm": 7.1210150718688965, "learning_rate": 2.8461410297218887e-06, "loss": 3.1442, "step": 44860 }, { "epoch": 0.456390380859375, "grad_norm": 9.796218872070312, "learning_rate": 2.8457449957941243e-06, "loss": 3.1287, "step": 44865 }, { "epoch": 0.4564412434895833, "grad_norm": 16.342761993408203, "learning_rate": 2.845348953020556e-06, "loss": 3.0487, "step": 44870 }, { "epoch": 0.4564921061197917, "grad_norm": 14.852348327636719, "learning_rate": 2.8449529014113162e-06, "loss": 3.7072, "step": 44875 }, { "epoch": 0.45654296875, "grad_norm": 8.907597541809082, "learning_rate": 2.8445568409765396e-06, "loss": 3.408, "step": 44880 }, { "epoch": 0.4565938313802083, "grad_norm": 16.051666259765625, "learning_rate": 2.8441607717263565e-06, "loss": 3.314, "step": 44885 }, { "epoch": 0.4566446940104167, "grad_norm": 13.219680786132812, "learning_rate": 2.8437646936709025e-06, "loss": 3.0836, "step": 44890 }, { "epoch": 0.456695556640625, "grad_norm": 10.055333137512207, "learning_rate": 2.84336860682031e-06, "loss": 3.2187, "step": 44895 }, { "epoch": 0.4567464192708333, "grad_norm": 11.605323791503906, "learning_rate": 2.842972511184712e-06, "loss": 3.2613, "step": 44900 }, { "epoch": 0.4567972819010417, "grad_norm": 10.680103302001953, "learning_rate": 2.842576406774245e-06, "loss": 3.2029, "step": 44905 }, { "epoch": 0.45684814453125, "grad_norm": 15.260845184326172, "learning_rate": 2.8421802935990412e-06, "loss": 3.2767, "step": 44910 }, { "epoch": 0.4568990071614583, "grad_norm": 10.041279792785645, "learning_rate": 2.8417841716692356e-06, "loss": 3.2855, "step": 44915 }, { "epoch": 0.4569498697916667, "grad_norm": 11.605247497558594, "learning_rate": 2.841388040994964e-06, "loss": 3.3748, "step": 44920 }, { "epoch": 0.457000732421875, "grad_norm": 12.599408149719238, "learning_rate": 2.840991901586359e-06, "loss": 3.1399, "step": 44925 }, { "epoch": 0.4570515950520833, "grad_norm": 8.706480979919434, "learning_rate": 2.8405957534535583e-06, "loss": 3.1279, "step": 44930 }, { "epoch": 0.4571024576822917, "grad_norm": 16.365188598632812, "learning_rate": 2.8401995966066947e-06, "loss": 3.966, "step": 44935 }, { "epoch": 0.4571533203125, "grad_norm": 14.899900436401367, "learning_rate": 2.839803431055906e-06, "loss": 3.3223, "step": 44940 }, { "epoch": 0.4572041829427083, "grad_norm": 13.398468971252441, "learning_rate": 2.839407256811326e-06, "loss": 2.9851, "step": 44945 }, { "epoch": 0.4572550455729167, "grad_norm": 11.414717674255371, "learning_rate": 2.839011073883093e-06, "loss": 3.5403, "step": 44950 }, { "epoch": 0.457305908203125, "grad_norm": 16.504152297973633, "learning_rate": 2.8386148822813414e-06, "loss": 2.9973, "step": 44955 }, { "epoch": 0.4573567708333333, "grad_norm": 13.877124786376953, "learning_rate": 2.8382186820162084e-06, "loss": 3.3215, "step": 44960 }, { "epoch": 0.4574076334635417, "grad_norm": 9.994048118591309, "learning_rate": 2.8378224730978305e-06, "loss": 3.4388, "step": 44965 }, { "epoch": 0.45745849609375, "grad_norm": 9.283815383911133, "learning_rate": 2.837426255536345e-06, "loss": 2.9213, "step": 44970 }, { "epoch": 0.4575093587239583, "grad_norm": 12.28955078125, "learning_rate": 2.837030029341888e-06, "loss": 3.1653, "step": 44975 }, { "epoch": 0.4575602213541667, "grad_norm": 18.627973556518555, "learning_rate": 2.836633794524598e-06, "loss": 3.3746, "step": 44980 }, { "epoch": 0.457611083984375, "grad_norm": 8.607205390930176, "learning_rate": 2.8362375510946127e-06, "loss": 3.2645, "step": 44985 }, { "epoch": 0.4576619466145833, "grad_norm": 9.861372947692871, "learning_rate": 2.835841299062068e-06, "loss": 3.2815, "step": 44990 }, { "epoch": 0.4577128092447917, "grad_norm": 14.02267074584961, "learning_rate": 2.8354450384371047e-06, "loss": 3.3776, "step": 44995 }, { "epoch": 0.457763671875, "grad_norm": 11.054256439208984, "learning_rate": 2.835048769229859e-06, "loss": 3.2593, "step": 45000 }, { "epoch": 0.4578145345052083, "grad_norm": 16.410503387451172, "learning_rate": 2.83465249145047e-06, "loss": 3.0917, "step": 45005 }, { "epoch": 0.4578653971354167, "grad_norm": 11.378438949584961, "learning_rate": 2.8342562051090762e-06, "loss": 3.4537, "step": 45010 }, { "epoch": 0.457916259765625, "grad_norm": 12.916954040527344, "learning_rate": 2.833859910215817e-06, "loss": 3.3718, "step": 45015 }, { "epoch": 0.4579671223958333, "grad_norm": 15.579991340637207, "learning_rate": 2.833463606780831e-06, "loss": 3.5082, "step": 45020 }, { "epoch": 0.4580179850260417, "grad_norm": 15.45205020904541, "learning_rate": 2.8330672948142568e-06, "loss": 3.216, "step": 45025 }, { "epoch": 0.45806884765625, "grad_norm": 12.469983100891113, "learning_rate": 2.832670974326236e-06, "loss": 3.6482, "step": 45030 }, { "epoch": 0.4581197102864583, "grad_norm": 13.32430362701416, "learning_rate": 2.8322746453269067e-06, "loss": 3.1862, "step": 45035 }, { "epoch": 0.4581705729166667, "grad_norm": 13.674798965454102, "learning_rate": 2.831878307826409e-06, "loss": 3.4776, "step": 45040 }, { "epoch": 0.458221435546875, "grad_norm": 9.960290908813477, "learning_rate": 2.8314819618348844e-06, "loss": 3.5553, "step": 45045 }, { "epoch": 0.4582722981770833, "grad_norm": 8.078351020812988, "learning_rate": 2.8310856073624715e-06, "loss": 3.9305, "step": 45050 }, { "epoch": 0.4583231608072917, "grad_norm": 11.871135711669922, "learning_rate": 2.8306892444193124e-06, "loss": 3.1103, "step": 45055 }, { "epoch": 0.4583740234375, "grad_norm": 14.729536056518555, "learning_rate": 2.830292873015547e-06, "loss": 3.2753, "step": 45060 }, { "epoch": 0.4584248860677083, "grad_norm": 11.23469352722168, "learning_rate": 2.8298964931613167e-06, "loss": 3.1144, "step": 45065 }, { "epoch": 0.4584757486979167, "grad_norm": 14.00146770477295, "learning_rate": 2.829500104866763e-06, "loss": 3.3374, "step": 45070 }, { "epoch": 0.458526611328125, "grad_norm": 8.97723388671875, "learning_rate": 2.829103708142027e-06, "loss": 3.3267, "step": 45075 }, { "epoch": 0.4585774739583333, "grad_norm": 11.814627647399902, "learning_rate": 2.8287073029972506e-06, "loss": 2.9089, "step": 45080 }, { "epoch": 0.4586283365885417, "grad_norm": 8.553272247314453, "learning_rate": 2.828310889442576e-06, "loss": 3.1574, "step": 45085 }, { "epoch": 0.45867919921875, "grad_norm": 7.917248725891113, "learning_rate": 2.8279144674881452e-06, "loss": 3.3913, "step": 45090 }, { "epoch": 0.4587300618489583, "grad_norm": 12.840087890625, "learning_rate": 2.8275180371441003e-06, "loss": 3.2862, "step": 45095 }, { "epoch": 0.4587809244791667, "grad_norm": 13.9429349899292, "learning_rate": 2.8271215984205847e-06, "loss": 3.7799, "step": 45100 }, { "epoch": 0.458831787109375, "grad_norm": 13.825484275817871, "learning_rate": 2.826725151327739e-06, "loss": 3.5117, "step": 45105 }, { "epoch": 0.4588826497395833, "grad_norm": 13.990893363952637, "learning_rate": 2.826328695875709e-06, "loss": 3.4283, "step": 45110 }, { "epoch": 0.4589335123697917, "grad_norm": 10.44043254852295, "learning_rate": 2.8259322320746363e-06, "loss": 3.5508, "step": 45115 }, { "epoch": 0.458984375, "grad_norm": 14.889739990234375, "learning_rate": 2.825535759934665e-06, "loss": 3.2424, "step": 45120 }, { "epoch": 0.4590352376302083, "grad_norm": 9.356514930725098, "learning_rate": 2.825139279465938e-06, "loss": 3.2146, "step": 45125 }, { "epoch": 0.4590861002604167, "grad_norm": 14.960698127746582, "learning_rate": 2.8247427906785994e-06, "loss": 3.2932, "step": 45130 }, { "epoch": 0.459136962890625, "grad_norm": 14.232577323913574, "learning_rate": 2.824346293582794e-06, "loss": 3.4178, "step": 45135 }, { "epoch": 0.4591878255208333, "grad_norm": 12.84840202331543, "learning_rate": 2.8239497881886655e-06, "loss": 3.3215, "step": 45140 }, { "epoch": 0.4592386881510417, "grad_norm": 11.659974098205566, "learning_rate": 2.823553274506358e-06, "loss": 3.2795, "step": 45145 }, { "epoch": 0.45928955078125, "grad_norm": 13.640230178833008, "learning_rate": 2.823156752546018e-06, "loss": 3.3699, "step": 45150 }, { "epoch": 0.4593404134114583, "grad_norm": 15.071539878845215, "learning_rate": 2.822760222317788e-06, "loss": 3.9149, "step": 45155 }, { "epoch": 0.4593912760416667, "grad_norm": 14.014788627624512, "learning_rate": 2.8223636838318134e-06, "loss": 3.1896, "step": 45160 }, { "epoch": 0.459442138671875, "grad_norm": 8.82933235168457, "learning_rate": 2.8219671370982422e-06, "loss": 3.1882, "step": 45165 }, { "epoch": 0.4594930013020833, "grad_norm": 11.878766059875488, "learning_rate": 2.821570582127216e-06, "loss": 3.6614, "step": 45170 }, { "epoch": 0.4595438639322917, "grad_norm": 13.7857084274292, "learning_rate": 2.8211740189288845e-06, "loss": 3.1367, "step": 45175 }, { "epoch": 0.4595947265625, "grad_norm": 12.247088432312012, "learning_rate": 2.8207774475133915e-06, "loss": 3.5135, "step": 45180 }, { "epoch": 0.4596455891927083, "grad_norm": 14.643757820129395, "learning_rate": 2.820380867890883e-06, "loss": 3.532, "step": 45185 }, { "epoch": 0.4596964518229167, "grad_norm": 16.834749221801758, "learning_rate": 2.819984280071506e-06, "loss": 3.4057, "step": 45190 }, { "epoch": 0.459747314453125, "grad_norm": 11.419657707214355, "learning_rate": 2.8195876840654073e-06, "loss": 3.3614, "step": 45195 }, { "epoch": 0.4597981770833333, "grad_norm": 15.169285774230957, "learning_rate": 2.8191910798827332e-06, "loss": 3.3418, "step": 45200 }, { "epoch": 0.4598490397135417, "grad_norm": 17.71880531311035, "learning_rate": 2.818794467533632e-06, "loss": 3.8517, "step": 45205 }, { "epoch": 0.45989990234375, "grad_norm": 15.081419944763184, "learning_rate": 2.818397847028248e-06, "loss": 3.199, "step": 45210 }, { "epoch": 0.4599507649739583, "grad_norm": 8.779073715209961, "learning_rate": 2.8180012183767327e-06, "loss": 3.139, "step": 45215 }, { "epoch": 0.4600016276041667, "grad_norm": 13.603039741516113, "learning_rate": 2.81760458158923e-06, "loss": 2.875, "step": 45220 }, { "epoch": 0.460052490234375, "grad_norm": 13.92172622680664, "learning_rate": 2.81720793667589e-06, "loss": 3.1506, "step": 45225 }, { "epoch": 0.4601033528645833, "grad_norm": 11.852885246276855, "learning_rate": 2.81681128364686e-06, "loss": 3.4814, "step": 45230 }, { "epoch": 0.4601542154947917, "grad_norm": 14.622265815734863, "learning_rate": 2.8164146225122886e-06, "loss": 3.6421, "step": 45235 }, { "epoch": 0.460205078125, "grad_norm": 7.7051496505737305, "learning_rate": 2.8160179532823233e-06, "loss": 3.9309, "step": 45240 }, { "epoch": 0.4602559407552083, "grad_norm": 11.519131660461426, "learning_rate": 2.8156212759671147e-06, "loss": 3.3426, "step": 45245 }, { "epoch": 0.4603068033854167, "grad_norm": 6.467058181762695, "learning_rate": 2.8152245905768093e-06, "loss": 3.3662, "step": 45250 }, { "epoch": 0.460357666015625, "grad_norm": 12.460644721984863, "learning_rate": 2.8148278971215594e-06, "loss": 3.3251, "step": 45255 }, { "epoch": 0.4604085286458333, "grad_norm": 12.776589393615723, "learning_rate": 2.8144311956115102e-06, "loss": 3.2917, "step": 45260 }, { "epoch": 0.4604593912760417, "grad_norm": 11.396586418151855, "learning_rate": 2.8140344860568143e-06, "loss": 3.509, "step": 45265 }, { "epoch": 0.46051025390625, "grad_norm": 8.867974281311035, "learning_rate": 2.813637768467621e-06, "loss": 3.2805, "step": 45270 }, { "epoch": 0.4605611165364583, "grad_norm": 12.355083465576172, "learning_rate": 2.8132410428540786e-06, "loss": 3.0796, "step": 45275 }, { "epoch": 0.4606119791666667, "grad_norm": 15.814553260803223, "learning_rate": 2.8128443092263396e-06, "loss": 3.3675, "step": 45280 }, { "epoch": 0.460662841796875, "grad_norm": 8.937992095947266, "learning_rate": 2.812447567594553e-06, "loss": 3.4759, "step": 45285 }, { "epoch": 0.4607137044270833, "grad_norm": 7.906702041625977, "learning_rate": 2.812050817968869e-06, "loss": 3.2888, "step": 45290 }, { "epoch": 0.4607645670572917, "grad_norm": 17.2447452545166, "learning_rate": 2.8116540603594383e-06, "loss": 3.4325, "step": 45295 }, { "epoch": 0.4608154296875, "grad_norm": 13.176146507263184, "learning_rate": 2.811257294776413e-06, "loss": 3.4879, "step": 45300 }, { "epoch": 0.4608662923177083, "grad_norm": 14.100749015808105, "learning_rate": 2.8108605212299435e-06, "loss": 3.6369, "step": 45305 }, { "epoch": 0.4609171549479167, "grad_norm": 8.977651596069336, "learning_rate": 2.8104637397301817e-06, "loss": 3.4339, "step": 45310 }, { "epoch": 0.460968017578125, "grad_norm": 6.882852554321289, "learning_rate": 2.8100669502872773e-06, "loss": 3.4842, "step": 45315 }, { "epoch": 0.4610188802083333, "grad_norm": 12.61129093170166, "learning_rate": 2.8096701529113847e-06, "loss": 3.0149, "step": 45320 }, { "epoch": 0.4610697428385417, "grad_norm": 8.961854934692383, "learning_rate": 2.8092733476126538e-06, "loss": 3.1113, "step": 45325 }, { "epoch": 0.46112060546875, "grad_norm": 11.627150535583496, "learning_rate": 2.8088765344012388e-06, "loss": 3.3965, "step": 45330 }, { "epoch": 0.4611714680989583, "grad_norm": 15.812899589538574, "learning_rate": 2.8084797132872897e-06, "loss": 3.4123, "step": 45335 }, { "epoch": 0.4612223307291667, "grad_norm": 8.41597843170166, "learning_rate": 2.8080828842809604e-06, "loss": 3.2558, "step": 45340 }, { "epoch": 0.461273193359375, "grad_norm": 7.589277744293213, "learning_rate": 2.807686047392404e-06, "loss": 3.5217, "step": 45345 }, { "epoch": 0.4613240559895833, "grad_norm": 11.874876976013184, "learning_rate": 2.807289202631773e-06, "loss": 3.4869, "step": 45350 }, { "epoch": 0.4613749186197917, "grad_norm": 12.37261962890625, "learning_rate": 2.8068923500092205e-06, "loss": 3.2697, "step": 45355 }, { "epoch": 0.46142578125, "grad_norm": 8.338521957397461, "learning_rate": 2.8064954895348994e-06, "loss": 3.5507, "step": 45360 }, { "epoch": 0.4614766438802083, "grad_norm": 7.475709915161133, "learning_rate": 2.8060986212189645e-06, "loss": 3.2731, "step": 45365 }, { "epoch": 0.4615275065104167, "grad_norm": 17.138797760009766, "learning_rate": 2.805701745071569e-06, "loss": 3.1835, "step": 45370 }, { "epoch": 0.461578369140625, "grad_norm": 13.825019836425781, "learning_rate": 2.8053048611028664e-06, "loss": 3.4295, "step": 45375 }, { "epoch": 0.4616292317708333, "grad_norm": 14.243266105651855, "learning_rate": 2.8049079693230114e-06, "loss": 3.3346, "step": 45380 }, { "epoch": 0.4616800944010417, "grad_norm": 12.526808738708496, "learning_rate": 2.804511069742159e-06, "loss": 3.2525, "step": 45385 }, { "epoch": 0.46173095703125, "grad_norm": 12.049077033996582, "learning_rate": 2.804114162370462e-06, "loss": 3.463, "step": 45390 }, { "epoch": 0.4617818196614583, "grad_norm": 7.687005519866943, "learning_rate": 2.8037172472180766e-06, "loss": 2.9475, "step": 45395 }, { "epoch": 0.4618326822916667, "grad_norm": 10.13161563873291, "learning_rate": 2.803320324295158e-06, "loss": 3.3653, "step": 45400 }, { "epoch": 0.461883544921875, "grad_norm": 7.479098796844482, "learning_rate": 2.80292339361186e-06, "loss": 3.8038, "step": 45405 }, { "epoch": 0.4619344075520833, "grad_norm": 13.849470138549805, "learning_rate": 2.8025264551783388e-06, "loss": 3.239, "step": 45410 }, { "epoch": 0.4619852701822917, "grad_norm": 12.716960906982422, "learning_rate": 2.80212950900475e-06, "loss": 3.4989, "step": 45415 }, { "epoch": 0.4620361328125, "grad_norm": 13.745390892028809, "learning_rate": 2.80173255510125e-06, "loss": 3.3843, "step": 45420 }, { "epoch": 0.4620869954427083, "grad_norm": 12.5733003616333, "learning_rate": 2.801335593477994e-06, "loss": 2.9451, "step": 45425 }, { "epoch": 0.4621378580729167, "grad_norm": 9.3145112991333, "learning_rate": 2.800938624145137e-06, "loss": 3.5294, "step": 45430 }, { "epoch": 0.462188720703125, "grad_norm": 12.413433074951172, "learning_rate": 2.8005416471128384e-06, "loss": 3.2586, "step": 45435 }, { "epoch": 0.4622395833333333, "grad_norm": 13.790407180786133, "learning_rate": 2.8001446623912523e-06, "loss": 3.599, "step": 45440 }, { "epoch": 0.4622904459635417, "grad_norm": 10.08675479888916, "learning_rate": 2.7997476699905352e-06, "loss": 3.5403, "step": 45445 }, { "epoch": 0.46234130859375, "grad_norm": 13.204947471618652, "learning_rate": 2.7993506699208463e-06, "loss": 3.2982, "step": 45450 }, { "epoch": 0.4623921712239583, "grad_norm": 15.960038185119629, "learning_rate": 2.798953662192341e-06, "loss": 3.3589, "step": 45455 }, { "epoch": 0.4624430338541667, "grad_norm": 15.051876068115234, "learning_rate": 2.7985566468151763e-06, "loss": 3.1739, "step": 45460 }, { "epoch": 0.462493896484375, "grad_norm": 11.148680686950684, "learning_rate": 2.7981596237995113e-06, "loss": 3.5542, "step": 45465 }, { "epoch": 0.4625447591145833, "grad_norm": 10.029537200927734, "learning_rate": 2.797762593155503e-06, "loss": 3.2435, "step": 45470 }, { "epoch": 0.4625956217447917, "grad_norm": 11.558324813842773, "learning_rate": 2.797365554893309e-06, "loss": 3.1746, "step": 45475 }, { "epoch": 0.462646484375, "grad_norm": 14.826126098632812, "learning_rate": 2.796968509023089e-06, "loss": 3.0957, "step": 45480 }, { "epoch": 0.4626973470052083, "grad_norm": 8.89865779876709, "learning_rate": 2.796571455554999e-06, "loss": 3.543, "step": 45485 }, { "epoch": 0.4627482096354167, "grad_norm": 13.865911483764648, "learning_rate": 2.7961743944991985e-06, "loss": 3.4724, "step": 45490 }, { "epoch": 0.462799072265625, "grad_norm": 12.434823036193848, "learning_rate": 2.7957773258658466e-06, "loss": 3.517, "step": 45495 }, { "epoch": 0.4628499348958333, "grad_norm": 10.956098556518555, "learning_rate": 2.7953802496651023e-06, "loss": 3.3514, "step": 45500 }, { "epoch": 0.4629007975260417, "grad_norm": 11.819670677185059, "learning_rate": 2.7949831659071237e-06, "loss": 3.0912, "step": 45505 }, { "epoch": 0.46295166015625, "grad_norm": 12.680577278137207, "learning_rate": 2.794586074602071e-06, "loss": 3.3441, "step": 45510 }, { "epoch": 0.4630025227864583, "grad_norm": 12.40794563293457, "learning_rate": 2.794188975760104e-06, "loss": 3.2761, "step": 45515 }, { "epoch": 0.4630533854166667, "grad_norm": 10.796905517578125, "learning_rate": 2.7937918693913808e-06, "loss": 3.3365, "step": 45520 }, { "epoch": 0.463104248046875, "grad_norm": 16.14789390563965, "learning_rate": 2.7933947555060627e-06, "loss": 3.5141, "step": 45525 }, { "epoch": 0.4631551106770833, "grad_norm": 12.617619514465332, "learning_rate": 2.7929976341143095e-06, "loss": 2.8931, "step": 45530 }, { "epoch": 0.4632059733072917, "grad_norm": 10.712579727172852, "learning_rate": 2.7926005052262804e-06, "loss": 3.657, "step": 45535 }, { "epoch": 0.4632568359375, "grad_norm": 11.029067039489746, "learning_rate": 2.7922033688521377e-06, "loss": 4.1094, "step": 45540 }, { "epoch": 0.4633076985677083, "grad_norm": 17.27669906616211, "learning_rate": 2.7918062250020406e-06, "loss": 3.5583, "step": 45545 }, { "epoch": 0.4633585611979167, "grad_norm": 17.485286712646484, "learning_rate": 2.7914090736861503e-06, "loss": 2.9874, "step": 45550 }, { "epoch": 0.463409423828125, "grad_norm": 9.873194694519043, "learning_rate": 2.7910119149146286e-06, "loss": 3.1215, "step": 45555 }, { "epoch": 0.4634602864583333, "grad_norm": 12.955085754394531, "learning_rate": 2.7906147486976347e-06, "loss": 3.2259, "step": 45560 }, { "epoch": 0.4635111490885417, "grad_norm": 11.360563278198242, "learning_rate": 2.790217575045333e-06, "loss": 2.8949, "step": 45565 }, { "epoch": 0.46356201171875, "grad_norm": 14.963033676147461, "learning_rate": 2.7898203939678827e-06, "loss": 3.4932, "step": 45570 }, { "epoch": 0.4636128743489583, "grad_norm": 7.978033542633057, "learning_rate": 2.789423205475446e-06, "loss": 3.2817, "step": 45575 }, { "epoch": 0.4636637369791667, "grad_norm": 9.456161499023438, "learning_rate": 2.7890260095781852e-06, "loss": 3.0742, "step": 45580 }, { "epoch": 0.463714599609375, "grad_norm": 11.976831436157227, "learning_rate": 2.788628806286263e-06, "loss": 3.1118, "step": 45585 }, { "epoch": 0.4637654622395833, "grad_norm": 15.199114799499512, "learning_rate": 2.7882315956098406e-06, "loss": 3.5626, "step": 45590 }, { "epoch": 0.4638163248697917, "grad_norm": 12.016143798828125, "learning_rate": 2.7878343775590823e-06, "loss": 3.4078, "step": 45595 }, { "epoch": 0.4638671875, "grad_norm": 11.1074800491333, "learning_rate": 2.787437152144148e-06, "loss": 3.0236, "step": 45600 }, { "epoch": 0.4639180501302083, "grad_norm": 12.313332557678223, "learning_rate": 2.7870399193752036e-06, "loss": 3.1036, "step": 45605 }, { "epoch": 0.4639689127604167, "grad_norm": 8.143770217895508, "learning_rate": 2.78664267926241e-06, "loss": 3.323, "step": 45610 }, { "epoch": 0.464019775390625, "grad_norm": 11.691008567810059, "learning_rate": 2.7862454318159323e-06, "loss": 3.2382, "step": 45615 }, { "epoch": 0.4640706380208333, "grad_norm": 8.609746932983398, "learning_rate": 2.7858481770459323e-06, "loss": 3.6214, "step": 45620 }, { "epoch": 0.4641215006510417, "grad_norm": 15.312708854675293, "learning_rate": 2.785450914962575e-06, "loss": 3.3398, "step": 45625 }, { "epoch": 0.46417236328125, "grad_norm": 13.080608367919922, "learning_rate": 2.785053645576023e-06, "loss": 3.0225, "step": 45630 }, { "epoch": 0.4642232259114583, "grad_norm": 7.98482084274292, "learning_rate": 2.7846563688964424e-06, "loss": 3.3137, "step": 45635 }, { "epoch": 0.4642740885416667, "grad_norm": 14.453807830810547, "learning_rate": 2.784259084933994e-06, "loss": 3.2648, "step": 45640 }, { "epoch": 0.464324951171875, "grad_norm": 8.011619567871094, "learning_rate": 2.783861793698846e-06, "loss": 3.3836, "step": 45645 }, { "epoch": 0.4643758138020833, "grad_norm": 10.647971153259277, "learning_rate": 2.7834644952011603e-06, "loss": 3.2071, "step": 45650 }, { "epoch": 0.4644266764322917, "grad_norm": 10.191885948181152, "learning_rate": 2.783067189451103e-06, "loss": 3.7233, "step": 45655 }, { "epoch": 0.4644775390625, "grad_norm": 9.946394920349121, "learning_rate": 2.7826698764588395e-06, "loss": 3.0366, "step": 45660 }, { "epoch": 0.4645284016927083, "grad_norm": 10.91606616973877, "learning_rate": 2.782272556234533e-06, "loss": 3.1228, "step": 45665 }, { "epoch": 0.4645792643229167, "grad_norm": 10.437875747680664, "learning_rate": 2.781875228788351e-06, "loss": 3.1384, "step": 45670 }, { "epoch": 0.464630126953125, "grad_norm": 9.868165969848633, "learning_rate": 2.7814778941304575e-06, "loss": 3.3624, "step": 45675 }, { "epoch": 0.4646809895833333, "grad_norm": 11.684338569641113, "learning_rate": 2.7810805522710193e-06, "loss": 3.5561, "step": 45680 }, { "epoch": 0.4647318522135417, "grad_norm": 11.288381576538086, "learning_rate": 2.7806832032202015e-06, "loss": 3.4609, "step": 45685 }, { "epoch": 0.46478271484375, "grad_norm": 8.975887298583984, "learning_rate": 2.78028584698817e-06, "loss": 3.5242, "step": 45690 }, { "epoch": 0.4648335774739583, "grad_norm": 13.19123649597168, "learning_rate": 2.779888483585092e-06, "loss": 3.4243, "step": 45695 }, { "epoch": 0.4648844401041667, "grad_norm": 9.616811752319336, "learning_rate": 2.7794911130211344e-06, "loss": 3.0956, "step": 45700 }, { "epoch": 0.464935302734375, "grad_norm": 16.0772762298584, "learning_rate": 2.7790937353064612e-06, "loss": 3.2974, "step": 45705 }, { "epoch": 0.4649861653645833, "grad_norm": 9.483195304870605, "learning_rate": 2.7786963504512423e-06, "loss": 3.244, "step": 45710 }, { "epoch": 0.4650370279947917, "grad_norm": 15.58304214477539, "learning_rate": 2.7782989584656424e-06, "loss": 3.4533, "step": 45715 }, { "epoch": 0.465087890625, "grad_norm": 9.435249328613281, "learning_rate": 2.77790155935983e-06, "loss": 3.8485, "step": 45720 }, { "epoch": 0.4651387532552083, "grad_norm": 12.428313255310059, "learning_rate": 2.777504153143972e-06, "loss": 3.3872, "step": 45725 }, { "epoch": 0.4651896158854167, "grad_norm": 15.34945297241211, "learning_rate": 2.7771067398282365e-06, "loss": 3.1382, "step": 45730 }, { "epoch": 0.465240478515625, "grad_norm": 13.501898765563965, "learning_rate": 2.77670931942279e-06, "loss": 3.186, "step": 45735 }, { "epoch": 0.4652913411458333, "grad_norm": 14.027562141418457, "learning_rate": 2.7763118919378015e-06, "loss": 3.2039, "step": 45740 }, { "epoch": 0.4653422037760417, "grad_norm": 16.236597061157227, "learning_rate": 2.7759144573834385e-06, "loss": 3.237, "step": 45745 }, { "epoch": 0.46539306640625, "grad_norm": 16.635051727294922, "learning_rate": 2.7755170157698703e-06, "loss": 3.1684, "step": 45750 }, { "epoch": 0.4654439290364583, "grad_norm": 9.709321022033691, "learning_rate": 2.7751195671072634e-06, "loss": 3.7887, "step": 45755 }, { "epoch": 0.4654947916666667, "grad_norm": 11.649523735046387, "learning_rate": 2.7747221114057882e-06, "loss": 3.6312, "step": 45760 }, { "epoch": 0.465545654296875, "grad_norm": 7.71863317489624, "learning_rate": 2.7743246486756133e-06, "loss": 4.0583, "step": 45765 }, { "epoch": 0.4655965169270833, "grad_norm": 15.480148315429688, "learning_rate": 2.7739271789269064e-06, "loss": 3.4472, "step": 45770 }, { "epoch": 0.4656473795572917, "grad_norm": 11.121807098388672, "learning_rate": 2.7735297021698383e-06, "loss": 3.2171, "step": 45775 }, { "epoch": 0.4656982421875, "grad_norm": 10.893674850463867, "learning_rate": 2.773132218414577e-06, "loss": 3.2047, "step": 45780 }, { "epoch": 0.4657491048177083, "grad_norm": 10.893601417541504, "learning_rate": 2.7727347276712935e-06, "loss": 3.7683, "step": 45785 }, { "epoch": 0.4657999674479167, "grad_norm": 13.179238319396973, "learning_rate": 2.7723372299501554e-06, "loss": 3.236, "step": 45790 }, { "epoch": 0.465850830078125, "grad_norm": 12.484517097473145, "learning_rate": 2.771939725261335e-06, "loss": 3.4028, "step": 45795 }, { "epoch": 0.4659016927083333, "grad_norm": 10.28607177734375, "learning_rate": 2.7715422136150003e-06, "loss": 3.4997, "step": 45800 }, { "epoch": 0.4659525553385417, "grad_norm": 12.38172721862793, "learning_rate": 2.771144695021323e-06, "loss": 4.0931, "step": 45805 }, { "epoch": 0.46600341796875, "grad_norm": 9.357684135437012, "learning_rate": 2.7707471694904726e-06, "loss": 3.2736, "step": 45810 }, { "epoch": 0.4660542805989583, "grad_norm": 8.547158241271973, "learning_rate": 2.7703496370326204e-06, "loss": 3.3679, "step": 45815 }, { "epoch": 0.4661051432291667, "grad_norm": 12.835251808166504, "learning_rate": 2.7699520976579366e-06, "loss": 3.553, "step": 45820 }, { "epoch": 0.466156005859375, "grad_norm": 8.556459426879883, "learning_rate": 2.7695545513765927e-06, "loss": 3.4662, "step": 45825 }, { "epoch": 0.4662068684895833, "grad_norm": 8.816329002380371, "learning_rate": 2.7691569981987594e-06, "loss": 3.277, "step": 45830 }, { "epoch": 0.4662577311197917, "grad_norm": 12.441618919372559, "learning_rate": 2.768759438134607e-06, "loss": 3.3367, "step": 45835 }, { "epoch": 0.46630859375, "grad_norm": 8.362746238708496, "learning_rate": 2.7683618711943093e-06, "loss": 3.7603, "step": 45840 }, { "epoch": 0.4663594563802083, "grad_norm": 11.711898803710938, "learning_rate": 2.767964297388037e-06, "loss": 3.4323, "step": 45845 }, { "epoch": 0.4664103190104167, "grad_norm": 14.386340141296387, "learning_rate": 2.76756671672596e-06, "loss": 3.4993, "step": 45850 }, { "epoch": 0.466461181640625, "grad_norm": 11.949966430664062, "learning_rate": 2.7671691292182534e-06, "loss": 3.3782, "step": 45855 }, { "epoch": 0.4665120442708333, "grad_norm": 8.975872993469238, "learning_rate": 2.7667715348750873e-06, "loss": 3.4422, "step": 45860 }, { "epoch": 0.4665629069010417, "grad_norm": 17.242460250854492, "learning_rate": 2.7663739337066352e-06, "loss": 3.2899, "step": 45865 }, { "epoch": 0.46661376953125, "grad_norm": 10.30756950378418, "learning_rate": 2.7659763257230687e-06, "loss": 3.4649, "step": 45870 }, { "epoch": 0.4666646321614583, "grad_norm": 13.584542274475098, "learning_rate": 2.765578710934561e-06, "loss": 3.606, "step": 45875 }, { "epoch": 0.4667154947916667, "grad_norm": 8.844463348388672, "learning_rate": 2.765181089351286e-06, "loss": 3.3156, "step": 45880 }, { "epoch": 0.466766357421875, "grad_norm": 13.212042808532715, "learning_rate": 2.764783460983414e-06, "loss": 3.1015, "step": 45885 }, { "epoch": 0.4668172200520833, "grad_norm": 11.864336967468262, "learning_rate": 2.7643858258411214e-06, "loss": 3.4215, "step": 45890 }, { "epoch": 0.4668680826822917, "grad_norm": 13.197895050048828, "learning_rate": 2.7639881839345795e-06, "loss": 3.1889, "step": 45895 }, { "epoch": 0.4669189453125, "grad_norm": 9.860047340393066, "learning_rate": 2.7635905352739624e-06, "loss": 3.5285, "step": 45900 }, { "epoch": 0.4669698079427083, "grad_norm": 10.454028129577637, "learning_rate": 2.7631928798694435e-06, "loss": 3.3109, "step": 45905 }, { "epoch": 0.4670206705729167, "grad_norm": 10.30970287322998, "learning_rate": 2.7627952177311978e-06, "loss": 3.4374, "step": 45910 }, { "epoch": 0.467071533203125, "grad_norm": 11.030384063720703, "learning_rate": 2.7623975488693984e-06, "loss": 3.2407, "step": 45915 }, { "epoch": 0.4671223958333333, "grad_norm": 8.984940528869629, "learning_rate": 2.7619998732942205e-06, "loss": 3.2256, "step": 45920 }, { "epoch": 0.4671732584635417, "grad_norm": 11.720098495483398, "learning_rate": 2.761602191015837e-06, "loss": 3.124, "step": 45925 }, { "epoch": 0.46722412109375, "grad_norm": 12.977888107299805, "learning_rate": 2.7612045020444243e-06, "loss": 3.0434, "step": 45930 }, { "epoch": 0.4672749837239583, "grad_norm": 14.955964088439941, "learning_rate": 2.760806806390156e-06, "loss": 3.2651, "step": 45935 }, { "epoch": 0.4673258463541667, "grad_norm": 10.134566307067871, "learning_rate": 2.7604091040632076e-06, "loss": 3.2583, "step": 45940 }, { "epoch": 0.467376708984375, "grad_norm": 13.452347755432129, "learning_rate": 2.7600113950737535e-06, "loss": 3.1927, "step": 45945 }, { "epoch": 0.4674275716145833, "grad_norm": 12.402140617370605, "learning_rate": 2.759613679431969e-06, "loss": 3.4382, "step": 45950 }, { "epoch": 0.4674784342447917, "grad_norm": 13.043981552124023, "learning_rate": 2.7592159571480317e-06, "loss": 3.4245, "step": 45955 }, { "epoch": 0.467529296875, "grad_norm": 15.960465431213379, "learning_rate": 2.758818228232114e-06, "loss": 3.0721, "step": 45960 }, { "epoch": 0.4675801595052083, "grad_norm": 10.671195030212402, "learning_rate": 2.758420492694394e-06, "loss": 3.0606, "step": 45965 }, { "epoch": 0.4676310221354167, "grad_norm": 12.669294357299805, "learning_rate": 2.758022750545047e-06, "loss": 3.3118, "step": 45970 }, { "epoch": 0.467681884765625, "grad_norm": 8.216130256652832, "learning_rate": 2.7576250017942484e-06, "loss": 3.3467, "step": 45975 }, { "epoch": 0.4677327473958333, "grad_norm": 11.873502731323242, "learning_rate": 2.757227246452176e-06, "loss": 3.4543, "step": 45980 }, { "epoch": 0.4677836100260417, "grad_norm": 12.08121395111084, "learning_rate": 2.7568294845290054e-06, "loss": 3.1965, "step": 45985 }, { "epoch": 0.46783447265625, "grad_norm": 10.506508827209473, "learning_rate": 2.756431716034913e-06, "loss": 3.118, "step": 45990 }, { "epoch": 0.4678853352864583, "grad_norm": 15.263740539550781, "learning_rate": 2.7560339409800762e-06, "loss": 3.3852, "step": 45995 }, { "epoch": 0.4679361979166667, "grad_norm": 11.982575416564941, "learning_rate": 2.755636159374672e-06, "loss": 3.6118, "step": 46000 }, { "epoch": 0.467987060546875, "grad_norm": 10.696041107177734, "learning_rate": 2.7552383712288766e-06, "loss": 3.3743, "step": 46005 }, { "epoch": 0.4680379231770833, "grad_norm": 13.898693084716797, "learning_rate": 2.754840576552868e-06, "loss": 3.0196, "step": 46010 }, { "epoch": 0.4680887858072917, "grad_norm": 8.868748664855957, "learning_rate": 2.7544427753568237e-06, "loss": 3.2477, "step": 46015 }, { "epoch": 0.4681396484375, "grad_norm": 15.456969261169434, "learning_rate": 2.754044967650922e-06, "loss": 3.7001, "step": 46020 }, { "epoch": 0.4681905110677083, "grad_norm": 11.392976760864258, "learning_rate": 2.753647153445339e-06, "loss": 3.349, "step": 46025 }, { "epoch": 0.4682413736979167, "grad_norm": 10.876288414001465, "learning_rate": 2.753249332750254e-06, "loss": 3.7635, "step": 46030 }, { "epoch": 0.468292236328125, "grad_norm": 19.523954391479492, "learning_rate": 2.7528515055758457e-06, "loss": 3.7524, "step": 46035 }, { "epoch": 0.4683430989583333, "grad_norm": 11.984675407409668, "learning_rate": 2.7524536719322903e-06, "loss": 3.5094, "step": 46040 }, { "epoch": 0.4683939615885417, "grad_norm": 14.17333698272705, "learning_rate": 2.7520558318297686e-06, "loss": 3.3934, "step": 46045 }, { "epoch": 0.46844482421875, "grad_norm": 13.487282752990723, "learning_rate": 2.7516579852784576e-06, "loss": 3.286, "step": 46050 }, { "epoch": 0.4684956868489583, "grad_norm": 14.06745433807373, "learning_rate": 2.7512601322885363e-06, "loss": 3.3532, "step": 46055 }, { "epoch": 0.4685465494791667, "grad_norm": 13.35567855834961, "learning_rate": 2.750862272870185e-06, "loss": 3.1667, "step": 46060 }, { "epoch": 0.468597412109375, "grad_norm": 15.46142292022705, "learning_rate": 2.7504644070335814e-06, "loss": 3.5401, "step": 46065 }, { "epoch": 0.4686482747395833, "grad_norm": 8.861741065979004, "learning_rate": 2.7500665347889054e-06, "loss": 3.4026, "step": 46070 }, { "epoch": 0.4686991373697917, "grad_norm": 13.047086715698242, "learning_rate": 2.7496686561463363e-06, "loss": 3.4934, "step": 46075 }, { "epoch": 0.46875, "grad_norm": 17.196575164794922, "learning_rate": 2.7492707711160537e-06, "loss": 3.9396, "step": 46080 }, { "epoch": 0.4688008626302083, "grad_norm": 10.11226749420166, "learning_rate": 2.7488728797082377e-06, "loss": 3.3455, "step": 46085 }, { "epoch": 0.4688517252604167, "grad_norm": 11.650439262390137, "learning_rate": 2.7484749819330684e-06, "loss": 3.3322, "step": 46090 }, { "epoch": 0.468902587890625, "grad_norm": 10.389031410217285, "learning_rate": 2.748077077800725e-06, "loss": 3.2071, "step": 46095 }, { "epoch": 0.4689534505208333, "grad_norm": 10.494956970214844, "learning_rate": 2.7476791673213888e-06, "loss": 3.2638, "step": 46100 }, { "epoch": 0.4690043131510417, "grad_norm": 8.282896995544434, "learning_rate": 2.7472812505052394e-06, "loss": 3.3806, "step": 46105 }, { "epoch": 0.46905517578125, "grad_norm": 14.236871719360352, "learning_rate": 2.7468833273624586e-06, "loss": 3.4765, "step": 46110 }, { "epoch": 0.4691060384114583, "grad_norm": 11.046870231628418, "learning_rate": 2.7464853979032253e-06, "loss": 3.3718, "step": 46115 }, { "epoch": 0.4691569010416667, "grad_norm": 14.94047737121582, "learning_rate": 2.746087462137722e-06, "loss": 3.4382, "step": 46120 }, { "epoch": 0.469207763671875, "grad_norm": 7.440110683441162, "learning_rate": 2.7456895200761295e-06, "loss": 3.5794, "step": 46125 }, { "epoch": 0.4692586263020833, "grad_norm": 11.102751731872559, "learning_rate": 2.745291571728629e-06, "loss": 3.0727, "step": 46130 }, { "epoch": 0.4693094889322917, "grad_norm": 11.534234046936035, "learning_rate": 2.7448936171054008e-06, "loss": 3.4297, "step": 46135 }, { "epoch": 0.4693603515625, "grad_norm": 10.486431121826172, "learning_rate": 2.7444956562166287e-06, "loss": 3.0962, "step": 46140 }, { "epoch": 0.4694112141927083, "grad_norm": 12.308408737182617, "learning_rate": 2.7440976890724926e-06, "loss": 3.5718, "step": 46145 }, { "epoch": 0.4694620768229167, "grad_norm": 14.86275863647461, "learning_rate": 2.7436997156831744e-06, "loss": 3.3616, "step": 46150 }, { "epoch": 0.469512939453125, "grad_norm": 10.989408493041992, "learning_rate": 2.743301736058858e-06, "loss": 3.4982, "step": 46155 }, { "epoch": 0.4695638020833333, "grad_norm": 16.7119140625, "learning_rate": 2.7429037502097235e-06, "loss": 3.5977, "step": 46160 }, { "epoch": 0.4696146647135417, "grad_norm": 10.82308292388916, "learning_rate": 2.7425057581459547e-06, "loss": 3.2861, "step": 46165 }, { "epoch": 0.46966552734375, "grad_norm": 15.950583457946777, "learning_rate": 2.7421077598777322e-06, "loss": 3.529, "step": 46170 }, { "epoch": 0.4697163899739583, "grad_norm": 10.276703834533691, "learning_rate": 2.7417097554152417e-06, "loss": 3.4298, "step": 46175 }, { "epoch": 0.4697672526041667, "grad_norm": 13.284745216369629, "learning_rate": 2.741311744768664e-06, "loss": 3.1905, "step": 46180 }, { "epoch": 0.469818115234375, "grad_norm": 10.398514747619629, "learning_rate": 2.7409137279481816e-06, "loss": 3.3221, "step": 46185 }, { "epoch": 0.4698689778645833, "grad_norm": 16.240459442138672, "learning_rate": 2.740515704963979e-06, "loss": 3.4604, "step": 46190 }, { "epoch": 0.4699198404947917, "grad_norm": 13.618303298950195, "learning_rate": 2.7401176758262392e-06, "loss": 3.6707, "step": 46195 }, { "epoch": 0.469970703125, "grad_norm": 7.832457542419434, "learning_rate": 2.739719640545146e-06, "loss": 3.0944, "step": 46200 }, { "epoch": 0.4700215657552083, "grad_norm": 8.400361061096191, "learning_rate": 2.739321599130883e-06, "loss": 4.1508, "step": 46205 }, { "epoch": 0.4700724283854167, "grad_norm": 7.780868053436279, "learning_rate": 2.7389235515936317e-06, "loss": 3.2587, "step": 46210 }, { "epoch": 0.470123291015625, "grad_norm": 15.038986206054688, "learning_rate": 2.73852549794358e-06, "loss": 3.0948, "step": 46215 }, { "epoch": 0.4701741536458333, "grad_norm": 13.191540718078613, "learning_rate": 2.738127438190909e-06, "loss": 3.2851, "step": 46220 }, { "epoch": 0.4702250162760417, "grad_norm": 12.649255752563477, "learning_rate": 2.7377293723458045e-06, "loss": 3.2563, "step": 46225 }, { "epoch": 0.47027587890625, "grad_norm": 11.454198837280273, "learning_rate": 2.73733130041845e-06, "loss": 3.1829, "step": 46230 }, { "epoch": 0.4703267415364583, "grad_norm": 14.024190902709961, "learning_rate": 2.7369332224190298e-06, "loss": 2.9479, "step": 46235 }, { "epoch": 0.4703776041666667, "grad_norm": 13.10719108581543, "learning_rate": 2.7365351383577297e-06, "loss": 3.1713, "step": 46240 }, { "epoch": 0.470428466796875, "grad_norm": 10.908052444458008, "learning_rate": 2.7361370482447346e-06, "loss": 2.944, "step": 46245 }, { "epoch": 0.4704793294270833, "grad_norm": 13.364755630493164, "learning_rate": 2.7357389520902282e-06, "loss": 3.4202, "step": 46250 }, { "epoch": 0.4705301920572917, "grad_norm": 13.245467185974121, "learning_rate": 2.7353408499043976e-06, "loss": 3.1811, "step": 46255 }, { "epoch": 0.4705810546875, "grad_norm": 10.978673934936523, "learning_rate": 2.734942741697427e-06, "loss": 3.36, "step": 46260 }, { "epoch": 0.4706319173177083, "grad_norm": 9.301791191101074, "learning_rate": 2.734544627479501e-06, "loss": 3.4495, "step": 46265 }, { "epoch": 0.4706827799479167, "grad_norm": 8.850616455078125, "learning_rate": 2.7341465072608074e-06, "loss": 3.1127, "step": 46270 }, { "epoch": 0.470733642578125, "grad_norm": 14.622509002685547, "learning_rate": 2.7337483810515303e-06, "loss": 3.2434, "step": 46275 }, { "epoch": 0.4707845052083333, "grad_norm": 8.17994213104248, "learning_rate": 2.7333502488618568e-06, "loss": 3.2975, "step": 46280 }, { "epoch": 0.4708353678385417, "grad_norm": 11.39811897277832, "learning_rate": 2.7329521107019723e-06, "loss": 3.5219, "step": 46285 }, { "epoch": 0.47088623046875, "grad_norm": 8.171241760253906, "learning_rate": 2.7325539665820632e-06, "loss": 2.908, "step": 46290 }, { "epoch": 0.4709370930989583, "grad_norm": 10.729769706726074, "learning_rate": 2.7321558165123157e-06, "loss": 2.9641, "step": 46295 }, { "epoch": 0.4709879557291667, "grad_norm": 11.02481746673584, "learning_rate": 2.7317576605029173e-06, "loss": 3.1499, "step": 46300 }, { "epoch": 0.471038818359375, "grad_norm": 12.968914031982422, "learning_rate": 2.7313594985640533e-06, "loss": 3.3203, "step": 46305 }, { "epoch": 0.4710896809895833, "grad_norm": 10.956926345825195, "learning_rate": 2.7309613307059125e-06, "loss": 3.4741, "step": 46310 }, { "epoch": 0.4711405436197917, "grad_norm": 13.60365104675293, "learning_rate": 2.7305631569386793e-06, "loss": 3.5447, "step": 46315 }, { "epoch": 0.47119140625, "grad_norm": 9.149913787841797, "learning_rate": 2.7301649772725435e-06, "loss": 3.5997, "step": 46320 }, { "epoch": 0.4712422688802083, "grad_norm": 15.682628631591797, "learning_rate": 2.7297667917176906e-06, "loss": 3.0661, "step": 46325 }, { "epoch": 0.4712931315104167, "grad_norm": 11.843368530273438, "learning_rate": 2.72936860028431e-06, "loss": 3.6361, "step": 46330 }, { "epoch": 0.471343994140625, "grad_norm": 12.912580490112305, "learning_rate": 2.728970402982587e-06, "loss": 3.3276, "step": 46335 }, { "epoch": 0.4713948567708333, "grad_norm": 13.227678298950195, "learning_rate": 2.728572199822711e-06, "loss": 3.2261, "step": 46340 }, { "epoch": 0.4714457194010417, "grad_norm": 12.401229858398438, "learning_rate": 2.7281739908148693e-06, "loss": 3.0861, "step": 46345 }, { "epoch": 0.47149658203125, "grad_norm": 10.131817817687988, "learning_rate": 2.7277757759692495e-06, "loss": 3.5494, "step": 46350 }, { "epoch": 0.4715474446614583, "grad_norm": 11.628375053405762, "learning_rate": 2.7273775552960413e-06, "loss": 3.3065, "step": 46355 }, { "epoch": 0.4715983072916667, "grad_norm": 8.976502418518066, "learning_rate": 2.726979328805432e-06, "loss": 3.2928, "step": 46360 }, { "epoch": 0.471649169921875, "grad_norm": 14.638002395629883, "learning_rate": 2.7265810965076102e-06, "loss": 3.8144, "step": 46365 }, { "epoch": 0.4717000325520833, "grad_norm": 13.228276252746582, "learning_rate": 2.7261828584127648e-06, "loss": 3.2929, "step": 46370 }, { "epoch": 0.4717508951822917, "grad_norm": 9.379998207092285, "learning_rate": 2.725784614531085e-06, "loss": 3.2328, "step": 46375 }, { "epoch": 0.4718017578125, "grad_norm": 7.740593910217285, "learning_rate": 2.7253863648727585e-06, "loss": 3.829, "step": 46380 }, { "epoch": 0.4718526204427083, "grad_norm": 13.069509506225586, "learning_rate": 2.7249881094479764e-06, "loss": 3.1788, "step": 46385 }, { "epoch": 0.4719034830729167, "grad_norm": 16.901323318481445, "learning_rate": 2.7245898482669265e-06, "loss": 3.1605, "step": 46390 }, { "epoch": 0.471954345703125, "grad_norm": 13.64307975769043, "learning_rate": 2.724191581339798e-06, "loss": 3.7023, "step": 46395 }, { "epoch": 0.4720052083333333, "grad_norm": 10.633169174194336, "learning_rate": 2.7237933086767814e-06, "loss": 3.269, "step": 46400 }, { "epoch": 0.4720560709635417, "grad_norm": 13.924692153930664, "learning_rate": 2.7233950302880658e-06, "loss": 3.2844, "step": 46405 }, { "epoch": 0.47210693359375, "grad_norm": 12.190840721130371, "learning_rate": 2.722996746183841e-06, "loss": 3.726, "step": 46410 }, { "epoch": 0.4721577962239583, "grad_norm": 12.154855728149414, "learning_rate": 2.7225984563742987e-06, "loss": 3.2895, "step": 46415 }, { "epoch": 0.4722086588541667, "grad_norm": 10.432464599609375, "learning_rate": 2.7222001608696264e-06, "loss": 3.3704, "step": 46420 }, { "epoch": 0.472259521484375, "grad_norm": 11.872044563293457, "learning_rate": 2.7218018596800167e-06, "loss": 3.1948, "step": 46425 }, { "epoch": 0.4723103841145833, "grad_norm": 10.191203117370605, "learning_rate": 2.7214035528156572e-06, "loss": 3.1356, "step": 46430 }, { "epoch": 0.4723612467447917, "grad_norm": 11.7601318359375, "learning_rate": 2.721005240286742e-06, "loss": 3.1688, "step": 46435 }, { "epoch": 0.472412109375, "grad_norm": 16.691404342651367, "learning_rate": 2.7206069221034597e-06, "loss": 3.407, "step": 46440 }, { "epoch": 0.4724629720052083, "grad_norm": 16.61736488342285, "learning_rate": 2.720208598276001e-06, "loss": 3.5574, "step": 46445 }, { "epoch": 0.4725138346354167, "grad_norm": 11.950624465942383, "learning_rate": 2.7198102688145583e-06, "loss": 3.7005, "step": 46450 }, { "epoch": 0.472564697265625, "grad_norm": 15.381657600402832, "learning_rate": 2.7194119337293214e-06, "loss": 3.3024, "step": 46455 }, { "epoch": 0.4726155598958333, "grad_norm": 8.248703956604004, "learning_rate": 2.7190135930304824e-06, "loss": 3.1889, "step": 46460 }, { "epoch": 0.4726664225260417, "grad_norm": 11.456612586975098, "learning_rate": 2.7186152467282325e-06, "loss": 3.4829, "step": 46465 }, { "epoch": 0.47271728515625, "grad_norm": 12.047392845153809, "learning_rate": 2.718216894832763e-06, "loss": 3.6619, "step": 46470 }, { "epoch": 0.4727681477864583, "grad_norm": 13.88084888458252, "learning_rate": 2.7178185373542663e-06, "loss": 3.1162, "step": 46475 }, { "epoch": 0.4728190104166667, "grad_norm": 10.259420394897461, "learning_rate": 2.7174201743029343e-06, "loss": 3.4904, "step": 46480 }, { "epoch": 0.472869873046875, "grad_norm": 16.462200164794922, "learning_rate": 2.717021805688958e-06, "loss": 3.3561, "step": 46485 }, { "epoch": 0.4729207356770833, "grad_norm": 14.48259162902832, "learning_rate": 2.7166234315225307e-06, "loss": 3.4951, "step": 46490 }, { "epoch": 0.4729715983072917, "grad_norm": 13.49642276763916, "learning_rate": 2.716225051813844e-06, "loss": 3.3612, "step": 46495 }, { "epoch": 0.4730224609375, "grad_norm": 12.33002758026123, "learning_rate": 2.715826666573091e-06, "loss": 3.3729, "step": 46500 }, { "epoch": 0.4730733235677083, "grad_norm": 14.000799179077148, "learning_rate": 2.715428275810463e-06, "loss": 3.4473, "step": 46505 }, { "epoch": 0.4731241861979167, "grad_norm": 7.098884105682373, "learning_rate": 2.7150298795361544e-06, "loss": 3.2642, "step": 46510 }, { "epoch": 0.473175048828125, "grad_norm": 10.298365592956543, "learning_rate": 2.7146314777603573e-06, "loss": 3.6669, "step": 46515 }, { "epoch": 0.4732259114583333, "grad_norm": 8.720600128173828, "learning_rate": 2.7142330704932647e-06, "loss": 3.2124, "step": 46520 }, { "epoch": 0.4732767740885417, "grad_norm": 12.410454750061035, "learning_rate": 2.7138346577450692e-06, "loss": 3.5289, "step": 46525 }, { "epoch": 0.47332763671875, "grad_norm": 10.719010353088379, "learning_rate": 2.7134362395259655e-06, "loss": 2.9803, "step": 46530 }, { "epoch": 0.4733784993489583, "grad_norm": 9.425874710083008, "learning_rate": 2.713037815846145e-06, "loss": 3.6764, "step": 46535 }, { "epoch": 0.4734293619791667, "grad_norm": 8.810243606567383, "learning_rate": 2.7126393867158034e-06, "loss": 3.5622, "step": 46540 }, { "epoch": 0.473480224609375, "grad_norm": 11.177980422973633, "learning_rate": 2.7122409521451344e-06, "loss": 3.3716, "step": 46545 }, { "epoch": 0.4735310872395833, "grad_norm": 12.672170639038086, "learning_rate": 2.7118425121443286e-06, "loss": 3.3576, "step": 46550 }, { "epoch": 0.4735819498697917, "grad_norm": 9.455219268798828, "learning_rate": 2.7114440667235842e-06, "loss": 3.1201, "step": 46555 }, { "epoch": 0.4736328125, "grad_norm": 13.621195793151855, "learning_rate": 2.7110456158930924e-06, "loss": 3.1756, "step": 46560 }, { "epoch": 0.4736836751302083, "grad_norm": 13.35601806640625, "learning_rate": 2.7106471596630498e-06, "loss": 3.4384, "step": 46565 }, { "epoch": 0.4737345377604167, "grad_norm": 12.736628532409668, "learning_rate": 2.7102486980436486e-06, "loss": 3.0624, "step": 46570 }, { "epoch": 0.473785400390625, "grad_norm": 12.159968376159668, "learning_rate": 2.7098502310450843e-06, "loss": 3.013, "step": 46575 }, { "epoch": 0.4738362630208333, "grad_norm": 13.247224807739258, "learning_rate": 2.709451758677552e-06, "loss": 3.506, "step": 46580 }, { "epoch": 0.4738871256510417, "grad_norm": 13.124889373779297, "learning_rate": 2.7090532809512455e-06, "loss": 2.9816, "step": 46585 }, { "epoch": 0.47393798828125, "grad_norm": 12.223262786865234, "learning_rate": 2.7086547978763604e-06, "loss": 3.3751, "step": 46590 }, { "epoch": 0.4739888509114583, "grad_norm": 11.755989074707031, "learning_rate": 2.708256309463093e-06, "loss": 3.5024, "step": 46595 }, { "epoch": 0.4740397135416667, "grad_norm": 8.518555641174316, "learning_rate": 2.7078578157216356e-06, "loss": 3.2307, "step": 46600 }, { "epoch": 0.474090576171875, "grad_norm": 11.394265174865723, "learning_rate": 2.7074593166621864e-06, "loss": 3.1565, "step": 46605 }, { "epoch": 0.4741414388020833, "grad_norm": 12.099921226501465, "learning_rate": 2.7070608122949394e-06, "loss": 3.2933, "step": 46610 }, { "epoch": 0.4741923014322917, "grad_norm": 14.934525489807129, "learning_rate": 2.706662302630091e-06, "loss": 3.0822, "step": 46615 }, { "epoch": 0.4742431640625, "grad_norm": 6.391138553619385, "learning_rate": 2.706263787677836e-06, "loss": 3.2745, "step": 46620 }, { "epoch": 0.4742940266927083, "grad_norm": 10.021805763244629, "learning_rate": 2.705865267448371e-06, "loss": 3.5125, "step": 46625 }, { "epoch": 0.4743448893229167, "grad_norm": 15.852633476257324, "learning_rate": 2.7054667419518915e-06, "loss": 3.794, "step": 46630 }, { "epoch": 0.474395751953125, "grad_norm": 15.297828674316406, "learning_rate": 2.705068211198595e-06, "loss": 3.2434, "step": 46635 }, { "epoch": 0.4744466145833333, "grad_norm": 12.144735336303711, "learning_rate": 2.704669675198676e-06, "loss": 3.2596, "step": 46640 }, { "epoch": 0.4744974772135417, "grad_norm": 10.186820030212402, "learning_rate": 2.7042711339623322e-06, "loss": 3.4567, "step": 46645 }, { "epoch": 0.47454833984375, "grad_norm": 10.479610443115234, "learning_rate": 2.7038725874997603e-06, "loss": 3.6553, "step": 46650 }, { "epoch": 0.4745992024739583, "grad_norm": 8.197158813476562, "learning_rate": 2.703474035821156e-06, "loss": 3.331, "step": 46655 }, { "epoch": 0.4746500651041667, "grad_norm": 13.642806053161621, "learning_rate": 2.7030754789367174e-06, "loss": 3.8816, "step": 46660 }, { "epoch": 0.474700927734375, "grad_norm": 13.100593566894531, "learning_rate": 2.7026769168566397e-06, "loss": 3.3208, "step": 46665 }, { "epoch": 0.4747517903645833, "grad_norm": 13.655210494995117, "learning_rate": 2.7022783495911228e-06, "loss": 2.952, "step": 46670 }, { "epoch": 0.4748026529947917, "grad_norm": 12.086578369140625, "learning_rate": 2.7018797771503608e-06, "loss": 3.1542, "step": 46675 }, { "epoch": 0.474853515625, "grad_norm": 11.18812370300293, "learning_rate": 2.7014811995445532e-06, "loss": 3.2304, "step": 46680 }, { "epoch": 0.4749043782552083, "grad_norm": 9.631187438964844, "learning_rate": 2.7010826167838967e-06, "loss": 3.2375, "step": 46685 }, { "epoch": 0.4749552408854167, "grad_norm": 13.716019630432129, "learning_rate": 2.700684028878589e-06, "loss": 3.3886, "step": 46690 }, { "epoch": 0.475006103515625, "grad_norm": 9.768837928771973, "learning_rate": 2.700285435838828e-06, "loss": 3.4435, "step": 46695 }, { "epoch": 0.4750569661458333, "grad_norm": 14.271130561828613, "learning_rate": 2.6998868376748123e-06, "loss": 3.3833, "step": 46700 }, { "epoch": 0.4751078287760417, "grad_norm": 17.306480407714844, "learning_rate": 2.6994882343967376e-06, "loss": 3.5262, "step": 46705 }, { "epoch": 0.47515869140625, "grad_norm": 11.069417953491211, "learning_rate": 2.6990896260148057e-06, "loss": 3.0509, "step": 46710 }, { "epoch": 0.4752095540364583, "grad_norm": 14.676597595214844, "learning_rate": 2.698691012539211e-06, "loss": 2.9301, "step": 46715 }, { "epoch": 0.4752604166666667, "grad_norm": 9.63838005065918, "learning_rate": 2.6982923939801555e-06, "loss": 3.1717, "step": 46720 }, { "epoch": 0.475311279296875, "grad_norm": 8.0972261428833, "learning_rate": 2.6978937703478354e-06, "loss": 3.1371, "step": 46725 }, { "epoch": 0.4753621419270833, "grad_norm": 10.335068702697754, "learning_rate": 2.6974951416524505e-06, "loss": 3.2931, "step": 46730 }, { "epoch": 0.4754130045572917, "grad_norm": 9.206419944763184, "learning_rate": 2.6970965079041985e-06, "loss": 3.309, "step": 46735 }, { "epoch": 0.4754638671875, "grad_norm": 14.041919708251953, "learning_rate": 2.696697869113279e-06, "loss": 3.267, "step": 46740 }, { "epoch": 0.4755147298177083, "grad_norm": 13.776942253112793, "learning_rate": 2.696299225289892e-06, "loss": 3.3834, "step": 46745 }, { "epoch": 0.4755655924479167, "grad_norm": 16.299365997314453, "learning_rate": 2.6959005764442348e-06, "loss": 3.2208, "step": 46750 }, { "epoch": 0.475616455078125, "grad_norm": 9.975279808044434, "learning_rate": 2.6955019225865083e-06, "loss": 3.4005, "step": 46755 }, { "epoch": 0.4756673177083333, "grad_norm": 17.89759063720703, "learning_rate": 2.6951032637269113e-06, "loss": 3.0012, "step": 46760 }, { "epoch": 0.4757181803385417, "grad_norm": 10.703070640563965, "learning_rate": 2.6947045998756445e-06, "loss": 3.1897, "step": 46765 }, { "epoch": 0.47576904296875, "grad_norm": 11.555328369140625, "learning_rate": 2.6943059310429054e-06, "loss": 3.7983, "step": 46770 }, { "epoch": 0.4758199055989583, "grad_norm": 9.706049919128418, "learning_rate": 2.6939072572388956e-06, "loss": 3.3247, "step": 46775 }, { "epoch": 0.4758707682291667, "grad_norm": 16.109375, "learning_rate": 2.6935085784738147e-06, "loss": 3.2998, "step": 46780 }, { "epoch": 0.475921630859375, "grad_norm": 14.335375785827637, "learning_rate": 2.6931098947578626e-06, "loss": 3.28, "step": 46785 }, { "epoch": 0.4759724934895833, "grad_norm": 14.95987606048584, "learning_rate": 2.6927112061012393e-06, "loss": 3.2788, "step": 46790 }, { "epoch": 0.4760233561197917, "grad_norm": 12.191413879394531, "learning_rate": 2.6923125125141457e-06, "loss": 3.3106, "step": 46795 }, { "epoch": 0.47607421875, "grad_norm": 14.423506736755371, "learning_rate": 2.6919138140067826e-06, "loss": 3.478, "step": 46800 }, { "epoch": 0.4761250813802083, "grad_norm": 14.733509063720703, "learning_rate": 2.6915151105893494e-06, "loss": 3.2981, "step": 46805 }, { "epoch": 0.4761759440104167, "grad_norm": 13.273368835449219, "learning_rate": 2.6911164022720474e-06, "loss": 3.504, "step": 46810 }, { "epoch": 0.476226806640625, "grad_norm": 13.908172607421875, "learning_rate": 2.6907176890650784e-06, "loss": 3.4394, "step": 46815 }, { "epoch": 0.4762776692708333, "grad_norm": 11.612787246704102, "learning_rate": 2.6903189709786414e-06, "loss": 3.3358, "step": 46820 }, { "epoch": 0.4763285319010417, "grad_norm": 10.148160934448242, "learning_rate": 2.68992024802294e-06, "loss": 3.5282, "step": 46825 }, { "epoch": 0.47637939453125, "grad_norm": 12.438175201416016, "learning_rate": 2.6895215202081727e-06, "loss": 3.3597, "step": 46830 }, { "epoch": 0.4764302571614583, "grad_norm": 11.603988647460938, "learning_rate": 2.689122787544543e-06, "loss": 3.1457, "step": 46835 }, { "epoch": 0.4764811197916667, "grad_norm": 15.418655395507812, "learning_rate": 2.6887240500422513e-06, "loss": 3.7434, "step": 46840 }, { "epoch": 0.476531982421875, "grad_norm": 14.989448547363281, "learning_rate": 2.6883253077114994e-06, "loss": 3.2839, "step": 46845 }, { "epoch": 0.4765828450520833, "grad_norm": 13.838479995727539, "learning_rate": 2.6879265605624895e-06, "loss": 3.3141, "step": 46850 }, { "epoch": 0.4766337076822917, "grad_norm": 11.194694519042969, "learning_rate": 2.6875278086054225e-06, "loss": 3.299, "step": 46855 }, { "epoch": 0.4766845703125, "grad_norm": 16.84820556640625, "learning_rate": 2.6871290518505017e-06, "loss": 3.3779, "step": 46860 }, { "epoch": 0.4767354329427083, "grad_norm": 13.52037239074707, "learning_rate": 2.686730290307928e-06, "loss": 3.0612, "step": 46865 }, { "epoch": 0.4767862955729167, "grad_norm": 19.260265350341797, "learning_rate": 2.686331523987904e-06, "loss": 3.7525, "step": 46870 }, { "epoch": 0.476837158203125, "grad_norm": 8.696489334106445, "learning_rate": 2.6859327529006326e-06, "loss": 3.2166, "step": 46875 }, { "epoch": 0.4768880208333333, "grad_norm": 8.214338302612305, "learning_rate": 2.685533977056316e-06, "loss": 3.0088, "step": 46880 }, { "epoch": 0.4769388834635417, "grad_norm": 13.720746040344238, "learning_rate": 2.685135196465155e-06, "loss": 3.4988, "step": 46885 }, { "epoch": 0.47698974609375, "grad_norm": 12.400075912475586, "learning_rate": 2.6847364111373557e-06, "loss": 2.9571, "step": 46890 }, { "epoch": 0.4770406087239583, "grad_norm": 13.83202838897705, "learning_rate": 2.6843376210831184e-06, "loss": 3.375, "step": 46895 }, { "epoch": 0.4770914713541667, "grad_norm": 12.201261520385742, "learning_rate": 2.683938826312647e-06, "loss": 3.3864, "step": 46900 }, { "epoch": 0.477142333984375, "grad_norm": 8.518744468688965, "learning_rate": 2.683540026836144e-06, "loss": 3.5712, "step": 46905 }, { "epoch": 0.4771931966145833, "grad_norm": 8.686223030090332, "learning_rate": 2.6831412226638125e-06, "loss": 3.9282, "step": 46910 }, { "epoch": 0.4772440592447917, "grad_norm": 11.989502906799316, "learning_rate": 2.682742413805857e-06, "loss": 3.6695, "step": 46915 }, { "epoch": 0.477294921875, "grad_norm": 11.652146339416504, "learning_rate": 2.6823436002724802e-06, "loss": 3.4997, "step": 46920 }, { "epoch": 0.4773457845052083, "grad_norm": 10.252928733825684, "learning_rate": 2.6819447820738847e-06, "loss": 3.2355, "step": 46925 }, { "epoch": 0.4773966471354167, "grad_norm": 13.859277725219727, "learning_rate": 2.6815459592202765e-06, "loss": 3.2546, "step": 46930 }, { "epoch": 0.477447509765625, "grad_norm": 15.216449737548828, "learning_rate": 2.6811471317218567e-06, "loss": 3.3368, "step": 46935 }, { "epoch": 0.4774983723958333, "grad_norm": 9.995338439941406, "learning_rate": 2.6807482995888306e-06, "loss": 3.0775, "step": 46940 }, { "epoch": 0.4775492350260417, "grad_norm": 14.418883323669434, "learning_rate": 2.680349462831403e-06, "loss": 3.3683, "step": 46945 }, { "epoch": 0.47760009765625, "grad_norm": 14.491920471191406, "learning_rate": 2.679950621459776e-06, "loss": 3.5651, "step": 46950 }, { "epoch": 0.4776509602864583, "grad_norm": 9.108541488647461, "learning_rate": 2.6795517754841562e-06, "loss": 3.2236, "step": 46955 }, { "epoch": 0.4777018229166667, "grad_norm": 12.77468204498291, "learning_rate": 2.6791529249147456e-06, "loss": 3.0829, "step": 46960 }, { "epoch": 0.477752685546875, "grad_norm": 13.290841102600098, "learning_rate": 2.6787540697617505e-06, "loss": 3.1515, "step": 46965 }, { "epoch": 0.4778035481770833, "grad_norm": 13.552038192749023, "learning_rate": 2.678355210035375e-06, "loss": 3.3246, "step": 46970 }, { "epoch": 0.4778544108072917, "grad_norm": 11.967382431030273, "learning_rate": 2.677956345745823e-06, "loss": 3.3275, "step": 46975 }, { "epoch": 0.4779052734375, "grad_norm": 11.457630157470703, "learning_rate": 2.677557476903301e-06, "loss": 3.331, "step": 46980 }, { "epoch": 0.4779561360677083, "grad_norm": 10.440465927124023, "learning_rate": 2.677158603518013e-06, "loss": 3.2845, "step": 46985 }, { "epoch": 0.4780069986979167, "grad_norm": 10.605171203613281, "learning_rate": 2.6767597256001633e-06, "loss": 3.3102, "step": 46990 }, { "epoch": 0.478057861328125, "grad_norm": 8.721869468688965, "learning_rate": 2.6763608431599586e-06, "loss": 2.8082, "step": 46995 }, { "epoch": 0.4781087239583333, "grad_norm": 9.492325782775879, "learning_rate": 2.6759619562076035e-06, "loss": 3.2523, "step": 47000 }, { "epoch": 0.4781595865885417, "grad_norm": 15.880738258361816, "learning_rate": 2.6755630647533034e-06, "loss": 3.6552, "step": 47005 }, { "epoch": 0.47821044921875, "grad_norm": 12.2164306640625, "learning_rate": 2.6751641688072633e-06, "loss": 3.652, "step": 47010 }, { "epoch": 0.4782613118489583, "grad_norm": 14.72760009765625, "learning_rate": 2.6747652683796897e-06, "loss": 3.2615, "step": 47015 }, { "epoch": 0.4783121744791667, "grad_norm": 11.249184608459473, "learning_rate": 2.674366363480789e-06, "loss": 3.4039, "step": 47020 }, { "epoch": 0.478363037109375, "grad_norm": 21.686203002929688, "learning_rate": 2.673967454120765e-06, "loss": 3.4197, "step": 47025 }, { "epoch": 0.4784138997395833, "grad_norm": 15.506630897521973, "learning_rate": 2.6735685403098255e-06, "loss": 3.3828, "step": 47030 }, { "epoch": 0.4784647623697917, "grad_norm": 14.081663131713867, "learning_rate": 2.673169622058176e-06, "loss": 3.1585, "step": 47035 }, { "epoch": 0.478515625, "grad_norm": 12.858463287353516, "learning_rate": 2.6727706993760227e-06, "loss": 3.1958, "step": 47040 }, { "epoch": 0.4785664876302083, "grad_norm": 10.969271659851074, "learning_rate": 2.672371772273572e-06, "loss": 3.5695, "step": 47045 }, { "epoch": 0.4786173502604167, "grad_norm": 11.234213829040527, "learning_rate": 2.6719728407610313e-06, "loss": 3.6693, "step": 47050 }, { "epoch": 0.478668212890625, "grad_norm": 13.769668579101562, "learning_rate": 2.6715739048486044e-06, "loss": 3.1101, "step": 47055 }, { "epoch": 0.4787190755208333, "grad_norm": 14.294875144958496, "learning_rate": 2.671174964546502e-06, "loss": 3.248, "step": 47060 }, { "epoch": 0.4787699381510417, "grad_norm": 13.281566619873047, "learning_rate": 2.6707760198649268e-06, "loss": 3.5763, "step": 47065 }, { "epoch": 0.47882080078125, "grad_norm": 14.868240356445312, "learning_rate": 2.670377070814089e-06, "loss": 3.3147, "step": 47070 }, { "epoch": 0.4788716634114583, "grad_norm": 9.297042846679688, "learning_rate": 2.6699781174041935e-06, "loss": 2.9528, "step": 47075 }, { "epoch": 0.4789225260416667, "grad_norm": 10.145970344543457, "learning_rate": 2.6695791596454477e-06, "loss": 3.4101, "step": 47080 }, { "epoch": 0.478973388671875, "grad_norm": 8.714816093444824, "learning_rate": 2.66918019754806e-06, "loss": 3.1934, "step": 47085 }, { "epoch": 0.4790242513020833, "grad_norm": 12.525447845458984, "learning_rate": 2.668781231122237e-06, "loss": 3.3849, "step": 47090 }, { "epoch": 0.4790751139322917, "grad_norm": 15.579602241516113, "learning_rate": 2.6683822603781856e-06, "loss": 3.3557, "step": 47095 }, { "epoch": 0.4791259765625, "grad_norm": 12.996421813964844, "learning_rate": 2.6679832853261153e-06, "loss": 3.3016, "step": 47100 }, { "epoch": 0.4791768391927083, "grad_norm": 15.442605018615723, "learning_rate": 2.6675843059762307e-06, "loss": 3.3641, "step": 47105 }, { "epoch": 0.4792277018229167, "grad_norm": 14.138955116271973, "learning_rate": 2.667185322338743e-06, "loss": 3.3516, "step": 47110 }, { "epoch": 0.479278564453125, "grad_norm": 12.985639572143555, "learning_rate": 2.6667863344238576e-06, "loss": 3.4175, "step": 47115 }, { "epoch": 0.4793294270833333, "grad_norm": 13.8716402053833, "learning_rate": 2.666387342241783e-06, "loss": 3.1962, "step": 47120 }, { "epoch": 0.4793802897135417, "grad_norm": 10.476211547851562, "learning_rate": 2.6659883458027284e-06, "loss": 3.3406, "step": 47125 }, { "epoch": 0.47943115234375, "grad_norm": 16.908771514892578, "learning_rate": 2.6655893451169013e-06, "loss": 3.4647, "step": 47130 }, { "epoch": 0.4794820149739583, "grad_norm": 9.55135440826416, "learning_rate": 2.6651903401945097e-06, "loss": 3.6185, "step": 47135 }, { "epoch": 0.4795328776041667, "grad_norm": 13.74426555633545, "learning_rate": 2.6647913310457625e-06, "loss": 3.5157, "step": 47140 }, { "epoch": 0.479583740234375, "grad_norm": 10.291647911071777, "learning_rate": 2.6643923176808678e-06, "loss": 3.2924, "step": 47145 }, { "epoch": 0.4796346028645833, "grad_norm": 10.82001781463623, "learning_rate": 2.6639933001100347e-06, "loss": 4.0843, "step": 47150 }, { "epoch": 0.4796854654947917, "grad_norm": 10.689411163330078, "learning_rate": 2.663594278343473e-06, "loss": 3.1606, "step": 47155 }, { "epoch": 0.479736328125, "grad_norm": 12.379398345947266, "learning_rate": 2.663195252391389e-06, "loss": 3.0887, "step": 47160 }, { "epoch": 0.4797871907552083, "grad_norm": 13.633697509765625, "learning_rate": 2.6627962222639945e-06, "loss": 3.2125, "step": 47165 }, { "epoch": 0.4798380533854167, "grad_norm": 14.551989555358887, "learning_rate": 2.662397187971496e-06, "loss": 3.0531, "step": 47170 }, { "epoch": 0.479888916015625, "grad_norm": 11.615093231201172, "learning_rate": 2.6619981495241047e-06, "loss": 3.2043, "step": 47175 }, { "epoch": 0.4799397786458333, "grad_norm": 8.46528434753418, "learning_rate": 2.661599106932029e-06, "loss": 3.3052, "step": 47180 }, { "epoch": 0.4799906412760417, "grad_norm": 13.535140037536621, "learning_rate": 2.661200060205478e-06, "loss": 3.2812, "step": 47185 }, { "epoch": 0.48004150390625, "grad_norm": 9.564166069030762, "learning_rate": 2.6608010093546626e-06, "loss": 3.0153, "step": 47190 }, { "epoch": 0.4800923665364583, "grad_norm": 13.626891136169434, "learning_rate": 2.6604019543897907e-06, "loss": 3.2109, "step": 47195 }, { "epoch": 0.4801432291666667, "grad_norm": 12.772491455078125, "learning_rate": 2.6600028953210733e-06, "loss": 3.2223, "step": 47200 }, { "epoch": 0.480194091796875, "grad_norm": 9.005960464477539, "learning_rate": 2.65960383215872e-06, "loss": 3.5045, "step": 47205 }, { "epoch": 0.4802449544270833, "grad_norm": 8.408181190490723, "learning_rate": 2.6592047649129395e-06, "loss": 3.6569, "step": 47210 }, { "epoch": 0.4802958170572917, "grad_norm": 13.204599380493164, "learning_rate": 2.6588056935939434e-06, "loss": 3.4019, "step": 47215 }, { "epoch": 0.4803466796875, "grad_norm": 7.732039451599121, "learning_rate": 2.658406618211941e-06, "loss": 3.2256, "step": 47220 }, { "epoch": 0.4803975423177083, "grad_norm": 8.743809700012207, "learning_rate": 2.6580075387771432e-06, "loss": 3.6135, "step": 47225 }, { "epoch": 0.4804484049479167, "grad_norm": 14.294320106506348, "learning_rate": 2.65760845529976e-06, "loss": 3.4267, "step": 47230 }, { "epoch": 0.480499267578125, "grad_norm": 13.660139083862305, "learning_rate": 2.6572093677900013e-06, "loss": 3.468, "step": 47235 }, { "epoch": 0.4805501302083333, "grad_norm": 13.269354820251465, "learning_rate": 2.6568102762580784e-06, "loss": 3.2078, "step": 47240 }, { "epoch": 0.4806009928385417, "grad_norm": 11.7168550491333, "learning_rate": 2.6564111807142017e-06, "loss": 3.388, "step": 47245 }, { "epoch": 0.48065185546875, "grad_norm": 9.902116775512695, "learning_rate": 2.6560120811685826e-06, "loss": 3.5782, "step": 47250 }, { "epoch": 0.4807027180989583, "grad_norm": 8.437605857849121, "learning_rate": 2.6556129776314304e-06, "loss": 3.279, "step": 47255 }, { "epoch": 0.4807535807291667, "grad_norm": 14.715630531311035, "learning_rate": 2.6552138701129575e-06, "loss": 2.9661, "step": 47260 }, { "epoch": 0.480804443359375, "grad_norm": 16.577116012573242, "learning_rate": 2.6548147586233747e-06, "loss": 3.4551, "step": 47265 }, { "epoch": 0.4808553059895833, "grad_norm": 10.214470863342285, "learning_rate": 2.6544156431728934e-06, "loss": 3.3104, "step": 47270 }, { "epoch": 0.4809061686197917, "grad_norm": 13.728621482849121, "learning_rate": 2.6540165237717235e-06, "loss": 3.2211, "step": 47275 }, { "epoch": 0.48095703125, "grad_norm": 10.931455612182617, "learning_rate": 2.6536174004300783e-06, "loss": 2.9952, "step": 47280 }, { "epoch": 0.4810078938802083, "grad_norm": 12.8707275390625, "learning_rate": 2.653218273158168e-06, "loss": 3.3947, "step": 47285 }, { "epoch": 0.4810587565104167, "grad_norm": 11.696076393127441, "learning_rate": 2.6528191419662043e-06, "loss": 3.3118, "step": 47290 }, { "epoch": 0.481109619140625, "grad_norm": 11.292045593261719, "learning_rate": 2.652420006864399e-06, "loss": 3.5174, "step": 47295 }, { "epoch": 0.4811604817708333, "grad_norm": 11.5274658203125, "learning_rate": 2.6520208678629637e-06, "loss": 3.4944, "step": 47300 }, { "epoch": 0.4812113444010417, "grad_norm": 13.709296226501465, "learning_rate": 2.651621724972111e-06, "loss": 3.5253, "step": 47305 }, { "epoch": 0.48126220703125, "grad_norm": 13.923632621765137, "learning_rate": 2.651222578202053e-06, "loss": 3.2138, "step": 47310 }, { "epoch": 0.4813130696614583, "grad_norm": 13.395757675170898, "learning_rate": 2.6508234275629996e-06, "loss": 3.4306, "step": 47315 }, { "epoch": 0.4813639322916667, "grad_norm": 14.269627571105957, "learning_rate": 2.6504242730651664e-06, "loss": 3.2937, "step": 47320 }, { "epoch": 0.481414794921875, "grad_norm": 11.33421516418457, "learning_rate": 2.650025114718763e-06, "loss": 2.8693, "step": 47325 }, { "epoch": 0.4814656575520833, "grad_norm": 12.996232032775879, "learning_rate": 2.649625952534001e-06, "loss": 3.3136, "step": 47330 }, { "epoch": 0.4815165201822917, "grad_norm": 12.771052360534668, "learning_rate": 2.6492267865210967e-06, "loss": 3.3747, "step": 47335 }, { "epoch": 0.4815673828125, "grad_norm": 11.761916160583496, "learning_rate": 2.648827616690259e-06, "loss": 3.9257, "step": 47340 }, { "epoch": 0.4816182454427083, "grad_norm": 7.735947608947754, "learning_rate": 2.648428443051703e-06, "loss": 3.2851, "step": 47345 }, { "epoch": 0.4816691080729167, "grad_norm": 17.554040908813477, "learning_rate": 2.64802926561564e-06, "loss": 3.3919, "step": 47350 }, { "epoch": 0.481719970703125, "grad_norm": 7.973572254180908, "learning_rate": 2.6476300843922844e-06, "loss": 3.1638, "step": 47355 }, { "epoch": 0.4817708333333333, "grad_norm": 9.76625919342041, "learning_rate": 2.6472308993918465e-06, "loss": 3.3234, "step": 47360 }, { "epoch": 0.4818216959635417, "grad_norm": 8.04384708404541, "learning_rate": 2.646831710624542e-06, "loss": 3.2401, "step": 47365 }, { "epoch": 0.48187255859375, "grad_norm": 11.144789695739746, "learning_rate": 2.6464325181005824e-06, "loss": 3.1819, "step": 47370 }, { "epoch": 0.4819234212239583, "grad_norm": 17.865840911865234, "learning_rate": 2.646033321830182e-06, "loss": 3.6869, "step": 47375 }, { "epoch": 0.4819742838541667, "grad_norm": 11.546830177307129, "learning_rate": 2.6456341218235525e-06, "loss": 3.2895, "step": 47380 }, { "epoch": 0.482025146484375, "grad_norm": 17.063457489013672, "learning_rate": 2.6452349180909103e-06, "loss": 3.4353, "step": 47385 }, { "epoch": 0.4820760091145833, "grad_norm": 13.253219604492188, "learning_rate": 2.644835710642466e-06, "loss": 3.2576, "step": 47390 }, { "epoch": 0.4821268717447917, "grad_norm": 9.14925765991211, "learning_rate": 2.6444364994884348e-06, "loss": 3.4164, "step": 47395 }, { "epoch": 0.482177734375, "grad_norm": 11.442546844482422, "learning_rate": 2.6440372846390293e-06, "loss": 3.2947, "step": 47400 }, { "epoch": 0.4822285970052083, "grad_norm": 12.624979019165039, "learning_rate": 2.6436380661044648e-06, "loss": 4.0104, "step": 47405 }, { "epoch": 0.4822794596354167, "grad_norm": 8.066694259643555, "learning_rate": 2.643238843894954e-06, "loss": 2.85, "step": 47410 }, { "epoch": 0.482330322265625, "grad_norm": 9.832465171813965, "learning_rate": 2.6428396180207115e-06, "loss": 3.0949, "step": 47415 }, { "epoch": 0.4823811848958333, "grad_norm": 12.458390235900879, "learning_rate": 2.6424403884919515e-06, "loss": 3.596, "step": 47420 }, { "epoch": 0.4824320475260417, "grad_norm": 10.591827392578125, "learning_rate": 2.6420411553188878e-06, "loss": 3.4678, "step": 47425 }, { "epoch": 0.48248291015625, "grad_norm": 10.282755851745605, "learning_rate": 2.641641918511734e-06, "loss": 3.4219, "step": 47430 }, { "epoch": 0.4825337727864583, "grad_norm": 10.006214141845703, "learning_rate": 2.641242678080706e-06, "loss": 3.3134, "step": 47435 }, { "epoch": 0.4825846354166667, "grad_norm": 14.434442520141602, "learning_rate": 2.640843434036018e-06, "loss": 3.4595, "step": 47440 }, { "epoch": 0.482635498046875, "grad_norm": 8.330063819885254, "learning_rate": 2.6404441863878832e-06, "loss": 3.4476, "step": 47445 }, { "epoch": 0.4826863606770833, "grad_norm": 21.621471405029297, "learning_rate": 2.640044935146518e-06, "loss": 3.7481, "step": 47450 }, { "epoch": 0.4827372233072917, "grad_norm": 15.059220314025879, "learning_rate": 2.639645680322136e-06, "loss": 3.0038, "step": 47455 }, { "epoch": 0.4827880859375, "grad_norm": 13.724431037902832, "learning_rate": 2.639246421924952e-06, "loss": 3.3143, "step": 47460 }, { "epoch": 0.4828389485677083, "grad_norm": 15.076763153076172, "learning_rate": 2.6388471599651815e-06, "loss": 3.2902, "step": 47465 }, { "epoch": 0.4828898111979167, "grad_norm": 6.995392322540283, "learning_rate": 2.6384478944530397e-06, "loss": 3.4319, "step": 47470 }, { "epoch": 0.482940673828125, "grad_norm": 12.359930992126465, "learning_rate": 2.638048625398741e-06, "loss": 3.4122, "step": 47475 }, { "epoch": 0.4829915364583333, "grad_norm": 8.89985179901123, "learning_rate": 2.6376493528125014e-06, "loss": 3.0402, "step": 47480 }, { "epoch": 0.4830423990885417, "grad_norm": 15.408087730407715, "learning_rate": 2.6372500767045344e-06, "loss": 3.0548, "step": 47485 }, { "epoch": 0.48309326171875, "grad_norm": 12.19315242767334, "learning_rate": 2.636850797085058e-06, "loss": 3.2569, "step": 47490 }, { "epoch": 0.4831441243489583, "grad_norm": 14.964292526245117, "learning_rate": 2.636451513964286e-06, "loss": 3.6578, "step": 47495 }, { "epoch": 0.4831949869791667, "grad_norm": 15.417756080627441, "learning_rate": 2.636052227352435e-06, "loss": 3.4411, "step": 47500 }, { "epoch": 0.483245849609375, "grad_norm": 10.532073974609375, "learning_rate": 2.635652937259719e-06, "loss": 3.3885, "step": 47505 }, { "epoch": 0.4832967122395833, "grad_norm": 8.670329093933105, "learning_rate": 2.6352536436963548e-06, "loss": 3.315, "step": 47510 }, { "epoch": 0.4833475748697917, "grad_norm": 9.610486030578613, "learning_rate": 2.634854346672559e-06, "loss": 3.4857, "step": 47515 }, { "epoch": 0.4833984375, "grad_norm": 15.70473861694336, "learning_rate": 2.634455046198546e-06, "loss": 3.2049, "step": 47520 }, { "epoch": 0.4834493001302083, "grad_norm": 12.451311111450195, "learning_rate": 2.6340557422845324e-06, "loss": 3.6464, "step": 47525 }, { "epoch": 0.4835001627604167, "grad_norm": 13.600598335266113, "learning_rate": 2.633656434940735e-06, "loss": 3.6325, "step": 47530 }, { "epoch": 0.483551025390625, "grad_norm": 8.907392501831055, "learning_rate": 2.6332571241773693e-06, "loss": 3.3728, "step": 47535 }, { "epoch": 0.4836018880208333, "grad_norm": 15.609326362609863, "learning_rate": 2.6328578100046514e-06, "loss": 3.5422, "step": 47540 }, { "epoch": 0.4836527506510417, "grad_norm": 16.75021743774414, "learning_rate": 2.6324584924327982e-06, "loss": 3.0167, "step": 47545 }, { "epoch": 0.48370361328125, "grad_norm": 7.8447346687316895, "learning_rate": 2.6320591714720256e-06, "loss": 3.3293, "step": 47550 }, { "epoch": 0.4837544759114583, "grad_norm": 12.660308837890625, "learning_rate": 2.631659847132551e-06, "loss": 3.2761, "step": 47555 }, { "epoch": 0.4838053385416667, "grad_norm": 8.696551322937012, "learning_rate": 2.6312605194245894e-06, "loss": 3.1498, "step": 47560 }, { "epoch": 0.483856201171875, "grad_norm": 15.263006210327148, "learning_rate": 2.6308611883583592e-06, "loss": 3.5946, "step": 47565 }, { "epoch": 0.4839070638020833, "grad_norm": 9.13163948059082, "learning_rate": 2.6304618539440763e-06, "loss": 3.4685, "step": 47570 }, { "epoch": 0.4839579264322917, "grad_norm": 10.969802856445312, "learning_rate": 2.630062516191958e-06, "loss": 3.3894, "step": 47575 }, { "epoch": 0.4840087890625, "grad_norm": 8.642189025878906, "learning_rate": 2.6296631751122208e-06, "loss": 3.314, "step": 47580 }, { "epoch": 0.4840596516927083, "grad_norm": 15.151670455932617, "learning_rate": 2.629263830715082e-06, "loss": 2.9757, "step": 47585 }, { "epoch": 0.4841105143229167, "grad_norm": 11.499610900878906, "learning_rate": 2.628864483010759e-06, "loss": 3.4151, "step": 47590 }, { "epoch": 0.484161376953125, "grad_norm": 13.054522514343262, "learning_rate": 2.628465132009469e-06, "loss": 3.5306, "step": 47595 }, { "epoch": 0.4842122395833333, "grad_norm": 15.709760665893555, "learning_rate": 2.628065777721428e-06, "loss": 3.5197, "step": 47600 }, { "epoch": 0.4842631022135417, "grad_norm": 7.843846797943115, "learning_rate": 2.6276664201568553e-06, "loss": 3.3044, "step": 47605 }, { "epoch": 0.48431396484375, "grad_norm": 13.429792404174805, "learning_rate": 2.6272670593259674e-06, "loss": 3.3137, "step": 47610 }, { "epoch": 0.4843648274739583, "grad_norm": 12.689478874206543, "learning_rate": 2.6268676952389815e-06, "loss": 3.4891, "step": 47615 }, { "epoch": 0.4844156901041667, "grad_norm": 8.793680191040039, "learning_rate": 2.6264683279061166e-06, "loss": 3.6951, "step": 47620 }, { "epoch": 0.484466552734375, "grad_norm": 11.071285247802734, "learning_rate": 2.6260689573375885e-06, "loss": 3.2139, "step": 47625 }, { "epoch": 0.4845174153645833, "grad_norm": 8.690736770629883, "learning_rate": 2.6256695835436164e-06, "loss": 3.5408, "step": 47630 }, { "epoch": 0.4845682779947917, "grad_norm": 13.136496543884277, "learning_rate": 2.625270206534418e-06, "loss": 3.121, "step": 47635 }, { "epoch": 0.484619140625, "grad_norm": 10.826349258422852, "learning_rate": 2.6248708263202105e-06, "loss": 3.7452, "step": 47640 }, { "epoch": 0.4846700032552083, "grad_norm": 14.85303020477295, "learning_rate": 2.6244714429112126e-06, "loss": 3.6537, "step": 47645 }, { "epoch": 0.4847208658854167, "grad_norm": 15.568806648254395, "learning_rate": 2.624072056317643e-06, "loss": 3.1806, "step": 47650 }, { "epoch": 0.484771728515625, "grad_norm": 9.954267501831055, "learning_rate": 2.6236726665497186e-06, "loss": 3.5269, "step": 47655 }, { "epoch": 0.4848225911458333, "grad_norm": 12.706521034240723, "learning_rate": 2.6232732736176593e-06, "loss": 3.9319, "step": 47660 }, { "epoch": 0.4848734537760417, "grad_norm": 11.31127643585205, "learning_rate": 2.622873877531681e-06, "loss": 3.0772, "step": 47665 }, { "epoch": 0.48492431640625, "grad_norm": 14.69467830657959, "learning_rate": 2.622474478302005e-06, "loss": 3.0709, "step": 47670 }, { "epoch": 0.4849751790364583, "grad_norm": 13.351578712463379, "learning_rate": 2.622075075938848e-06, "loss": 3.3081, "step": 47675 }, { "epoch": 0.4850260416666667, "grad_norm": 12.570734024047852, "learning_rate": 2.6216756704524294e-06, "loss": 3.3593, "step": 47680 }, { "epoch": 0.485076904296875, "grad_norm": 14.045585632324219, "learning_rate": 2.621276261852967e-06, "loss": 3.3143, "step": 47685 }, { "epoch": 0.4851277669270833, "grad_norm": 13.33901596069336, "learning_rate": 2.6208768501506802e-06, "loss": 3.7336, "step": 47690 }, { "epoch": 0.4851786295572917, "grad_norm": 11.250321388244629, "learning_rate": 2.6204774353557883e-06, "loss": 3.3786, "step": 47695 }, { "epoch": 0.4852294921875, "grad_norm": 12.41751480102539, "learning_rate": 2.62007801747851e-06, "loss": 2.9969, "step": 47700 }, { "epoch": 0.4852803548177083, "grad_norm": 13.19685173034668, "learning_rate": 2.6196785965290635e-06, "loss": 3.379, "step": 47705 }, { "epoch": 0.4853312174479167, "grad_norm": 9.21330738067627, "learning_rate": 2.6192791725176687e-06, "loss": 3.3872, "step": 47710 }, { "epoch": 0.485382080078125, "grad_norm": 14.119096755981445, "learning_rate": 2.618879745454544e-06, "loss": 3.1882, "step": 47715 }, { "epoch": 0.4854329427083333, "grad_norm": 15.260636329650879, "learning_rate": 2.61848031534991e-06, "loss": 2.9966, "step": 47720 }, { "epoch": 0.4854838053385417, "grad_norm": 10.559808731079102, "learning_rate": 2.6180808822139854e-06, "loss": 3.0944, "step": 47725 }, { "epoch": 0.48553466796875, "grad_norm": 12.172982215881348, "learning_rate": 2.617681446056988e-06, "loss": 3.019, "step": 47730 }, { "epoch": 0.4855855305989583, "grad_norm": 14.875792503356934, "learning_rate": 2.6172820068891403e-06, "loss": 3.4954, "step": 47735 }, { "epoch": 0.4856363932291667, "grad_norm": 12.607369422912598, "learning_rate": 2.616882564720659e-06, "loss": 3.3797, "step": 47740 }, { "epoch": 0.485687255859375, "grad_norm": 14.492063522338867, "learning_rate": 2.6164831195617655e-06, "loss": 3.7695, "step": 47745 }, { "epoch": 0.4857381184895833, "grad_norm": 9.796468734741211, "learning_rate": 2.6160836714226796e-06, "loss": 3.4647, "step": 47750 }, { "epoch": 0.4857889811197917, "grad_norm": 11.6259765625, "learning_rate": 2.6156842203136196e-06, "loss": 3.1608, "step": 47755 }, { "epoch": 0.48583984375, "grad_norm": 8.976288795471191, "learning_rate": 2.6152847662448067e-06, "loss": 3.7266, "step": 47760 }, { "epoch": 0.4858907063802083, "grad_norm": 9.242552757263184, "learning_rate": 2.6148853092264607e-06, "loss": 3.4968, "step": 47765 }, { "epoch": 0.4859415690104167, "grad_norm": 9.405117988586426, "learning_rate": 2.6144858492688003e-06, "loss": 3.2472, "step": 47770 }, { "epoch": 0.485992431640625, "grad_norm": 13.579270362854004, "learning_rate": 2.6140863863820475e-06, "loss": 3.0759, "step": 47775 }, { "epoch": 0.4860432942708333, "grad_norm": 14.4361572265625, "learning_rate": 2.6136869205764216e-06, "loss": 3.3667, "step": 47780 }, { "epoch": 0.4860941569010417, "grad_norm": 13.49770736694336, "learning_rate": 2.6132874518621423e-06, "loss": 3.4313, "step": 47785 }, { "epoch": 0.48614501953125, "grad_norm": 12.228191375732422, "learning_rate": 2.61288798024943e-06, "loss": 3.0355, "step": 47790 }, { "epoch": 0.4861958821614583, "grad_norm": 9.5272216796875, "learning_rate": 2.612488505748506e-06, "loss": 3.2651, "step": 47795 }, { "epoch": 0.4862467447916667, "grad_norm": 12.149260520935059, "learning_rate": 2.6120890283695905e-06, "loss": 3.6675, "step": 47800 }, { "epoch": 0.486297607421875, "grad_norm": 10.929622650146484, "learning_rate": 2.6116895481229033e-06, "loss": 3.3229, "step": 47805 }, { "epoch": 0.4863484700520833, "grad_norm": 15.386078834533691, "learning_rate": 2.611290065018666e-06, "loss": 3.3882, "step": 47810 }, { "epoch": 0.4863993326822917, "grad_norm": 6.863919258117676, "learning_rate": 2.6108905790670987e-06, "loss": 3.9467, "step": 47815 }, { "epoch": 0.4864501953125, "grad_norm": 11.039124488830566, "learning_rate": 2.610491090278421e-06, "loss": 3.0379, "step": 47820 }, { "epoch": 0.4865010579427083, "grad_norm": 12.934539794921875, "learning_rate": 2.6100915986628562e-06, "loss": 3.4597, "step": 47825 }, { "epoch": 0.4865519205729167, "grad_norm": 13.209774017333984, "learning_rate": 2.609692104230624e-06, "loss": 3.5327, "step": 47830 }, { "epoch": 0.486602783203125, "grad_norm": 10.195297241210938, "learning_rate": 2.609292606991944e-06, "loss": 3.6324, "step": 47835 }, { "epoch": 0.4866536458333333, "grad_norm": 14.529973983764648, "learning_rate": 2.60889310695704e-06, "loss": 3.2091, "step": 47840 }, { "epoch": 0.4867045084635417, "grad_norm": 15.05029582977295, "learning_rate": 2.6084936041361315e-06, "loss": 3.3564, "step": 47845 }, { "epoch": 0.48675537109375, "grad_norm": 10.648780822753906, "learning_rate": 2.6080940985394393e-06, "loss": 2.9378, "step": 47850 }, { "epoch": 0.4868062337239583, "grad_norm": 15.958991050720215, "learning_rate": 2.6076945901771854e-06, "loss": 3.5244, "step": 47855 }, { "epoch": 0.4868570963541667, "grad_norm": 9.16628360748291, "learning_rate": 2.607295079059591e-06, "loss": 3.666, "step": 47860 }, { "epoch": 0.486907958984375, "grad_norm": 13.665358543395996, "learning_rate": 2.606895565196877e-06, "loss": 3.5797, "step": 47865 }, { "epoch": 0.4869588216145833, "grad_norm": 13.204471588134766, "learning_rate": 2.6064960485992657e-06, "loss": 3.0869, "step": 47870 }, { "epoch": 0.4870096842447917, "grad_norm": 9.907011032104492, "learning_rate": 2.6060965292769784e-06, "loss": 3.6301, "step": 47875 }, { "epoch": 0.487060546875, "grad_norm": 7.6872053146362305, "learning_rate": 2.6056970072402365e-06, "loss": 3.0106, "step": 47880 }, { "epoch": 0.4871114095052083, "grad_norm": 10.525590896606445, "learning_rate": 2.605297482499262e-06, "loss": 3.1349, "step": 47885 }, { "epoch": 0.4871622721354167, "grad_norm": 13.306690216064453, "learning_rate": 2.6048979550642755e-06, "loss": 3.7184, "step": 47890 }, { "epoch": 0.487213134765625, "grad_norm": 8.08199405670166, "learning_rate": 2.6044984249455e-06, "loss": 3.631, "step": 47895 }, { "epoch": 0.4872639973958333, "grad_norm": 14.965778350830078, "learning_rate": 2.6040988921531575e-06, "loss": 3.4085, "step": 47900 }, { "epoch": 0.4873148600260417, "grad_norm": 11.588021278381348, "learning_rate": 2.6036993566974693e-06, "loss": 2.9787, "step": 47905 }, { "epoch": 0.48736572265625, "grad_norm": 8.804408073425293, "learning_rate": 2.6032998185886575e-06, "loss": 2.9821, "step": 47910 }, { "epoch": 0.4874165852864583, "grad_norm": 14.51441478729248, "learning_rate": 2.6029002778369445e-06, "loss": 3.24, "step": 47915 }, { "epoch": 0.4874674479166667, "grad_norm": 10.842745780944824, "learning_rate": 2.6025007344525527e-06, "loss": 3.3752, "step": 47920 }, { "epoch": 0.487518310546875, "grad_norm": 15.25305461883545, "learning_rate": 2.6021011884457025e-06, "loss": 3.3172, "step": 47925 }, { "epoch": 0.4875691731770833, "grad_norm": 11.409184455871582, "learning_rate": 2.6017016398266187e-06, "loss": 3.2808, "step": 47930 }, { "epoch": 0.4876200358072917, "grad_norm": 13.894691467285156, "learning_rate": 2.601302088605523e-06, "loss": 3.082, "step": 47935 }, { "epoch": 0.4876708984375, "grad_norm": 11.15825080871582, "learning_rate": 2.6009025347926357e-06, "loss": 3.525, "step": 47940 }, { "epoch": 0.4877217610677083, "grad_norm": 12.198467254638672, "learning_rate": 2.600502978398183e-06, "loss": 3.4651, "step": 47945 }, { "epoch": 0.4877726236979167, "grad_norm": 17.750070571899414, "learning_rate": 2.6001034194323838e-06, "loss": 3.5371, "step": 47950 }, { "epoch": 0.487823486328125, "grad_norm": 9.900705337524414, "learning_rate": 2.5997038579054636e-06, "loss": 3.3005, "step": 47955 }, { "epoch": 0.4878743489583333, "grad_norm": 11.648094177246094, "learning_rate": 2.5993042938276423e-06, "loss": 3.3285, "step": 47960 }, { "epoch": 0.4879252115885417, "grad_norm": 17.3449764251709, "learning_rate": 2.5989047272091457e-06, "loss": 3.7537, "step": 47965 }, { "epoch": 0.48797607421875, "grad_norm": 10.940754890441895, "learning_rate": 2.598505158060194e-06, "loss": 3.6213, "step": 47970 }, { "epoch": 0.4880269368489583, "grad_norm": 11.876534461975098, "learning_rate": 2.5981055863910115e-06, "loss": 3.2027, "step": 47975 }, { "epoch": 0.4880777994791667, "grad_norm": 11.180745124816895, "learning_rate": 2.597706012211821e-06, "loss": 3.1598, "step": 47980 }, { "epoch": 0.488128662109375, "grad_norm": 19.479246139526367, "learning_rate": 2.597306435532846e-06, "loss": 3.4352, "step": 47985 }, { "epoch": 0.4881795247395833, "grad_norm": 6.949069976806641, "learning_rate": 2.5969068563643076e-06, "loss": 2.9822, "step": 47990 }, { "epoch": 0.4882303873697917, "grad_norm": 13.0645751953125, "learning_rate": 2.596507274716431e-06, "loss": 3.5882, "step": 47995 }, { "epoch": 0.48828125, "grad_norm": 14.730225563049316, "learning_rate": 2.5961076905994386e-06, "loss": 3.2193, "step": 48000 }, { "epoch": 0.4883321126302083, "grad_norm": 14.940589904785156, "learning_rate": 2.5957081040235536e-06, "loss": 3.6021, "step": 48005 }, { "epoch": 0.4883829752604167, "grad_norm": 9.172715187072754, "learning_rate": 2.5953085149989993e-06, "loss": 3.2141, "step": 48010 }, { "epoch": 0.488433837890625, "grad_norm": 12.160663604736328, "learning_rate": 2.5949089235359997e-06, "loss": 3.2393, "step": 48015 }, { "epoch": 0.4884847005208333, "grad_norm": 9.29576587677002, "learning_rate": 2.594509329644777e-06, "loss": 3.3738, "step": 48020 }, { "epoch": 0.4885355631510417, "grad_norm": 8.26999568939209, "learning_rate": 2.5941097333355562e-06, "loss": 3.4324, "step": 48025 }, { "epoch": 0.48858642578125, "grad_norm": 12.226856231689453, "learning_rate": 2.59371013461856e-06, "loss": 3.3112, "step": 48030 }, { "epoch": 0.4886372884114583, "grad_norm": 12.230896949768066, "learning_rate": 2.593310533504012e-06, "loss": 3.2613, "step": 48035 }, { "epoch": 0.4886881510416667, "grad_norm": 7.127318382263184, "learning_rate": 2.592910930002136e-06, "loss": 3.5765, "step": 48040 }, { "epoch": 0.488739013671875, "grad_norm": 6.903397560119629, "learning_rate": 2.5925113241231566e-06, "loss": 3.5333, "step": 48045 }, { "epoch": 0.4887898763020833, "grad_norm": 14.250541687011719, "learning_rate": 2.592111715877297e-06, "loss": 3.2821, "step": 48050 }, { "epoch": 0.4888407389322917, "grad_norm": 10.005630493164062, "learning_rate": 2.5917121052747795e-06, "loss": 3.3414, "step": 48055 }, { "epoch": 0.4888916015625, "grad_norm": 14.25844955444336, "learning_rate": 2.5913124923258308e-06, "loss": 3.4396, "step": 48060 }, { "epoch": 0.4889424641927083, "grad_norm": 11.528429985046387, "learning_rate": 2.5909128770406737e-06, "loss": 3.6652, "step": 48065 }, { "epoch": 0.4889933268229167, "grad_norm": 11.45598316192627, "learning_rate": 2.5905132594295316e-06, "loss": 3.5595, "step": 48070 }, { "epoch": 0.489044189453125, "grad_norm": 11.216914176940918, "learning_rate": 2.5901136395026294e-06, "loss": 3.5003, "step": 48075 }, { "epoch": 0.4890950520833333, "grad_norm": 16.073488235473633, "learning_rate": 2.589714017270191e-06, "loss": 3.3486, "step": 48080 }, { "epoch": 0.4891459147135417, "grad_norm": 13.13344669342041, "learning_rate": 2.589314392742441e-06, "loss": 3.7033, "step": 48085 }, { "epoch": 0.48919677734375, "grad_norm": 9.148369789123535, "learning_rate": 2.588914765929604e-06, "loss": 3.5296, "step": 48090 }, { "epoch": 0.4892476399739583, "grad_norm": 16.00242042541504, "learning_rate": 2.588515136841902e-06, "loss": 3.1364, "step": 48095 }, { "epoch": 0.4892985026041667, "grad_norm": 16.407093048095703, "learning_rate": 2.588115505489563e-06, "loss": 3.4385, "step": 48100 }, { "epoch": 0.489349365234375, "grad_norm": 13.131834983825684, "learning_rate": 2.587715871882808e-06, "loss": 3.7845, "step": 48105 }, { "epoch": 0.4894002278645833, "grad_norm": 13.327959060668945, "learning_rate": 2.5873162360318648e-06, "loss": 3.2955, "step": 48110 }, { "epoch": 0.4894510904947917, "grad_norm": 9.44006061553955, "learning_rate": 2.586916597946956e-06, "loss": 3.2827, "step": 48115 }, { "epoch": 0.489501953125, "grad_norm": 13.902898788452148, "learning_rate": 2.5865169576383055e-06, "loss": 3.2474, "step": 48120 }, { "epoch": 0.4895528157552083, "grad_norm": 12.939776420593262, "learning_rate": 2.5861173151161404e-06, "loss": 3.6203, "step": 48125 }, { "epoch": 0.4896036783854167, "grad_norm": 15.50821590423584, "learning_rate": 2.585717670390684e-06, "loss": 3.5654, "step": 48130 }, { "epoch": 0.489654541015625, "grad_norm": 13.119051933288574, "learning_rate": 2.585318023472161e-06, "loss": 3.1919, "step": 48135 }, { "epoch": 0.4897054036458333, "grad_norm": 10.716808319091797, "learning_rate": 2.5849183743707967e-06, "loss": 3.2412, "step": 48140 }, { "epoch": 0.4897562662760417, "grad_norm": 12.619991302490234, "learning_rate": 2.584518723096816e-06, "loss": 3.2517, "step": 48145 }, { "epoch": 0.48980712890625, "grad_norm": 13.602775573730469, "learning_rate": 2.5841190696604434e-06, "loss": 2.9025, "step": 48150 }, { "epoch": 0.4898579915364583, "grad_norm": 11.507429122924805, "learning_rate": 2.5837194140719057e-06, "loss": 3.4202, "step": 48155 }, { "epoch": 0.4899088541666667, "grad_norm": 12.635565757751465, "learning_rate": 2.5833197563414246e-06, "loss": 3.4805, "step": 48160 }, { "epoch": 0.489959716796875, "grad_norm": 15.815251350402832, "learning_rate": 2.5829200964792287e-06, "loss": 3.9183, "step": 48165 }, { "epoch": 0.4900105794270833, "grad_norm": 13.597136497497559, "learning_rate": 2.5825204344955414e-06, "loss": 3.4959, "step": 48170 }, { "epoch": 0.4900614420572917, "grad_norm": 10.966267585754395, "learning_rate": 2.5821207704005885e-06, "loss": 3.4393, "step": 48175 }, { "epoch": 0.4901123046875, "grad_norm": 12.54188346862793, "learning_rate": 2.5817211042045954e-06, "loss": 3.4757, "step": 48180 }, { "epoch": 0.4901631673177083, "grad_norm": 8.404973983764648, "learning_rate": 2.581321435917786e-06, "loss": 3.1954, "step": 48185 }, { "epoch": 0.4902140299479167, "grad_norm": 11.254782676696777, "learning_rate": 2.5809217655503882e-06, "loss": 3.3841, "step": 48190 }, { "epoch": 0.490264892578125, "grad_norm": 14.785955429077148, "learning_rate": 2.580522093112626e-06, "loss": 3.095, "step": 48195 }, { "epoch": 0.4903157552083333, "grad_norm": 16.44007110595703, "learning_rate": 2.5801224186147243e-06, "loss": 3.0962, "step": 48200 }, { "epoch": 0.4903666178385417, "grad_norm": 15.266268730163574, "learning_rate": 2.5797227420669107e-06, "loss": 3.2877, "step": 48205 }, { "epoch": 0.49041748046875, "grad_norm": 11.603970527648926, "learning_rate": 2.5793230634794087e-06, "loss": 3.2051, "step": 48210 }, { "epoch": 0.4904683430989583, "grad_norm": 10.646866798400879, "learning_rate": 2.578923382862446e-06, "loss": 3.1391, "step": 48215 }, { "epoch": 0.4905192057291667, "grad_norm": 12.524734497070312, "learning_rate": 2.5785237002262463e-06, "loss": 3.5244, "step": 48220 }, { "epoch": 0.490570068359375, "grad_norm": 9.726661682128906, "learning_rate": 2.578124015581036e-06, "loss": 3.2464, "step": 48225 }, { "epoch": 0.4906209309895833, "grad_norm": 9.554346084594727, "learning_rate": 2.5777243289370425e-06, "loss": 3.1529, "step": 48230 }, { "epoch": 0.4906717936197917, "grad_norm": 15.186952590942383, "learning_rate": 2.57732464030449e-06, "loss": 3.4499, "step": 48235 }, { "epoch": 0.49072265625, "grad_norm": 9.830705642700195, "learning_rate": 2.5769249496936044e-06, "loss": 3.4578, "step": 48240 }, { "epoch": 0.4907735188802083, "grad_norm": 11.41324520111084, "learning_rate": 2.5765252571146127e-06, "loss": 3.267, "step": 48245 }, { "epoch": 0.4908243815104167, "grad_norm": 10.870372772216797, "learning_rate": 2.57612556257774e-06, "loss": 3.5487, "step": 48250 }, { "epoch": 0.490875244140625, "grad_norm": 15.754440307617188, "learning_rate": 2.575725866093213e-06, "loss": 3.463, "step": 48255 }, { "epoch": 0.4909261067708333, "grad_norm": 14.528594017028809, "learning_rate": 2.575326167671258e-06, "loss": 3.1897, "step": 48260 }, { "epoch": 0.4909769694010417, "grad_norm": 10.713245391845703, "learning_rate": 2.5749264673221005e-06, "loss": 3.5306, "step": 48265 }, { "epoch": 0.49102783203125, "grad_norm": 11.782358169555664, "learning_rate": 2.5745267650559674e-06, "loss": 3.3349, "step": 48270 }, { "epoch": 0.4910786946614583, "grad_norm": 14.125024795532227, "learning_rate": 2.574127060883084e-06, "loss": 3.4315, "step": 48275 }, { "epoch": 0.4911295572916667, "grad_norm": 11.369453430175781, "learning_rate": 2.573727354813678e-06, "loss": 3.0201, "step": 48280 }, { "epoch": 0.491180419921875, "grad_norm": 8.513801574707031, "learning_rate": 2.5733276468579755e-06, "loss": 3.1815, "step": 48285 }, { "epoch": 0.4912312825520833, "grad_norm": 10.482697486877441, "learning_rate": 2.5729279370262017e-06, "loss": 3.2698, "step": 48290 }, { "epoch": 0.4912821451822917, "grad_norm": 13.890192031860352, "learning_rate": 2.5725282253285837e-06, "loss": 3.5445, "step": 48295 }, { "epoch": 0.4913330078125, "grad_norm": 13.60200309753418, "learning_rate": 2.572128511775349e-06, "loss": 3.5674, "step": 48300 }, { "epoch": 0.4913838704427083, "grad_norm": 17.395017623901367, "learning_rate": 2.5717287963767227e-06, "loss": 3.266, "step": 48305 }, { "epoch": 0.4914347330729167, "grad_norm": 10.49577522277832, "learning_rate": 2.5713290791429334e-06, "loss": 3.5678, "step": 48310 }, { "epoch": 0.491485595703125, "grad_norm": 9.473282814025879, "learning_rate": 2.570929360084205e-06, "loss": 3.2889, "step": 48315 }, { "epoch": 0.4915364583333333, "grad_norm": 11.267034530639648, "learning_rate": 2.5705296392107666e-06, "loss": 3.3956, "step": 48320 }, { "epoch": 0.4915873209635417, "grad_norm": 15.123723030090332, "learning_rate": 2.570129916532844e-06, "loss": 3.3584, "step": 48325 }, { "epoch": 0.49163818359375, "grad_norm": 13.88943099975586, "learning_rate": 2.5697301920606636e-06, "loss": 3.3674, "step": 48330 }, { "epoch": 0.4916890462239583, "grad_norm": 9.391311645507812, "learning_rate": 2.569330465804454e-06, "loss": 3.3737, "step": 48335 }, { "epoch": 0.4917399088541667, "grad_norm": 12.441389083862305, "learning_rate": 2.5689307377744394e-06, "loss": 2.9606, "step": 48340 }, { "epoch": 0.491790771484375, "grad_norm": 12.627106666564941, "learning_rate": 2.568531007980849e-06, "loss": 3.5774, "step": 48345 }, { "epoch": 0.4918416341145833, "grad_norm": 12.987388610839844, "learning_rate": 2.568131276433909e-06, "loss": 3.6828, "step": 48350 }, { "epoch": 0.4918924967447917, "grad_norm": 7.0246171951293945, "learning_rate": 2.5677315431438467e-06, "loss": 3.4315, "step": 48355 }, { "epoch": 0.491943359375, "grad_norm": 7.368935585021973, "learning_rate": 2.5673318081208883e-06, "loss": 3.259, "step": 48360 }, { "epoch": 0.4919942220052083, "grad_norm": 14.435423851013184, "learning_rate": 2.5669320713752618e-06, "loss": 3.3111, "step": 48365 }, { "epoch": 0.4920450846354167, "grad_norm": 14.978443145751953, "learning_rate": 2.566532332917194e-06, "loss": 3.1992, "step": 48370 }, { "epoch": 0.492095947265625, "grad_norm": 7.657479763031006, "learning_rate": 2.5661325927569124e-06, "loss": 3.4598, "step": 48375 }, { "epoch": 0.4921468098958333, "grad_norm": 11.787341117858887, "learning_rate": 2.5657328509046437e-06, "loss": 3.3408, "step": 48380 }, { "epoch": 0.4921976725260417, "grad_norm": 11.836021423339844, "learning_rate": 2.565333107370617e-06, "loss": 3.2878, "step": 48385 }, { "epoch": 0.49224853515625, "grad_norm": 10.637925148010254, "learning_rate": 2.564933362165056e-06, "loss": 3.5864, "step": 48390 }, { "epoch": 0.4922993977864583, "grad_norm": 10.810693740844727, "learning_rate": 2.564533615298192e-06, "loss": 3.2705, "step": 48395 }, { "epoch": 0.4923502604166667, "grad_norm": 13.188955307006836, "learning_rate": 2.5641338667802503e-06, "loss": 2.8802, "step": 48400 }, { "epoch": 0.492401123046875, "grad_norm": 12.960467338562012, "learning_rate": 2.5637341166214585e-06, "loss": 3.7479, "step": 48405 }, { "epoch": 0.4924519856770833, "grad_norm": 13.326948165893555, "learning_rate": 2.5633343648320445e-06, "loss": 3.3727, "step": 48410 }, { "epoch": 0.4925028483072917, "grad_norm": 10.184150695800781, "learning_rate": 2.5629346114222357e-06, "loss": 3.1968, "step": 48415 }, { "epoch": 0.4925537109375, "grad_norm": 11.197040557861328, "learning_rate": 2.5625348564022604e-06, "loss": 3.4119, "step": 48420 }, { "epoch": 0.4926045735677083, "grad_norm": 17.48925018310547, "learning_rate": 2.5621350997823454e-06, "loss": 3.5111, "step": 48425 }, { "epoch": 0.4926554361979167, "grad_norm": 8.670833587646484, "learning_rate": 2.561735341572718e-06, "loss": 3.3153, "step": 48430 }, { "epoch": 0.492706298828125, "grad_norm": 11.009799003601074, "learning_rate": 2.5613355817836073e-06, "loss": 3.3655, "step": 48435 }, { "epoch": 0.4927571614583333, "grad_norm": 10.905400276184082, "learning_rate": 2.5609358204252404e-06, "loss": 3.5021, "step": 48440 }, { "epoch": 0.4928080240885417, "grad_norm": 16.436967849731445, "learning_rate": 2.560536057507844e-06, "loss": 3.2442, "step": 48445 }, { "epoch": 0.49285888671875, "grad_norm": 9.947446823120117, "learning_rate": 2.560136293041648e-06, "loss": 3.5158, "step": 48450 }, { "epoch": 0.4929097493489583, "grad_norm": 10.083742141723633, "learning_rate": 2.5597365270368784e-06, "loss": 3.4966, "step": 48455 }, { "epoch": 0.4929606119791667, "grad_norm": 10.116064071655273, "learning_rate": 2.559336759503764e-06, "loss": 3.1833, "step": 48460 }, { "epoch": 0.493011474609375, "grad_norm": 13.628241539001465, "learning_rate": 2.5589369904525328e-06, "loss": 3.4158, "step": 48465 }, { "epoch": 0.4930623372395833, "grad_norm": 10.907445907592773, "learning_rate": 2.558537219893413e-06, "loss": 3.4594, "step": 48470 }, { "epoch": 0.4931131998697917, "grad_norm": 10.239285469055176, "learning_rate": 2.5581374478366315e-06, "loss": 3.1526, "step": 48475 }, { "epoch": 0.4931640625, "grad_norm": 8.00221061706543, "learning_rate": 2.557737674292419e-06, "loss": 3.5418, "step": 48480 }, { "epoch": 0.4932149251302083, "grad_norm": 7.948908805847168, "learning_rate": 2.5573378992709997e-06, "loss": 3.5753, "step": 48485 }, { "epoch": 0.4932657877604167, "grad_norm": 14.463628768920898, "learning_rate": 2.5569381227826047e-06, "loss": 3.1937, "step": 48490 }, { "epoch": 0.493316650390625, "grad_norm": 10.312268257141113, "learning_rate": 2.556538344837461e-06, "loss": 3.0908, "step": 48495 }, { "epoch": 0.4933675130208333, "grad_norm": 13.522259712219238, "learning_rate": 2.5561385654457977e-06, "loss": 3.4039, "step": 48500 }, { "epoch": 0.4934183756510417, "grad_norm": 18.827590942382812, "learning_rate": 2.5557387846178424e-06, "loss": 3.5376, "step": 48505 }, { "epoch": 0.49346923828125, "grad_norm": 12.525690078735352, "learning_rate": 2.5553390023638232e-06, "loss": 3.4584, "step": 48510 }, { "epoch": 0.4935201009114583, "grad_norm": 11.270380020141602, "learning_rate": 2.5549392186939686e-06, "loss": 3.3056, "step": 48515 }, { "epoch": 0.4935709635416667, "grad_norm": 13.468146324157715, "learning_rate": 2.554539433618507e-06, "loss": 3.4103, "step": 48520 }, { "epoch": 0.493621826171875, "grad_norm": 12.6211519241333, "learning_rate": 2.5541396471476677e-06, "loss": 3.5486, "step": 48525 }, { "epoch": 0.4936726888020833, "grad_norm": 15.345010757446289, "learning_rate": 2.553739859291678e-06, "loss": 4.056, "step": 48530 }, { "epoch": 0.4937235514322917, "grad_norm": 9.263572692871094, "learning_rate": 2.553340070060766e-06, "loss": 3.4742, "step": 48535 }, { "epoch": 0.4937744140625, "grad_norm": 13.135659217834473, "learning_rate": 2.552940279465162e-06, "loss": 3.0927, "step": 48540 }, { "epoch": 0.4938252766927083, "grad_norm": 9.30755615234375, "learning_rate": 2.552540487515093e-06, "loss": 3.3484, "step": 48545 }, { "epoch": 0.4938761393229167, "grad_norm": 14.75592041015625, "learning_rate": 2.552140694220788e-06, "loss": 3.3657, "step": 48550 }, { "epoch": 0.493927001953125, "grad_norm": 14.432010650634766, "learning_rate": 2.5517408995924766e-06, "loss": 3.3975, "step": 48555 }, { "epoch": 0.4939778645833333, "grad_norm": 9.98413372039795, "learning_rate": 2.551341103640385e-06, "loss": 3.1357, "step": 48560 }, { "epoch": 0.4940287272135417, "grad_norm": 13.80256175994873, "learning_rate": 2.5509413063747453e-06, "loss": 3.1355, "step": 48565 }, { "epoch": 0.49407958984375, "grad_norm": 11.76331615447998, "learning_rate": 2.5505415078057827e-06, "loss": 3.4596, "step": 48570 }, { "epoch": 0.4941304524739583, "grad_norm": 10.180501937866211, "learning_rate": 2.5501417079437287e-06, "loss": 3.5006, "step": 48575 }, { "epoch": 0.4941813151041667, "grad_norm": 7.968472003936768, "learning_rate": 2.5497419067988098e-06, "loss": 3.1618, "step": 48580 }, { "epoch": 0.494232177734375, "grad_norm": 10.584066390991211, "learning_rate": 2.5493421043812573e-06, "loss": 3.2592, "step": 48585 }, { "epoch": 0.4942830403645833, "grad_norm": 16.969036102294922, "learning_rate": 2.548942300701298e-06, "loss": 3.4475, "step": 48590 }, { "epoch": 0.4943339029947917, "grad_norm": 10.872123718261719, "learning_rate": 2.548542495769162e-06, "loss": 3.6009, "step": 48595 }, { "epoch": 0.494384765625, "grad_norm": 11.284872055053711, "learning_rate": 2.5481426895950775e-06, "loss": 3.1851, "step": 48600 }, { "epoch": 0.4944356282552083, "grad_norm": 12.281546592712402, "learning_rate": 2.5477428821892734e-06, "loss": 3.1344, "step": 48605 }, { "epoch": 0.4944864908854167, "grad_norm": 13.539896011352539, "learning_rate": 2.5473430735619797e-06, "loss": 3.4482, "step": 48610 }, { "epoch": 0.494537353515625, "grad_norm": 12.465642929077148, "learning_rate": 2.5469432637234238e-06, "loss": 3.2899, "step": 48615 }, { "epoch": 0.4945882161458333, "grad_norm": 8.407029151916504, "learning_rate": 2.546543452683836e-06, "loss": 3.3725, "step": 48620 }, { "epoch": 0.4946390787760417, "grad_norm": 99.68233489990234, "learning_rate": 2.546143640453445e-06, "loss": 3.8263, "step": 48625 }, { "epoch": 0.49468994140625, "grad_norm": 9.953444480895996, "learning_rate": 2.54574382704248e-06, "loss": 4.2646, "step": 48630 }, { "epoch": 0.4947408040364583, "grad_norm": 12.155915260314941, "learning_rate": 2.54534401246117e-06, "loss": 3.2373, "step": 48635 }, { "epoch": 0.4947916666666667, "grad_norm": 11.434561729431152, "learning_rate": 2.544944196719744e-06, "loss": 3.3427, "step": 48640 }, { "epoch": 0.494842529296875, "grad_norm": 13.522272109985352, "learning_rate": 2.5445443798284314e-06, "loss": 3.5858, "step": 48645 }, { "epoch": 0.4948933919270833, "grad_norm": 10.04487133026123, "learning_rate": 2.544144561797462e-06, "loss": 3.2289, "step": 48650 }, { "epoch": 0.4949442545572917, "grad_norm": 16.41366958618164, "learning_rate": 2.543744742637064e-06, "loss": 3.3868, "step": 48655 }, { "epoch": 0.4949951171875, "grad_norm": 15.050728797912598, "learning_rate": 2.5433449223574674e-06, "loss": 3.2918, "step": 48660 }, { "epoch": 0.4950459798177083, "grad_norm": 13.474639892578125, "learning_rate": 2.5429451009689006e-06, "loss": 3.3679, "step": 48665 }, { "epoch": 0.4950968424479167, "grad_norm": 11.2426118850708, "learning_rate": 2.542545278481594e-06, "loss": 3.277, "step": 48670 }, { "epoch": 0.495147705078125, "grad_norm": 8.831446647644043, "learning_rate": 2.5421454549057765e-06, "loss": 3.3783, "step": 48675 }, { "epoch": 0.4951985677083333, "grad_norm": 9.274211883544922, "learning_rate": 2.5417456302516775e-06, "loss": 3.1851, "step": 48680 }, { "epoch": 0.4952494303385417, "grad_norm": 10.510271072387695, "learning_rate": 2.5413458045295263e-06, "loss": 3.0859, "step": 48685 }, { "epoch": 0.49530029296875, "grad_norm": 8.18698501586914, "learning_rate": 2.5409459777495525e-06, "loss": 2.9118, "step": 48690 }, { "epoch": 0.4953511555989583, "grad_norm": 14.988253593444824, "learning_rate": 2.5405461499219857e-06, "loss": 3.2874, "step": 48695 }, { "epoch": 0.4954020182291667, "grad_norm": 11.119796752929688, "learning_rate": 2.540146321057056e-06, "loss": 3.4541, "step": 48700 }, { "epoch": 0.495452880859375, "grad_norm": 11.139017105102539, "learning_rate": 2.5397464911649904e-06, "loss": 3.5297, "step": 48705 }, { "epoch": 0.4955037434895833, "grad_norm": 12.22435188293457, "learning_rate": 2.5393466602560215e-06, "loss": 3.1274, "step": 48710 }, { "epoch": 0.4955546061197917, "grad_norm": 8.149970054626465, "learning_rate": 2.5389468283403772e-06, "loss": 2.9917, "step": 48715 }, { "epoch": 0.49560546875, "grad_norm": 8.23815631866455, "learning_rate": 2.5385469954282876e-06, "loss": 3.3093, "step": 48720 }, { "epoch": 0.4956563313802083, "grad_norm": 7.144126892089844, "learning_rate": 2.5381471615299828e-06, "loss": 3.3526, "step": 48725 }, { "epoch": 0.4957071940104167, "grad_norm": 8.10865306854248, "learning_rate": 2.5377473266556914e-06, "loss": 3.5419, "step": 48730 }, { "epoch": 0.495758056640625, "grad_norm": 12.645817756652832, "learning_rate": 2.5373474908156444e-06, "loss": 3.5775, "step": 48735 }, { "epoch": 0.4958089192708333, "grad_norm": 14.494380950927734, "learning_rate": 2.5369476540200693e-06, "loss": 3.3425, "step": 48740 }, { "epoch": 0.4958597819010417, "grad_norm": 6.908821105957031, "learning_rate": 2.5365478162791986e-06, "loss": 2.9617, "step": 48745 }, { "epoch": 0.49591064453125, "grad_norm": 16.56468963623047, "learning_rate": 2.5361479776032594e-06, "loss": 3.5609, "step": 48750 }, { "epoch": 0.4959615071614583, "grad_norm": 14.772183418273926, "learning_rate": 2.5357481380024836e-06, "loss": 3.4348, "step": 48755 }, { "epoch": 0.4960123697916667, "grad_norm": 9.944372177124023, "learning_rate": 2.5353482974870997e-06, "loss": 3.1382, "step": 48760 }, { "epoch": 0.496063232421875, "grad_norm": 7.180305004119873, "learning_rate": 2.5349484560673388e-06, "loss": 3.3043, "step": 48765 }, { "epoch": 0.4961140950520833, "grad_norm": 10.013150215148926, "learning_rate": 2.5345486137534287e-06, "loss": 3.4011, "step": 48770 }, { "epoch": 0.4961649576822917, "grad_norm": 10.231666564941406, "learning_rate": 2.5341487705556016e-06, "loss": 3.2933, "step": 48775 }, { "epoch": 0.4962158203125, "grad_norm": 16.945215225219727, "learning_rate": 2.533748926484086e-06, "loss": 3.5064, "step": 48780 }, { "epoch": 0.4962666829427083, "grad_norm": 14.766674041748047, "learning_rate": 2.533349081549112e-06, "loss": 3.152, "step": 48785 }, { "epoch": 0.4963175455729167, "grad_norm": 10.92806339263916, "learning_rate": 2.53294923576091e-06, "loss": 3.3627, "step": 48790 }, { "epoch": 0.496368408203125, "grad_norm": 14.547791481018066, "learning_rate": 2.532549389129709e-06, "loss": 3.7166, "step": 48795 }, { "epoch": 0.4964192708333333, "grad_norm": 13.458353042602539, "learning_rate": 2.5321495416657394e-06, "loss": 3.249, "step": 48800 }, { "epoch": 0.4964701334635417, "grad_norm": 13.802599906921387, "learning_rate": 2.531749693379232e-06, "loss": 3.2997, "step": 48805 }, { "epoch": 0.49652099609375, "grad_norm": 9.486502647399902, "learning_rate": 2.531349844280416e-06, "loss": 3.3716, "step": 48810 }, { "epoch": 0.4965718587239583, "grad_norm": 12.073533058166504, "learning_rate": 2.5309499943795217e-06, "loss": 3.1021, "step": 48815 }, { "epoch": 0.4966227213541667, "grad_norm": 13.449509620666504, "learning_rate": 2.530550143686779e-06, "loss": 3.4323, "step": 48820 }, { "epoch": 0.496673583984375, "grad_norm": 15.299708366394043, "learning_rate": 2.530150292212418e-06, "loss": 3.4064, "step": 48825 }, { "epoch": 0.4967244466145833, "grad_norm": 14.66781234741211, "learning_rate": 2.5297504399666702e-06, "loss": 3.2232, "step": 48830 }, { "epoch": 0.4967753092447917, "grad_norm": 11.898265838623047, "learning_rate": 2.529350586959763e-06, "loss": 3.2682, "step": 48835 }, { "epoch": 0.496826171875, "grad_norm": 10.700338363647461, "learning_rate": 2.5289507332019286e-06, "loss": 2.9856, "step": 48840 }, { "epoch": 0.4968770345052083, "grad_norm": 12.895772933959961, "learning_rate": 2.528550878703396e-06, "loss": 3.3705, "step": 48845 }, { "epoch": 0.4969278971354167, "grad_norm": 12.710967063903809, "learning_rate": 2.5281510234743966e-06, "loss": 3.3809, "step": 48850 }, { "epoch": 0.496978759765625, "grad_norm": 12.692770004272461, "learning_rate": 2.5277511675251598e-06, "loss": 3.0422, "step": 48855 }, { "epoch": 0.4970296223958333, "grad_norm": 14.448827743530273, "learning_rate": 2.5273513108659153e-06, "loss": 3.4203, "step": 48860 }, { "epoch": 0.4970804850260417, "grad_norm": 14.509695053100586, "learning_rate": 2.5269514535068947e-06, "loss": 3.4038, "step": 48865 }, { "epoch": 0.49713134765625, "grad_norm": 9.705320358276367, "learning_rate": 2.5265515954583273e-06, "loss": 3.1937, "step": 48870 }, { "epoch": 0.4971822102864583, "grad_norm": 16.22066307067871, "learning_rate": 2.5261517367304434e-06, "loss": 3.2324, "step": 48875 }, { "epoch": 0.4972330729166667, "grad_norm": 14.455748558044434, "learning_rate": 2.525751877333474e-06, "loss": 3.1136, "step": 48880 }, { "epoch": 0.497283935546875, "grad_norm": 15.725226402282715, "learning_rate": 2.5253520172776484e-06, "loss": 3.4166, "step": 48885 }, { "epoch": 0.4973347981770833, "grad_norm": 11.71474552154541, "learning_rate": 2.5249521565731983e-06, "loss": 3.1958, "step": 48890 }, { "epoch": 0.4973856608072917, "grad_norm": 10.169915199279785, "learning_rate": 2.524552295230353e-06, "loss": 3.2446, "step": 48895 }, { "epoch": 0.4974365234375, "grad_norm": 15.470590591430664, "learning_rate": 2.5241524332593425e-06, "loss": 3.4225, "step": 48900 }, { "epoch": 0.4974873860677083, "grad_norm": 12.102554321289062, "learning_rate": 2.523752570670398e-06, "loss": 3.1009, "step": 48905 }, { "epoch": 0.4975382486979167, "grad_norm": 8.739709854125977, "learning_rate": 2.5233527074737493e-06, "loss": 3.4734, "step": 48910 }, { "epoch": 0.497589111328125, "grad_norm": 12.60647201538086, "learning_rate": 2.5229528436796274e-06, "loss": 3.4694, "step": 48915 }, { "epoch": 0.4976399739583333, "grad_norm": 7.893643856048584, "learning_rate": 2.5225529792982623e-06, "loss": 3.2898, "step": 48920 }, { "epoch": 0.4976908365885417, "grad_norm": 7.657857894897461, "learning_rate": 2.522153114339885e-06, "loss": 3.2166, "step": 48925 }, { "epoch": 0.49774169921875, "grad_norm": 14.99483871459961, "learning_rate": 2.5217532488147245e-06, "loss": 4.0929, "step": 48930 }, { "epoch": 0.4977925618489583, "grad_norm": 13.64297866821289, "learning_rate": 2.521353382733013e-06, "loss": 3.0839, "step": 48935 }, { "epoch": 0.4978434244791667, "grad_norm": 8.626524925231934, "learning_rate": 2.5209535161049804e-06, "loss": 3.2194, "step": 48940 }, { "epoch": 0.497894287109375, "grad_norm": 11.188443183898926, "learning_rate": 2.520553648940857e-06, "loss": 3.2343, "step": 48945 }, { "epoch": 0.4979451497395833, "grad_norm": 17.096988677978516, "learning_rate": 2.520153781250873e-06, "loss": 3.1586, "step": 48950 }, { "epoch": 0.4979960123697917, "grad_norm": 14.983606338500977, "learning_rate": 2.51975391304526e-06, "loss": 4.0485, "step": 48955 }, { "epoch": 0.498046875, "grad_norm": 14.723469734191895, "learning_rate": 2.5193540443342474e-06, "loss": 3.3887, "step": 48960 }, { "epoch": 0.4980977376302083, "grad_norm": 7.029719352722168, "learning_rate": 2.518954175128066e-06, "loss": 3.2619, "step": 48965 }, { "epoch": 0.4981486002604167, "grad_norm": 14.817591667175293, "learning_rate": 2.5185543054369467e-06, "loss": 3.7279, "step": 48970 }, { "epoch": 0.498199462890625, "grad_norm": 9.7324857711792, "learning_rate": 2.5181544352711194e-06, "loss": 3.1242, "step": 48975 }, { "epoch": 0.4982503255208333, "grad_norm": 11.890584945678711, "learning_rate": 2.5177545646408158e-06, "loss": 3.2638, "step": 48980 }, { "epoch": 0.4983011881510417, "grad_norm": 13.200179100036621, "learning_rate": 2.517354693556266e-06, "loss": 3.1061, "step": 48985 }, { "epoch": 0.49835205078125, "grad_norm": 11.83781909942627, "learning_rate": 2.5169548220276997e-06, "loss": 3.4721, "step": 48990 }, { "epoch": 0.4984029134114583, "grad_norm": 15.258880615234375, "learning_rate": 2.516554950065349e-06, "loss": 3.2813, "step": 48995 }, { "epoch": 0.4984537760416667, "grad_norm": 12.589689254760742, "learning_rate": 2.5161550776794434e-06, "loss": 3.4539, "step": 49000 }, { "epoch": 0.498504638671875, "grad_norm": 13.380403518676758, "learning_rate": 2.5157552048802136e-06, "loss": 3.1147, "step": 49005 }, { "epoch": 0.4985555013020833, "grad_norm": 10.888894081115723, "learning_rate": 2.515355331677892e-06, "loss": 3.4481, "step": 49010 }, { "epoch": 0.4986063639322917, "grad_norm": 8.734400749206543, "learning_rate": 2.514955458082706e-06, "loss": 3.6778, "step": 49015 }, { "epoch": 0.4986572265625, "grad_norm": 13.816628456115723, "learning_rate": 2.5145555841048896e-06, "loss": 3.3654, "step": 49020 }, { "epoch": 0.4987080891927083, "grad_norm": 14.505660057067871, "learning_rate": 2.5141557097546722e-06, "loss": 3.0168, "step": 49025 }, { "epoch": 0.4987589518229167, "grad_norm": 14.234870910644531, "learning_rate": 2.513755835042283e-06, "loss": 3.6649, "step": 49030 }, { "epoch": 0.498809814453125, "grad_norm": 17.624862670898438, "learning_rate": 2.513355959977955e-06, "loss": 3.7414, "step": 49035 }, { "epoch": 0.4988606770833333, "grad_norm": 14.029349327087402, "learning_rate": 2.5129560845719173e-06, "loss": 3.109, "step": 49040 }, { "epoch": 0.4989115397135417, "grad_norm": 9.352248191833496, "learning_rate": 2.512556208834402e-06, "loss": 3.5068, "step": 49045 }, { "epoch": 0.49896240234375, "grad_norm": 11.51716423034668, "learning_rate": 2.5121563327756393e-06, "loss": 3.312, "step": 49050 }, { "epoch": 0.4990132649739583, "grad_norm": 12.597025871276855, "learning_rate": 2.5117564564058582e-06, "loss": 3.4719, "step": 49055 }, { "epoch": 0.4990641276041667, "grad_norm": 9.703937530517578, "learning_rate": 2.5113565797352923e-06, "loss": 2.6813, "step": 49060 }, { "epoch": 0.499114990234375, "grad_norm": 11.819278717041016, "learning_rate": 2.5109567027741704e-06, "loss": 3.2599, "step": 49065 }, { "epoch": 0.4991658528645833, "grad_norm": 11.828073501586914, "learning_rate": 2.5105568255327246e-06, "loss": 3.2094, "step": 49070 }, { "epoch": 0.4992167154947917, "grad_norm": 10.16787052154541, "learning_rate": 2.510156948021184e-06, "loss": 3.8233, "step": 49075 }, { "epoch": 0.499267578125, "grad_norm": 10.422018051147461, "learning_rate": 2.5097570702497803e-06, "loss": 3.1277, "step": 49080 }, { "epoch": 0.4993184407552083, "grad_norm": 13.755646705627441, "learning_rate": 2.5093571922287444e-06, "loss": 4.0196, "step": 49085 }, { "epoch": 0.4993693033854167, "grad_norm": 10.642024040222168, "learning_rate": 2.5089573139683082e-06, "loss": 3.3442, "step": 49090 }, { "epoch": 0.499420166015625, "grad_norm": 11.056506156921387, "learning_rate": 2.5085574354786997e-06, "loss": 3.3985, "step": 49095 }, { "epoch": 0.4994710286458333, "grad_norm": 13.28608512878418, "learning_rate": 2.508157556770152e-06, "loss": 3.1175, "step": 49100 }, { "epoch": 0.4995218912760417, "grad_norm": 13.478775024414062, "learning_rate": 2.5077576778528945e-06, "loss": 3.2193, "step": 49105 }, { "epoch": 0.49957275390625, "grad_norm": 14.44498062133789, "learning_rate": 2.5073577987371587e-06, "loss": 3.6488, "step": 49110 }, { "epoch": 0.4996236165364583, "grad_norm": 11.836501121520996, "learning_rate": 2.5069579194331763e-06, "loss": 3.1957, "step": 49115 }, { "epoch": 0.4996744791666667, "grad_norm": 11.179645538330078, "learning_rate": 2.506558039951176e-06, "loss": 3.1457, "step": 49120 }, { "epoch": 0.499725341796875, "grad_norm": 11.630443572998047, "learning_rate": 2.5061581603013905e-06, "loss": 3.6474, "step": 49125 }, { "epoch": 0.4997762044270833, "grad_norm": 14.907173156738281, "learning_rate": 2.50575828049405e-06, "loss": 3.2522, "step": 49130 }, { "epoch": 0.4998270670572917, "grad_norm": 17.425331115722656, "learning_rate": 2.505358400539385e-06, "loss": 3.0893, "step": 49135 }, { "epoch": 0.4998779296875, "grad_norm": 13.06432819366455, "learning_rate": 2.504958520447627e-06, "loss": 3.0929, "step": 49140 }, { "epoch": 0.4999287923177083, "grad_norm": 9.655762672424316, "learning_rate": 2.5045586402290055e-06, "loss": 3.2749, "step": 49145 }, { "epoch": 0.4999796549479167, "grad_norm": 8.963852882385254, "learning_rate": 2.504158759893753e-06, "loss": 3.7212, "step": 49150 }, { "epoch": 0.5, "eval_loss": 3.3498637676239014, "eval_runtime": 156.6867, "eval_samples_per_second": 12.809, "eval_steps_per_second": 12.809, "step": 49152 }, { "epoch": 0.500030517578125, "grad_norm": 9.196493148803711, "learning_rate": 2.5037588794521e-06, "loss": 3.5239, "step": 49155 }, { "epoch": 0.5000813802083334, "grad_norm": 8.475089073181152, "learning_rate": 2.503358998914276e-06, "loss": 3.4656, "step": 49160 }, { "epoch": 0.5001322428385416, "grad_norm": 16.432350158691406, "learning_rate": 2.502959118290514e-06, "loss": 3.1871, "step": 49165 }, { "epoch": 0.50018310546875, "grad_norm": 15.631834030151367, "learning_rate": 2.5025592375910416e-06, "loss": 3.5826, "step": 49170 }, { "epoch": 0.5002339680989584, "grad_norm": 15.452563285827637, "learning_rate": 2.5021593568260938e-06, "loss": 3.5809, "step": 49175 }, { "epoch": 0.5002848307291666, "grad_norm": 12.592869758605957, "learning_rate": 2.501759476005898e-06, "loss": 3.5671, "step": 49180 }, { "epoch": 0.500335693359375, "grad_norm": 13.242794036865234, "learning_rate": 2.501359595140688e-06, "loss": 3.2827, "step": 49185 }, { "epoch": 0.5003865559895834, "grad_norm": 13.345170021057129, "learning_rate": 2.5009597142406917e-06, "loss": 3.2537, "step": 49190 }, { "epoch": 0.5004374186197916, "grad_norm": 11.155458450317383, "learning_rate": 2.500559833316141e-06, "loss": 3.0264, "step": 49195 }, { "epoch": 0.50048828125, "grad_norm": 11.095985412597656, "learning_rate": 2.5001599523772687e-06, "loss": 3.3933, "step": 49200 }, { "epoch": 0.5005391438802084, "grad_norm": 10.589454650878906, "learning_rate": 2.499760071434303e-06, "loss": 3.6001, "step": 49205 }, { "epoch": 0.5005900065104166, "grad_norm": 7.216246604919434, "learning_rate": 2.4993601904974755e-06, "loss": 3.1535, "step": 49210 }, { "epoch": 0.500640869140625, "grad_norm": 9.691969871520996, "learning_rate": 2.4989603095770184e-06, "loss": 3.4616, "step": 49215 }, { "epoch": 0.5006917317708334, "grad_norm": 13.116881370544434, "learning_rate": 2.4985604286831606e-06, "loss": 3.1803, "step": 49220 }, { "epoch": 0.5007425944010416, "grad_norm": 7.126953125, "learning_rate": 2.4981605478261343e-06, "loss": 3.4157, "step": 49225 }, { "epoch": 0.50079345703125, "grad_norm": 9.899238586425781, "learning_rate": 2.49776066701617e-06, "loss": 3.3096, "step": 49230 }, { "epoch": 0.5008443196614584, "grad_norm": 11.79799747467041, "learning_rate": 2.4973607862634987e-06, "loss": 3.4077, "step": 49235 }, { "epoch": 0.5008951822916666, "grad_norm": 7.4908246994018555, "learning_rate": 2.4969609055783502e-06, "loss": 3.031, "step": 49240 }, { "epoch": 0.500946044921875, "grad_norm": 15.956377029418945, "learning_rate": 2.4965610249709573e-06, "loss": 3.8845, "step": 49245 }, { "epoch": 0.5009969075520834, "grad_norm": 16.496440887451172, "learning_rate": 2.4961611444515495e-06, "loss": 3.5862, "step": 49250 }, { "epoch": 0.5010477701822916, "grad_norm": 15.384745597839355, "learning_rate": 2.495761264030358e-06, "loss": 3.2148, "step": 49255 }, { "epoch": 0.5010986328125, "grad_norm": 8.837871551513672, "learning_rate": 2.4953613837176127e-06, "loss": 3.1969, "step": 49260 }, { "epoch": 0.5011494954427084, "grad_norm": 9.887128829956055, "learning_rate": 2.494961503523546e-06, "loss": 3.1775, "step": 49265 }, { "epoch": 0.5012003580729166, "grad_norm": 12.647445678710938, "learning_rate": 2.494561623458388e-06, "loss": 3.177, "step": 49270 }, { "epoch": 0.501251220703125, "grad_norm": 9.768755912780762, "learning_rate": 2.4941617435323695e-06, "loss": 3.2917, "step": 49275 }, { "epoch": 0.5013020833333334, "grad_norm": 13.205989837646484, "learning_rate": 2.4937618637557207e-06, "loss": 3.0655, "step": 49280 }, { "epoch": 0.5013529459635416, "grad_norm": 11.150520324707031, "learning_rate": 2.4933619841386744e-06, "loss": 3.437, "step": 49285 }, { "epoch": 0.50140380859375, "grad_norm": 8.906401634216309, "learning_rate": 2.4929621046914595e-06, "loss": 3.4999, "step": 49290 }, { "epoch": 0.5014546712239584, "grad_norm": 9.648660659790039, "learning_rate": 2.4925622254243078e-06, "loss": 3.5996, "step": 49295 }, { "epoch": 0.5015055338541666, "grad_norm": 7.539660453796387, "learning_rate": 2.492162346347449e-06, "loss": 3.0006, "step": 49300 }, { "epoch": 0.501556396484375, "grad_norm": 12.649576187133789, "learning_rate": 2.4917624674711154e-06, "loss": 3.2997, "step": 49305 }, { "epoch": 0.5016072591145834, "grad_norm": 12.197525024414062, "learning_rate": 2.4913625888055374e-06, "loss": 3.3524, "step": 49310 }, { "epoch": 0.5016581217447916, "grad_norm": 13.816476821899414, "learning_rate": 2.490962710360944e-06, "loss": 3.1124, "step": 49315 }, { "epoch": 0.501708984375, "grad_norm": 8.340036392211914, "learning_rate": 2.490562832147569e-06, "loss": 3.5732, "step": 49320 }, { "epoch": 0.5017598470052084, "grad_norm": 7.469123363494873, "learning_rate": 2.490162954175642e-06, "loss": 3.2823, "step": 49325 }, { "epoch": 0.5018107096354166, "grad_norm": 8.699426651000977, "learning_rate": 2.4897630764553926e-06, "loss": 3.4334, "step": 49330 }, { "epoch": 0.501861572265625, "grad_norm": 15.343707084655762, "learning_rate": 2.489363198997052e-06, "loss": 3.2377, "step": 49335 }, { "epoch": 0.5019124348958334, "grad_norm": 9.360434532165527, "learning_rate": 2.4889633218108524e-06, "loss": 3.5039, "step": 49340 }, { "epoch": 0.5019632975260416, "grad_norm": 13.143653869628906, "learning_rate": 2.4885634449070234e-06, "loss": 3.4251, "step": 49345 }, { "epoch": 0.50201416015625, "grad_norm": 13.156907081604004, "learning_rate": 2.488163568295796e-06, "loss": 3.1726, "step": 49350 }, { "epoch": 0.5020650227864584, "grad_norm": 7.87513542175293, "learning_rate": 2.4877636919873997e-06, "loss": 3.2848, "step": 49355 }, { "epoch": 0.5021158854166666, "grad_norm": 9.70824146270752, "learning_rate": 2.487363815992068e-06, "loss": 3.5863, "step": 49360 }, { "epoch": 0.502166748046875, "grad_norm": 10.080727577209473, "learning_rate": 2.4869639403200295e-06, "loss": 3.5781, "step": 49365 }, { "epoch": 0.5022176106770834, "grad_norm": 7.460448265075684, "learning_rate": 2.486564064981516e-06, "loss": 3.4143, "step": 49370 }, { "epoch": 0.5022684733072916, "grad_norm": 11.609356880187988, "learning_rate": 2.486164189986757e-06, "loss": 3.2316, "step": 49375 }, { "epoch": 0.5023193359375, "grad_norm": 9.654780387878418, "learning_rate": 2.485764315345984e-06, "loss": 3.3201, "step": 49380 }, { "epoch": 0.5023701985677084, "grad_norm": 11.209192276000977, "learning_rate": 2.4853644410694284e-06, "loss": 3.371, "step": 49385 }, { "epoch": 0.5024210611979166, "grad_norm": 12.05045223236084, "learning_rate": 2.4849645671673193e-06, "loss": 3.4816, "step": 49390 }, { "epoch": 0.502471923828125, "grad_norm": 11.425850868225098, "learning_rate": 2.4845646936498878e-06, "loss": 3.9463, "step": 49395 }, { "epoch": 0.5025227864583334, "grad_norm": 8.453120231628418, "learning_rate": 2.4841648205273663e-06, "loss": 3.5582, "step": 49400 }, { "epoch": 0.5025736490885416, "grad_norm": 8.841841697692871, "learning_rate": 2.483764947809983e-06, "loss": 3.4371, "step": 49405 }, { "epoch": 0.50262451171875, "grad_norm": 7.723162651062012, "learning_rate": 2.4833650755079693e-06, "loss": 3.3702, "step": 49410 }, { "epoch": 0.5026753743489584, "grad_norm": 10.669869422912598, "learning_rate": 2.482965203631558e-06, "loss": 3.3359, "step": 49415 }, { "epoch": 0.5027262369791666, "grad_norm": 8.969093322753906, "learning_rate": 2.4825653321909765e-06, "loss": 3.2873, "step": 49420 }, { "epoch": 0.502777099609375, "grad_norm": 17.20841407775879, "learning_rate": 2.4821654611964575e-06, "loss": 3.6631, "step": 49425 }, { "epoch": 0.5028279622395834, "grad_norm": 15.726045608520508, "learning_rate": 2.48176559065823e-06, "loss": 2.9552, "step": 49430 }, { "epoch": 0.5028788248697916, "grad_norm": 8.223189353942871, "learning_rate": 2.4813657205865265e-06, "loss": 3.1761, "step": 49435 }, { "epoch": 0.5029296875, "grad_norm": 8.99612808227539, "learning_rate": 2.4809658509915766e-06, "loss": 3.4041, "step": 49440 }, { "epoch": 0.5029805501302084, "grad_norm": 11.7155122756958, "learning_rate": 2.4805659818836105e-06, "loss": 3.3462, "step": 49445 }, { "epoch": 0.5030314127604166, "grad_norm": 16.708206176757812, "learning_rate": 2.4801661132728587e-06, "loss": 3.405, "step": 49450 }, { "epoch": 0.503082275390625, "grad_norm": 7.648775100708008, "learning_rate": 2.479766245169553e-06, "loss": 3.6026, "step": 49455 }, { "epoch": 0.5031331380208334, "grad_norm": 16.03279685974121, "learning_rate": 2.479366377583923e-06, "loss": 3.4089, "step": 49460 }, { "epoch": 0.5031840006510416, "grad_norm": 8.516480445861816, "learning_rate": 2.4789665105261995e-06, "loss": 3.0617, "step": 49465 }, { "epoch": 0.50323486328125, "grad_norm": 15.769489288330078, "learning_rate": 2.478566644006612e-06, "loss": 3.3682, "step": 49470 }, { "epoch": 0.5032857259114584, "grad_norm": 9.968362808227539, "learning_rate": 2.4781667780353922e-06, "loss": 3.2163, "step": 49475 }, { "epoch": 0.5033365885416666, "grad_norm": 10.482285499572754, "learning_rate": 2.477766912622771e-06, "loss": 3.2009, "step": 49480 }, { "epoch": 0.503387451171875, "grad_norm": 13.688324928283691, "learning_rate": 2.4773670477789774e-06, "loss": 3.3076, "step": 49485 }, { "epoch": 0.5034383138020834, "grad_norm": 11.150995254516602, "learning_rate": 2.476967183514242e-06, "loss": 3.38, "step": 49490 }, { "epoch": 0.5034891764322916, "grad_norm": 13.235206604003906, "learning_rate": 2.4765673198387963e-06, "loss": 3.1943, "step": 49495 }, { "epoch": 0.5035400390625, "grad_norm": 12.570960998535156, "learning_rate": 2.4761674567628707e-06, "loss": 3.5157, "step": 49500 }, { "epoch": 0.5035909016927084, "grad_norm": 9.195624351501465, "learning_rate": 2.4757675942966937e-06, "loss": 3.2672, "step": 49505 }, { "epoch": 0.5036417643229166, "grad_norm": 8.501916885375977, "learning_rate": 2.4753677324504978e-06, "loss": 3.6237, "step": 49510 }, { "epoch": 0.503692626953125, "grad_norm": 11.110610961914062, "learning_rate": 2.4749678712345134e-06, "loss": 3.9753, "step": 49515 }, { "epoch": 0.5037434895833334, "grad_norm": 12.429216384887695, "learning_rate": 2.4745680106589694e-06, "loss": 3.4606, "step": 49520 }, { "epoch": 0.5037943522135416, "grad_norm": 12.383891105651855, "learning_rate": 2.474168150734096e-06, "loss": 3.1723, "step": 49525 }, { "epoch": 0.50384521484375, "grad_norm": 14.52224349975586, "learning_rate": 2.473768291470126e-06, "loss": 3.3007, "step": 49530 }, { "epoch": 0.5038960774739584, "grad_norm": 13.140124320983887, "learning_rate": 2.473368432877287e-06, "loss": 3.3674, "step": 49535 }, { "epoch": 0.5039469401041666, "grad_norm": 297.5426330566406, "learning_rate": 2.472968574965811e-06, "loss": 3.9067, "step": 49540 }, { "epoch": 0.503997802734375, "grad_norm": 13.380870819091797, "learning_rate": 2.4725687177459266e-06, "loss": 3.3042, "step": 49545 }, { "epoch": 0.5040486653645834, "grad_norm": 12.111773490905762, "learning_rate": 2.4721688612278657e-06, "loss": 3.2508, "step": 49550 }, { "epoch": 0.5040995279947916, "grad_norm": 8.741081237792969, "learning_rate": 2.4717690054218583e-06, "loss": 3.265, "step": 49555 }, { "epoch": 0.504150390625, "grad_norm": 11.820341110229492, "learning_rate": 2.4713691503381333e-06, "loss": 3.3668, "step": 49560 }, { "epoch": 0.5042012532552084, "grad_norm": 10.02950668334961, "learning_rate": 2.470969295986922e-06, "loss": 3.0367, "step": 49565 }, { "epoch": 0.5042521158854166, "grad_norm": 10.921381950378418, "learning_rate": 2.4705694423784548e-06, "loss": 3.1557, "step": 49570 }, { "epoch": 0.504302978515625, "grad_norm": 9.535286903381348, "learning_rate": 2.4701695895229613e-06, "loss": 3.1932, "step": 49575 }, { "epoch": 0.5043538411458334, "grad_norm": 13.039703369140625, "learning_rate": 2.469769737430672e-06, "loss": 3.4789, "step": 49580 }, { "epoch": 0.5044047037760416, "grad_norm": 9.973648071289062, "learning_rate": 2.4693698861118162e-06, "loss": 3.3607, "step": 49585 }, { "epoch": 0.50445556640625, "grad_norm": 16.38591194152832, "learning_rate": 2.4689700355766247e-06, "loss": 3.0819, "step": 49590 }, { "epoch": 0.5045064290364584, "grad_norm": 15.450492858886719, "learning_rate": 2.4685701858353283e-06, "loss": 3.5412, "step": 49595 }, { "epoch": 0.5045572916666666, "grad_norm": 16.321855545043945, "learning_rate": 2.468170336898155e-06, "loss": 3.1058, "step": 49600 }, { "epoch": 0.504608154296875, "grad_norm": 11.17414379119873, "learning_rate": 2.4677704887753367e-06, "loss": 3.4829, "step": 49605 }, { "epoch": 0.5046590169270834, "grad_norm": 14.2786865234375, "learning_rate": 2.4673706414771036e-06, "loss": 3.3495, "step": 49610 }, { "epoch": 0.5047098795572916, "grad_norm": 10.139547348022461, "learning_rate": 2.4669707950136837e-06, "loss": 3.4174, "step": 49615 }, { "epoch": 0.5047607421875, "grad_norm": 7.030655384063721, "learning_rate": 2.466570949395308e-06, "loss": 3.5727, "step": 49620 }, { "epoch": 0.5048116048177084, "grad_norm": 13.815516471862793, "learning_rate": 2.466171104632208e-06, "loss": 3.296, "step": 49625 }, { "epoch": 0.5048624674479166, "grad_norm": 10.212800025939941, "learning_rate": 2.4657712607346115e-06, "loss": 3.2526, "step": 49630 }, { "epoch": 0.504913330078125, "grad_norm": 13.8051176071167, "learning_rate": 2.4653714177127495e-06, "loss": 3.8579, "step": 49635 }, { "epoch": 0.5049641927083334, "grad_norm": 10.22461986541748, "learning_rate": 2.4649715755768502e-06, "loss": 3.1761, "step": 49640 }, { "epoch": 0.5050150553385416, "grad_norm": 9.084115982055664, "learning_rate": 2.4645717343371465e-06, "loss": 3.2978, "step": 49645 }, { "epoch": 0.50506591796875, "grad_norm": 12.154102325439453, "learning_rate": 2.464171894003866e-06, "loss": 3.3534, "step": 49650 }, { "epoch": 0.5051167805989584, "grad_norm": 16.16970443725586, "learning_rate": 2.463772054587239e-06, "loss": 3.3953, "step": 49655 }, { "epoch": 0.5051676432291666, "grad_norm": 12.5291109085083, "learning_rate": 2.463372216097495e-06, "loss": 3.4107, "step": 49660 }, { "epoch": 0.505218505859375, "grad_norm": 8.317669868469238, "learning_rate": 2.462972378544865e-06, "loss": 3.3117, "step": 49665 }, { "epoch": 0.5052693684895834, "grad_norm": 12.62435245513916, "learning_rate": 2.462572541939578e-06, "loss": 2.8869, "step": 49670 }, { "epoch": 0.5053202311197916, "grad_norm": 9.405922889709473, "learning_rate": 2.462172706291863e-06, "loss": 3.1812, "step": 49675 }, { "epoch": 0.50537109375, "grad_norm": 15.787854194641113, "learning_rate": 2.4617728716119502e-06, "loss": 3.0783, "step": 49680 }, { "epoch": 0.5054219563802084, "grad_norm": 7.9198102951049805, "learning_rate": 2.46137303791007e-06, "loss": 3.4253, "step": 49685 }, { "epoch": 0.5054728190104166, "grad_norm": 13.76949405670166, "learning_rate": 2.4609732051964514e-06, "loss": 3.4277, "step": 49690 }, { "epoch": 0.505523681640625, "grad_norm": 10.015167236328125, "learning_rate": 2.460573373481323e-06, "loss": 3.4706, "step": 49695 }, { "epoch": 0.5055745442708334, "grad_norm": 11.512467384338379, "learning_rate": 2.460173542774917e-06, "loss": 3.4938, "step": 49700 }, { "epoch": 0.5056254069010416, "grad_norm": 10.98910903930664, "learning_rate": 2.4597737130874612e-06, "loss": 3.3244, "step": 49705 }, { "epoch": 0.50567626953125, "grad_norm": 10.6687650680542, "learning_rate": 2.4593738844291855e-06, "loss": 3.7278, "step": 49710 }, { "epoch": 0.5057271321614584, "grad_norm": 8.300183296203613, "learning_rate": 2.4589740568103183e-06, "loss": 3.469, "step": 49715 }, { "epoch": 0.5057779947916666, "grad_norm": 10.843482971191406, "learning_rate": 2.458574230241091e-06, "loss": 3.217, "step": 49720 }, { "epoch": 0.505828857421875, "grad_norm": 14.605001449584961, "learning_rate": 2.458174404731732e-06, "loss": 3.3045, "step": 49725 }, { "epoch": 0.5058797200520834, "grad_norm": 10.476160049438477, "learning_rate": 2.4577745802924713e-06, "loss": 3.3574, "step": 49730 }, { "epoch": 0.5059305826822916, "grad_norm": 9.061910629272461, "learning_rate": 2.457374756933537e-06, "loss": 3.0889, "step": 49735 }, { "epoch": 0.5059814453125, "grad_norm": 13.6966552734375, "learning_rate": 2.4569749346651604e-06, "loss": 5.5981, "step": 49740 }, { "epoch": 0.5060323079427084, "grad_norm": 11.65805435180664, "learning_rate": 2.4565751134975693e-06, "loss": 3.4213, "step": 49745 }, { "epoch": 0.5060831705729166, "grad_norm": 8.059772491455078, "learning_rate": 2.4561752934409945e-06, "loss": 3.1742, "step": 49750 }, { "epoch": 0.506134033203125, "grad_norm": 8.113338470458984, "learning_rate": 2.4557754745056635e-06, "loss": 3.5701, "step": 49755 }, { "epoch": 0.5061848958333334, "grad_norm": 8.002399444580078, "learning_rate": 2.455375656701806e-06, "loss": 3.16, "step": 49760 }, { "epoch": 0.5062357584635416, "grad_norm": 13.288044929504395, "learning_rate": 2.4549758400396532e-06, "loss": 3.2662, "step": 49765 }, { "epoch": 0.50628662109375, "grad_norm": 14.36279296875, "learning_rate": 2.4545760245294325e-06, "loss": 3.2286, "step": 49770 }, { "epoch": 0.5063374837239584, "grad_norm": 13.100310325622559, "learning_rate": 2.4541762101813722e-06, "loss": 3.6597, "step": 49775 }, { "epoch": 0.5063883463541666, "grad_norm": 15.703715324401855, "learning_rate": 2.4537763970057043e-06, "loss": 3.2327, "step": 49780 }, { "epoch": 0.506439208984375, "grad_norm": 13.610279083251953, "learning_rate": 2.4533765850126557e-06, "loss": 3.4582, "step": 49785 }, { "epoch": 0.5064900716145834, "grad_norm": 15.596902847290039, "learning_rate": 2.4529767742124565e-06, "loss": 3.2701, "step": 49790 }, { "epoch": 0.5065409342447916, "grad_norm": 14.749031066894531, "learning_rate": 2.4525769646153338e-06, "loss": 3.1786, "step": 49795 }, { "epoch": 0.506591796875, "grad_norm": 12.403141975402832, "learning_rate": 2.45217715623152e-06, "loss": 3.2539, "step": 49800 }, { "epoch": 0.5066426595052084, "grad_norm": 13.405444145202637, "learning_rate": 2.4517773490712417e-06, "loss": 3.5059, "step": 49805 }, { "epoch": 0.5066935221354166, "grad_norm": 9.99687385559082, "learning_rate": 2.4513775431447278e-06, "loss": 3.0976, "step": 49810 }, { "epoch": 0.506744384765625, "grad_norm": 14.107109069824219, "learning_rate": 2.450977738462209e-06, "loss": 3.3232, "step": 49815 }, { "epoch": 0.5067952473958334, "grad_norm": 11.486820220947266, "learning_rate": 2.450577935033913e-06, "loss": 3.5945, "step": 49820 }, { "epoch": 0.5068461100260416, "grad_norm": 8.772841453552246, "learning_rate": 2.450178132870069e-06, "loss": 3.1173, "step": 49825 }, { "epoch": 0.50689697265625, "grad_norm": 11.41469669342041, "learning_rate": 2.449778331980905e-06, "loss": 3.1338, "step": 49830 }, { "epoch": 0.5069478352864584, "grad_norm": 14.06910228729248, "learning_rate": 2.4493785323766504e-06, "loss": 3.4256, "step": 49835 }, { "epoch": 0.5069986979166666, "grad_norm": 10.561040878295898, "learning_rate": 2.448978734067535e-06, "loss": 3.2773, "step": 49840 }, { "epoch": 0.507049560546875, "grad_norm": 13.252824783325195, "learning_rate": 2.4485789370637857e-06, "loss": 3.2069, "step": 49845 }, { "epoch": 0.5071004231770834, "grad_norm": 15.160521507263184, "learning_rate": 2.448179141375632e-06, "loss": 2.8845, "step": 49850 }, { "epoch": 0.5071512858072916, "grad_norm": 14.707564353942871, "learning_rate": 2.4477793470133033e-06, "loss": 3.173, "step": 49855 }, { "epoch": 0.5072021484375, "grad_norm": 12.910215377807617, "learning_rate": 2.4473795539870275e-06, "loss": 3.2612, "step": 49860 }, { "epoch": 0.5072530110677084, "grad_norm": 12.413802146911621, "learning_rate": 2.446979762307034e-06, "loss": 3.0549, "step": 49865 }, { "epoch": 0.5073038736979166, "grad_norm": 14.505427360534668, "learning_rate": 2.446579971983549e-06, "loss": 3.0984, "step": 49870 }, { "epoch": 0.507354736328125, "grad_norm": 16.360313415527344, "learning_rate": 2.446180183026804e-06, "loss": 3.2245, "step": 49875 }, { "epoch": 0.5074055989583334, "grad_norm": 12.215571403503418, "learning_rate": 2.4457803954470264e-06, "loss": 3.4219, "step": 49880 }, { "epoch": 0.5074564615885416, "grad_norm": 9.959635734558105, "learning_rate": 2.4453806092544442e-06, "loss": 3.2679, "step": 49885 }, { "epoch": 0.50750732421875, "grad_norm": 11.354060173034668, "learning_rate": 2.444980824459285e-06, "loss": 3.3806, "step": 49890 }, { "epoch": 0.5075581868489584, "grad_norm": 12.338563919067383, "learning_rate": 2.44458104107178e-06, "loss": 3.0743, "step": 49895 }, { "epoch": 0.5076090494791666, "grad_norm": 12.6554594039917, "learning_rate": 2.4441812591021553e-06, "loss": 3.8411, "step": 49900 }, { "epoch": 0.507659912109375, "grad_norm": 14.3016939163208, "learning_rate": 2.4437814785606394e-06, "loss": 3.4705, "step": 49905 }, { "epoch": 0.5077107747395834, "grad_norm": 10.152817726135254, "learning_rate": 2.443381699457462e-06, "loss": 3.1855, "step": 49910 }, { "epoch": 0.5077616373697916, "grad_norm": 14.350810050964355, "learning_rate": 2.44298192180285e-06, "loss": 3.6002, "step": 49915 }, { "epoch": 0.5078125, "grad_norm": 13.546785354614258, "learning_rate": 2.4425821456070326e-06, "loss": 3.6468, "step": 49920 }, { "epoch": 0.5078633626302084, "grad_norm": 7.947472095489502, "learning_rate": 2.442182370880236e-06, "loss": 3.3876, "step": 49925 }, { "epoch": 0.5079142252604166, "grad_norm": 9.699104309082031, "learning_rate": 2.4417825976326908e-06, "loss": 3.4474, "step": 49930 }, { "epoch": 0.507965087890625, "grad_norm": 11.072478294372559, "learning_rate": 2.441382825874624e-06, "loss": 3.6583, "step": 49935 }, { "epoch": 0.5080159505208334, "grad_norm": 11.983139038085938, "learning_rate": 2.4409830556162635e-06, "loss": 3.1169, "step": 49940 }, { "epoch": 0.5080668131510416, "grad_norm": 8.136286735534668, "learning_rate": 2.440583286867837e-06, "loss": 3.1332, "step": 49945 }, { "epoch": 0.50811767578125, "grad_norm": 13.545182228088379, "learning_rate": 2.440183519639573e-06, "loss": 3.0072, "step": 49950 }, { "epoch": 0.5081685384114584, "grad_norm": 16.493642807006836, "learning_rate": 2.439783753941701e-06, "loss": 3.238, "step": 49955 }, { "epoch": 0.5082194010416666, "grad_norm": 16.028322219848633, "learning_rate": 2.4393839897844463e-06, "loss": 3.2214, "step": 49960 }, { "epoch": 0.508270263671875, "grad_norm": 13.38894271850586, "learning_rate": 2.438984227178037e-06, "loss": 3.4751, "step": 49965 }, { "epoch": 0.5083211263020834, "grad_norm": 9.154212951660156, "learning_rate": 2.4385844661327034e-06, "loss": 3.4803, "step": 49970 }, { "epoch": 0.5083719889322916, "grad_norm": 12.187919616699219, "learning_rate": 2.438184706658671e-06, "loss": 3.3993, "step": 49975 }, { "epoch": 0.5084228515625, "grad_norm": 8.677459716796875, "learning_rate": 2.4377849487661686e-06, "loss": 3.4102, "step": 49980 }, { "epoch": 0.5084737141927084, "grad_norm": 15.56594181060791, "learning_rate": 2.437385192465423e-06, "loss": 3.432, "step": 49985 }, { "epoch": 0.5085245768229166, "grad_norm": 15.693167686462402, "learning_rate": 2.436985437766662e-06, "loss": 3.3972, "step": 49990 }, { "epoch": 0.508575439453125, "grad_norm": 17.262100219726562, "learning_rate": 2.4365856846801148e-06, "loss": 3.3424, "step": 49995 }, { "epoch": 0.5086263020833334, "grad_norm": 14.669502258300781, "learning_rate": 2.4361859332160063e-06, "loss": 3.4352, "step": 50000 }, { "epoch": 0.5086771647135416, "grad_norm": 13.700075149536133, "learning_rate": 2.4357861833845665e-06, "loss": 3.1324, "step": 50005 }, { "epoch": 0.50872802734375, "grad_norm": 13.19578742980957, "learning_rate": 2.4353864351960225e-06, "loss": 3.5918, "step": 50010 }, { "epoch": 0.5087788899739584, "grad_norm": 8.245698928833008, "learning_rate": 2.434986688660601e-06, "loss": 3.2106, "step": 50015 }, { "epoch": 0.5088297526041666, "grad_norm": 10.24449348449707, "learning_rate": 2.4345869437885285e-06, "loss": 3.3516, "step": 50020 }, { "epoch": 0.508880615234375, "grad_norm": 8.788253784179688, "learning_rate": 2.434187200590035e-06, "loss": 3.3597, "step": 50025 }, { "epoch": 0.5089314778645834, "grad_norm": 9.52047061920166, "learning_rate": 2.433787459075346e-06, "loss": 3.2025, "step": 50030 }, { "epoch": 0.5089823404947916, "grad_norm": 9.388339042663574, "learning_rate": 2.433387719254689e-06, "loss": 3.3501, "step": 50035 }, { "epoch": 0.509033203125, "grad_norm": 20.20309829711914, "learning_rate": 2.432987981138291e-06, "loss": 3.6578, "step": 50040 }, { "epoch": 0.5090840657552084, "grad_norm": 13.675063133239746, "learning_rate": 2.43258824473638e-06, "loss": 3.4585, "step": 50045 }, { "epoch": 0.5091349283854166, "grad_norm": 12.403952598571777, "learning_rate": 2.4321885100591833e-06, "loss": 3.2217, "step": 50050 }, { "epoch": 0.509185791015625, "grad_norm": 9.077116966247559, "learning_rate": 2.431788777116927e-06, "loss": 3.248, "step": 50055 }, { "epoch": 0.5092366536458334, "grad_norm": 10.033145904541016, "learning_rate": 2.431389045919838e-06, "loss": 3.2762, "step": 50060 }, { "epoch": 0.5092875162760416, "grad_norm": 9.409383773803711, "learning_rate": 2.430989316478145e-06, "loss": 3.191, "step": 50065 }, { "epoch": 0.50933837890625, "grad_norm": 9.033283233642578, "learning_rate": 2.4305895888020737e-06, "loss": 3.4433, "step": 50070 }, { "epoch": 0.5093892415364584, "grad_norm": 16.920303344726562, "learning_rate": 2.4301898629018516e-06, "loss": 3.1023, "step": 50075 }, { "epoch": 0.5094401041666666, "grad_norm": 13.073542594909668, "learning_rate": 2.4297901387877042e-06, "loss": 3.2759, "step": 50080 }, { "epoch": 0.509490966796875, "grad_norm": 7.634866237640381, "learning_rate": 2.4293904164698606e-06, "loss": 3.5395, "step": 50085 }, { "epoch": 0.5095418294270834, "grad_norm": 10.092267990112305, "learning_rate": 2.4289906959585463e-06, "loss": 3.4359, "step": 50090 }, { "epoch": 0.5095926920572916, "grad_norm": 12.581851959228516, "learning_rate": 2.428590977263987e-06, "loss": 3.5123, "step": 50095 }, { "epoch": 0.5096435546875, "grad_norm": 13.736969947814941, "learning_rate": 2.428191260396412e-06, "loss": 3.0601, "step": 50100 }, { "epoch": 0.5096944173177084, "grad_norm": 8.191214561462402, "learning_rate": 2.427791545366046e-06, "loss": 3.0694, "step": 50105 }, { "epoch": 0.5097452799479166, "grad_norm": 11.425572395324707, "learning_rate": 2.427391832183117e-06, "loss": 3.3201, "step": 50110 }, { "epoch": 0.509796142578125, "grad_norm": 6.909358024597168, "learning_rate": 2.4269921208578496e-06, "loss": 2.9903, "step": 50115 }, { "epoch": 0.5098470052083334, "grad_norm": 13.281767845153809, "learning_rate": 2.426592411400472e-06, "loss": 3.3728, "step": 50120 }, { "epoch": 0.5098978678385416, "grad_norm": 9.757806777954102, "learning_rate": 2.4261927038212104e-06, "loss": 3.9518, "step": 50125 }, { "epoch": 0.50994873046875, "grad_norm": 12.427955627441406, "learning_rate": 2.425792998130291e-06, "loss": 3.4458, "step": 50130 }, { "epoch": 0.5099995930989584, "grad_norm": 9.13679027557373, "learning_rate": 2.425393294337939e-06, "loss": 3.3046, "step": 50135 }, { "epoch": 0.5100504557291666, "grad_norm": 11.100677490234375, "learning_rate": 2.4249935924543833e-06, "loss": 3.5699, "step": 50140 }, { "epoch": 0.510101318359375, "grad_norm": 16.57285499572754, "learning_rate": 2.4245938924898484e-06, "loss": 3.4786, "step": 50145 }, { "epoch": 0.5101521809895834, "grad_norm": 13.360235214233398, "learning_rate": 2.424194194454561e-06, "loss": 3.0517, "step": 50150 }, { "epoch": 0.5102030436197916, "grad_norm": 12.516863822937012, "learning_rate": 2.4237944983587465e-06, "loss": 3.709, "step": 50155 }, { "epoch": 0.51025390625, "grad_norm": 12.673425674438477, "learning_rate": 2.423394804212632e-06, "loss": 3.3519, "step": 50160 }, { "epoch": 0.5103047688802084, "grad_norm": 16.423259735107422, "learning_rate": 2.422995112026444e-06, "loss": 3.4322, "step": 50165 }, { "epoch": 0.5103556315104166, "grad_norm": 15.741813659667969, "learning_rate": 2.422595421810407e-06, "loss": 3.4376, "step": 50170 }, { "epoch": 0.510406494140625, "grad_norm": 9.000436782836914, "learning_rate": 2.4221957335747474e-06, "loss": 3.5496, "step": 50175 }, { "epoch": 0.5104573567708334, "grad_norm": 11.855289459228516, "learning_rate": 2.4217960473296923e-06, "loss": 3.5276, "step": 50180 }, { "epoch": 0.5105082194010416, "grad_norm": 10.971668243408203, "learning_rate": 2.4213963630854667e-06, "loss": 3.2312, "step": 50185 }, { "epoch": 0.51055908203125, "grad_norm": 15.227359771728516, "learning_rate": 2.420996680852296e-06, "loss": 3.5039, "step": 50190 }, { "epoch": 0.5106099446614584, "grad_norm": 7.392257213592529, "learning_rate": 2.420597000640407e-06, "loss": 3.7844, "step": 50195 }, { "epoch": 0.5106608072916666, "grad_norm": 10.824780464172363, "learning_rate": 2.4201973224600252e-06, "loss": 3.2045, "step": 50200 }, { "epoch": 0.510711669921875, "grad_norm": 15.995607376098633, "learning_rate": 2.419797646321376e-06, "loss": 3.509, "step": 50205 }, { "epoch": 0.5107625325520834, "grad_norm": 10.681689262390137, "learning_rate": 2.419397972234684e-06, "loss": 3.3356, "step": 50210 }, { "epoch": 0.5108133951822916, "grad_norm": 13.102217674255371, "learning_rate": 2.4189983002101762e-06, "loss": 3.5439, "step": 50215 }, { "epoch": 0.5108642578125, "grad_norm": 8.509812355041504, "learning_rate": 2.418598630258078e-06, "loss": 3.1117, "step": 50220 }, { "epoch": 0.5109151204427084, "grad_norm": 14.604217529296875, "learning_rate": 2.4181989623886147e-06, "loss": 3.1855, "step": 50225 }, { "epoch": 0.5109659830729166, "grad_norm": 10.331098556518555, "learning_rate": 2.4177992966120105e-06, "loss": 3.0951, "step": 50230 }, { "epoch": 0.511016845703125, "grad_norm": 12.546608924865723, "learning_rate": 2.4173996329384933e-06, "loss": 3.2482, "step": 50235 }, { "epoch": 0.5110677083333334, "grad_norm": 12.26623249053955, "learning_rate": 2.416999971378286e-06, "loss": 3.5527, "step": 50240 }, { "epoch": 0.5111185709635416, "grad_norm": 13.634391784667969, "learning_rate": 2.4166003119416154e-06, "loss": 3.1796, "step": 50245 }, { "epoch": 0.51116943359375, "grad_norm": 12.512577056884766, "learning_rate": 2.4162006546387047e-06, "loss": 3.0403, "step": 50250 }, { "epoch": 0.5112202962239584, "grad_norm": 11.685742378234863, "learning_rate": 2.4158009994797816e-06, "loss": 3.6144, "step": 50255 }, { "epoch": 0.5112711588541666, "grad_norm": 7.155736446380615, "learning_rate": 2.4154013464750697e-06, "loss": 3.35, "step": 50260 }, { "epoch": 0.511322021484375, "grad_norm": 14.116155624389648, "learning_rate": 2.415001695634795e-06, "loss": 3.2603, "step": 50265 }, { "epoch": 0.5113728841145834, "grad_norm": 16.665386199951172, "learning_rate": 2.4146020469691803e-06, "loss": 3.3092, "step": 50270 }, { "epoch": 0.5114237467447916, "grad_norm": 8.797182083129883, "learning_rate": 2.4142024004884525e-06, "loss": 3.2307, "step": 50275 }, { "epoch": 0.511474609375, "grad_norm": 10.283164024353027, "learning_rate": 2.4138027562028368e-06, "loss": 2.9887, "step": 50280 }, { "epoch": 0.5115254720052084, "grad_norm": 16.19517707824707, "learning_rate": 2.4134031141225565e-06, "loss": 3.5166, "step": 50285 }, { "epoch": 0.5115763346354166, "grad_norm": 16.54936408996582, "learning_rate": 2.4130034742578365e-06, "loss": 3.17, "step": 50290 }, { "epoch": 0.511627197265625, "grad_norm": 17.40081024169922, "learning_rate": 2.412603836618903e-06, "loss": 3.3569, "step": 50295 }, { "epoch": 0.5116780598958334, "grad_norm": 8.195357322692871, "learning_rate": 2.4122042012159793e-06, "loss": 3.4386, "step": 50300 }, { "epoch": 0.5117289225260416, "grad_norm": 10.756447792053223, "learning_rate": 2.4118045680592893e-06, "loss": 3.425, "step": 50305 }, { "epoch": 0.51177978515625, "grad_norm": 10.567279815673828, "learning_rate": 2.41140493715906e-06, "loss": 3.1411, "step": 50310 }, { "epoch": 0.5118306477864584, "grad_norm": 10.493538856506348, "learning_rate": 2.4110053085255137e-06, "loss": 3.7064, "step": 50315 }, { "epoch": 0.5118815104166666, "grad_norm": 12.735881805419922, "learning_rate": 2.410605682168876e-06, "loss": 3.4582, "step": 50320 }, { "epoch": 0.511932373046875, "grad_norm": 14.975763320922852, "learning_rate": 2.4102060580993696e-06, "loss": 2.8556, "step": 50325 }, { "epoch": 0.5119832356770834, "grad_norm": 14.091262817382812, "learning_rate": 2.4098064363272205e-06, "loss": 3.2849, "step": 50330 }, { "epoch": 0.5120340983072916, "grad_norm": 15.417219161987305, "learning_rate": 2.409406816862653e-06, "loss": 3.2253, "step": 50335 }, { "epoch": 0.5120849609375, "grad_norm": 14.360461235046387, "learning_rate": 2.40900719971589e-06, "loss": 3.3464, "step": 50340 }, { "epoch": 0.5121358235677084, "grad_norm": 10.996293067932129, "learning_rate": 2.408607584897156e-06, "loss": 3.5622, "step": 50345 }, { "epoch": 0.5121866861979166, "grad_norm": 8.267298698425293, "learning_rate": 2.4082079724166756e-06, "loss": 3.1208, "step": 50350 }, { "epoch": 0.512237548828125, "grad_norm": 8.640914916992188, "learning_rate": 2.4078083622846726e-06, "loss": 2.9342, "step": 50355 }, { "epoch": 0.5122884114583334, "grad_norm": 12.42250919342041, "learning_rate": 2.407408754511371e-06, "loss": 3.2516, "step": 50360 }, { "epoch": 0.5123392740885416, "grad_norm": 9.798332214355469, "learning_rate": 2.4070091491069934e-06, "loss": 3.4086, "step": 50365 }, { "epoch": 0.51239013671875, "grad_norm": 15.376659393310547, "learning_rate": 2.4066095460817654e-06, "loss": 3.5751, "step": 50370 }, { "epoch": 0.5124409993489584, "grad_norm": 9.240889549255371, "learning_rate": 2.4062099454459107e-06, "loss": 3.3049, "step": 50375 }, { "epoch": 0.5124918619791666, "grad_norm": 15.17997932434082, "learning_rate": 2.4058103472096515e-06, "loss": 3.3706, "step": 50380 }, { "epoch": 0.512542724609375, "grad_norm": 11.492697715759277, "learning_rate": 2.4054107513832117e-06, "loss": 3.4086, "step": 50385 }, { "epoch": 0.5125935872395834, "grad_norm": 9.998835563659668, "learning_rate": 2.4050111579768166e-06, "loss": 2.9284, "step": 50390 }, { "epoch": 0.5126444498697916, "grad_norm": 11.688558578491211, "learning_rate": 2.4046115670006877e-06, "loss": 3.3235, "step": 50395 }, { "epoch": 0.5126953125, "grad_norm": 11.273649215698242, "learning_rate": 2.4042119784650493e-06, "loss": 3.3909, "step": 50400 }, { "epoch": 0.5127461751302084, "grad_norm": 10.354995727539062, "learning_rate": 2.403812392380125e-06, "loss": 3.4836, "step": 50405 }, { "epoch": 0.5127970377604166, "grad_norm": 9.114692687988281, "learning_rate": 2.4034128087561383e-06, "loss": 3.4699, "step": 50410 }, { "epoch": 0.512847900390625, "grad_norm": 11.493762016296387, "learning_rate": 2.403013227603311e-06, "loss": 3.1074, "step": 50415 }, { "epoch": 0.5128987630208334, "grad_norm": 691.8250122070312, "learning_rate": 2.4026136489318676e-06, "loss": 3.792, "step": 50420 }, { "epoch": 0.5129496256510416, "grad_norm": 13.626561164855957, "learning_rate": 2.4022140727520313e-06, "loss": 3.4223, "step": 50425 }, { "epoch": 0.51300048828125, "grad_norm": 6.908864498138428, "learning_rate": 2.4018144990740245e-06, "loss": 3.213, "step": 50430 }, { "epoch": 0.5130513509114584, "grad_norm": 13.467784881591797, "learning_rate": 2.401414927908071e-06, "loss": 3.7017, "step": 50435 }, { "epoch": 0.5131022135416666, "grad_norm": 15.351990699768066, "learning_rate": 2.4010153592643923e-06, "loss": 3.4501, "step": 50440 }, { "epoch": 0.513153076171875, "grad_norm": 12.327286720275879, "learning_rate": 2.400615793153212e-06, "loss": 3.5289, "step": 50445 }, { "epoch": 0.5132039388020834, "grad_norm": 14.21029281616211, "learning_rate": 2.4002162295847545e-06, "loss": 3.0235, "step": 50450 }, { "epoch": 0.5132548014322916, "grad_norm": 12.524280548095703, "learning_rate": 2.3998166685692402e-06, "loss": 3.6627, "step": 50455 }, { "epoch": 0.5133056640625, "grad_norm": 12.760085105895996, "learning_rate": 2.399417110116892e-06, "loss": 3.4636, "step": 50460 }, { "epoch": 0.5133565266927084, "grad_norm": 10.160909652709961, "learning_rate": 2.3990175542379347e-06, "loss": 3.5232, "step": 50465 }, { "epoch": 0.5134073893229166, "grad_norm": 17.44170570373535, "learning_rate": 2.3986180009425887e-06, "loss": 3.5578, "step": 50470 }, { "epoch": 0.513458251953125, "grad_norm": 10.31970500946045, "learning_rate": 2.3982184502410774e-06, "loss": 3.331, "step": 50475 }, { "epoch": 0.5135091145833334, "grad_norm": 10.512755393981934, "learning_rate": 2.397818902143622e-06, "loss": 3.3922, "step": 50480 }, { "epoch": 0.5135599772135416, "grad_norm": 9.803839683532715, "learning_rate": 2.397419356660446e-06, "loss": 3.3884, "step": 50485 }, { "epoch": 0.51361083984375, "grad_norm": 13.15937328338623, "learning_rate": 2.3970198138017724e-06, "loss": 3.4839, "step": 50490 }, { "epoch": 0.5136617024739584, "grad_norm": 9.590703964233398, "learning_rate": 2.396620273577821e-06, "loss": 3.0772, "step": 50495 }, { "epoch": 0.5137125651041666, "grad_norm": 14.244230270385742, "learning_rate": 2.3962207359988166e-06, "loss": 3.1122, "step": 50500 }, { "epoch": 0.513763427734375, "grad_norm": 12.112451553344727, "learning_rate": 2.39582120107498e-06, "loss": 3.1967, "step": 50505 }, { "epoch": 0.5138142903645834, "grad_norm": 14.854276657104492, "learning_rate": 2.3954216688165328e-06, "loss": 3.6053, "step": 50510 }, { "epoch": 0.5138651529947916, "grad_norm": 8.958395004272461, "learning_rate": 2.395022139233697e-06, "loss": 3.2142, "step": 50515 }, { "epoch": 0.513916015625, "grad_norm": 8.370209693908691, "learning_rate": 2.3946226123366962e-06, "loss": 3.4084, "step": 50520 }, { "epoch": 0.5139668782552084, "grad_norm": 14.15273380279541, "learning_rate": 2.39422308813575e-06, "loss": 3.4198, "step": 50525 }, { "epoch": 0.5140177408854166, "grad_norm": 10.030532836914062, "learning_rate": 2.3938235666410815e-06, "loss": 3.345, "step": 50530 }, { "epoch": 0.514068603515625, "grad_norm": 12.817328453063965, "learning_rate": 2.3934240478629113e-06, "loss": 3.5023, "step": 50535 }, { "epoch": 0.5141194661458334, "grad_norm": 8.550262451171875, "learning_rate": 2.3930245318114617e-06, "loss": 3.4911, "step": 50540 }, { "epoch": 0.5141703287760416, "grad_norm": 8.891758918762207, "learning_rate": 2.3926250184969547e-06, "loss": 3.4614, "step": 50545 }, { "epoch": 0.51422119140625, "grad_norm": 16.2994384765625, "learning_rate": 2.3922255079296106e-06, "loss": 3.341, "step": 50550 }, { "epoch": 0.5142720540364584, "grad_norm": 13.644462585449219, "learning_rate": 2.391826000119651e-06, "loss": 3.1707, "step": 50555 }, { "epoch": 0.5143229166666666, "grad_norm": 11.300737380981445, "learning_rate": 2.391426495077298e-06, "loss": 3.1572, "step": 50560 }, { "epoch": 0.514373779296875, "grad_norm": 13.053349494934082, "learning_rate": 2.3910269928127723e-06, "loss": 3.1483, "step": 50565 }, { "epoch": 0.5144246419270834, "grad_norm": 10.000075340270996, "learning_rate": 2.3906274933362955e-06, "loss": 3.4378, "step": 50570 }, { "epoch": 0.5144755045572916, "grad_norm": 15.421857833862305, "learning_rate": 2.3902279966580873e-06, "loss": 3.5272, "step": 50575 }, { "epoch": 0.5145263671875, "grad_norm": 7.433443546295166, "learning_rate": 2.389828502788371e-06, "loss": 3.3488, "step": 50580 }, { "epoch": 0.5145772298177084, "grad_norm": 11.181934356689453, "learning_rate": 2.3894290117373657e-06, "loss": 3.2142, "step": 50585 }, { "epoch": 0.5146280924479166, "grad_norm": 13.431914329528809, "learning_rate": 2.389029523515293e-06, "loss": 3.3927, "step": 50590 }, { "epoch": 0.514678955078125, "grad_norm": 13.219193458557129, "learning_rate": 2.388630038132374e-06, "loss": 3.3451, "step": 50595 }, { "epoch": 0.5147298177083334, "grad_norm": 15.109130859375, "learning_rate": 2.388230555598829e-06, "loss": 3.4154, "step": 50600 }, { "epoch": 0.5147806803385416, "grad_norm": 13.607009887695312, "learning_rate": 2.387831075924879e-06, "loss": 3.0114, "step": 50605 }, { "epoch": 0.51483154296875, "grad_norm": 13.028376579284668, "learning_rate": 2.3874315991207434e-06, "loss": 3.5488, "step": 50610 }, { "epoch": 0.5148824055989584, "grad_norm": 11.688109397888184, "learning_rate": 2.387032125196644e-06, "loss": 3.1004, "step": 50615 }, { "epoch": 0.5149332682291666, "grad_norm": 10.281679153442383, "learning_rate": 2.3866326541628017e-06, "loss": 3.3787, "step": 50620 }, { "epoch": 0.514984130859375, "grad_norm": 13.672207832336426, "learning_rate": 2.3862331860294353e-06, "loss": 3.0825, "step": 50625 }, { "epoch": 0.5150349934895834, "grad_norm": 13.863525390625, "learning_rate": 2.3858337208067654e-06, "loss": 3.4848, "step": 50630 }, { "epoch": 0.5150858561197916, "grad_norm": 15.515698432922363, "learning_rate": 2.3854342585050138e-06, "loss": 3.2334, "step": 50635 }, { "epoch": 0.51513671875, "grad_norm": 11.521782875061035, "learning_rate": 2.385034799134399e-06, "loss": 3.2403, "step": 50640 }, { "epoch": 0.5151875813802084, "grad_norm": 14.413559913635254, "learning_rate": 2.3846353427051423e-06, "loss": 3.2055, "step": 50645 }, { "epoch": 0.5152384440104166, "grad_norm": 11.58481502532959, "learning_rate": 2.3842358892274614e-06, "loss": 3.4076, "step": 50650 }, { "epoch": 0.515289306640625, "grad_norm": 9.105278015136719, "learning_rate": 2.383836438711579e-06, "loss": 3.4334, "step": 50655 }, { "epoch": 0.5153401692708334, "grad_norm": 13.805219650268555, "learning_rate": 2.383436991167714e-06, "loss": 3.4611, "step": 50660 }, { "epoch": 0.5153910319010416, "grad_norm": 14.29794979095459, "learning_rate": 2.3830375466060855e-06, "loss": 3.3283, "step": 50665 }, { "epoch": 0.51544189453125, "grad_norm": 14.205374717712402, "learning_rate": 2.3826381050369128e-06, "loss": 3.3684, "step": 50670 }, { "epoch": 0.5154927571614584, "grad_norm": 10.655013084411621, "learning_rate": 2.382238666470418e-06, "loss": 3.6278, "step": 50675 }, { "epoch": 0.5155436197916666, "grad_norm": 10.02477741241455, "learning_rate": 2.3818392309168183e-06, "loss": 3.0367, "step": 50680 }, { "epoch": 0.515594482421875, "grad_norm": 8.622747421264648, "learning_rate": 2.381439798386333e-06, "loss": 3.2649, "step": 50685 }, { "epoch": 0.5156453450520834, "grad_norm": 16.067399978637695, "learning_rate": 2.3810403688891835e-06, "loss": 3.3215, "step": 50690 }, { "epoch": 0.5156962076822916, "grad_norm": 8.392431259155273, "learning_rate": 2.3806409424355877e-06, "loss": 3.3279, "step": 50695 }, { "epoch": 0.5157470703125, "grad_norm": 14.65240478515625, "learning_rate": 2.380241519035766e-06, "loss": 3.3661, "step": 50700 }, { "epoch": 0.5157979329427084, "grad_norm": 15.060720443725586, "learning_rate": 2.3798420986999343e-06, "loss": 3.2802, "step": 50705 }, { "epoch": 0.5158487955729166, "grad_norm": 8.779844284057617, "learning_rate": 2.379442681438316e-06, "loss": 3.4106, "step": 50710 }, { "epoch": 0.515899658203125, "grad_norm": 13.472846031188965, "learning_rate": 2.3790432672611275e-06, "loss": 3.6712, "step": 50715 }, { "epoch": 0.5159505208333334, "grad_norm": 12.658249855041504, "learning_rate": 2.3786438561785887e-06, "loss": 3.3399, "step": 50720 }, { "epoch": 0.5160013834635416, "grad_norm": 15.144787788391113, "learning_rate": 2.3782444482009173e-06, "loss": 3.2874, "step": 50725 }, { "epoch": 0.51605224609375, "grad_norm": 11.097823143005371, "learning_rate": 2.3778450433383333e-06, "loss": 3.4698, "step": 50730 }, { "epoch": 0.5161031087239584, "grad_norm": 12.127666473388672, "learning_rate": 2.377445641601055e-06, "loss": 3.1847, "step": 50735 }, { "epoch": 0.5161539713541666, "grad_norm": 8.602930068969727, "learning_rate": 2.377046242999301e-06, "loss": 3.3704, "step": 50740 }, { "epoch": 0.516204833984375, "grad_norm": 9.570191383361816, "learning_rate": 2.3766468475432887e-06, "loss": 3.4845, "step": 50745 }, { "epoch": 0.5162556966145834, "grad_norm": 12.587255477905273, "learning_rate": 2.376247455243239e-06, "loss": 3.1955, "step": 50750 }, { "epoch": 0.5163065592447916, "grad_norm": 9.452386856079102, "learning_rate": 2.3758480661093683e-06, "loss": 3.3975, "step": 50755 }, { "epoch": 0.516357421875, "grad_norm": 10.14846134185791, "learning_rate": 2.375448680151896e-06, "loss": 3.3591, "step": 50760 }, { "epoch": 0.5164082845052084, "grad_norm": 9.86368179321289, "learning_rate": 2.3750492973810383e-06, "loss": 3.1278, "step": 50765 }, { "epoch": 0.5164591471354166, "grad_norm": 10.385574340820312, "learning_rate": 2.3746499178070155e-06, "loss": 3.4764, "step": 50770 }, { "epoch": 0.516510009765625, "grad_norm": 12.00722599029541, "learning_rate": 2.3742505414400454e-06, "loss": 3.3049, "step": 50775 }, { "epoch": 0.5165608723958334, "grad_norm": 13.417591094970703, "learning_rate": 2.373851168290344e-06, "loss": 3.1833, "step": 50780 }, { "epoch": 0.5166117350260416, "grad_norm": 15.702399253845215, "learning_rate": 2.373451798368131e-06, "loss": 3.1977, "step": 50785 }, { "epoch": 0.51666259765625, "grad_norm": 8.82960033416748, "learning_rate": 2.3730524316836247e-06, "loss": 3.4057, "step": 50790 }, { "epoch": 0.5167134602864584, "grad_norm": 11.624604225158691, "learning_rate": 2.372653068247041e-06, "loss": 3.5341, "step": 50795 }, { "epoch": 0.5167643229166666, "grad_norm": 14.807909965515137, "learning_rate": 2.372253708068598e-06, "loss": 3.719, "step": 50800 }, { "epoch": 0.516815185546875, "grad_norm": 20.163330078125, "learning_rate": 2.371854351158515e-06, "loss": 3.3702, "step": 50805 }, { "epoch": 0.5168660481770834, "grad_norm": 15.910099029541016, "learning_rate": 2.371454997527007e-06, "loss": 3.7183, "step": 50810 }, { "epoch": 0.5169169108072916, "grad_norm": 14.499089241027832, "learning_rate": 2.3710556471842934e-06, "loss": 3.5245, "step": 50815 }, { "epoch": 0.5169677734375, "grad_norm": 9.212532043457031, "learning_rate": 2.370656300140589e-06, "loss": 3.3738, "step": 50820 }, { "epoch": 0.5170186360677084, "grad_norm": 14.878519058227539, "learning_rate": 2.370256956406113e-06, "loss": 3.2764, "step": 50825 }, { "epoch": 0.5170694986979166, "grad_norm": 13.573134422302246, "learning_rate": 2.369857615991083e-06, "loss": 3.3369, "step": 50830 }, { "epoch": 0.517120361328125, "grad_norm": 9.683137893676758, "learning_rate": 2.3694582789057145e-06, "loss": 3.1725, "step": 50835 }, { "epoch": 0.5171712239583334, "grad_norm": 12.827288627624512, "learning_rate": 2.369058945160224e-06, "loss": 3.366, "step": 50840 }, { "epoch": 0.5172220865885416, "grad_norm": 13.126194953918457, "learning_rate": 2.3686596147648306e-06, "loss": 3.2969, "step": 50845 }, { "epoch": 0.51727294921875, "grad_norm": 17.425718307495117, "learning_rate": 2.3682602877297497e-06, "loss": 3.1351, "step": 50850 }, { "epoch": 0.5173238118489584, "grad_norm": 12.274630546569824, "learning_rate": 2.367860964065198e-06, "loss": 3.4009, "step": 50855 }, { "epoch": 0.5173746744791666, "grad_norm": 13.878703117370605, "learning_rate": 2.367461643781391e-06, "loss": 3.0991, "step": 50860 }, { "epoch": 0.517425537109375, "grad_norm": 13.84969425201416, "learning_rate": 2.367062326888548e-06, "loss": 3.1251, "step": 50865 }, { "epoch": 0.5174763997395834, "grad_norm": 7.590649604797363, "learning_rate": 2.3666630133968833e-06, "loss": 3.6181, "step": 50870 }, { "epoch": 0.5175272623697916, "grad_norm": 8.43957805633545, "learning_rate": 2.366263703316614e-06, "loss": 3.6765, "step": 50875 }, { "epoch": 0.517578125, "grad_norm": 13.262419700622559, "learning_rate": 2.3658643966579554e-06, "loss": 3.394, "step": 50880 }, { "epoch": 0.5176289876302084, "grad_norm": 13.628838539123535, "learning_rate": 2.365465093431125e-06, "loss": 3.6081, "step": 50885 }, { "epoch": 0.5176798502604166, "grad_norm": 16.391429901123047, "learning_rate": 2.3650657936463386e-06, "loss": 3.781, "step": 50890 }, { "epoch": 0.517730712890625, "grad_norm": 16.025196075439453, "learning_rate": 2.364666497313811e-06, "loss": 3.2152, "step": 50895 }, { "epoch": 0.5177815755208334, "grad_norm": 9.452840805053711, "learning_rate": 2.3642672044437597e-06, "loss": 3.3014, "step": 50900 }, { "epoch": 0.5178324381510416, "grad_norm": 12.161417007446289, "learning_rate": 2.3638679150464e-06, "loss": 3.2071, "step": 50905 }, { "epoch": 0.51788330078125, "grad_norm": 10.940399169921875, "learning_rate": 2.363468629131947e-06, "loss": 3.0984, "step": 50910 }, { "epoch": 0.5179341634114584, "grad_norm": 9.861112594604492, "learning_rate": 2.3630693467106164e-06, "loss": 3.2047, "step": 50915 }, { "epoch": 0.5179850260416666, "grad_norm": 9.728883743286133, "learning_rate": 2.3626700677926253e-06, "loss": 3.5047, "step": 50920 }, { "epoch": 0.518035888671875, "grad_norm": 14.600275993347168, "learning_rate": 2.362270792388187e-06, "loss": 3.7304, "step": 50925 }, { "epoch": 0.5180867513020834, "grad_norm": 12.862422943115234, "learning_rate": 2.3618715205075187e-06, "loss": 3.7425, "step": 50930 }, { "epoch": 0.5181376139322916, "grad_norm": 10.666069030761719, "learning_rate": 2.3614722521608332e-06, "loss": 3.2768, "step": 50935 }, { "epoch": 0.5181884765625, "grad_norm": 12.862142562866211, "learning_rate": 2.3610729873583484e-06, "loss": 3.1389, "step": 50940 }, { "epoch": 0.5182393391927084, "grad_norm": 13.986405372619629, "learning_rate": 2.3606737261102785e-06, "loss": 3.0806, "step": 50945 }, { "epoch": 0.5182902018229166, "grad_norm": 11.19984245300293, "learning_rate": 2.360274468426838e-06, "loss": 3.6976, "step": 50950 }, { "epoch": 0.518341064453125, "grad_norm": 12.949975967407227, "learning_rate": 2.3598752143182414e-06, "loss": 3.2316, "step": 50955 }, { "epoch": 0.5183919270833334, "grad_norm": 14.107664108276367, "learning_rate": 2.359475963794705e-06, "loss": 3.7034, "step": 50960 }, { "epoch": 0.5184427897135416, "grad_norm": 16.346643447875977, "learning_rate": 2.359076716866443e-06, "loss": 3.275, "step": 50965 }, { "epoch": 0.51849365234375, "grad_norm": 10.664952278137207, "learning_rate": 2.3586774735436697e-06, "loss": 3.1661, "step": 50970 }, { "epoch": 0.5185445149739584, "grad_norm": 9.160752296447754, "learning_rate": 2.358278233836599e-06, "loss": 3.2862, "step": 50975 }, { "epoch": 0.5185953776041666, "grad_norm": 15.120304107666016, "learning_rate": 2.3578789977554465e-06, "loss": 3.3029, "step": 50980 }, { "epoch": 0.518646240234375, "grad_norm": 12.193244934082031, "learning_rate": 2.3574797653104266e-06, "loss": 2.9656, "step": 50985 }, { "epoch": 0.5186971028645834, "grad_norm": 10.405268669128418, "learning_rate": 2.3570805365117523e-06, "loss": 3.1787, "step": 50990 }, { "epoch": 0.5187479654947916, "grad_norm": 13.421646118164062, "learning_rate": 2.3566813113696387e-06, "loss": 3.4537, "step": 50995 }, { "epoch": 0.518798828125, "grad_norm": 9.784440994262695, "learning_rate": 2.3562820898943007e-06, "loss": 3.3648, "step": 51000 }, { "epoch": 0.5188496907552084, "grad_norm": 9.541216850280762, "learning_rate": 2.3558828720959505e-06, "loss": 3.6462, "step": 51005 }, { "epoch": 0.5189005533854166, "grad_norm": 13.812661170959473, "learning_rate": 2.355483657984803e-06, "loss": 3.1048, "step": 51010 }, { "epoch": 0.518951416015625, "grad_norm": 12.758337020874023, "learning_rate": 2.3550844475710714e-06, "loss": 3.4573, "step": 51015 }, { "epoch": 0.5190022786458334, "grad_norm": 8.859757423400879, "learning_rate": 2.3546852408649713e-06, "loss": 3.2942, "step": 51020 }, { "epoch": 0.5190531412760416, "grad_norm": 13.27428913116455, "learning_rate": 2.354286037876715e-06, "loss": 3.3289, "step": 51025 }, { "epoch": 0.51910400390625, "grad_norm": 7.497504711151123, "learning_rate": 2.3538868386165133e-06, "loss": 3.3419, "step": 51030 }, { "epoch": 0.5191548665364584, "grad_norm": 12.467268943786621, "learning_rate": 2.353487643094584e-06, "loss": 3.3565, "step": 51035 }, { "epoch": 0.5192057291666666, "grad_norm": 15.2796630859375, "learning_rate": 2.3530884513211387e-06, "loss": 3.4037, "step": 51040 }, { "epoch": 0.519256591796875, "grad_norm": 13.033889770507812, "learning_rate": 2.352689263306391e-06, "loss": 3.5713, "step": 51045 }, { "epoch": 0.5193074544270834, "grad_norm": 15.384819984436035, "learning_rate": 2.352290079060552e-06, "loss": 3.6361, "step": 51050 }, { "epoch": 0.5193583170572916, "grad_norm": 12.511043548583984, "learning_rate": 2.351890898593837e-06, "loss": 3.4419, "step": 51055 }, { "epoch": 0.5194091796875, "grad_norm": 12.416461944580078, "learning_rate": 2.351491721916459e-06, "loss": 4.2458, "step": 51060 }, { "epoch": 0.5194600423177084, "grad_norm": 12.38286304473877, "learning_rate": 2.3510925490386296e-06, "loss": 3.8934, "step": 51065 }, { "epoch": 0.5195109049479166, "grad_norm": 9.572614669799805, "learning_rate": 2.3506933799705613e-06, "loss": 3.1409, "step": 51070 }, { "epoch": 0.519561767578125, "grad_norm": 11.439574241638184, "learning_rate": 2.3502942147224684e-06, "loss": 3.4687, "step": 51075 }, { "epoch": 0.5196126302083334, "grad_norm": 14.704670906066895, "learning_rate": 2.3498950533045624e-06, "loss": 3.1802, "step": 51080 }, { "epoch": 0.5196634928385416, "grad_norm": 40.19412612915039, "learning_rate": 2.349495895727055e-06, "loss": 3.3865, "step": 51085 }, { "epoch": 0.51971435546875, "grad_norm": 11.445558547973633, "learning_rate": 2.349096742000161e-06, "loss": 3.2296, "step": 51090 }, { "epoch": 0.5197652180989584, "grad_norm": 13.202363014221191, "learning_rate": 2.34869759213409e-06, "loss": 3.9636, "step": 51095 }, { "epoch": 0.5198160807291666, "grad_norm": 8.832242965698242, "learning_rate": 2.3482984461390562e-06, "loss": 3.1758, "step": 51100 }, { "epoch": 0.519866943359375, "grad_norm": 9.222075462341309, "learning_rate": 2.347899304025269e-06, "loss": 3.2653, "step": 51105 }, { "epoch": 0.5199178059895834, "grad_norm": 12.401978492736816, "learning_rate": 2.3475001658029432e-06, "loss": 3.3345, "step": 51110 }, { "epoch": 0.5199686686197916, "grad_norm": 7.745174884796143, "learning_rate": 2.34710103148229e-06, "loss": 3.5809, "step": 51115 }, { "epoch": 0.52001953125, "grad_norm": 13.621798515319824, "learning_rate": 2.3467019010735203e-06, "loss": 3.3218, "step": 51120 }, { "epoch": 0.5200703938802084, "grad_norm": 14.074609756469727, "learning_rate": 2.346302774586845e-06, "loss": 3.9371, "step": 51125 }, { "epoch": 0.5201212565104166, "grad_norm": 11.33251953125, "learning_rate": 2.345903652032478e-06, "loss": 3.6171, "step": 51130 }, { "epoch": 0.520172119140625, "grad_norm": 15.287870407104492, "learning_rate": 2.3455045334206294e-06, "loss": 3.5255, "step": 51135 }, { "epoch": 0.5202229817708334, "grad_norm": 7.853886604309082, "learning_rate": 2.345105418761511e-06, "loss": 3.231, "step": 51140 }, { "epoch": 0.5202738444010416, "grad_norm": 9.591255187988281, "learning_rate": 2.3447063080653327e-06, "loss": 3.2378, "step": 51145 }, { "epoch": 0.52032470703125, "grad_norm": 13.270482063293457, "learning_rate": 2.344307201342307e-06, "loss": 3.4815, "step": 51150 }, { "epoch": 0.5203755696614584, "grad_norm": 19.019012451171875, "learning_rate": 2.3439080986026454e-06, "loss": 3.2569, "step": 51155 }, { "epoch": 0.5204264322916666, "grad_norm": 15.56859302520752, "learning_rate": 2.3435089998565576e-06, "loss": 3.5787, "step": 51160 }, { "epoch": 0.520477294921875, "grad_norm": 11.9397554397583, "learning_rate": 2.3431099051142545e-06, "loss": 3.2714, "step": 51165 }, { "epoch": 0.5205281575520834, "grad_norm": 14.755770683288574, "learning_rate": 2.342710814385948e-06, "loss": 3.5029, "step": 51170 }, { "epoch": 0.5205790201822916, "grad_norm": 9.2692232131958, "learning_rate": 2.342311727681848e-06, "loss": 3.3179, "step": 51175 }, { "epoch": 0.5206298828125, "grad_norm": 11.290838241577148, "learning_rate": 2.3419126450121642e-06, "loss": 3.2469, "step": 51180 }, { "epoch": 0.5206807454427084, "grad_norm": 9.631206512451172, "learning_rate": 2.3415135663871083e-06, "loss": 3.3658, "step": 51185 }, { "epoch": 0.5207316080729166, "grad_norm": 13.162626266479492, "learning_rate": 2.341114491816891e-06, "loss": 3.1031, "step": 51190 }, { "epoch": 0.520782470703125, "grad_norm": 15.770508766174316, "learning_rate": 2.340715421311721e-06, "loss": 3.6408, "step": 51195 }, { "epoch": 0.5208333333333334, "grad_norm": 14.972073554992676, "learning_rate": 2.3403163548818093e-06, "loss": 3.098, "step": 51200 }, { "epoch": 0.5208841959635416, "grad_norm": 12.583430290222168, "learning_rate": 2.339917292537366e-06, "loss": 3.1795, "step": 51205 }, { "epoch": 0.52093505859375, "grad_norm": 14.268254280090332, "learning_rate": 2.339518234288601e-06, "loss": 3.473, "step": 51210 }, { "epoch": 0.5209859212239584, "grad_norm": 13.213074684143066, "learning_rate": 2.3391191801457243e-06, "loss": 3.4323, "step": 51215 }, { "epoch": 0.5210367838541666, "grad_norm": 14.242136001586914, "learning_rate": 2.338720130118944e-06, "loss": 3.3706, "step": 51220 }, { "epoch": 0.521087646484375, "grad_norm": 10.817700386047363, "learning_rate": 2.3383210842184713e-06, "loss": 3.2105, "step": 51225 }, { "epoch": 0.5211385091145834, "grad_norm": 12.685539245605469, "learning_rate": 2.3379220424545164e-06, "loss": 3.3286, "step": 51230 }, { "epoch": 0.5211893717447916, "grad_norm": 12.610810279846191, "learning_rate": 2.3375230048372866e-06, "loss": 3.3509, "step": 51235 }, { "epoch": 0.521240234375, "grad_norm": 12.87966537475586, "learning_rate": 2.3371239713769918e-06, "loss": 3.0719, "step": 51240 }, { "epoch": 0.5212910970052084, "grad_norm": 15.29201602935791, "learning_rate": 2.336724942083843e-06, "loss": 3.5089, "step": 51245 }, { "epoch": 0.5213419596354166, "grad_norm": 6.654240608215332, "learning_rate": 2.3363259169680472e-06, "loss": 2.8616, "step": 51250 }, { "epoch": 0.521392822265625, "grad_norm": 13.208778381347656, "learning_rate": 2.3359268960398145e-06, "loss": 3.2127, "step": 51255 }, { "epoch": 0.5214436848958334, "grad_norm": 12.543509483337402, "learning_rate": 2.335527879309352e-06, "loss": 3.5661, "step": 51260 }, { "epoch": 0.5214945475260416, "grad_norm": 11.997392654418945, "learning_rate": 2.3351288667868705e-06, "loss": 3.1548, "step": 51265 }, { "epoch": 0.52154541015625, "grad_norm": 9.279683113098145, "learning_rate": 2.334729858482578e-06, "loss": 3.0211, "step": 51270 }, { "epoch": 0.5215962727864584, "grad_norm": 12.218412399291992, "learning_rate": 2.3343308544066823e-06, "loss": 3.2288, "step": 51275 }, { "epoch": 0.5216471354166666, "grad_norm": 11.581144332885742, "learning_rate": 2.3339318545693924e-06, "loss": 3.3161, "step": 51280 }, { "epoch": 0.521697998046875, "grad_norm": 13.967557907104492, "learning_rate": 2.3335328589809177e-06, "loss": 3.2799, "step": 51285 }, { "epoch": 0.5217488606770834, "grad_norm": 10.300307273864746, "learning_rate": 2.3331338676514648e-06, "loss": 3.5126, "step": 51290 }, { "epoch": 0.5217997233072916, "grad_norm": 11.853108406066895, "learning_rate": 2.3327348805912417e-06, "loss": 3.2095, "step": 51295 }, { "epoch": 0.5218505859375, "grad_norm": 13.32164192199707, "learning_rate": 2.332335897810458e-06, "loss": 3.4154, "step": 51300 }, { "epoch": 0.5219014485677084, "grad_norm": 17.006755828857422, "learning_rate": 2.3319369193193203e-06, "loss": 3.0868, "step": 51305 }, { "epoch": 0.5219523111979166, "grad_norm": 11.63281536102295, "learning_rate": 2.331537945128037e-06, "loss": 3.0452, "step": 51310 }, { "epoch": 0.522003173828125, "grad_norm": 8.596928596496582, "learning_rate": 2.3311389752468144e-06, "loss": 3.5104, "step": 51315 }, { "epoch": 0.5220540364583334, "grad_norm": 13.741214752197266, "learning_rate": 2.330740009685862e-06, "loss": 3.1809, "step": 51320 }, { "epoch": 0.5221048990885416, "grad_norm": 9.251409530639648, "learning_rate": 2.3303410484553863e-06, "loss": 3.7666, "step": 51325 }, { "epoch": 0.52215576171875, "grad_norm": 13.186622619628906, "learning_rate": 2.3299420915655942e-06, "loss": 3.5376, "step": 51330 }, { "epoch": 0.5222066243489584, "grad_norm": 10.63139533996582, "learning_rate": 2.329543139026693e-06, "loss": 3.2363, "step": 51335 }, { "epoch": 0.5222574869791666, "grad_norm": 15.087735176086426, "learning_rate": 2.3291441908488908e-06, "loss": 2.8915, "step": 51340 }, { "epoch": 0.522308349609375, "grad_norm": 8.385112762451172, "learning_rate": 2.328745247042394e-06, "loss": 3.5765, "step": 51345 }, { "epoch": 0.5223592122395834, "grad_norm": 7.616689205169678, "learning_rate": 2.3283463076174097e-06, "loss": 3.5431, "step": 51350 }, { "epoch": 0.5224100748697916, "grad_norm": 8.684670448303223, "learning_rate": 2.3279473725841437e-06, "loss": 3.1274, "step": 51355 }, { "epoch": 0.5224609375, "grad_norm": 14.156767845153809, "learning_rate": 2.3275484419528045e-06, "loss": 3.6883, "step": 51360 }, { "epoch": 0.5225118001302084, "grad_norm": 17.684885025024414, "learning_rate": 2.3271495157335965e-06, "loss": 3.2219, "step": 51365 }, { "epoch": 0.5225626627604166, "grad_norm": 11.786378860473633, "learning_rate": 2.3267505939367284e-06, "loss": 3.0865, "step": 51370 }, { "epoch": 0.522613525390625, "grad_norm": 11.846403121948242, "learning_rate": 2.3263516765724036e-06, "loss": 3.993, "step": 51375 }, { "epoch": 0.5226643880208334, "grad_norm": 12.272603988647461, "learning_rate": 2.325952763650831e-06, "loss": 3.5374, "step": 51380 }, { "epoch": 0.5227152506510416, "grad_norm": 10.249964714050293, "learning_rate": 2.3255538551822157e-06, "loss": 3.1231, "step": 51385 }, { "epoch": 0.52276611328125, "grad_norm": 12.157092094421387, "learning_rate": 2.325154951176763e-06, "loss": 3.5138, "step": 51390 }, { "epoch": 0.5228169759114584, "grad_norm": 11.733100891113281, "learning_rate": 2.32475605164468e-06, "loss": 3.5915, "step": 51395 }, { "epoch": 0.5228678385416666, "grad_norm": 9.639185905456543, "learning_rate": 2.3243571565961724e-06, "loss": 3.3849, "step": 51400 }, { "epoch": 0.522918701171875, "grad_norm": 13.651537895202637, "learning_rate": 2.3239582660414446e-06, "loss": 3.294, "step": 51405 }, { "epoch": 0.5229695638020834, "grad_norm": 8.989803314208984, "learning_rate": 2.3235593799907022e-06, "loss": 3.4397, "step": 51410 }, { "epoch": 0.5230204264322916, "grad_norm": 9.389153480529785, "learning_rate": 2.3231604984541525e-06, "loss": 3.2164, "step": 51415 }, { "epoch": 0.5230712890625, "grad_norm": 13.706945419311523, "learning_rate": 2.322761621441999e-06, "loss": 3.1938, "step": 51420 }, { "epoch": 0.5231221516927084, "grad_norm": 14.718073844909668, "learning_rate": 2.3223627489644478e-06, "loss": 3.3858, "step": 51425 }, { "epoch": 0.5231730143229166, "grad_norm": 14.35042667388916, "learning_rate": 2.321963881031703e-06, "loss": 3.2756, "step": 51430 }, { "epoch": 0.523223876953125, "grad_norm": 7.722215175628662, "learning_rate": 2.32156501765397e-06, "loss": 3.3564, "step": 51435 }, { "epoch": 0.5232747395833334, "grad_norm": 13.248672485351562, "learning_rate": 2.3211661588414546e-06, "loss": 3.3001, "step": 51440 }, { "epoch": 0.5233256022135416, "grad_norm": 9.243656158447266, "learning_rate": 2.32076730460436e-06, "loss": 3.3604, "step": 51445 }, { "epoch": 0.52337646484375, "grad_norm": 8.182433128356934, "learning_rate": 2.320368454952891e-06, "loss": 3.0259, "step": 51450 }, { "epoch": 0.5234273274739584, "grad_norm": 14.732831001281738, "learning_rate": 2.3199696098972535e-06, "loss": 3.588, "step": 51455 }, { "epoch": 0.5234781901041666, "grad_norm": 11.189901351928711, "learning_rate": 2.31957076944765e-06, "loss": 3.3701, "step": 51460 }, { "epoch": 0.523529052734375, "grad_norm": 10.825516700744629, "learning_rate": 2.3191719336142864e-06, "loss": 3.2404, "step": 51465 }, { "epoch": 0.5235799153645834, "grad_norm": 14.079922676086426, "learning_rate": 2.318773102407365e-06, "loss": 3.1259, "step": 51470 }, { "epoch": 0.5236307779947916, "grad_norm": 15.275527000427246, "learning_rate": 2.318374275837092e-06, "loss": 3.4218, "step": 51475 }, { "epoch": 0.523681640625, "grad_norm": 13.454855918884277, "learning_rate": 2.3179754539136693e-06, "loss": 3.2168, "step": 51480 }, { "epoch": 0.5237325032552084, "grad_norm": 10.511768341064453, "learning_rate": 2.317576636647301e-06, "loss": 3.367, "step": 51485 }, { "epoch": 0.5237833658854166, "grad_norm": 12.318464279174805, "learning_rate": 2.317177824048192e-06, "loss": 2.9529, "step": 51490 }, { "epoch": 0.523834228515625, "grad_norm": 9.753463745117188, "learning_rate": 2.316779016126545e-06, "loss": 3.1118, "step": 51495 }, { "epoch": 0.5238850911458334, "grad_norm": 12.015084266662598, "learning_rate": 2.3163802128925637e-06, "loss": 3.4823, "step": 51500 }, { "epoch": 0.5239359537760416, "grad_norm": 10.5300874710083, "learning_rate": 2.3159814143564506e-06, "loss": 3.3846, "step": 51505 }, { "epoch": 0.52398681640625, "grad_norm": 9.35047435760498, "learning_rate": 2.315582620528409e-06, "loss": 3.1737, "step": 51510 }, { "epoch": 0.5240376790364584, "grad_norm": 10.02412223815918, "learning_rate": 2.3151838314186437e-06, "loss": 3.1713, "step": 51515 }, { "epoch": 0.5240885416666666, "grad_norm": 14.379279136657715, "learning_rate": 2.3147850470373558e-06, "loss": 3.3371, "step": 51520 }, { "epoch": 0.524139404296875, "grad_norm": 9.510961532592773, "learning_rate": 2.3143862673947475e-06, "loss": 3.3166, "step": 51525 }, { "epoch": 0.5241902669270834, "grad_norm": 12.390170097351074, "learning_rate": 2.3139874925010235e-06, "loss": 3.0833, "step": 51530 }, { "epoch": 0.5242411295572916, "grad_norm": 16.991884231567383, "learning_rate": 2.313588722366385e-06, "loss": 3.4757, "step": 51535 }, { "epoch": 0.5242919921875, "grad_norm": 10.350554466247559, "learning_rate": 2.313189957001036e-06, "loss": 3.18, "step": 51540 }, { "epoch": 0.5243428548177084, "grad_norm": 11.769200325012207, "learning_rate": 2.312791196415176e-06, "loss": 3.317, "step": 51545 }, { "epoch": 0.5243937174479166, "grad_norm": 13.403620719909668, "learning_rate": 2.3123924406190098e-06, "loss": 5.084, "step": 51550 }, { "epoch": 0.524444580078125, "grad_norm": 15.726815223693848, "learning_rate": 2.311993689622739e-06, "loss": 3.2429, "step": 51555 }, { "epoch": 0.5244954427083334, "grad_norm": 11.416741371154785, "learning_rate": 2.311594943436564e-06, "loss": 3.0246, "step": 51560 }, { "epoch": 0.5245463053385416, "grad_norm": 13.953176498413086, "learning_rate": 2.311196202070688e-06, "loss": 3.0309, "step": 51565 }, { "epoch": 0.52459716796875, "grad_norm": 11.300572395324707, "learning_rate": 2.3107974655353125e-06, "loss": 3.1699, "step": 51570 }, { "epoch": 0.5246480305989584, "grad_norm": 13.902636528015137, "learning_rate": 2.3103987338406394e-06, "loss": 3.2971, "step": 51575 }, { "epoch": 0.5246988932291666, "grad_norm": 14.147994041442871, "learning_rate": 2.310000006996869e-06, "loss": 2.9933, "step": 51580 }, { "epoch": 0.524749755859375, "grad_norm": 10.730506896972656, "learning_rate": 2.309601285014204e-06, "loss": 3.0943, "step": 51585 }, { "epoch": 0.5248006184895834, "grad_norm": 15.618927001953125, "learning_rate": 2.3092025679028447e-06, "loss": 3.2672, "step": 51590 }, { "epoch": 0.5248514811197916, "grad_norm": 11.174661636352539, "learning_rate": 2.3088038556729934e-06, "loss": 3.5472, "step": 51595 }, { "epoch": 0.52490234375, "grad_norm": 14.61530590057373, "learning_rate": 2.3084051483348485e-06, "loss": 3.1849, "step": 51600 }, { "epoch": 0.5249532063802084, "grad_norm": 7.89102029800415, "learning_rate": 2.3080064458986136e-06, "loss": 3.3216, "step": 51605 }, { "epoch": 0.5250040690104166, "grad_norm": 12.069526672363281, "learning_rate": 2.3076077483744884e-06, "loss": 3.2755, "step": 51610 }, { "epoch": 0.525054931640625, "grad_norm": 12.264208793640137, "learning_rate": 2.307209055772673e-06, "loss": 3.3825, "step": 51615 }, { "epoch": 0.5251057942708334, "grad_norm": 18.144428253173828, "learning_rate": 2.3068103681033672e-06, "loss": 3.5544, "step": 51620 }, { "epoch": 0.5251566569010416, "grad_norm": 12.813551902770996, "learning_rate": 2.306411685376774e-06, "loss": 3.4971, "step": 51625 }, { "epoch": 0.52520751953125, "grad_norm": 10.434419631958008, "learning_rate": 2.3060130076030907e-06, "loss": 3.1101, "step": 51630 }, { "epoch": 0.5252583821614584, "grad_norm": 10.288335800170898, "learning_rate": 2.30561433479252e-06, "loss": 3.1028, "step": 51635 }, { "epoch": 0.5253092447916666, "grad_norm": 9.808342933654785, "learning_rate": 2.305215666955258e-06, "loss": 3.2641, "step": 51640 }, { "epoch": 0.525360107421875, "grad_norm": 11.223031044006348, "learning_rate": 2.3048170041015095e-06, "loss": 3.0565, "step": 51645 }, { "epoch": 0.5254109700520834, "grad_norm": 13.923452377319336, "learning_rate": 2.3044183462414706e-06, "loss": 3.2248, "step": 51650 }, { "epoch": 0.5254618326822916, "grad_norm": 9.677695274353027, "learning_rate": 2.304019693385343e-06, "loss": 3.2461, "step": 51655 }, { "epoch": 0.5255126953125, "grad_norm": 14.185688018798828, "learning_rate": 2.3036210455433236e-06, "loss": 3.252, "step": 51660 }, { "epoch": 0.5255635579427084, "grad_norm": 9.480224609375, "learning_rate": 2.3032224027256137e-06, "loss": 3.7707, "step": 51665 }, { "epoch": 0.5256144205729166, "grad_norm": 14.351865768432617, "learning_rate": 2.3028237649424127e-06, "loss": 3.4189, "step": 51670 }, { "epoch": 0.525665283203125, "grad_norm": 7.74800443649292, "learning_rate": 2.3024251322039175e-06, "loss": 3.4073, "step": 51675 }, { "epoch": 0.5257161458333334, "grad_norm": 11.025547981262207, "learning_rate": 2.3020265045203295e-06, "loss": 3.0491, "step": 51680 }, { "epoch": 0.5257670084635416, "grad_norm": 9.585078239440918, "learning_rate": 2.301627881901847e-06, "loss": 3.041, "step": 51685 }, { "epoch": 0.52581787109375, "grad_norm": 12.840312957763672, "learning_rate": 2.301229264358667e-06, "loss": 3.3416, "step": 51690 }, { "epoch": 0.5258687337239584, "grad_norm": 13.87100887298584, "learning_rate": 2.300830651900989e-06, "loss": 3.3369, "step": 51695 }, { "epoch": 0.5259195963541666, "grad_norm": 7.698983669281006, "learning_rate": 2.3004320445390126e-06, "loss": 3.2423, "step": 51700 }, { "epoch": 0.525970458984375, "grad_norm": 12.41983413696289, "learning_rate": 2.3000334422829344e-06, "loss": 3.0699, "step": 51705 }, { "epoch": 0.5260213216145834, "grad_norm": 10.423108100891113, "learning_rate": 2.299634845142954e-06, "loss": 3.4923, "step": 51710 }, { "epoch": 0.5260721842447916, "grad_norm": 9.7667236328125, "learning_rate": 2.2992362531292674e-06, "loss": 3.4751, "step": 51715 }, { "epoch": 0.526123046875, "grad_norm": 7.892971992492676, "learning_rate": 2.2988376662520744e-06, "loss": 3.2019, "step": 51720 }, { "epoch": 0.5261739095052084, "grad_norm": 10.178163528442383, "learning_rate": 2.298439084521572e-06, "loss": 3.52, "step": 51725 }, { "epoch": 0.5262247721354166, "grad_norm": 12.569657325744629, "learning_rate": 2.2980405079479577e-06, "loss": 2.9527, "step": 51730 }, { "epoch": 0.526275634765625, "grad_norm": 12.244410514831543, "learning_rate": 2.297641936541429e-06, "loss": 3.2829, "step": 51735 }, { "epoch": 0.5263264973958334, "grad_norm": 8.741074562072754, "learning_rate": 2.2972433703121835e-06, "loss": 3.2828, "step": 51740 }, { "epoch": 0.5263773600260416, "grad_norm": 12.224329948425293, "learning_rate": 2.2968448092704186e-06, "loss": 3.366, "step": 51745 }, { "epoch": 0.52642822265625, "grad_norm": 15.625667572021484, "learning_rate": 2.2964462534263313e-06, "loss": 3.4129, "step": 51750 }, { "epoch": 0.5264790852864584, "grad_norm": 14.998575210571289, "learning_rate": 2.296047702790117e-06, "loss": 3.1494, "step": 51755 }, { "epoch": 0.5265299479166666, "grad_norm": 11.03321647644043, "learning_rate": 2.295649157371975e-06, "loss": 3.4301, "step": 51760 }, { "epoch": 0.526580810546875, "grad_norm": 12.48939037322998, "learning_rate": 2.295250617182101e-06, "loss": 3.3599, "step": 51765 }, { "epoch": 0.5266316731770834, "grad_norm": 10.886321067810059, "learning_rate": 2.2948520822306906e-06, "loss": 3.5091, "step": 51770 }, { "epoch": 0.5266825358072916, "grad_norm": 19.278493881225586, "learning_rate": 2.294453552527941e-06, "loss": 3.2808, "step": 51775 }, { "epoch": 0.5267333984375, "grad_norm": 11.889963150024414, "learning_rate": 2.29405502808405e-06, "loss": 3.1522, "step": 51780 }, { "epoch": 0.5267842610677084, "grad_norm": 13.487797737121582, "learning_rate": 2.293656508909211e-06, "loss": 3.5732, "step": 51785 }, { "epoch": 0.5268351236979166, "grad_norm": 13.162525177001953, "learning_rate": 2.2932579950136215e-06, "loss": 3.6082, "step": 51790 }, { "epoch": 0.526885986328125, "grad_norm": 9.454289436340332, "learning_rate": 2.292859486407477e-06, "loss": 3.153, "step": 51795 }, { "epoch": 0.5269368489583334, "grad_norm": 9.60610294342041, "learning_rate": 2.2924609831009745e-06, "loss": 3.205, "step": 51800 }, { "epoch": 0.5269877115885416, "grad_norm": 8.241484642028809, "learning_rate": 2.2920624851043077e-06, "loss": 3.2567, "step": 51805 }, { "epoch": 0.52703857421875, "grad_norm": 9.547820091247559, "learning_rate": 2.2916639924276727e-06, "loss": 3.6178, "step": 51810 }, { "epoch": 0.5270894368489584, "grad_norm": 13.794641494750977, "learning_rate": 2.2912655050812657e-06, "loss": 3.4859, "step": 51815 }, { "epoch": 0.5271402994791666, "grad_norm": 16.587139129638672, "learning_rate": 2.290867023075281e-06, "loss": 3.2263, "step": 51820 }, { "epoch": 0.527191162109375, "grad_norm": 16.068571090698242, "learning_rate": 2.2904685464199144e-06, "loss": 3.5605, "step": 51825 }, { "epoch": 0.5272420247395834, "grad_norm": 13.112360000610352, "learning_rate": 2.2900700751253595e-06, "loss": 3.6708, "step": 51830 }, { "epoch": 0.5272928873697916, "grad_norm": 10.92347526550293, "learning_rate": 2.2896716092018125e-06, "loss": 3.278, "step": 51835 }, { "epoch": 0.52734375, "grad_norm": 11.687875747680664, "learning_rate": 2.289273148659468e-06, "loss": 3.218, "step": 51840 }, { "epoch": 0.5273946126302084, "grad_norm": 12.249234199523926, "learning_rate": 2.2888746935085194e-06, "loss": 3.1509, "step": 51845 }, { "epoch": 0.5274454752604166, "grad_norm": 11.752399444580078, "learning_rate": 2.2884762437591614e-06, "loss": 3.3611, "step": 51850 }, { "epoch": 0.527496337890625, "grad_norm": 11.20430850982666, "learning_rate": 2.2880777994215893e-06, "loss": 3.5416, "step": 51855 }, { "epoch": 0.5275472005208334, "grad_norm": 10.635947227478027, "learning_rate": 2.287679360505996e-06, "loss": 3.4996, "step": 51860 }, { "epoch": 0.5275980631510416, "grad_norm": 12.224534034729004, "learning_rate": 2.2872809270225756e-06, "loss": 3.6393, "step": 51865 }, { "epoch": 0.52764892578125, "grad_norm": 9.653603553771973, "learning_rate": 2.2868824989815235e-06, "loss": 3.3679, "step": 51870 }, { "epoch": 0.5276997884114584, "grad_norm": 15.11462688446045, "learning_rate": 2.286484076393031e-06, "loss": 3.3414, "step": 51875 }, { "epoch": 0.5277506510416666, "grad_norm": 9.089104652404785, "learning_rate": 2.2860856592672937e-06, "loss": 3.5182, "step": 51880 }, { "epoch": 0.527801513671875, "grad_norm": 11.646150588989258, "learning_rate": 2.285687247614503e-06, "loss": 3.7024, "step": 51885 }, { "epoch": 0.5278523763020834, "grad_norm": 11.647123336791992, "learning_rate": 2.285288841444854e-06, "loss": 2.8685, "step": 51890 }, { "epoch": 0.5279032389322916, "grad_norm": 10.295969009399414, "learning_rate": 2.2848904407685392e-06, "loss": 3.3256, "step": 51895 }, { "epoch": 0.5279541015625, "grad_norm": 13.555588722229004, "learning_rate": 2.2844920455957515e-06, "loss": 3.3036, "step": 51900 }, { "epoch": 0.5280049641927084, "grad_norm": 16.191238403320312, "learning_rate": 2.2840936559366827e-06, "loss": 3.0817, "step": 51905 }, { "epoch": 0.5280558268229166, "grad_norm": 12.715621948242188, "learning_rate": 2.283695271801528e-06, "loss": 3.6958, "step": 51910 }, { "epoch": 0.528106689453125, "grad_norm": 15.330379486083984, "learning_rate": 2.2832968932004777e-06, "loss": 3.2108, "step": 51915 }, { "epoch": 0.5281575520833334, "grad_norm": 13.766459465026855, "learning_rate": 2.2828985201437257e-06, "loss": 3.4376, "step": 51920 }, { "epoch": 0.5282084147135416, "grad_norm": 11.895289421081543, "learning_rate": 2.2825001526414626e-06, "loss": 2.946, "step": 51925 }, { "epoch": 0.52825927734375, "grad_norm": 12.957642555236816, "learning_rate": 2.282101790703882e-06, "loss": 3.0694, "step": 51930 }, { "epoch": 0.5283101399739584, "grad_norm": 12.726977348327637, "learning_rate": 2.281703434341176e-06, "loss": 3.1, "step": 51935 }, { "epoch": 0.5283610026041666, "grad_norm": 10.04681396484375, "learning_rate": 2.281305083563535e-06, "loss": 3.5245, "step": 51940 }, { "epoch": 0.528411865234375, "grad_norm": 9.157989501953125, "learning_rate": 2.2809067383811514e-06, "loss": 3.4185, "step": 51945 }, { "epoch": 0.5284627278645834, "grad_norm": 11.363675117492676, "learning_rate": 2.2805083988042178e-06, "loss": 2.9623, "step": 51950 }, { "epoch": 0.5285135904947916, "grad_norm": 18.07217025756836, "learning_rate": 2.280110064842925e-06, "loss": 3.286, "step": 51955 }, { "epoch": 0.528564453125, "grad_norm": 11.635565757751465, "learning_rate": 2.279711736507463e-06, "loss": 3.6186, "step": 51960 }, { "epoch": 0.5286153157552084, "grad_norm": 14.110377311706543, "learning_rate": 2.2793134138080243e-06, "loss": 2.961, "step": 51965 }, { "epoch": 0.5286661783854166, "grad_norm": 9.997685432434082, "learning_rate": 2.2789150967548e-06, "loss": 3.1046, "step": 51970 }, { "epoch": 0.528717041015625, "grad_norm": 10.754714012145996, "learning_rate": 2.2785167853579808e-06, "loss": 3.6072, "step": 51975 }, { "epoch": 0.5287679036458334, "grad_norm": 9.805929183959961, "learning_rate": 2.2781184796277557e-06, "loss": 3.3923, "step": 51980 }, { "epoch": 0.5288187662760416, "grad_norm": 9.262873649597168, "learning_rate": 2.2777201795743187e-06, "loss": 3.3945, "step": 51985 }, { "epoch": 0.52886962890625, "grad_norm": 9.637592315673828, "learning_rate": 2.277321885207857e-06, "loss": 3.1716, "step": 51990 }, { "epoch": 0.5289204915364584, "grad_norm": 7.525077819824219, "learning_rate": 2.2769235965385632e-06, "loss": 3.2199, "step": 51995 }, { "epoch": 0.5289713541666666, "grad_norm": 11.829056739807129, "learning_rate": 2.276525313576625e-06, "loss": 3.1419, "step": 52000 }, { "epoch": 0.529022216796875, "grad_norm": 10.279789924621582, "learning_rate": 2.2761270363322338e-06, "loss": 2.9866, "step": 52005 }, { "epoch": 0.5290730794270834, "grad_norm": 17.86949920654297, "learning_rate": 2.2757287648155803e-06, "loss": 3.3813, "step": 52010 }, { "epoch": 0.5291239420572916, "grad_norm": 8.782398223876953, "learning_rate": 2.2753304990368525e-06, "loss": 3.3566, "step": 52015 }, { "epoch": 0.5291748046875, "grad_norm": 10.083893775939941, "learning_rate": 2.27493223900624e-06, "loss": 3.37, "step": 52020 }, { "epoch": 0.5292256673177084, "grad_norm": 12.253583908081055, "learning_rate": 2.274533984733934e-06, "loss": 3.3056, "step": 52025 }, { "epoch": 0.5292765299479166, "grad_norm": 10.968459129333496, "learning_rate": 2.274135736230122e-06, "loss": 3.1398, "step": 52030 }, { "epoch": 0.529327392578125, "grad_norm": 13.678936004638672, "learning_rate": 2.2737374935049946e-06, "loss": 3.0078, "step": 52035 }, { "epoch": 0.5293782552083334, "grad_norm": 14.763940811157227, "learning_rate": 2.2733392565687377e-06, "loss": 3.3282, "step": 52040 }, { "epoch": 0.5294291178385416, "grad_norm": 8.088820457458496, "learning_rate": 2.2729410254315437e-06, "loss": 3.1962, "step": 52045 }, { "epoch": 0.52947998046875, "grad_norm": 13.720199584960938, "learning_rate": 2.2725428001036003e-06, "loss": 3.3752, "step": 52050 }, { "epoch": 0.5295308430989584, "grad_norm": 13.48266315460205, "learning_rate": 2.272144580595094e-06, "loss": 3.7975, "step": 52055 }, { "epoch": 0.5295817057291666, "grad_norm": 11.64840316772461, "learning_rate": 2.271746366916215e-06, "loss": 3.4526, "step": 52060 }, { "epoch": 0.529632568359375, "grad_norm": 18.696725845336914, "learning_rate": 2.2713481590771516e-06, "loss": 3.4287, "step": 52065 }, { "epoch": 0.5296834309895834, "grad_norm": 14.861400604248047, "learning_rate": 2.270949957088091e-06, "loss": 3.342, "step": 52070 }, { "epoch": 0.5297342936197916, "grad_norm": 7.6815876960754395, "learning_rate": 2.270551760959221e-06, "loss": 3.5648, "step": 52075 }, { "epoch": 0.52978515625, "grad_norm": 9.49208927154541, "learning_rate": 2.270153570700731e-06, "loss": 3.0591, "step": 52080 }, { "epoch": 0.5298360188802084, "grad_norm": 11.423577308654785, "learning_rate": 2.269755386322807e-06, "loss": 3.3461, "step": 52085 }, { "epoch": 0.5298868815104166, "grad_norm": 8.663924217224121, "learning_rate": 2.2693572078356367e-06, "loss": 3.2918, "step": 52090 }, { "epoch": 0.529937744140625, "grad_norm": 17.198579788208008, "learning_rate": 2.2689590352494063e-06, "loss": 2.9642, "step": 52095 }, { "epoch": 0.5299886067708334, "grad_norm": 7.101471424102783, "learning_rate": 2.268560868574306e-06, "loss": 3.2453, "step": 52100 }, { "epoch": 0.5300394694010416, "grad_norm": 12.237752914428711, "learning_rate": 2.2681627078205205e-06, "loss": 3.2128, "step": 52105 }, { "epoch": 0.53009033203125, "grad_norm": 9.680429458618164, "learning_rate": 2.2677645529982375e-06, "loss": 3.3987, "step": 52110 }, { "epoch": 0.5301411946614584, "grad_norm": 10.857959747314453, "learning_rate": 2.2673664041176425e-06, "loss": 3.1378, "step": 52115 }, { "epoch": 0.5301920572916666, "grad_norm": 10.731316566467285, "learning_rate": 2.266968261188923e-06, "loss": 3.4948, "step": 52120 }, { "epoch": 0.530242919921875, "grad_norm": 14.895269393920898, "learning_rate": 2.2665701242222666e-06, "loss": 3.3257, "step": 52125 }, { "epoch": 0.5302937825520834, "grad_norm": 17.698509216308594, "learning_rate": 2.266171993227857e-06, "loss": 3.9778, "step": 52130 }, { "epoch": 0.5303446451822916, "grad_norm": 12.754210472106934, "learning_rate": 2.2657738682158812e-06, "loss": 3.0264, "step": 52135 }, { "epoch": 0.5303955078125, "grad_norm": 12.280755996704102, "learning_rate": 2.265375749196527e-06, "loss": 3.2183, "step": 52140 }, { "epoch": 0.5304463704427084, "grad_norm": 9.895425796508789, "learning_rate": 2.2649776361799777e-06, "loss": 3.253, "step": 52145 }, { "epoch": 0.5304972330729166, "grad_norm": 14.172664642333984, "learning_rate": 2.26457952917642e-06, "loss": 3.5023, "step": 52150 }, { "epoch": 0.530548095703125, "grad_norm": 17.2047119140625, "learning_rate": 2.264181428196039e-06, "loss": 2.8805, "step": 52155 }, { "epoch": 0.5305989583333334, "grad_norm": 9.435441970825195, "learning_rate": 2.2637833332490203e-06, "loss": 3.1764, "step": 52160 }, { "epoch": 0.5306498209635416, "grad_norm": 12.391535758972168, "learning_rate": 2.2633852443455497e-06, "loss": 3.2382, "step": 52165 }, { "epoch": 0.53070068359375, "grad_norm": 13.898744583129883, "learning_rate": 2.2629871614958104e-06, "loss": 3.1006, "step": 52170 }, { "epoch": 0.5307515462239584, "grad_norm": 12.971320152282715, "learning_rate": 2.262589084709989e-06, "loss": 3.5297, "step": 52175 }, { "epoch": 0.5308024088541666, "grad_norm": 8.470605850219727, "learning_rate": 2.26219101399827e-06, "loss": 3.3854, "step": 52180 }, { "epoch": 0.530853271484375, "grad_norm": 10.319451332092285, "learning_rate": 2.261792949370837e-06, "loss": 3.3321, "step": 52185 }, { "epoch": 0.5309041341145834, "grad_norm": 8.604473114013672, "learning_rate": 2.2613948908378748e-06, "loss": 3.3155, "step": 52190 }, { "epoch": 0.5309549967447916, "grad_norm": 9.907164573669434, "learning_rate": 2.260996838409568e-06, "loss": 3.2708, "step": 52195 }, { "epoch": 0.531005859375, "grad_norm": 9.253422737121582, "learning_rate": 2.2605987920961004e-06, "loss": 3.3554, "step": 52200 }, { "epoch": 0.5310567220052084, "grad_norm": 11.168255805969238, "learning_rate": 2.260200751907657e-06, "loss": 3.3023, "step": 52205 }, { "epoch": 0.5311075846354166, "grad_norm": 6.434413433074951, "learning_rate": 2.2598027178544186e-06, "loss": 3.239, "step": 52210 }, { "epoch": 0.531158447265625, "grad_norm": 10.66589641571045, "learning_rate": 2.259404689946572e-06, "loss": 3.4369, "step": 52215 }, { "epoch": 0.5312093098958334, "grad_norm": 11.483353614807129, "learning_rate": 2.259006668194299e-06, "loss": 3.2524, "step": 52220 }, { "epoch": 0.5312601725260416, "grad_norm": 14.248106002807617, "learning_rate": 2.2586086526077837e-06, "loss": 3.3969, "step": 52225 }, { "epoch": 0.53131103515625, "grad_norm": 16.453853607177734, "learning_rate": 2.258210643197208e-06, "loss": 2.9747, "step": 52230 }, { "epoch": 0.5313618977864584, "grad_norm": 11.427420616149902, "learning_rate": 2.2578126399727564e-06, "loss": 3.4672, "step": 52235 }, { "epoch": 0.5314127604166666, "grad_norm": 12.872052192687988, "learning_rate": 2.257414642944611e-06, "loss": 3.1296, "step": 52240 }, { "epoch": 0.531463623046875, "grad_norm": 17.460342407226562, "learning_rate": 2.2570166521229545e-06, "loss": 3.1822, "step": 52245 }, { "epoch": 0.5315144856770834, "grad_norm": 15.031963348388672, "learning_rate": 2.256618667517968e-06, "loss": 3.5697, "step": 52250 }, { "epoch": 0.5315653483072916, "grad_norm": 18.319963455200195, "learning_rate": 2.2562206891398367e-06, "loss": 3.3926, "step": 52255 }, { "epoch": 0.5316162109375, "grad_norm": 15.066197395324707, "learning_rate": 2.2558227169987407e-06, "loss": 3.341, "step": 52260 }, { "epoch": 0.5316670735677084, "grad_norm": 9.521345138549805, "learning_rate": 2.255424751104862e-06, "loss": 3.0247, "step": 52265 }, { "epoch": 0.5317179361979166, "grad_norm": 16.426836013793945, "learning_rate": 2.2550267914683845e-06, "loss": 3.0748, "step": 52270 }, { "epoch": 0.531768798828125, "grad_norm": 7.882839679718018, "learning_rate": 2.2546288380994876e-06, "loss": 3.1708, "step": 52275 }, { "epoch": 0.5318196614583334, "grad_norm": 17.430219650268555, "learning_rate": 2.2542308910083546e-06, "loss": 3.5405, "step": 52280 }, { "epoch": 0.5318705240885416, "grad_norm": 12.507336616516113, "learning_rate": 2.2538329502051647e-06, "loss": 3.4098, "step": 52285 }, { "epoch": 0.53192138671875, "grad_norm": 16.876689910888672, "learning_rate": 2.253435015700101e-06, "loss": 3.5465, "step": 52290 }, { "epoch": 0.5319722493489584, "grad_norm": 13.726040840148926, "learning_rate": 2.253037087503345e-06, "loss": 3.726, "step": 52295 }, { "epoch": 0.5320231119791666, "grad_norm": 9.711737632751465, "learning_rate": 2.252639165625075e-06, "loss": 3.4336, "step": 52300 }, { "epoch": 0.532073974609375, "grad_norm": 14.651636123657227, "learning_rate": 2.2522412500754733e-06, "loss": 3.1465, "step": 52305 }, { "epoch": 0.5321248372395834, "grad_norm": 8.425837516784668, "learning_rate": 2.251843340864722e-06, "loss": 3.1489, "step": 52310 }, { "epoch": 0.5321756998697916, "grad_norm": 16.434213638305664, "learning_rate": 2.251445438002999e-06, "loss": 3.6863, "step": 52315 }, { "epoch": 0.5322265625, "grad_norm": 12.590494155883789, "learning_rate": 2.251047541500486e-06, "loss": 3.3995, "step": 52320 }, { "epoch": 0.5322774251302084, "grad_norm": 17.571537017822266, "learning_rate": 2.250649651367362e-06, "loss": 3.4516, "step": 52325 }, { "epoch": 0.5323282877604166, "grad_norm": 13.119302749633789, "learning_rate": 2.2502517676138074e-06, "loss": 3.4861, "step": 52330 }, { "epoch": 0.532379150390625, "grad_norm": 13.304254531860352, "learning_rate": 2.2498538902500035e-06, "loss": 3.2404, "step": 52335 }, { "epoch": 0.5324300130208334, "grad_norm": 13.901344299316406, "learning_rate": 2.2494560192861273e-06, "loss": 3.4191, "step": 52340 }, { "epoch": 0.5324808756510416, "grad_norm": 9.741520881652832, "learning_rate": 2.2490581547323593e-06, "loss": 3.2748, "step": 52345 }, { "epoch": 0.53253173828125, "grad_norm": 9.868925094604492, "learning_rate": 2.24866029659888e-06, "loss": 3.1799, "step": 52350 }, { "epoch": 0.5325826009114584, "grad_norm": 9.409064292907715, "learning_rate": 2.248262444895867e-06, "loss": 3.519, "step": 52355 }, { "epoch": 0.5326334635416666, "grad_norm": 13.42024040222168, "learning_rate": 2.2478645996334986e-06, "loss": 3.3728, "step": 52360 }, { "epoch": 0.532684326171875, "grad_norm": 12.795633316040039, "learning_rate": 2.247466760821956e-06, "loss": 3.6022, "step": 52365 }, { "epoch": 0.5327351888020834, "grad_norm": 15.062945365905762, "learning_rate": 2.247068928471416e-06, "loss": 3.4308, "step": 52370 }, { "epoch": 0.5327860514322916, "grad_norm": 14.683167457580566, "learning_rate": 2.2466711025920583e-06, "loss": 3.3802, "step": 52375 }, { "epoch": 0.5328369140625, "grad_norm": 8.117255210876465, "learning_rate": 2.246273283194059e-06, "loss": 3.3244, "step": 52380 }, { "epoch": 0.5328877766927084, "grad_norm": 9.679817199707031, "learning_rate": 2.2458754702875983e-06, "loss": 3.496, "step": 52385 }, { "epoch": 0.5329386393229166, "grad_norm": 15.520284652709961, "learning_rate": 2.245477663882854e-06, "loss": 3.8227, "step": 52390 }, { "epoch": 0.532989501953125, "grad_norm": 9.276087760925293, "learning_rate": 2.2450798639900026e-06, "loss": 3.3125, "step": 52395 }, { "epoch": 0.5330403645833334, "grad_norm": 8.678460121154785, "learning_rate": 2.244682070619222e-06, "loss": 3.4275, "step": 52400 }, { "epoch": 0.5330912272135416, "grad_norm": 12.090228080749512, "learning_rate": 2.244284283780691e-06, "loss": 3.719, "step": 52405 }, { "epoch": 0.53314208984375, "grad_norm": 15.500432968139648, "learning_rate": 2.243886503484586e-06, "loss": 3.1714, "step": 52410 }, { "epoch": 0.5331929524739584, "grad_norm": 14.30467414855957, "learning_rate": 2.243488729741084e-06, "loss": 3.1552, "step": 52415 }, { "epoch": 0.5332438151041666, "grad_norm": 15.131646156311035, "learning_rate": 2.243090962560361e-06, "loss": 3.0603, "step": 52420 }, { "epoch": 0.533294677734375, "grad_norm": 12.336730003356934, "learning_rate": 2.242693201952596e-06, "loss": 3.3295, "step": 52425 }, { "epoch": 0.5333455403645834, "grad_norm": 9.866353988647461, "learning_rate": 2.2422954479279646e-06, "loss": 2.873, "step": 52430 }, { "epoch": 0.5333964029947916, "grad_norm": 10.846232414245605, "learning_rate": 2.241897700496643e-06, "loss": 3.5189, "step": 52435 }, { "epoch": 0.533447265625, "grad_norm": 11.261388778686523, "learning_rate": 2.2414999596688066e-06, "loss": 3.1236, "step": 52440 }, { "epoch": 0.5334981282552084, "grad_norm": 14.13904857635498, "learning_rate": 2.241102225454633e-06, "loss": 3.3259, "step": 52445 }, { "epoch": 0.5335489908854166, "grad_norm": 8.11526870727539, "learning_rate": 2.240704497864298e-06, "loss": 3.1958, "step": 52450 }, { "epoch": 0.533599853515625, "grad_norm": 14.283095359802246, "learning_rate": 2.2403067769079765e-06, "loss": 3.111, "step": 52455 }, { "epoch": 0.5336507161458334, "grad_norm": 9.867772102355957, "learning_rate": 2.239909062595844e-06, "loss": 3.2556, "step": 52460 }, { "epoch": 0.5337015787760416, "grad_norm": 15.871087074279785, "learning_rate": 2.239511354938078e-06, "loss": 2.8123, "step": 52465 }, { "epoch": 0.53375244140625, "grad_norm": 12.044535636901855, "learning_rate": 2.2391136539448514e-06, "loss": 3.3877, "step": 52470 }, { "epoch": 0.5338033040364584, "grad_norm": 9.333099365234375, "learning_rate": 2.2387159596263396e-06, "loss": 3.6512, "step": 52475 }, { "epoch": 0.5338541666666666, "grad_norm": 12.61330795288086, "learning_rate": 2.238318271992719e-06, "loss": 3.3015, "step": 52480 }, { "epoch": 0.533905029296875, "grad_norm": 8.70833969116211, "learning_rate": 2.2379205910541627e-06, "loss": 3.2452, "step": 52485 }, { "epoch": 0.5339558919270834, "grad_norm": 9.98299503326416, "learning_rate": 2.237522916820847e-06, "loss": 3.3227, "step": 52490 }, { "epoch": 0.5340067545572916, "grad_norm": 15.434283256530762, "learning_rate": 2.237125249302944e-06, "loss": 3.4599, "step": 52495 }, { "epoch": 0.5340576171875, "grad_norm": 16.889253616333008, "learning_rate": 2.2367275885106294e-06, "loss": 3.2198, "step": 52500 }, { "epoch": 0.5341084798177084, "grad_norm": 9.48607063293457, "learning_rate": 2.236329934454078e-06, "loss": 3.1513, "step": 52505 }, { "epoch": 0.5341593424479166, "grad_norm": 16.795494079589844, "learning_rate": 2.235932287143462e-06, "loss": 3.3567, "step": 52510 }, { "epoch": 0.534210205078125, "grad_norm": 14.146149635314941, "learning_rate": 2.235534646588955e-06, "loss": 3.3844, "step": 52515 }, { "epoch": 0.5342610677083334, "grad_norm": 12.229581832885742, "learning_rate": 2.235137012800733e-06, "loss": 3.6554, "step": 52520 }, { "epoch": 0.5343119303385416, "grad_norm": 11.907014846801758, "learning_rate": 2.2347393857889667e-06, "loss": 3.3006, "step": 52525 }, { "epoch": 0.53436279296875, "grad_norm": 13.531723022460938, "learning_rate": 2.234341765563831e-06, "loss": 3.2998, "step": 52530 }, { "epoch": 0.5344136555989584, "grad_norm": 15.982670783996582, "learning_rate": 2.2339441521354977e-06, "loss": 3.3834, "step": 52535 }, { "epoch": 0.5344645182291666, "grad_norm": 15.747007369995117, "learning_rate": 2.23354654551414e-06, "loss": 3.299, "step": 52540 }, { "epoch": 0.534515380859375, "grad_norm": 11.762879371643066, "learning_rate": 2.233148945709932e-06, "loss": 3.095, "step": 52545 }, { "epoch": 0.5345662434895834, "grad_norm": 13.616257667541504, "learning_rate": 2.232751352733044e-06, "loss": 3.2841, "step": 52550 }, { "epoch": 0.5346171061197916, "grad_norm": 15.09197998046875, "learning_rate": 2.232353766593649e-06, "loss": 3.4964, "step": 52555 }, { "epoch": 0.53466796875, "grad_norm": 9.675505638122559, "learning_rate": 2.2319561873019198e-06, "loss": 3.1382, "step": 52560 }, { "epoch": 0.5347188313802084, "grad_norm": 13.677727699279785, "learning_rate": 2.2315586148680283e-06, "loss": 3.1489, "step": 52565 }, { "epoch": 0.5347696940104166, "grad_norm": 8.051255226135254, "learning_rate": 2.2311610493021455e-06, "loss": 3.1315, "step": 52570 }, { "epoch": 0.534820556640625, "grad_norm": 13.022601127624512, "learning_rate": 2.230763490614444e-06, "loss": 3.163, "step": 52575 }, { "epoch": 0.5348714192708334, "grad_norm": 14.174161911010742, "learning_rate": 2.230365938815095e-06, "loss": 3.1091, "step": 52580 }, { "epoch": 0.5349222819010416, "grad_norm": 8.802834510803223, "learning_rate": 2.229968393914269e-06, "loss": 3.4788, "step": 52585 }, { "epoch": 0.53497314453125, "grad_norm": 7.659459590911865, "learning_rate": 2.2295708559221373e-06, "loss": 3.6628, "step": 52590 }, { "epoch": 0.5350240071614584, "grad_norm": 10.533792495727539, "learning_rate": 2.2291733248488723e-06, "loss": 3.253, "step": 52595 }, { "epoch": 0.5350748697916666, "grad_norm": 12.855596542358398, "learning_rate": 2.228775800704643e-06, "loss": 3.3276, "step": 52600 }, { "epoch": 0.535125732421875, "grad_norm": 12.619338989257812, "learning_rate": 2.228378283499621e-06, "loss": 3.0476, "step": 52605 }, { "epoch": 0.5351765950520834, "grad_norm": 10.090911865234375, "learning_rate": 2.2279807732439753e-06, "loss": 3.5128, "step": 52610 }, { "epoch": 0.5352274576822916, "grad_norm": 9.089913368225098, "learning_rate": 2.227583269947877e-06, "loss": 3.2851, "step": 52615 }, { "epoch": 0.5352783203125, "grad_norm": 17.8261775970459, "learning_rate": 2.2271857736214973e-06, "loss": 4.0462, "step": 52620 }, { "epoch": 0.5353291829427084, "grad_norm": 11.043100357055664, "learning_rate": 2.2267882842750044e-06, "loss": 3.5272, "step": 52625 }, { "epoch": 0.5353800455729166, "grad_norm": 10.309991836547852, "learning_rate": 2.226390801918568e-06, "loss": 3.1993, "step": 52630 }, { "epoch": 0.535430908203125, "grad_norm": 8.734068870544434, "learning_rate": 2.2259933265623584e-06, "loss": 3.4426, "step": 52635 }, { "epoch": 0.5354817708333334, "grad_norm": 13.374344825744629, "learning_rate": 2.2255958582165447e-06, "loss": 3.252, "step": 52640 }, { "epoch": 0.5355326334635416, "grad_norm": 13.33709716796875, "learning_rate": 2.2251983968912968e-06, "loss": 3.2941, "step": 52645 }, { "epoch": 0.53558349609375, "grad_norm": 13.283416748046875, "learning_rate": 2.224800942596781e-06, "loss": 3.4063, "step": 52650 }, { "epoch": 0.5356343587239584, "grad_norm": 12.268871307373047, "learning_rate": 2.2244034953431685e-06, "loss": 3.1474, "step": 52655 }, { "epoch": 0.5356852213541666, "grad_norm": 17.27332305908203, "learning_rate": 2.2240060551406276e-06, "loss": 3.7438, "step": 52660 }, { "epoch": 0.535736083984375, "grad_norm": 8.489108085632324, "learning_rate": 2.223608621999326e-06, "loss": 3.0981, "step": 52665 }, { "epoch": 0.5357869466145834, "grad_norm": 14.006270408630371, "learning_rate": 2.2232111959294325e-06, "loss": 3.0433, "step": 52670 }, { "epoch": 0.5358378092447916, "grad_norm": 10.502427101135254, "learning_rate": 2.2228137769411156e-06, "loss": 3.2598, "step": 52675 }, { "epoch": 0.535888671875, "grad_norm": 13.823921203613281, "learning_rate": 2.2224163650445417e-06, "loss": 3.4957, "step": 52680 }, { "epoch": 0.5359395345052084, "grad_norm": 12.201239585876465, "learning_rate": 2.2220189602498788e-06, "loss": 3.3212, "step": 52685 }, { "epoch": 0.5359903971354166, "grad_norm": 15.84398078918457, "learning_rate": 2.221621562567296e-06, "loss": 3.3958, "step": 52690 }, { "epoch": 0.536041259765625, "grad_norm": 10.147021293640137, "learning_rate": 2.2212241720069592e-06, "loss": 3.2461, "step": 52695 }, { "epoch": 0.5360921223958334, "grad_norm": 9.781103134155273, "learning_rate": 2.220826788579036e-06, "loss": 3.3178, "step": 52700 }, { "epoch": 0.5361429850260416, "grad_norm": 8.797540664672852, "learning_rate": 2.2204294122936925e-06, "loss": 3.4735, "step": 52705 }, { "epoch": 0.53619384765625, "grad_norm": 12.501797676086426, "learning_rate": 2.220032043161097e-06, "loss": 3.4536, "step": 52710 }, { "epoch": 0.5362447102864584, "grad_norm": 11.115304946899414, "learning_rate": 2.219634681191415e-06, "loss": 3.3875, "step": 52715 }, { "epoch": 0.5362955729166666, "grad_norm": 10.252839088439941, "learning_rate": 2.219237326394814e-06, "loss": 3.3677, "step": 52720 }, { "epoch": 0.536346435546875, "grad_norm": 11.421342849731445, "learning_rate": 2.218839978781458e-06, "loss": 3.1049, "step": 52725 }, { "epoch": 0.5363972981770834, "grad_norm": 12.959375381469727, "learning_rate": 2.2184426383615155e-06, "loss": 3.1831, "step": 52730 }, { "epoch": 0.5364481608072916, "grad_norm": 14.955923080444336, "learning_rate": 2.2180453051451515e-06, "loss": 3.4734, "step": 52735 }, { "epoch": 0.5364990234375, "grad_norm": 9.893755912780762, "learning_rate": 2.217647979142531e-06, "loss": 3.146, "step": 52740 }, { "epoch": 0.5365498860677084, "grad_norm": 10.388473510742188, "learning_rate": 2.21725066036382e-06, "loss": 3.3632, "step": 52745 }, { "epoch": 0.5366007486979166, "grad_norm": 9.242728233337402, "learning_rate": 2.216853348819184e-06, "loss": 3.7097, "step": 52750 }, { "epoch": 0.536651611328125, "grad_norm": 13.42584228515625, "learning_rate": 2.2164560445187883e-06, "loss": 3.2877, "step": 52755 }, { "epoch": 0.5367024739583334, "grad_norm": 12.122518539428711, "learning_rate": 2.2160587474727962e-06, "loss": 3.1327, "step": 52760 }, { "epoch": 0.5367533365885416, "grad_norm": 10.502169609069824, "learning_rate": 2.2156614576913755e-06, "loss": 3.4177, "step": 52765 }, { "epoch": 0.53680419921875, "grad_norm": 12.630999565124512, "learning_rate": 2.2152641751846883e-06, "loss": 3.1985, "step": 52770 }, { "epoch": 0.5368550618489584, "grad_norm": 13.926748275756836, "learning_rate": 2.2148668999629e-06, "loss": 3.281, "step": 52775 }, { "epoch": 0.5369059244791666, "grad_norm": 10.87210464477539, "learning_rate": 2.2144696320361735e-06, "loss": 3.2454, "step": 52780 }, { "epoch": 0.536956787109375, "grad_norm": 14.919941902160645, "learning_rate": 2.2140723714146747e-06, "loss": 3.4325, "step": 52785 }, { "epoch": 0.5370076497395834, "grad_norm": 14.381443977355957, "learning_rate": 2.2136751181085666e-06, "loss": 3.3376, "step": 52790 }, { "epoch": 0.5370585123697916, "grad_norm": 13.070241928100586, "learning_rate": 2.213277872128012e-06, "loss": 3.3856, "step": 52795 }, { "epoch": 0.537109375, "grad_norm": 9.268564224243164, "learning_rate": 2.2128806334831747e-06, "loss": 3.7792, "step": 52800 }, { "epoch": 0.5371602376302084, "grad_norm": 15.848664283752441, "learning_rate": 2.2124834021842196e-06, "loss": 3.5488, "step": 52805 }, { "epoch": 0.5372111002604166, "grad_norm": 10.323749542236328, "learning_rate": 2.2120861782413083e-06, "loss": 3.3862, "step": 52810 }, { "epoch": 0.537261962890625, "grad_norm": 9.335864067077637, "learning_rate": 2.2116889616646037e-06, "loss": 3.2816, "step": 52815 }, { "epoch": 0.5373128255208334, "grad_norm": 12.387421607971191, "learning_rate": 2.211291752464268e-06, "loss": 3.4637, "step": 52820 }, { "epoch": 0.5373636881510416, "grad_norm": 15.235649108886719, "learning_rate": 2.2108945506504648e-06, "loss": 3.7854, "step": 52825 }, { "epoch": 0.53741455078125, "grad_norm": 12.311189651489258, "learning_rate": 2.2104973562333567e-06, "loss": 3.4655, "step": 52830 }, { "epoch": 0.5374654134114584, "grad_norm": 15.438931465148926, "learning_rate": 2.2101001692231046e-06, "loss": 3.1788, "step": 52835 }, { "epoch": 0.5375162760416666, "grad_norm": 13.147708892822266, "learning_rate": 2.20970298962987e-06, "loss": 3.123, "step": 52840 }, { "epoch": 0.537567138671875, "grad_norm": 12.911852836608887, "learning_rate": 2.209305817463817e-06, "loss": 3.2834, "step": 52845 }, { "epoch": 0.5376180013020834, "grad_norm": 15.99446964263916, "learning_rate": 2.2089086527351054e-06, "loss": 3.5062, "step": 52850 }, { "epoch": 0.5376688639322916, "grad_norm": 12.867097854614258, "learning_rate": 2.208511495453896e-06, "loss": 3.0001, "step": 52855 }, { "epoch": 0.5377197265625, "grad_norm": 10.72048568725586, "learning_rate": 2.2081143456303517e-06, "loss": 3.6095, "step": 52860 }, { "epoch": 0.5377705891927084, "grad_norm": 9.181864738464355, "learning_rate": 2.2077172032746334e-06, "loss": 3.3329, "step": 52865 }, { "epoch": 0.5378214518229166, "grad_norm": 14.825196266174316, "learning_rate": 2.2073200683969e-06, "loss": 3.4629, "step": 52870 }, { "epoch": 0.537872314453125, "grad_norm": 8.038079261779785, "learning_rate": 2.2069229410073134e-06, "loss": 3.4778, "step": 52875 }, { "epoch": 0.5379231770833334, "grad_norm": 12.720928192138672, "learning_rate": 2.2065258211160345e-06, "loss": 3.1995, "step": 52880 }, { "epoch": 0.5379740397135416, "grad_norm": 13.668231010437012, "learning_rate": 2.2061287087332225e-06, "loss": 3.2563, "step": 52885 }, { "epoch": 0.53802490234375, "grad_norm": 13.181472778320312, "learning_rate": 2.2057316038690383e-06, "loss": 3.4019, "step": 52890 }, { "epoch": 0.5380757649739584, "grad_norm": 12.291973114013672, "learning_rate": 2.205334506533641e-06, "loss": 3.1949, "step": 52895 }, { "epoch": 0.5381266276041666, "grad_norm": 11.615266799926758, "learning_rate": 2.2049374167371898e-06, "loss": 3.1898, "step": 52900 }, { "epoch": 0.538177490234375, "grad_norm": 14.333727836608887, "learning_rate": 2.204540334489846e-06, "loss": 3.577, "step": 52905 }, { "epoch": 0.5382283528645834, "grad_norm": 12.227395057678223, "learning_rate": 2.204143259801767e-06, "loss": 3.314, "step": 52910 }, { "epoch": 0.5382792154947916, "grad_norm": 13.9808931350708, "learning_rate": 2.203746192683112e-06, "loss": 3.4432, "step": 52915 }, { "epoch": 0.538330078125, "grad_norm": 12.037018775939941, "learning_rate": 2.203349133144042e-06, "loss": 3.4993, "step": 52920 }, { "epoch": 0.5383809407552084, "grad_norm": 12.9117431640625, "learning_rate": 2.2029520811947134e-06, "loss": 3.4597, "step": 52925 }, { "epoch": 0.5384318033854166, "grad_norm": 10.380311965942383, "learning_rate": 2.2025550368452853e-06, "loss": 3.2176, "step": 52930 }, { "epoch": 0.538482666015625, "grad_norm": 16.426626205444336, "learning_rate": 2.202158000105916e-06, "loss": 3.2336, "step": 52935 }, { "epoch": 0.5385335286458334, "grad_norm": 9.988062858581543, "learning_rate": 2.2017609709867636e-06, "loss": 3.0777, "step": 52940 }, { "epoch": 0.5385843912760416, "grad_norm": 11.464739799499512, "learning_rate": 2.2013639494979865e-06, "loss": 3.2367, "step": 52945 }, { "epoch": 0.53863525390625, "grad_norm": 8.486089706420898, "learning_rate": 2.2009669356497414e-06, "loss": 3.3867, "step": 52950 }, { "epoch": 0.5386861165364584, "grad_norm": 13.624707221984863, "learning_rate": 2.2005699294521867e-06, "loss": 3.5823, "step": 52955 }, { "epoch": 0.5387369791666666, "grad_norm": 13.097014427185059, "learning_rate": 2.2001729309154795e-06, "loss": 3.7487, "step": 52960 }, { "epoch": 0.538787841796875, "grad_norm": 9.218652725219727, "learning_rate": 2.1997759400497764e-06, "loss": 3.0382, "step": 52965 }, { "epoch": 0.5388387044270834, "grad_norm": 11.32375431060791, "learning_rate": 2.199378956865235e-06, "loss": 3.7389, "step": 52970 }, { "epoch": 0.5388895670572916, "grad_norm": 11.567964553833008, "learning_rate": 2.1989819813720116e-06, "loss": 3.7298, "step": 52975 }, { "epoch": 0.5389404296875, "grad_norm": 13.934860229492188, "learning_rate": 2.198585013580263e-06, "loss": 3.3685, "step": 52980 }, { "epoch": 0.5389912923177084, "grad_norm": 13.693037033081055, "learning_rate": 2.1981880535001456e-06, "loss": 3.9366, "step": 52985 }, { "epoch": 0.5390421549479166, "grad_norm": 14.608330726623535, "learning_rate": 2.1977911011418147e-06, "loss": 3.314, "step": 52990 }, { "epoch": 0.539093017578125, "grad_norm": 9.940957069396973, "learning_rate": 2.197394156515427e-06, "loss": 3.2194, "step": 52995 }, { "epoch": 0.5391438802083334, "grad_norm": 8.905831336975098, "learning_rate": 2.1969972196311387e-06, "loss": 3.8342, "step": 53000 }, { "epoch": 0.5391947428385416, "grad_norm": 11.434901237487793, "learning_rate": 2.1966002904991043e-06, "loss": 3.4717, "step": 53005 }, { "epoch": 0.53924560546875, "grad_norm": 9.313929557800293, "learning_rate": 2.196203369129479e-06, "loss": 3.592, "step": 53010 }, { "epoch": 0.5392964680989584, "grad_norm": 14.059566497802734, "learning_rate": 2.1958064555324195e-06, "loss": 3.802, "step": 53015 }, { "epoch": 0.5393473307291666, "grad_norm": 9.69733715057373, "learning_rate": 2.195409549718079e-06, "loss": 2.9602, "step": 53020 }, { "epoch": 0.539398193359375, "grad_norm": 15.862642288208008, "learning_rate": 2.195012651696614e-06, "loss": 4.232, "step": 53025 }, { "epoch": 0.5394490559895834, "grad_norm": 13.767962455749512, "learning_rate": 2.194615761478176e-06, "loss": 3.3887, "step": 53030 }, { "epoch": 0.5394999186197916, "grad_norm": 13.546216011047363, "learning_rate": 2.194218879072923e-06, "loss": 3.4194, "step": 53035 }, { "epoch": 0.53955078125, "grad_norm": 16.30230712890625, "learning_rate": 2.193822004491007e-06, "loss": 2.9932, "step": 53040 }, { "epoch": 0.5396016438802084, "grad_norm": 9.344871520996094, "learning_rate": 2.1934251377425833e-06, "loss": 3.5214, "step": 53045 }, { "epoch": 0.5396525065104166, "grad_norm": 12.343306541442871, "learning_rate": 2.193028278837803e-06, "loss": 3.0739, "step": 53050 }, { "epoch": 0.539703369140625, "grad_norm": 7.4595723152160645, "learning_rate": 2.1926314277868226e-06, "loss": 3.3064, "step": 53055 }, { "epoch": 0.5397542317708334, "grad_norm": 10.087668418884277, "learning_rate": 2.1922345845997947e-06, "loss": 3.1303, "step": 53060 }, { "epoch": 0.5398050944010416, "grad_norm": 9.357985496520996, "learning_rate": 2.191837749286871e-06, "loss": 3.156, "step": 53065 }, { "epoch": 0.53985595703125, "grad_norm": 9.338754653930664, "learning_rate": 2.1914409218582057e-06, "loss": 3.8574, "step": 53070 }, { "epoch": 0.5399068196614584, "grad_norm": 10.841154098510742, "learning_rate": 2.191044102323952e-06, "loss": 3.1361, "step": 53075 }, { "epoch": 0.5399576822916666, "grad_norm": 10.66102409362793, "learning_rate": 2.190647290694261e-06, "loss": 3.5059, "step": 53080 }, { "epoch": 0.540008544921875, "grad_norm": 9.158910751342773, "learning_rate": 2.1902504869792855e-06, "loss": 3.075, "step": 53085 }, { "epoch": 0.5400594075520834, "grad_norm": 15.92582893371582, "learning_rate": 2.1898536911891787e-06, "loss": 3.3651, "step": 53090 }, { "epoch": 0.5401102701822916, "grad_norm": 8.675463676452637, "learning_rate": 2.1894569033340914e-06, "loss": 3.6126, "step": 53095 }, { "epoch": 0.5401611328125, "grad_norm": 12.356219291687012, "learning_rate": 2.1890601234241757e-06, "loss": 3.4649, "step": 53100 }, { "epoch": 0.5402119954427084, "grad_norm": 7.340629577636719, "learning_rate": 2.1886633514695824e-06, "loss": 3.1227, "step": 53105 }, { "epoch": 0.5402628580729166, "grad_norm": 13.032557487487793, "learning_rate": 2.1882665874804637e-06, "loss": 3.4904, "step": 53110 }, { "epoch": 0.540313720703125, "grad_norm": 10.5289306640625, "learning_rate": 2.187869831466971e-06, "loss": 3.3025, "step": 53115 }, { "epoch": 0.5403645833333334, "grad_norm": 13.882126808166504, "learning_rate": 2.1874730834392545e-06, "loss": 3.3192, "step": 53120 }, { "epoch": 0.5404154459635416, "grad_norm": 13.612321853637695, "learning_rate": 2.1870763434074644e-06, "loss": 3.4219, "step": 53125 }, { "epoch": 0.54046630859375, "grad_norm": 8.61611557006836, "learning_rate": 2.1866796113817527e-06, "loss": 3.246, "step": 53130 }, { "epoch": 0.5405171712239584, "grad_norm": 10.256523132324219, "learning_rate": 2.1862828873722684e-06, "loss": 3.3299, "step": 53135 }, { "epoch": 0.5405680338541666, "grad_norm": 13.90639877319336, "learning_rate": 2.185886171389163e-06, "loss": 3.5323, "step": 53140 }, { "epoch": 0.540618896484375, "grad_norm": 12.011414527893066, "learning_rate": 2.185489463442584e-06, "loss": 2.9904, "step": 53145 }, { "epoch": 0.5406697591145834, "grad_norm": 14.602082252502441, "learning_rate": 2.185092763542683e-06, "loss": 3.3221, "step": 53150 }, { "epoch": 0.5407206217447916, "grad_norm": 11.019375801086426, "learning_rate": 2.18469607169961e-06, "loss": 3.2125, "step": 53155 }, { "epoch": 0.540771484375, "grad_norm": 9.490230560302734, "learning_rate": 2.184299387923512e-06, "loss": 3.3152, "step": 53160 }, { "epoch": 0.5408223470052084, "grad_norm": 15.053577423095703, "learning_rate": 2.18390271222454e-06, "loss": 3.3199, "step": 53165 }, { "epoch": 0.5408732096354166, "grad_norm": 9.94269847869873, "learning_rate": 2.183506044612842e-06, "loss": 3.1042, "step": 53170 }, { "epoch": 0.540924072265625, "grad_norm": 14.025388717651367, "learning_rate": 2.183109385098567e-06, "loss": 3.3606, "step": 53175 }, { "epoch": 0.5409749348958334, "grad_norm": 10.789249420166016, "learning_rate": 2.1827127336918622e-06, "loss": 3.2626, "step": 53180 }, { "epoch": 0.5410257975260416, "grad_norm": 15.582239151000977, "learning_rate": 2.182316090402878e-06, "loss": 3.3256, "step": 53185 }, { "epoch": 0.54107666015625, "grad_norm": 11.45635986328125, "learning_rate": 2.181919455241762e-06, "loss": 3.6184, "step": 53190 }, { "epoch": 0.5411275227864584, "grad_norm": 9.036690711975098, "learning_rate": 2.1815228282186603e-06, "loss": 3.4225, "step": 53195 }, { "epoch": 0.5411783854166666, "grad_norm": 11.719807624816895, "learning_rate": 2.181126209343721e-06, "loss": 3.565, "step": 53200 }, { "epoch": 0.541229248046875, "grad_norm": 12.068906784057617, "learning_rate": 2.1807295986270934e-06, "loss": 3.1241, "step": 53205 }, { "epoch": 0.5412801106770834, "grad_norm": 12.981789588928223, "learning_rate": 2.1803329960789227e-06, "loss": 3.2856, "step": 53210 }, { "epoch": 0.5413309733072916, "grad_norm": 15.0866060256958, "learning_rate": 2.1799364017093574e-06, "loss": 3.6841, "step": 53215 }, { "epoch": 0.5413818359375, "grad_norm": 9.662513732910156, "learning_rate": 2.1795398155285415e-06, "loss": 3.1087, "step": 53220 }, { "epoch": 0.5414326985677084, "grad_norm": 10.991047859191895, "learning_rate": 2.1791432375466247e-06, "loss": 3.3624, "step": 53225 }, { "epoch": 0.5414835611979166, "grad_norm": 10.34939956665039, "learning_rate": 2.1787466677737523e-06, "loss": 3.3209, "step": 53230 }, { "epoch": 0.541534423828125, "grad_norm": 13.844328880310059, "learning_rate": 2.17835010622007e-06, "loss": 3.1816, "step": 53235 }, { "epoch": 0.5415852864583334, "grad_norm": 12.248176574707031, "learning_rate": 2.177953552895723e-06, "loss": 3.3928, "step": 53240 }, { "epoch": 0.5416361490885416, "grad_norm": 12.704273223876953, "learning_rate": 2.1775570078108597e-06, "loss": 3.3041, "step": 53245 }, { "epoch": 0.54168701171875, "grad_norm": 7.139502048492432, "learning_rate": 2.177160470975623e-06, "loss": 3.214, "step": 53250 }, { "epoch": 0.5417378743489584, "grad_norm": 16.03692054748535, "learning_rate": 2.1767639424001586e-06, "loss": 3.3918, "step": 53255 }, { "epoch": 0.5417887369791666, "grad_norm": 14.646202087402344, "learning_rate": 2.1763674220946134e-06, "loss": 3.6005, "step": 53260 }, { "epoch": 0.541839599609375, "grad_norm": 10.91369342803955, "learning_rate": 2.1759709100691303e-06, "loss": 3.0894, "step": 53265 }, { "epoch": 0.5418904622395834, "grad_norm": 18.816396713256836, "learning_rate": 2.1755744063338556e-06, "loss": 3.5276, "step": 53270 }, { "epoch": 0.5419413248697916, "grad_norm": 13.356977462768555, "learning_rate": 2.175177910898931e-06, "loss": 3.3633, "step": 53275 }, { "epoch": 0.5419921875, "grad_norm": 14.471107482910156, "learning_rate": 2.1747814237745036e-06, "loss": 3.6021, "step": 53280 }, { "epoch": 0.5420430501302084, "grad_norm": 8.016794204711914, "learning_rate": 2.174384944970717e-06, "loss": 3.8809, "step": 53285 }, { "epoch": 0.5420939127604166, "grad_norm": 9.090747833251953, "learning_rate": 2.1739884744977134e-06, "loss": 3.44, "step": 53290 }, { "epoch": 0.542144775390625, "grad_norm": 14.494226455688477, "learning_rate": 2.1735920123656375e-06, "loss": 3.4437, "step": 53295 }, { "epoch": 0.5421956380208334, "grad_norm": 10.311285018920898, "learning_rate": 2.173195558584633e-06, "loss": 3.5714, "step": 53300 }, { "epoch": 0.5422465006510416, "grad_norm": 10.30700969696045, "learning_rate": 2.172799113164842e-06, "loss": 3.3391, "step": 53305 }, { "epoch": 0.54229736328125, "grad_norm": 15.927813529968262, "learning_rate": 2.1724026761164096e-06, "loss": 3.0528, "step": 53310 }, { "epoch": 0.5423482259114584, "grad_norm": 9.194511413574219, "learning_rate": 2.172006247449476e-06, "loss": 3.4069, "step": 53315 }, { "epoch": 0.5423990885416666, "grad_norm": 11.598966598510742, "learning_rate": 2.1716098271741846e-06, "loss": 3.8443, "step": 53320 }, { "epoch": 0.542449951171875, "grad_norm": 14.813419342041016, "learning_rate": 2.1712134153006786e-06, "loss": 3.066, "step": 53325 }, { "epoch": 0.5425008138020834, "grad_norm": 11.64278793334961, "learning_rate": 2.1708170118390992e-06, "loss": 3.6079, "step": 53330 }, { "epoch": 0.5425516764322916, "grad_norm": 12.371272087097168, "learning_rate": 2.170420616799588e-06, "loss": 3.0563, "step": 53335 }, { "epoch": 0.5426025390625, "grad_norm": 13.104665756225586, "learning_rate": 2.1700242301922875e-06, "loss": 3.3068, "step": 53340 }, { "epoch": 0.5426534016927084, "grad_norm": 8.993667602539062, "learning_rate": 2.169627852027339e-06, "loss": 3.176, "step": 53345 }, { "epoch": 0.5427042643229166, "grad_norm": 10.01164436340332, "learning_rate": 2.169231482314883e-06, "loss": 3.2864, "step": 53350 }, { "epoch": 0.542755126953125, "grad_norm": 15.4075927734375, "learning_rate": 2.168835121065062e-06, "loss": 3.3562, "step": 53355 }, { "epoch": 0.5428059895833334, "grad_norm": 11.776107788085938, "learning_rate": 2.168438768288016e-06, "loss": 3.7907, "step": 53360 }, { "epoch": 0.5428568522135416, "grad_norm": 13.539200782775879, "learning_rate": 2.1680424239938853e-06, "loss": 3.2214, "step": 53365 }, { "epoch": 0.54290771484375, "grad_norm": 14.385930061340332, "learning_rate": 2.16764608819281e-06, "loss": 3.2643, "step": 53370 }, { "epoch": 0.5429585774739584, "grad_norm": 15.621609687805176, "learning_rate": 2.1672497608949312e-06, "loss": 3.3567, "step": 53375 }, { "epoch": 0.5430094401041666, "grad_norm": 12.618790626525879, "learning_rate": 2.1668534421103886e-06, "loss": 3.2828, "step": 53380 }, { "epoch": 0.543060302734375, "grad_norm": 19.181137084960938, "learning_rate": 2.1664571318493216e-06, "loss": 3.3321, "step": 53385 }, { "epoch": 0.5431111653645834, "grad_norm": 12.526358604431152, "learning_rate": 2.1660608301218693e-06, "loss": 3.0963, "step": 53390 }, { "epoch": 0.5431620279947916, "grad_norm": 7.121951103210449, "learning_rate": 2.165664536938172e-06, "loss": 3.5169, "step": 53395 }, { "epoch": 0.543212890625, "grad_norm": 8.991941452026367, "learning_rate": 2.1652682523083686e-06, "loss": 3.3319, "step": 53400 }, { "epoch": 0.5432637532552084, "grad_norm": 13.199124336242676, "learning_rate": 2.164871976242597e-06, "loss": 3.5861, "step": 53405 }, { "epoch": 0.5433146158854166, "grad_norm": 14.945271492004395, "learning_rate": 2.164475708750996e-06, "loss": 3.3048, "step": 53410 }, { "epoch": 0.543365478515625, "grad_norm": 9.011767387390137, "learning_rate": 2.164079449843706e-06, "loss": 3.6937, "step": 53415 }, { "epoch": 0.5434163411458334, "grad_norm": 12.211606979370117, "learning_rate": 2.163683199530862e-06, "loss": 3.1385, "step": 53420 }, { "epoch": 0.5434672037760416, "grad_norm": 9.78544807434082, "learning_rate": 2.163286957822605e-06, "loss": 3.3107, "step": 53425 }, { "epoch": 0.54351806640625, "grad_norm": 12.810589790344238, "learning_rate": 2.1628907247290704e-06, "loss": 2.9619, "step": 53430 }, { "epoch": 0.5435689290364584, "grad_norm": 20.296228408813477, "learning_rate": 2.162494500260397e-06, "loss": 3.5441, "step": 53435 }, { "epoch": 0.5436197916666666, "grad_norm": 9.777166366577148, "learning_rate": 2.1620982844267222e-06, "loss": 3.3543, "step": 53440 }, { "epoch": 0.543670654296875, "grad_norm": 12.75098705291748, "learning_rate": 2.161702077238182e-06, "loss": 3.4497, "step": 53445 }, { "epoch": 0.5437215169270834, "grad_norm": 10.42978286743164, "learning_rate": 2.161305878704914e-06, "loss": 3.2527, "step": 53450 }, { "epoch": 0.5437723795572916, "grad_norm": 11.12440013885498, "learning_rate": 2.1609096888370557e-06, "loss": 3.281, "step": 53455 }, { "epoch": 0.5438232421875, "grad_norm": 12.265634536743164, "learning_rate": 2.160513507644742e-06, "loss": 3.3271, "step": 53460 }, { "epoch": 0.5438741048177084, "grad_norm": 13.81308364868164, "learning_rate": 2.1601173351381093e-06, "loss": 3.4146, "step": 53465 }, { "epoch": 0.5439249674479166, "grad_norm": 10.630414009094238, "learning_rate": 2.159721171327295e-06, "loss": 3.4962, "step": 53470 }, { "epoch": 0.543975830078125, "grad_norm": 11.793816566467285, "learning_rate": 2.1593250162224333e-06, "loss": 3.708, "step": 53475 }, { "epoch": 0.5440266927083334, "grad_norm": 15.098947525024414, "learning_rate": 2.158928869833661e-06, "loss": 3.7219, "step": 53480 }, { "epoch": 0.5440775553385416, "grad_norm": 9.953935623168945, "learning_rate": 2.158532732171111e-06, "loss": 3.079, "step": 53485 }, { "epoch": 0.54412841796875, "grad_norm": 11.695734977722168, "learning_rate": 2.1581366032449217e-06, "loss": 3.1605, "step": 53490 }, { "epoch": 0.5441792805989584, "grad_norm": 13.40351390838623, "learning_rate": 2.157740483065226e-06, "loss": 3.398, "step": 53495 }, { "epoch": 0.5442301432291666, "grad_norm": 7.162520885467529, "learning_rate": 2.157344371642159e-06, "loss": 3.6939, "step": 53500 }, { "epoch": 0.544281005859375, "grad_norm": 17.510847091674805, "learning_rate": 2.1569482689858547e-06, "loss": 3.5553, "step": 53505 }, { "epoch": 0.5443318684895834, "grad_norm": 10.181023597717285, "learning_rate": 2.1565521751064476e-06, "loss": 3.1076, "step": 53510 }, { "epoch": 0.5443827311197916, "grad_norm": 13.848320007324219, "learning_rate": 2.1561560900140726e-06, "loss": 3.4043, "step": 53515 }, { "epoch": 0.54443359375, "grad_norm": 12.950430870056152, "learning_rate": 2.1557600137188615e-06, "loss": 3.055, "step": 53520 }, { "epoch": 0.5444844563802084, "grad_norm": 12.266128540039062, "learning_rate": 2.155363946230949e-06, "loss": 3.3766, "step": 53525 }, { "epoch": 0.5445353190104166, "grad_norm": 10.952404022216797, "learning_rate": 2.1549678875604688e-06, "loss": 3.4065, "step": 53530 }, { "epoch": 0.544586181640625, "grad_norm": 13.889093399047852, "learning_rate": 2.154571837717553e-06, "loss": 3.7138, "step": 53535 }, { "epoch": 0.5446370442708334, "grad_norm": 14.20893383026123, "learning_rate": 2.1541757967123356e-06, "loss": 3.283, "step": 53540 }, { "epoch": 0.5446879069010416, "grad_norm": 13.899380683898926, "learning_rate": 2.1537797645549467e-06, "loss": 3.0658, "step": 53545 }, { "epoch": 0.54473876953125, "grad_norm": 13.060921669006348, "learning_rate": 2.153383741255522e-06, "loss": 3.6988, "step": 53550 }, { "epoch": 0.5447896321614584, "grad_norm": 10.030902862548828, "learning_rate": 2.152987726824192e-06, "loss": 3.0602, "step": 53555 }, { "epoch": 0.5448404947916666, "grad_norm": 16.82111358642578, "learning_rate": 2.1525917212710877e-06, "loss": 3.9291, "step": 53560 }, { "epoch": 0.544891357421875, "grad_norm": 10.103955268859863, "learning_rate": 2.1521957246063427e-06, "loss": 3.28, "step": 53565 }, { "epoch": 0.5449422200520834, "grad_norm": 9.024051666259766, "learning_rate": 2.151799736840088e-06, "loss": 3.4298, "step": 53570 }, { "epoch": 0.5449930826822916, "grad_norm": 9.407695770263672, "learning_rate": 2.151403757982454e-06, "loss": 3.4351, "step": 53575 }, { "epoch": 0.5450439453125, "grad_norm": 8.37774658203125, "learning_rate": 2.151007788043572e-06, "loss": 3.2921, "step": 53580 }, { "epoch": 0.5450948079427084, "grad_norm": 15.555733680725098, "learning_rate": 2.1506118270335734e-06, "loss": 3.1459, "step": 53585 }, { "epoch": 0.5451456705729166, "grad_norm": 15.700858116149902, "learning_rate": 2.1502158749625885e-06, "loss": 3.2357, "step": 53590 }, { "epoch": 0.545196533203125, "grad_norm": 9.189980506896973, "learning_rate": 2.1498199318407477e-06, "loss": 4.0753, "step": 53595 }, { "epoch": 0.5452473958333334, "grad_norm": 15.427556037902832, "learning_rate": 2.14942399767818e-06, "loss": 3.0574, "step": 53600 }, { "epoch": 0.5452982584635416, "grad_norm": 15.837459564208984, "learning_rate": 2.1490280724850164e-06, "loss": 3.5831, "step": 53605 }, { "epoch": 0.54534912109375, "grad_norm": 13.636536598205566, "learning_rate": 2.1486321562713875e-06, "loss": 3.0997, "step": 53610 }, { "epoch": 0.5453999837239584, "grad_norm": 12.569819450378418, "learning_rate": 2.148236249047421e-06, "loss": 2.9296, "step": 53615 }, { "epoch": 0.5454508463541666, "grad_norm": 7.152266025543213, "learning_rate": 2.1478403508232458e-06, "loss": 2.8501, "step": 53620 }, { "epoch": 0.545501708984375, "grad_norm": 9.717489242553711, "learning_rate": 2.1474444616089928e-06, "loss": 3.5302, "step": 53625 }, { "epoch": 0.5455525716145834, "grad_norm": 9.396700859069824, "learning_rate": 2.1470485814147894e-06, "loss": 3.2379, "step": 53630 }, { "epoch": 0.5456034342447916, "grad_norm": 16.059377670288086, "learning_rate": 2.146652710250765e-06, "loss": 3.1214, "step": 53635 }, { "epoch": 0.545654296875, "grad_norm": 15.743471145629883, "learning_rate": 2.1462568481270455e-06, "loss": 3.4218, "step": 53640 }, { "epoch": 0.5457051595052084, "grad_norm": 11.109064102172852, "learning_rate": 2.145860995053762e-06, "loss": 3.4594, "step": 53645 }, { "epoch": 0.5457560221354166, "grad_norm": 11.835540771484375, "learning_rate": 2.1454651510410412e-06, "loss": 3.3928, "step": 53650 }, { "epoch": 0.545806884765625, "grad_norm": 9.042723655700684, "learning_rate": 2.1450693160990093e-06, "loss": 3.4778, "step": 53655 }, { "epoch": 0.5458577473958334, "grad_norm": 14.188231468200684, "learning_rate": 2.144673490237796e-06, "loss": 3.1003, "step": 53660 }, { "epoch": 0.5459086100260416, "grad_norm": 8.52377986907959, "learning_rate": 2.1442776734675268e-06, "loss": 3.7347, "step": 53665 }, { "epoch": 0.54595947265625, "grad_norm": 12.667838096618652, "learning_rate": 2.14388186579833e-06, "loss": 3.6695, "step": 53670 }, { "epoch": 0.5460103352864584, "grad_norm": 11.661608695983887, "learning_rate": 2.1434860672403298e-06, "loss": 3.3877, "step": 53675 }, { "epoch": 0.5460611979166666, "grad_norm": 12.163783073425293, "learning_rate": 2.1430902778036543e-06, "loss": 3.2375, "step": 53680 }, { "epoch": 0.546112060546875, "grad_norm": 13.89341926574707, "learning_rate": 2.1426944974984303e-06, "loss": 2.9204, "step": 53685 }, { "epoch": 0.5461629231770834, "grad_norm": 9.899314880371094, "learning_rate": 2.142298726334782e-06, "loss": 3.5734, "step": 53690 }, { "epoch": 0.5462137858072916, "grad_norm": 10.043965339660645, "learning_rate": 2.141902964322836e-06, "loss": 3.4135, "step": 53695 }, { "epoch": 0.5462646484375, "grad_norm": 14.102566719055176, "learning_rate": 2.1415072114727187e-06, "loss": 3.5811, "step": 53700 }, { "epoch": 0.5463155110677084, "grad_norm": 9.399636268615723, "learning_rate": 2.1411114677945537e-06, "loss": 3.2885, "step": 53705 }, { "epoch": 0.5463663736979166, "grad_norm": 9.890459060668945, "learning_rate": 2.140715733298467e-06, "loss": 3.4513, "step": 53710 }, { "epoch": 0.546417236328125, "grad_norm": 9.972284317016602, "learning_rate": 2.1403200079945826e-06, "loss": 3.2842, "step": 53715 }, { "epoch": 0.5464680989583334, "grad_norm": 17.944011688232422, "learning_rate": 2.1399242918930257e-06, "loss": 3.1879, "step": 53720 }, { "epoch": 0.5465189615885416, "grad_norm": 10.539843559265137, "learning_rate": 2.139528585003921e-06, "loss": 3.2273, "step": 53725 }, { "epoch": 0.54656982421875, "grad_norm": 10.64560317993164, "learning_rate": 2.139132887337392e-06, "loss": 2.9197, "step": 53730 }, { "epoch": 0.5466206868489584, "grad_norm": 10.019573211669922, "learning_rate": 2.1387371989035614e-06, "loss": 3.4138, "step": 53735 }, { "epoch": 0.5466715494791666, "grad_norm": 6.66783332824707, "learning_rate": 2.1383415197125553e-06, "loss": 3.1166, "step": 53740 }, { "epoch": 0.546722412109375, "grad_norm": 14.750569343566895, "learning_rate": 2.1379458497744952e-06, "loss": 3.0363, "step": 53745 }, { "epoch": 0.5467732747395834, "grad_norm": 10.087311744689941, "learning_rate": 2.137550189099504e-06, "loss": 2.8389, "step": 53750 }, { "epoch": 0.5468241373697916, "grad_norm": 14.355230331420898, "learning_rate": 2.1371545376977063e-06, "loss": 3.4085, "step": 53755 }, { "epoch": 0.546875, "grad_norm": 10.355266571044922, "learning_rate": 2.1367588955792233e-06, "loss": 2.9538, "step": 53760 }, { "epoch": 0.5469258626302084, "grad_norm": 11.927539825439453, "learning_rate": 2.1363632627541786e-06, "loss": 3.5174, "step": 53765 }, { "epoch": 0.5469767252604166, "grad_norm": 10.795658111572266, "learning_rate": 2.1359676392326926e-06, "loss": 3.3532, "step": 53770 }, { "epoch": 0.547027587890625, "grad_norm": 7.102908611297607, "learning_rate": 2.1355720250248884e-06, "loss": 3.3258, "step": 53775 }, { "epoch": 0.5470784505208334, "grad_norm": 13.276028633117676, "learning_rate": 2.135176420140888e-06, "loss": 3.2748, "step": 53780 }, { "epoch": 0.5471293131510416, "grad_norm": 12.033732414245605, "learning_rate": 2.1347808245908124e-06, "loss": 3.4438, "step": 53785 }, { "epoch": 0.54718017578125, "grad_norm": 9.38341999053955, "learning_rate": 2.1343852383847824e-06, "loss": 3.5425, "step": 53790 }, { "epoch": 0.5472310384114584, "grad_norm": 8.150219917297363, "learning_rate": 2.1339896615329196e-06, "loss": 3.1475, "step": 53795 }, { "epoch": 0.5472819010416666, "grad_norm": 37.85032653808594, "learning_rate": 2.1335940940453452e-06, "loss": 3.4975, "step": 53800 }, { "epoch": 0.547332763671875, "grad_norm": 10.433250427246094, "learning_rate": 2.1331985359321784e-06, "loss": 3.3001, "step": 53805 }, { "epoch": 0.5473836263020834, "grad_norm": 12.532647132873535, "learning_rate": 2.1328029872035396e-06, "loss": 3.1557, "step": 53810 }, { "epoch": 0.5474344889322916, "grad_norm": 10.558504104614258, "learning_rate": 2.1324074478695506e-06, "loss": 3.2694, "step": 53815 }, { "epoch": 0.5474853515625, "grad_norm": 16.129058837890625, "learning_rate": 2.132011917940329e-06, "loss": 3.8683, "step": 53820 }, { "epoch": 0.5475362141927084, "grad_norm": 10.495721817016602, "learning_rate": 2.131616397425996e-06, "loss": 3.4622, "step": 53825 }, { "epoch": 0.5475870768229166, "grad_norm": 11.091641426086426, "learning_rate": 2.131220886336669e-06, "loss": 3.2935, "step": 53830 }, { "epoch": 0.547637939453125, "grad_norm": 9.288651466369629, "learning_rate": 2.130825384682469e-06, "loss": 3.5211, "step": 53835 }, { "epoch": 0.5476888020833334, "grad_norm": 10.28441047668457, "learning_rate": 2.1304298924735147e-06, "loss": 3.0458, "step": 53840 }, { "epoch": 0.5477396647135416, "grad_norm": 11.29616928100586, "learning_rate": 2.1300344097199222e-06, "loss": 3.6167, "step": 53845 }, { "epoch": 0.54779052734375, "grad_norm": 9.837610244750977, "learning_rate": 2.1296389364318124e-06, "loss": 3.6602, "step": 53850 }, { "epoch": 0.5478413899739584, "grad_norm": 11.58377456665039, "learning_rate": 2.1292434726193035e-06, "loss": 3.5914, "step": 53855 }, { "epoch": 0.5478922526041666, "grad_norm": 13.809196472167969, "learning_rate": 2.128848018292512e-06, "loss": 3.2624, "step": 53860 }, { "epoch": 0.547943115234375, "grad_norm": 11.803674697875977, "learning_rate": 2.1284525734615553e-06, "loss": 3.3709, "step": 53865 }, { "epoch": 0.5479939778645834, "grad_norm": 9.340080261230469, "learning_rate": 2.1280571381365525e-06, "loss": 3.6047, "step": 53870 }, { "epoch": 0.5480448404947916, "grad_norm": 16.43390655517578, "learning_rate": 2.1276617123276192e-06, "loss": 4.1752, "step": 53875 }, { "epoch": 0.548095703125, "grad_norm": 12.21694564819336, "learning_rate": 2.127266296044873e-06, "loss": 3.217, "step": 53880 }, { "epoch": 0.5481465657552084, "grad_norm": 11.829559326171875, "learning_rate": 2.1268708892984293e-06, "loss": 3.5265, "step": 53885 }, { "epoch": 0.5481974283854166, "grad_norm": 8.994977951049805, "learning_rate": 2.126475492098406e-06, "loss": 2.9463, "step": 53890 }, { "epoch": 0.548248291015625, "grad_norm": 9.751565933227539, "learning_rate": 2.1260801044549195e-06, "loss": 3.4813, "step": 53895 }, { "epoch": 0.5482991536458334, "grad_norm": 10.996219635009766, "learning_rate": 2.1256847263780843e-06, "loss": 3.1345, "step": 53900 }, { "epoch": 0.5483500162760416, "grad_norm": 13.151937484741211, "learning_rate": 2.1252893578780158e-06, "loss": 3.6332, "step": 53905 }, { "epoch": 0.54840087890625, "grad_norm": 12.586631774902344, "learning_rate": 2.1248939989648313e-06, "loss": 3.2873, "step": 53910 }, { "epoch": 0.5484517415364584, "grad_norm": 15.285003662109375, "learning_rate": 2.1244986496486445e-06, "loss": 3.5958, "step": 53915 }, { "epoch": 0.5485026041666666, "grad_norm": 13.51598072052002, "learning_rate": 2.1241033099395715e-06, "loss": 3.9591, "step": 53920 }, { "epoch": 0.548553466796875, "grad_norm": 13.146869659423828, "learning_rate": 2.1237079798477253e-06, "loss": 3.6961, "step": 53925 }, { "epoch": 0.5486043294270834, "grad_norm": 12.482857704162598, "learning_rate": 2.1233126593832214e-06, "loss": 3.3824, "step": 53930 }, { "epoch": 0.5486551920572916, "grad_norm": 11.18795108795166, "learning_rate": 2.1229173485561744e-06, "loss": 3.0001, "step": 53935 }, { "epoch": 0.5487060546875, "grad_norm": 8.530762672424316, "learning_rate": 2.122522047376696e-06, "loss": 3.4197, "step": 53940 }, { "epoch": 0.5487569173177084, "grad_norm": 14.566081047058105, "learning_rate": 2.1221267558549037e-06, "loss": 3.2567, "step": 53945 }, { "epoch": 0.5488077799479166, "grad_norm": 6.921778202056885, "learning_rate": 2.1217314740009076e-06, "loss": 3.4137, "step": 53950 }, { "epoch": 0.548858642578125, "grad_norm": 10.239953994750977, "learning_rate": 2.1213362018248227e-06, "loss": 3.3854, "step": 53955 }, { "epoch": 0.5489095052083334, "grad_norm": 12.461688041687012, "learning_rate": 2.1209409393367607e-06, "loss": 3.7388, "step": 53960 }, { "epoch": 0.5489603678385416, "grad_norm": 13.59146499633789, "learning_rate": 2.120545686546835e-06, "loss": 3.5482, "step": 53965 }, { "epoch": 0.54901123046875, "grad_norm": 12.854676246643066, "learning_rate": 2.1201504434651587e-06, "loss": 3.5777, "step": 53970 }, { "epoch": 0.5490620930989584, "grad_norm": 11.088582992553711, "learning_rate": 2.1197552101018425e-06, "loss": 3.1657, "step": 53975 }, { "epoch": 0.5491129557291666, "grad_norm": 12.971543312072754, "learning_rate": 2.119359986466999e-06, "loss": 3.5671, "step": 53980 }, { "epoch": 0.549163818359375, "grad_norm": 7.972337245941162, "learning_rate": 2.1189647725707406e-06, "loss": 3.4042, "step": 53985 }, { "epoch": 0.5492146809895834, "grad_norm": 9.62437915802002, "learning_rate": 2.118569568423178e-06, "loss": 3.5359, "step": 53990 }, { "epoch": 0.5492655436197916, "grad_norm": 9.917858123779297, "learning_rate": 2.1181743740344233e-06, "loss": 3.604, "step": 53995 }, { "epoch": 0.54931640625, "grad_norm": 13.30362319946289, "learning_rate": 2.117779189414585e-06, "loss": 3.9732, "step": 54000 }, { "epoch": 0.5493672688802084, "grad_norm": 12.902823448181152, "learning_rate": 2.117384014573777e-06, "loss": 3.0754, "step": 54005 }, { "epoch": 0.5494181315104166, "grad_norm": 9.599853515625, "learning_rate": 2.116988849522108e-06, "loss": 3.4358, "step": 54010 }, { "epoch": 0.549468994140625, "grad_norm": 13.125391960144043, "learning_rate": 2.116593694269688e-06, "loss": 3.2702, "step": 54015 }, { "epoch": 0.5495198567708334, "grad_norm": 9.132649421691895, "learning_rate": 2.1161985488266275e-06, "loss": 3.3415, "step": 54020 }, { "epoch": 0.5495707194010416, "grad_norm": 9.473170280456543, "learning_rate": 2.115803413203037e-06, "loss": 3.3436, "step": 54025 }, { "epoch": 0.54962158203125, "grad_norm": 11.078560829162598, "learning_rate": 2.1154082874090244e-06, "loss": 3.0247, "step": 54030 }, { "epoch": 0.5496724446614584, "grad_norm": 12.930471420288086, "learning_rate": 2.115013171454699e-06, "loss": 3.2621, "step": 54035 }, { "epoch": 0.5497233072916666, "grad_norm": 15.67178726196289, "learning_rate": 2.1146180653501717e-06, "loss": 3.6102, "step": 54040 }, { "epoch": 0.549774169921875, "grad_norm": 11.1826810836792, "learning_rate": 2.1142229691055494e-06, "loss": 3.6965, "step": 54045 }, { "epoch": 0.5498250325520834, "grad_norm": 8.767780303955078, "learning_rate": 2.1138278827309413e-06, "loss": 3.1587, "step": 54050 }, { "epoch": 0.5498758951822916, "grad_norm": 11.69979476928711, "learning_rate": 2.113432806236454e-06, "loss": 3.5907, "step": 54055 }, { "epoch": 0.5499267578125, "grad_norm": 11.563873291015625, "learning_rate": 2.1130377396321976e-06, "loss": 3.079, "step": 54060 }, { "epoch": 0.5499776204427084, "grad_norm": 9.545299530029297, "learning_rate": 2.1126426829282794e-06, "loss": 3.4689, "step": 54065 }, { "epoch": 0.5500284830729166, "grad_norm": 14.429900169372559, "learning_rate": 2.1122476361348056e-06, "loss": 3.2776, "step": 54070 }, { "epoch": 0.550079345703125, "grad_norm": 12.01415729522705, "learning_rate": 2.1118525992618837e-06, "loss": 3.2068, "step": 54075 }, { "epoch": 0.5501302083333334, "grad_norm": 9.425143241882324, "learning_rate": 2.1114575723196222e-06, "loss": 3.2896, "step": 54080 }, { "epoch": 0.5501810709635416, "grad_norm": 12.544426918029785, "learning_rate": 2.1110625553181263e-06, "loss": 3.1276, "step": 54085 }, { "epoch": 0.55023193359375, "grad_norm": 11.73938274383545, "learning_rate": 2.110667548267503e-06, "loss": 3.239, "step": 54090 }, { "epoch": 0.5502827962239584, "grad_norm": 7.796879291534424, "learning_rate": 2.1102725511778566e-06, "loss": 3.7686, "step": 54095 }, { "epoch": 0.5503336588541666, "grad_norm": 15.59985065460205, "learning_rate": 2.109877564059296e-06, "loss": 3.0617, "step": 54100 }, { "epoch": 0.550384521484375, "grad_norm": 18.140830993652344, "learning_rate": 2.1094825869219255e-06, "loss": 3.4484, "step": 54105 }, { "epoch": 0.5504353841145834, "grad_norm": 11.294111251831055, "learning_rate": 2.1090876197758508e-06, "loss": 3.4144, "step": 54110 }, { "epoch": 0.5504862467447916, "grad_norm": 8.844625473022461, "learning_rate": 2.108692662631175e-06, "loss": 3.117, "step": 54115 }, { "epoch": 0.550537109375, "grad_norm": 11.545036315917969, "learning_rate": 2.108297715498006e-06, "loss": 3.8671, "step": 54120 }, { "epoch": 0.5505879720052084, "grad_norm": 10.198552131652832, "learning_rate": 2.1079027783864472e-06, "loss": 2.9302, "step": 54125 }, { "epoch": 0.5506388346354166, "grad_norm": 12.223042488098145, "learning_rate": 2.1075078513066026e-06, "loss": 3.0021, "step": 54130 }, { "epoch": 0.550689697265625, "grad_norm": 10.66480827331543, "learning_rate": 2.1071129342685758e-06, "loss": 3.2338, "step": 54135 }, { "epoch": 0.5507405598958334, "grad_norm": 14.180126190185547, "learning_rate": 2.1067180272824723e-06, "loss": 2.97, "step": 54140 }, { "epoch": 0.5507914225260416, "grad_norm": 10.506875991821289, "learning_rate": 2.106323130358395e-06, "loss": 3.6545, "step": 54145 }, { "epoch": 0.55084228515625, "grad_norm": 9.22285270690918, "learning_rate": 2.105928243506446e-06, "loss": 3.3539, "step": 54150 }, { "epoch": 0.5508931477864584, "grad_norm": 11.771363258361816, "learning_rate": 2.1055333667367307e-06, "loss": 2.9404, "step": 54155 }, { "epoch": 0.5509440104166666, "grad_norm": 9.148426055908203, "learning_rate": 2.1051385000593506e-06, "loss": 3.7044, "step": 54160 }, { "epoch": 0.550994873046875, "grad_norm": 10.212970733642578, "learning_rate": 2.1047436434844084e-06, "loss": 3.4446, "step": 54165 }, { "epoch": 0.5510457356770834, "grad_norm": 13.897493362426758, "learning_rate": 2.1043487970220057e-06, "loss": 2.7659, "step": 54170 }, { "epoch": 0.5510965983072916, "grad_norm": 9.90019702911377, "learning_rate": 2.103953960682245e-06, "loss": 3.2357, "step": 54175 }, { "epoch": 0.5511474609375, "grad_norm": 12.732852935791016, "learning_rate": 2.10355913447523e-06, "loss": 3.6353, "step": 54180 }, { "epoch": 0.5511983235677084, "grad_norm": 14.46264934539795, "learning_rate": 2.1031643184110596e-06, "loss": 3.103, "step": 54185 }, { "epoch": 0.5512491861979166, "grad_norm": 11.0048246383667, "learning_rate": 2.102769512499835e-06, "loss": 3.1272, "step": 54190 }, { "epoch": 0.551300048828125, "grad_norm": 16.377975463867188, "learning_rate": 2.10237471675166e-06, "loss": 3.5995, "step": 54195 }, { "epoch": 0.5513509114583334, "grad_norm": 14.817015647888184, "learning_rate": 2.1019799311766327e-06, "loss": 3.1704, "step": 54200 }, { "epoch": 0.5514017740885416, "grad_norm": 8.748543739318848, "learning_rate": 2.101585155784855e-06, "loss": 3.4014, "step": 54205 }, { "epoch": 0.55145263671875, "grad_norm": 11.921297073364258, "learning_rate": 2.1011903905864263e-06, "loss": 3.2395, "step": 54210 }, { "epoch": 0.5515034993489584, "grad_norm": 12.566230773925781, "learning_rate": 2.1007956355914465e-06, "loss": 3.5617, "step": 54215 }, { "epoch": 0.5515543619791666, "grad_norm": 12.688462257385254, "learning_rate": 2.1004008908100166e-06, "loss": 3.3699, "step": 54220 }, { "epoch": 0.551605224609375, "grad_norm": 14.409541130065918, "learning_rate": 2.100006156252235e-06, "loss": 3.4616, "step": 54225 }, { "epoch": 0.5516560872395834, "grad_norm": 9.698654174804688, "learning_rate": 2.0996114319282006e-06, "loss": 3.3425, "step": 54230 }, { "epoch": 0.5517069498697916, "grad_norm": 8.120442390441895, "learning_rate": 2.099216717848013e-06, "loss": 3.4025, "step": 54235 }, { "epoch": 0.5517578125, "grad_norm": 11.480592727661133, "learning_rate": 2.098822014021771e-06, "loss": 3.2517, "step": 54240 }, { "epoch": 0.5518086751302084, "grad_norm": 18.86686897277832, "learning_rate": 2.0984273204595722e-06, "loss": 4.1665, "step": 54245 }, { "epoch": 0.5518595377604166, "grad_norm": 14.03760814666748, "learning_rate": 2.0980326371715155e-06, "loss": 3.5493, "step": 54250 }, { "epoch": 0.551910400390625, "grad_norm": 7.9557929039001465, "learning_rate": 2.097637964167699e-06, "loss": 3.3811, "step": 54255 }, { "epoch": 0.5519612630208334, "grad_norm": 13.905202865600586, "learning_rate": 2.0972433014582193e-06, "loss": 3.3876, "step": 54260 }, { "epoch": 0.5520121256510416, "grad_norm": 10.3096284866333, "learning_rate": 2.0968486490531735e-06, "loss": 3.1963, "step": 54265 }, { "epoch": 0.55206298828125, "grad_norm": 10.08290958404541, "learning_rate": 2.0964540069626606e-06, "loss": 3.4092, "step": 54270 }, { "epoch": 0.5521138509114584, "grad_norm": 14.121380805969238, "learning_rate": 2.0960593751967758e-06, "loss": 4.521, "step": 54275 }, { "epoch": 0.5521647135416666, "grad_norm": 9.761064529418945, "learning_rate": 2.095664753765617e-06, "loss": 3.3897, "step": 54280 }, { "epoch": 0.552215576171875, "grad_norm": 10.380366325378418, "learning_rate": 2.095270142679278e-06, "loss": 3.1439, "step": 54285 }, { "epoch": 0.5522664388020834, "grad_norm": 9.43635368347168, "learning_rate": 2.0948755419478576e-06, "loss": 3.3955, "step": 54290 }, { "epoch": 0.5523173014322916, "grad_norm": 14.51638412475586, "learning_rate": 2.094480951581451e-06, "loss": 3.3379, "step": 54295 }, { "epoch": 0.5523681640625, "grad_norm": 14.440349578857422, "learning_rate": 2.0940863715901518e-06, "loss": 3.3932, "step": 54300 }, { "epoch": 0.5524190266927084, "grad_norm": 11.273665428161621, "learning_rate": 2.093691801984057e-06, "loss": 3.5603, "step": 54305 }, { "epoch": 0.5524698893229166, "grad_norm": 18.805217742919922, "learning_rate": 2.0932972427732613e-06, "loss": 3.1834, "step": 54310 }, { "epoch": 0.552520751953125, "grad_norm": 11.681893348693848, "learning_rate": 2.0929026939678595e-06, "loss": 3.1483, "step": 54315 }, { "epoch": 0.5525716145833334, "grad_norm": 12.73479175567627, "learning_rate": 2.0925081555779457e-06, "loss": 3.036, "step": 54320 }, { "epoch": 0.5526224772135416, "grad_norm": 16.403100967407227, "learning_rate": 2.0921136276136134e-06, "loss": 3.5831, "step": 54325 }, { "epoch": 0.55267333984375, "grad_norm": 7.362949371337891, "learning_rate": 2.0917191100849577e-06, "loss": 3.2312, "step": 54330 }, { "epoch": 0.5527242024739584, "grad_norm": 8.387533187866211, "learning_rate": 2.0913246030020725e-06, "loss": 3.2315, "step": 54335 }, { "epoch": 0.5527750651041666, "grad_norm": 15.326143264770508, "learning_rate": 2.090930106375049e-06, "loss": 3.3588, "step": 54340 }, { "epoch": 0.552825927734375, "grad_norm": 11.431089401245117, "learning_rate": 2.0905356202139827e-06, "loss": 3.2824, "step": 54345 }, { "epoch": 0.5528767903645834, "grad_norm": 8.371926307678223, "learning_rate": 2.090141144528966e-06, "loss": 3.5238, "step": 54350 }, { "epoch": 0.5529276529947916, "grad_norm": 18.97978973388672, "learning_rate": 2.08974667933009e-06, "loss": 4.286, "step": 54355 }, { "epoch": 0.552978515625, "grad_norm": 11.31583309173584, "learning_rate": 2.089352224627448e-06, "loss": 3.5481, "step": 54360 }, { "epoch": 0.5530293782552084, "grad_norm": 7.666788578033447, "learning_rate": 2.088957780431133e-06, "loss": 3.1631, "step": 54365 }, { "epoch": 0.5530802408854166, "grad_norm": 7.157726287841797, "learning_rate": 2.0885633467512352e-06, "loss": 3.4058, "step": 54370 }, { "epoch": 0.553131103515625, "grad_norm": 9.22585678100586, "learning_rate": 2.088168923597847e-06, "loss": 3.6942, "step": 54375 }, { "epoch": 0.5531819661458334, "grad_norm": 15.494122505187988, "learning_rate": 2.0877745109810584e-06, "loss": 3.2869, "step": 54380 }, { "epoch": 0.5532328287760416, "grad_norm": 8.315948486328125, "learning_rate": 2.0873801089109615e-06, "loss": 3.2937, "step": 54385 }, { "epoch": 0.55328369140625, "grad_norm": 14.754796028137207, "learning_rate": 2.0869857173976475e-06, "loss": 3.1085, "step": 54390 }, { "epoch": 0.5533345540364584, "grad_norm": 16.696014404296875, "learning_rate": 2.086591336451205e-06, "loss": 2.9874, "step": 54395 }, { "epoch": 0.5533854166666666, "grad_norm": 9.401805877685547, "learning_rate": 2.0861969660817254e-06, "loss": 3.4339, "step": 54400 }, { "epoch": 0.553436279296875, "grad_norm": 10.695304870605469, "learning_rate": 2.085802606299298e-06, "loss": 3.4864, "step": 54405 }, { "epoch": 0.5534871419270834, "grad_norm": 8.707365989685059, "learning_rate": 2.0854082571140138e-06, "loss": 3.4632, "step": 54410 }, { "epoch": 0.5535380045572916, "grad_norm": 7.770190238952637, "learning_rate": 2.0850139185359608e-06, "loss": 3.3616, "step": 54415 }, { "epoch": 0.5535888671875, "grad_norm": 15.296331405639648, "learning_rate": 2.0846195905752273e-06, "loss": 3.1424, "step": 54420 }, { "epoch": 0.5536397298177084, "grad_norm": 14.825443267822266, "learning_rate": 2.0842252732419045e-06, "loss": 3.6531, "step": 54425 }, { "epoch": 0.5536905924479166, "grad_norm": 13.824195861816406, "learning_rate": 2.083830966546079e-06, "loss": 3.5766, "step": 54430 }, { "epoch": 0.553741455078125, "grad_norm": 9.839577674865723, "learning_rate": 2.0834366704978386e-06, "loss": 3.9144, "step": 54435 }, { "epoch": 0.5537923177083334, "grad_norm": 11.540263175964355, "learning_rate": 2.083042385107274e-06, "loss": 3.3083, "step": 54440 }, { "epoch": 0.5538431803385416, "grad_norm": 9.633585929870605, "learning_rate": 2.08264811038447e-06, "loss": 3.3286, "step": 54445 }, { "epoch": 0.55389404296875, "grad_norm": 9.796147346496582, "learning_rate": 2.082253846339516e-06, "loss": 3.2135, "step": 54450 }, { "epoch": 0.5539449055989584, "grad_norm": 9.02984619140625, "learning_rate": 2.081859592982497e-06, "loss": 3.2556, "step": 54455 }, { "epoch": 0.5539957682291666, "grad_norm": 14.30068302154541, "learning_rate": 2.081465350323502e-06, "loss": 3.1485, "step": 54460 }, { "epoch": 0.554046630859375, "grad_norm": 8.195463180541992, "learning_rate": 2.0810711183726173e-06, "loss": 3.653, "step": 54465 }, { "epoch": 0.5540974934895834, "grad_norm": 16.552175521850586, "learning_rate": 2.0806768971399283e-06, "loss": 3.0628, "step": 54470 }, { "epoch": 0.5541483561197916, "grad_norm": 15.321861267089844, "learning_rate": 2.0802826866355207e-06, "loss": 3.3406, "step": 54475 }, { "epoch": 0.55419921875, "grad_norm": 11.74885368347168, "learning_rate": 2.0798884868694824e-06, "loss": 2.9268, "step": 54480 }, { "epoch": 0.5542500813802084, "grad_norm": 14.062115669250488, "learning_rate": 2.079494297851897e-06, "loss": 3.3394, "step": 54485 }, { "epoch": 0.5543009440104166, "grad_norm": 9.796475410461426, "learning_rate": 2.079100119592851e-06, "loss": 3.3353, "step": 54490 }, { "epoch": 0.554351806640625, "grad_norm": 12.919037818908691, "learning_rate": 2.078705952102427e-06, "loss": 3.256, "step": 54495 }, { "epoch": 0.5544026692708334, "grad_norm": 9.963675498962402, "learning_rate": 2.0783117953907123e-06, "loss": 3.8251, "step": 54500 }, { "epoch": 0.5544535319010416, "grad_norm": 12.951997756958008, "learning_rate": 2.077917649467791e-06, "loss": 3.1518, "step": 54505 }, { "epoch": 0.55450439453125, "grad_norm": 14.596063613891602, "learning_rate": 2.0775235143437463e-06, "loss": 3.0804, "step": 54510 }, { "epoch": 0.5545552571614584, "grad_norm": 9.925080299377441, "learning_rate": 2.0771293900286616e-06, "loss": 3.5082, "step": 54515 }, { "epoch": 0.5546061197916666, "grad_norm": 8.489387512207031, "learning_rate": 2.076735276532622e-06, "loss": 3.8833, "step": 54520 }, { "epoch": 0.554656982421875, "grad_norm": 17.317609786987305, "learning_rate": 2.0763411738657097e-06, "loss": 3.565, "step": 54525 }, { "epoch": 0.5547078450520834, "grad_norm": 9.189920425415039, "learning_rate": 2.0759470820380074e-06, "loss": 3.6864, "step": 54530 }, { "epoch": 0.5547587076822916, "grad_norm": 15.213544845581055, "learning_rate": 2.0755530010596e-06, "loss": 3.2918, "step": 54535 }, { "epoch": 0.5548095703125, "grad_norm": 11.895380020141602, "learning_rate": 2.0751589309405673e-06, "loss": 3.0242, "step": 54540 }, { "epoch": 0.5548604329427084, "grad_norm": 10.334179878234863, "learning_rate": 2.0747648716909936e-06, "loss": 3.477, "step": 54545 }, { "epoch": 0.5549112955729166, "grad_norm": 11.6627779006958, "learning_rate": 2.074370823320958e-06, "loss": 3.2691, "step": 54550 }, { "epoch": 0.554962158203125, "grad_norm": 12.616467475891113, "learning_rate": 2.073976785840546e-06, "loss": 3.4009, "step": 54555 }, { "epoch": 0.5550130208333334, "grad_norm": 9.53610897064209, "learning_rate": 2.0735827592598356e-06, "loss": 3.8242, "step": 54560 }, { "epoch": 0.5550638834635416, "grad_norm": 13.089378356933594, "learning_rate": 2.0731887435889105e-06, "loss": 3.2991, "step": 54565 }, { "epoch": 0.55511474609375, "grad_norm": 12.170549392700195, "learning_rate": 2.072794738837849e-06, "loss": 3.2447, "step": 54570 }, { "epoch": 0.5551656087239584, "grad_norm": 9.022259712219238, "learning_rate": 2.0724007450167335e-06, "loss": 3.2896, "step": 54575 }, { "epoch": 0.5552164713541666, "grad_norm": 14.788315773010254, "learning_rate": 2.072006762135644e-06, "loss": 3.1942, "step": 54580 }, { "epoch": 0.555267333984375, "grad_norm": 12.025224685668945, "learning_rate": 2.0716127902046595e-06, "loss": 3.3759, "step": 54585 }, { "epoch": 0.5553181966145834, "grad_norm": 13.401224136352539, "learning_rate": 2.0712188292338596e-06, "loss": 3.3078, "step": 54590 }, { "epoch": 0.5553690592447916, "grad_norm": 10.349852561950684, "learning_rate": 2.0708248792333257e-06, "loss": 3.5264, "step": 54595 }, { "epoch": 0.555419921875, "grad_norm": 9.907820701599121, "learning_rate": 2.0704309402131344e-06, "loss": 3.2487, "step": 54600 }, { "epoch": 0.5554707845052084, "grad_norm": 12.225205421447754, "learning_rate": 2.0700370121833666e-06, "loss": 3.2967, "step": 54605 }, { "epoch": 0.5555216471354166, "grad_norm": 12.409201622009277, "learning_rate": 2.069643095154099e-06, "loss": 3.1588, "step": 54610 }, { "epoch": 0.555572509765625, "grad_norm": 7.892660140991211, "learning_rate": 2.069249189135411e-06, "loss": 3.132, "step": 54615 }, { "epoch": 0.5556233723958334, "grad_norm": 17.70599937438965, "learning_rate": 2.068855294137381e-06, "loss": 3.6297, "step": 54620 }, { "epoch": 0.5556742350260416, "grad_norm": 12.921568870544434, "learning_rate": 2.0684614101700852e-06, "loss": 3.8524, "step": 54625 }, { "epoch": 0.55572509765625, "grad_norm": 10.770087242126465, "learning_rate": 2.0680675372436025e-06, "loss": 3.2557, "step": 54630 }, { "epoch": 0.5557759602864584, "grad_norm": 11.178862571716309, "learning_rate": 2.06767367536801e-06, "loss": 3.6751, "step": 54635 }, { "epoch": 0.5558268229166666, "grad_norm": 12.682517051696777, "learning_rate": 2.0672798245533833e-06, "loss": 3.5117, "step": 54640 }, { "epoch": 0.555877685546875, "grad_norm": 10.491020202636719, "learning_rate": 2.0668859848097995e-06, "loss": 3.1276, "step": 54645 }, { "epoch": 0.5559285481770834, "grad_norm": 14.331825256347656, "learning_rate": 2.0664921561473363e-06, "loss": 3.2829, "step": 54650 }, { "epoch": 0.5559794108072916, "grad_norm": 10.737212181091309, "learning_rate": 2.0660983385760676e-06, "loss": 3.1904, "step": 54655 }, { "epoch": 0.5560302734375, "grad_norm": 14.648859977722168, "learning_rate": 2.0657045321060704e-06, "loss": 2.9052, "step": 54660 }, { "epoch": 0.5560811360677084, "grad_norm": 15.264992713928223, "learning_rate": 2.0653107367474193e-06, "loss": 3.3429, "step": 54665 }, { "epoch": 0.5561319986979166, "grad_norm": 15.288416862487793, "learning_rate": 2.0649169525101904e-06, "loss": 3.1437, "step": 54670 }, { "epoch": 0.556182861328125, "grad_norm": 10.921524047851562, "learning_rate": 2.0645231794044584e-06, "loss": 3.3432, "step": 54675 }, { "epoch": 0.5562337239583334, "grad_norm": 15.968602180480957, "learning_rate": 2.064129417440297e-06, "loss": 3.2523, "step": 54680 }, { "epoch": 0.5562845865885416, "grad_norm": 12.748848915100098, "learning_rate": 2.0637356666277813e-06, "loss": 3.4739, "step": 54685 }, { "epoch": 0.55633544921875, "grad_norm": 9.147810935974121, "learning_rate": 2.0633419269769856e-06, "loss": 3.4638, "step": 54690 }, { "epoch": 0.5563863118489584, "grad_norm": 13.364506721496582, "learning_rate": 2.062948198497983e-06, "loss": 3.3763, "step": 54695 }, { "epoch": 0.5564371744791666, "grad_norm": 8.214322090148926, "learning_rate": 2.0625544812008475e-06, "loss": 3.2868, "step": 54700 }, { "epoch": 0.556488037109375, "grad_norm": 11.215323448181152, "learning_rate": 2.06216077509565e-06, "loss": 3.552, "step": 54705 }, { "epoch": 0.5565388997395834, "grad_norm": 10.461332321166992, "learning_rate": 2.0617670801924674e-06, "loss": 3.3382, "step": 54710 }, { "epoch": 0.5565897623697916, "grad_norm": 11.168867111206055, "learning_rate": 2.0613733965013693e-06, "loss": 3.1396, "step": 54715 }, { "epoch": 0.556640625, "grad_norm": 12.699355125427246, "learning_rate": 2.06097972403243e-06, "loss": 3.6432, "step": 54720 }, { "epoch": 0.5566914876302084, "grad_norm": 9.216696739196777, "learning_rate": 2.0605860627957187e-06, "loss": 3.6198, "step": 54725 }, { "epoch": 0.5567423502604166, "grad_norm": 10.07630443572998, "learning_rate": 2.0601924128013095e-06, "loss": 3.3492, "step": 54730 }, { "epoch": 0.556793212890625, "grad_norm": 13.220158576965332, "learning_rate": 2.059798774059274e-06, "loss": 3.1742, "step": 54735 }, { "epoch": 0.5568440755208334, "grad_norm": 15.62512493133545, "learning_rate": 2.059405146579681e-06, "loss": 3.2923, "step": 54740 }, { "epoch": 0.5568949381510416, "grad_norm": 12.246667861938477, "learning_rate": 2.0590115303726037e-06, "loss": 3.3294, "step": 54745 }, { "epoch": 0.55694580078125, "grad_norm": 7.680668830871582, "learning_rate": 2.0586179254481124e-06, "loss": 2.9745, "step": 54750 }, { "epoch": 0.5569966634114584, "grad_norm": 10.86223030090332, "learning_rate": 2.0582243318162767e-06, "loss": 3.0465, "step": 54755 }, { "epoch": 0.5570475260416666, "grad_norm": 14.095746994018555, "learning_rate": 2.0578307494871656e-06, "loss": 3.6199, "step": 54760 }, { "epoch": 0.557098388671875, "grad_norm": 16.408039093017578, "learning_rate": 2.057437178470851e-06, "loss": 3.1182, "step": 54765 }, { "epoch": 0.5571492513020834, "grad_norm": 11.257684707641602, "learning_rate": 2.0570436187774013e-06, "loss": 3.4447, "step": 54770 }, { "epoch": 0.5572001139322916, "grad_norm": 17.170454025268555, "learning_rate": 2.056650070416886e-06, "loss": 3.0475, "step": 54775 }, { "epoch": 0.5572509765625, "grad_norm": 8.76863956451416, "learning_rate": 2.056256533399372e-06, "loss": 3.2926, "step": 54780 }, { "epoch": 0.5573018391927084, "grad_norm": 16.362281799316406, "learning_rate": 2.0558630077349305e-06, "loss": 3.7876, "step": 54785 }, { "epoch": 0.5573527018229166, "grad_norm": 9.774134635925293, "learning_rate": 2.055469493433629e-06, "loss": 3.3734, "step": 54790 }, { "epoch": 0.557403564453125, "grad_norm": 13.329634666442871, "learning_rate": 2.0550759905055346e-06, "loss": 3.4319, "step": 54795 }, { "epoch": 0.5574544270833334, "grad_norm": 7.764184474945068, "learning_rate": 2.054682498960715e-06, "loss": 3.158, "step": 54800 }, { "epoch": 0.5575052897135416, "grad_norm": 12.539365768432617, "learning_rate": 2.0542890188092398e-06, "loss": 3.1955, "step": 54805 }, { "epoch": 0.55755615234375, "grad_norm": 17.243762969970703, "learning_rate": 2.053895550061173e-06, "loss": 3.5431, "step": 54810 }, { "epoch": 0.5576070149739584, "grad_norm": 12.289092063903809, "learning_rate": 2.0535020927265835e-06, "loss": 2.9041, "step": 54815 }, { "epoch": 0.5576578776041666, "grad_norm": 13.950140953063965, "learning_rate": 2.053108646815536e-06, "loss": 3.3025, "step": 54820 }, { "epoch": 0.557708740234375, "grad_norm": 11.045097351074219, "learning_rate": 2.052715212338099e-06, "loss": 3.3789, "step": 54825 }, { "epoch": 0.5577596028645834, "grad_norm": 12.279302597045898, "learning_rate": 2.0523217893043372e-06, "loss": 3.5081, "step": 54830 }, { "epoch": 0.5578104654947916, "grad_norm": 11.898847579956055, "learning_rate": 2.0519283777243154e-06, "loss": 3.2755, "step": 54835 }, { "epoch": 0.557861328125, "grad_norm": 10.195027351379395, "learning_rate": 2.0515349776081005e-06, "loss": 3.508, "step": 54840 }, { "epoch": 0.5579121907552084, "grad_norm": 12.330281257629395, "learning_rate": 2.0511415889657576e-06, "loss": 3.2828, "step": 54845 }, { "epoch": 0.5579630533854166, "grad_norm": 8.676578521728516, "learning_rate": 2.0507482118073497e-06, "loss": 3.3831, "step": 54850 }, { "epoch": 0.558013916015625, "grad_norm": 6.987087726593018, "learning_rate": 2.050354846142942e-06, "loss": 3.5374, "step": 54855 }, { "epoch": 0.5580647786458334, "grad_norm": 11.584981918334961, "learning_rate": 2.0499614919825998e-06, "loss": 3.3075, "step": 54860 }, { "epoch": 0.5581156412760416, "grad_norm": 9.410149574279785, "learning_rate": 2.0495681493363862e-06, "loss": 3.3489, "step": 54865 }, { "epoch": 0.55816650390625, "grad_norm": 12.8016996383667, "learning_rate": 2.049174818214365e-06, "loss": 3.0027, "step": 54870 }, { "epoch": 0.5582173665364584, "grad_norm": 11.745713233947754, "learning_rate": 2.048781498626598e-06, "loss": 3.2402, "step": 54875 }, { "epoch": 0.5582682291666666, "grad_norm": 8.826766967773438, "learning_rate": 2.0483881905831503e-06, "loss": 3.4843, "step": 54880 }, { "epoch": 0.558319091796875, "grad_norm": 13.848268508911133, "learning_rate": 2.047994894094084e-06, "loss": 3.2401, "step": 54885 }, { "epoch": 0.5583699544270834, "grad_norm": 15.071582794189453, "learning_rate": 2.0476016091694607e-06, "loss": 3.133, "step": 54890 }, { "epoch": 0.5584208170572916, "grad_norm": 16.107677459716797, "learning_rate": 2.0472083358193425e-06, "loss": 3.4667, "step": 54895 }, { "epoch": 0.5584716796875, "grad_norm": 13.1598539352417, "learning_rate": 2.0468150740537924e-06, "loss": 3.3724, "step": 54900 }, { "epoch": 0.5585225423177084, "grad_norm": 14.7920503616333, "learning_rate": 2.0464218238828714e-06, "loss": 3.6829, "step": 54905 }, { "epoch": 0.5585734049479166, "grad_norm": 13.697108268737793, "learning_rate": 2.04602858531664e-06, "loss": 3.2593, "step": 54910 }, { "epoch": 0.558624267578125, "grad_norm": 10.984542846679688, "learning_rate": 2.0456353583651594e-06, "loss": 3.4175, "step": 54915 }, { "epoch": 0.5586751302083334, "grad_norm": 11.960285186767578, "learning_rate": 2.0452421430384913e-06, "loss": 3.1616, "step": 54920 }, { "epoch": 0.5587259928385416, "grad_norm": 7.636691093444824, "learning_rate": 2.044848939346695e-06, "loss": 3.4549, "step": 54925 }, { "epoch": 0.55877685546875, "grad_norm": 11.358219146728516, "learning_rate": 2.0444557472998295e-06, "loss": 3.4507, "step": 54930 }, { "epoch": 0.5588277180989584, "grad_norm": 8.738129615783691, "learning_rate": 2.044062566907957e-06, "loss": 3.0457, "step": 54935 }, { "epoch": 0.5588785807291666, "grad_norm": 10.17060661315918, "learning_rate": 2.0436693981811356e-06, "loss": 3.1116, "step": 54940 }, { "epoch": 0.558929443359375, "grad_norm": 10.410772323608398, "learning_rate": 2.0432762411294246e-06, "loss": 3.4821, "step": 54945 }, { "epoch": 0.5589803059895834, "grad_norm": 12.336195945739746, "learning_rate": 2.042883095762882e-06, "loss": 3.1465, "step": 54950 }, { "epoch": 0.5590311686197916, "grad_norm": 12.679303169250488, "learning_rate": 2.0424899620915674e-06, "loss": 3.4706, "step": 54955 }, { "epoch": 0.55908203125, "grad_norm": 9.3101224899292, "learning_rate": 2.042096840125539e-06, "loss": 2.8378, "step": 54960 }, { "epoch": 0.5591328938802084, "grad_norm": 12.529419898986816, "learning_rate": 2.041703729874854e-06, "loss": 3.4076, "step": 54965 }, { "epoch": 0.5591837565104166, "grad_norm": 48.43590545654297, "learning_rate": 2.04131063134957e-06, "loss": 3.2729, "step": 54970 }, { "epoch": 0.559234619140625, "grad_norm": 15.862727165222168, "learning_rate": 2.0409175445597455e-06, "loss": 3.2223, "step": 54975 }, { "epoch": 0.5592854817708334, "grad_norm": 14.386452674865723, "learning_rate": 2.040524469515437e-06, "loss": 3.4878, "step": 54980 }, { "epoch": 0.5593363444010416, "grad_norm": 9.835855484008789, "learning_rate": 2.0401314062267007e-06, "loss": 3.6074, "step": 54985 }, { "epoch": 0.55938720703125, "grad_norm": 12.619324684143066, "learning_rate": 2.039738354703593e-06, "loss": 3.1502, "step": 54990 }, { "epoch": 0.5594380696614584, "grad_norm": 7.411263942718506, "learning_rate": 2.0393453149561706e-06, "loss": 3.6218, "step": 54995 }, { "epoch": 0.5594889322916666, "grad_norm": 7.863607883453369, "learning_rate": 2.03895228699449e-06, "loss": 3.1499, "step": 55000 }, { "epoch": 0.559539794921875, "grad_norm": 14.606014251708984, "learning_rate": 2.038559270828605e-06, "loss": 3.4186, "step": 55005 }, { "epoch": 0.5595906575520834, "grad_norm": 15.155403137207031, "learning_rate": 2.038166266468571e-06, "loss": 3.3658, "step": 55010 }, { "epoch": 0.5596415201822916, "grad_norm": 11.197957038879395, "learning_rate": 2.0377732739244447e-06, "loss": 2.9844, "step": 55015 }, { "epoch": 0.5596923828125, "grad_norm": 14.66739273071289, "learning_rate": 2.0373802932062798e-06, "loss": 3.361, "step": 55020 }, { "epoch": 0.5597432454427084, "grad_norm": 13.307490348815918, "learning_rate": 2.0369873243241293e-06, "loss": 3.3566, "step": 55025 }, { "epoch": 0.5597941080729166, "grad_norm": 9.766070365905762, "learning_rate": 2.036594367288049e-06, "loss": 3.1445, "step": 55030 }, { "epoch": 0.559844970703125, "grad_norm": 8.896641731262207, "learning_rate": 2.0362014221080924e-06, "loss": 3.0715, "step": 55035 }, { "epoch": 0.5598958333333334, "grad_norm": 10.176886558532715, "learning_rate": 2.035808488794312e-06, "loss": 3.1849, "step": 55040 }, { "epoch": 0.5599466959635416, "grad_norm": 12.1671724319458, "learning_rate": 2.0354155673567612e-06, "loss": 3.9336, "step": 55045 }, { "epoch": 0.55999755859375, "grad_norm": 15.339162826538086, "learning_rate": 2.0350226578054935e-06, "loss": 3.4378, "step": 55050 }, { "epoch": 0.5600484212239584, "grad_norm": 11.908868789672852, "learning_rate": 2.0346297601505605e-06, "loss": 3.4508, "step": 55055 }, { "epoch": 0.5600992838541666, "grad_norm": 13.39874267578125, "learning_rate": 2.0342368744020155e-06, "loss": 3.5067, "step": 55060 }, { "epoch": 0.560150146484375, "grad_norm": 12.38049602508545, "learning_rate": 2.033844000569908e-06, "loss": 3.3802, "step": 55065 }, { "epoch": 0.5602010091145834, "grad_norm": 9.690685272216797, "learning_rate": 2.033451138664293e-06, "loss": 3.0584, "step": 55070 }, { "epoch": 0.5602518717447916, "grad_norm": 8.989315032958984, "learning_rate": 2.0330582886952196e-06, "loss": 3.4424, "step": 55075 }, { "epoch": 0.560302734375, "grad_norm": 17.36121940612793, "learning_rate": 2.0326654506727393e-06, "loss": 3.2294, "step": 55080 }, { "epoch": 0.5603535970052084, "grad_norm": 15.613003730773926, "learning_rate": 2.0322726246069015e-06, "loss": 3.4868, "step": 55085 }, { "epoch": 0.5604044596354166, "grad_norm": 13.500045776367188, "learning_rate": 2.031879810507759e-06, "loss": 3.1505, "step": 55090 }, { "epoch": 0.560455322265625, "grad_norm": 9.693497657775879, "learning_rate": 2.0314870083853604e-06, "loss": 3.2669, "step": 55095 }, { "epoch": 0.5605061848958334, "grad_norm": 8.206704139709473, "learning_rate": 2.031094218249756e-06, "loss": 2.9043, "step": 55100 }, { "epoch": 0.5605570475260416, "grad_norm": 12.259492874145508, "learning_rate": 2.0307014401109935e-06, "loss": 3.3699, "step": 55105 }, { "epoch": 0.56060791015625, "grad_norm": 10.723469734191895, "learning_rate": 2.0303086739791246e-06, "loss": 3.4771, "step": 55110 }, { "epoch": 0.5606587727864584, "grad_norm": 15.417384147644043, "learning_rate": 2.029915919864197e-06, "loss": 3.1352, "step": 55115 }, { "epoch": 0.5607096354166666, "grad_norm": 8.692520141601562, "learning_rate": 2.029523177776258e-06, "loss": 3.1695, "step": 55120 }, { "epoch": 0.560760498046875, "grad_norm": 11.361414909362793, "learning_rate": 2.0291304477253577e-06, "loss": 3.0225, "step": 55125 }, { "epoch": 0.5608113606770834, "grad_norm": 8.461770057678223, "learning_rate": 2.028737729721544e-06, "loss": 3.4996, "step": 55130 }, { "epoch": 0.5608622233072916, "grad_norm": 14.262664794921875, "learning_rate": 2.028345023774863e-06, "loss": 3.5087, "step": 55135 }, { "epoch": 0.5609130859375, "grad_norm": 10.160379409790039, "learning_rate": 2.027952329895362e-06, "loss": 3.847, "step": 55140 }, { "epoch": 0.5609639485677084, "grad_norm": 18.16651725769043, "learning_rate": 2.0275596480930902e-06, "loss": 3.2377, "step": 55145 }, { "epoch": 0.5610148111979166, "grad_norm": 15.301115036010742, "learning_rate": 2.0271669783780923e-06, "loss": 3.1241, "step": 55150 }, { "epoch": 0.561065673828125, "grad_norm": 10.699058532714844, "learning_rate": 2.0267743207604156e-06, "loss": 3.7011, "step": 55155 }, { "epoch": 0.5611165364583334, "grad_norm": 7.996247291564941, "learning_rate": 2.0263816752501046e-06, "loss": 3.0694, "step": 55160 }, { "epoch": 0.5611673990885416, "grad_norm": 10.596744537353516, "learning_rate": 2.025989041857207e-06, "loss": 3.2339, "step": 55165 }, { "epoch": 0.56121826171875, "grad_norm": 11.930685997009277, "learning_rate": 2.0255964205917676e-06, "loss": 3.6971, "step": 55170 }, { "epoch": 0.5612691243489584, "grad_norm": 10.017671585083008, "learning_rate": 2.025203811463831e-06, "loss": 3.1608, "step": 55175 }, { "epoch": 0.5613199869791666, "grad_norm": 14.618124008178711, "learning_rate": 2.024811214483442e-06, "loss": 3.3134, "step": 55180 }, { "epoch": 0.561370849609375, "grad_norm": 11.243496894836426, "learning_rate": 2.0244186296606456e-06, "loss": 3.3153, "step": 55185 }, { "epoch": 0.5614217122395834, "grad_norm": 14.103216171264648, "learning_rate": 2.0240260570054866e-06, "loss": 3.324, "step": 55190 }, { "epoch": 0.5614725748697916, "grad_norm": 10.062374114990234, "learning_rate": 2.0236334965280073e-06, "loss": 3.4501, "step": 55195 }, { "epoch": 0.5615234375, "grad_norm": 11.451557159423828, "learning_rate": 2.023240948238252e-06, "loss": 3.3229, "step": 55200 }, { "epoch": 0.5615743001302084, "grad_norm": 11.037567138671875, "learning_rate": 2.022848412146265e-06, "loss": 3.2838, "step": 55205 }, { "epoch": 0.5616251627604166, "grad_norm": 9.876566886901855, "learning_rate": 2.0224558882620874e-06, "loss": 3.4612, "step": 55210 }, { "epoch": 0.561676025390625, "grad_norm": 8.376761436462402, "learning_rate": 2.0220633765957634e-06, "loss": 3.2515, "step": 55215 }, { "epoch": 0.5617268880208334, "grad_norm": 12.039939880371094, "learning_rate": 2.0216708771573336e-06, "loss": 3.21, "step": 55220 }, { "epoch": 0.5617777506510416, "grad_norm": 9.83841323852539, "learning_rate": 2.0212783899568418e-06, "loss": 3.0499, "step": 55225 }, { "epoch": 0.56182861328125, "grad_norm": 13.492292404174805, "learning_rate": 2.020885915004329e-06, "loss": 3.6028, "step": 55230 }, { "epoch": 0.5618794759114584, "grad_norm": 9.677485466003418, "learning_rate": 2.0204934523098357e-06, "loss": 3.1137, "step": 55235 }, { "epoch": 0.5619303385416666, "grad_norm": 14.501381874084473, "learning_rate": 2.020101001883404e-06, "loss": 3.2436, "step": 55240 }, { "epoch": 0.561981201171875, "grad_norm": 8.117530822753906, "learning_rate": 2.0197085637350757e-06, "loss": 3.0716, "step": 55245 }, { "epoch": 0.5620320638020834, "grad_norm": 16.134164810180664, "learning_rate": 2.0193161378748883e-06, "loss": 3.0136, "step": 55250 }, { "epoch": 0.5620829264322916, "grad_norm": 7.026928901672363, "learning_rate": 2.018923724312884e-06, "loss": 3.3786, "step": 55255 }, { "epoch": 0.5621337890625, "grad_norm": 12.431653022766113, "learning_rate": 2.018531323059103e-06, "loss": 3.3799, "step": 55260 }, { "epoch": 0.5621846516927084, "grad_norm": 12.698046684265137, "learning_rate": 2.018138934123583e-06, "loss": 3.7676, "step": 55265 }, { "epoch": 0.5622355143229166, "grad_norm": 10.076068878173828, "learning_rate": 2.0177465575163652e-06, "loss": 3.2673, "step": 55270 }, { "epoch": 0.562286376953125, "grad_norm": 16.043195724487305, "learning_rate": 2.017354193247486e-06, "loss": 3.6135, "step": 55275 }, { "epoch": 0.5623372395833334, "grad_norm": 12.170609474182129, "learning_rate": 2.0169618413269864e-06, "loss": 3.4841, "step": 55280 }, { "epoch": 0.5623881022135416, "grad_norm": 15.926358222961426, "learning_rate": 2.016569501764904e-06, "loss": 3.3622, "step": 55285 }, { "epoch": 0.56243896484375, "grad_norm": 9.513419151306152, "learning_rate": 2.0161771745712753e-06, "loss": 3.1698, "step": 55290 }, { "epoch": 0.5624898274739584, "grad_norm": 11.159460067749023, "learning_rate": 2.0157848597561388e-06, "loss": 3.432, "step": 55295 }, { "epoch": 0.5625406901041666, "grad_norm": 14.436287879943848, "learning_rate": 2.015392557329533e-06, "loss": 3.3272, "step": 55300 }, { "epoch": 0.562591552734375, "grad_norm": 11.197450637817383, "learning_rate": 2.015000267301493e-06, "loss": 3.2097, "step": 55305 }, { "epoch": 0.5626424153645834, "grad_norm": 9.489493370056152, "learning_rate": 2.014607989682057e-06, "loss": 3.3393, "step": 55310 }, { "epoch": 0.5626932779947916, "grad_norm": 9.723332405090332, "learning_rate": 2.014215724481259e-06, "loss": 3.5027, "step": 55315 }, { "epoch": 0.562744140625, "grad_norm": 8.030701637268066, "learning_rate": 2.0138234717091375e-06, "loss": 3.4648, "step": 55320 }, { "epoch": 0.5627950032552084, "grad_norm": 11.252326011657715, "learning_rate": 2.0134312313757277e-06, "loss": 3.2391, "step": 55325 }, { "epoch": 0.5628458658854166, "grad_norm": 9.977880477905273, "learning_rate": 2.013039003491063e-06, "loss": 3.6818, "step": 55330 }, { "epoch": 0.562896728515625, "grad_norm": 12.451231002807617, "learning_rate": 2.012646788065181e-06, "loss": 3.3159, "step": 55335 }, { "epoch": 0.5629475911458334, "grad_norm": 14.259267807006836, "learning_rate": 2.0122545851081154e-06, "loss": 3.3659, "step": 55340 }, { "epoch": 0.5629984537760416, "grad_norm": 8.799775123596191, "learning_rate": 2.011862394629901e-06, "loss": 3.3504, "step": 55345 }, { "epoch": 0.56304931640625, "grad_norm": 13.567157745361328, "learning_rate": 2.011470216640571e-06, "loss": 3.4156, "step": 55350 }, { "epoch": 0.5631001790364584, "grad_norm": 8.953998565673828, "learning_rate": 2.0110780511501593e-06, "loss": 3.1224, "step": 55355 }, { "epoch": 0.5631510416666666, "grad_norm": 13.455957412719727, "learning_rate": 2.010685898168701e-06, "loss": 3.1641, "step": 55360 }, { "epoch": 0.563201904296875, "grad_norm": 10.784248352050781, "learning_rate": 2.0102937577062274e-06, "loss": 3.3754, "step": 55365 }, { "epoch": 0.5632527669270834, "grad_norm": 15.13234806060791, "learning_rate": 2.009901629772771e-06, "loss": 3.2727, "step": 55370 }, { "epoch": 0.5633036295572916, "grad_norm": 12.457405090332031, "learning_rate": 2.0095095143783666e-06, "loss": 3.4566, "step": 55375 }, { "epoch": 0.5633544921875, "grad_norm": 13.739633560180664, "learning_rate": 2.0091174115330443e-06, "loss": 3.4025, "step": 55380 }, { "epoch": 0.5634053548177084, "grad_norm": 15.087024688720703, "learning_rate": 2.008725321246838e-06, "loss": 3.0332, "step": 55385 }, { "epoch": 0.5634562174479166, "grad_norm": 8.560545921325684, "learning_rate": 2.008333243529776e-06, "loss": 3.7152, "step": 55390 }, { "epoch": 0.563507080078125, "grad_norm": 13.509528160095215, "learning_rate": 2.007941178391892e-06, "loss": 3.2793, "step": 55395 }, { "epoch": 0.5635579427083334, "grad_norm": 19.74806785583496, "learning_rate": 2.007549125843217e-06, "loss": 3.2927, "step": 55400 }, { "epoch": 0.5636088053385416, "grad_norm": 13.209090232849121, "learning_rate": 2.0071570858937804e-06, "loss": 3.1358, "step": 55405 }, { "epoch": 0.56365966796875, "grad_norm": 11.439593315124512, "learning_rate": 2.0067650585536124e-06, "loss": 3.3403, "step": 55410 }, { "epoch": 0.5637105305989584, "grad_norm": 15.847519874572754, "learning_rate": 2.0063730438327445e-06, "loss": 3.0433, "step": 55415 }, { "epoch": 0.5637613932291666, "grad_norm": 16.50002670288086, "learning_rate": 2.0059810417412047e-06, "loss": 3.5169, "step": 55420 }, { "epoch": 0.563812255859375, "grad_norm": 15.249661445617676, "learning_rate": 2.0055890522890225e-06, "loss": 3.4845, "step": 55425 }, { "epoch": 0.5638631184895834, "grad_norm": 13.227401733398438, "learning_rate": 2.0051970754862282e-06, "loss": 3.2742, "step": 55430 }, { "epoch": 0.5639139811197916, "grad_norm": 62.592098236083984, "learning_rate": 2.0048051113428487e-06, "loss": 4.0365, "step": 55435 }, { "epoch": 0.56396484375, "grad_norm": 10.982181549072266, "learning_rate": 2.0044131598689137e-06, "loss": 3.4353, "step": 55440 }, { "epoch": 0.5640157063802084, "grad_norm": 9.214542388916016, "learning_rate": 2.0040212210744495e-06, "loss": 3.6739, "step": 55445 }, { "epoch": 0.5640665690104166, "grad_norm": 13.622384071350098, "learning_rate": 2.0036292949694855e-06, "loss": 3.0846, "step": 55450 }, { "epoch": 0.564117431640625, "grad_norm": 19.547794342041016, "learning_rate": 2.0032373815640487e-06, "loss": 3.2805, "step": 55455 }, { "epoch": 0.5641682942708334, "grad_norm": 9.698598861694336, "learning_rate": 2.002845480868165e-06, "loss": 3.0059, "step": 55460 }, { "epoch": 0.5642191569010416, "grad_norm": 12.346816062927246, "learning_rate": 2.002453592891862e-06, "loss": 2.9825, "step": 55465 }, { "epoch": 0.56427001953125, "grad_norm": 12.297981262207031, "learning_rate": 2.002061717645166e-06, "loss": 3.9478, "step": 55470 }, { "epoch": 0.5643208821614584, "grad_norm": 12.369157791137695, "learning_rate": 2.001669855138103e-06, "loss": 3.379, "step": 55475 }, { "epoch": 0.5643717447916666, "grad_norm": 13.843544006347656, "learning_rate": 2.001278005380699e-06, "loss": 3.3144, "step": 55480 }, { "epoch": 0.564422607421875, "grad_norm": 11.357953071594238, "learning_rate": 2.000886168382978e-06, "loss": 3.1551, "step": 55485 }, { "epoch": 0.5644734700520834, "grad_norm": 11.626884460449219, "learning_rate": 2.0004943441549668e-06, "loss": 3.3464, "step": 55490 }, { "epoch": 0.5645243326822916, "grad_norm": 12.29926872253418, "learning_rate": 2.0001025327066893e-06, "loss": 3.8347, "step": 55495 }, { "epoch": 0.5645751953125, "grad_norm": 7.423537731170654, "learning_rate": 1.99971073404817e-06, "loss": 3.3202, "step": 55500 }, { "epoch": 0.5646260579427084, "grad_norm": 14.192968368530273, "learning_rate": 1.9993189481894328e-06, "loss": 3.5049, "step": 55505 }, { "epoch": 0.5646769205729166, "grad_norm": 11.486786842346191, "learning_rate": 1.9989271751405016e-06, "loss": 3.5, "step": 55510 }, { "epoch": 0.564727783203125, "grad_norm": 11.493895530700684, "learning_rate": 1.998535414911401e-06, "loss": 3.218, "step": 55515 }, { "epoch": 0.5647786458333334, "grad_norm": 14.027342796325684, "learning_rate": 1.9981436675121514e-06, "loss": 3.3089, "step": 55520 }, { "epoch": 0.5648295084635416, "grad_norm": 14.80307674407959, "learning_rate": 1.9977519329527777e-06, "loss": 3.0861, "step": 55525 }, { "epoch": 0.56488037109375, "grad_norm": 10.044238090515137, "learning_rate": 1.997360211243302e-06, "loss": 3.6657, "step": 55530 }, { "epoch": 0.5649312337239584, "grad_norm": 12.431305885314941, "learning_rate": 1.9969685023937462e-06, "loss": 3.2477, "step": 55535 }, { "epoch": 0.5649820963541666, "grad_norm": 8.12856388092041, "learning_rate": 1.996576806414131e-06, "loss": 3.1989, "step": 55540 }, { "epoch": 0.565032958984375, "grad_norm": 10.21527099609375, "learning_rate": 1.99618512331448e-06, "loss": 3.4905, "step": 55545 }, { "epoch": 0.5650838216145834, "grad_norm": 11.607525825500488, "learning_rate": 1.995793453104813e-06, "loss": 3.9561, "step": 55550 }, { "epoch": 0.5651346842447916, "grad_norm": 9.88409423828125, "learning_rate": 1.995401795795151e-06, "loss": 3.1461, "step": 55555 }, { "epoch": 0.565185546875, "grad_norm": 12.363259315490723, "learning_rate": 1.995010151395514e-06, "loss": 3.8604, "step": 55560 }, { "epoch": 0.5652364095052084, "grad_norm": 15.324316024780273, "learning_rate": 1.994618519915923e-06, "loss": 3.7273, "step": 55565 }, { "epoch": 0.5652872721354166, "grad_norm": 10.770191192626953, "learning_rate": 1.994226901366398e-06, "loss": 3.4032, "step": 55570 }, { "epoch": 0.565338134765625, "grad_norm": 13.22590160369873, "learning_rate": 1.9938352957569566e-06, "loss": 3.3282, "step": 55575 }, { "epoch": 0.5653889973958334, "grad_norm": 13.094693183898926, "learning_rate": 1.9934437030976194e-06, "loss": 3.7382, "step": 55580 }, { "epoch": 0.5654398600260416, "grad_norm": 9.559659004211426, "learning_rate": 1.993052123398406e-06, "loss": 3.203, "step": 55585 }, { "epoch": 0.56549072265625, "grad_norm": 11.719879150390625, "learning_rate": 1.9926605566693327e-06, "loss": 3.6259, "step": 55590 }, { "epoch": 0.5655415852864584, "grad_norm": 13.236265182495117, "learning_rate": 1.99226900292042e-06, "loss": 3.1326, "step": 55595 }, { "epoch": 0.5655924479166666, "grad_norm": 10.034022331237793, "learning_rate": 1.991877462161683e-06, "loss": 3.3176, "step": 55600 }, { "epoch": 0.565643310546875, "grad_norm": 12.410802841186523, "learning_rate": 1.9914859344031414e-06, "loss": 3.0259, "step": 55605 }, { "epoch": 0.5656941731770834, "grad_norm": 11.027775764465332, "learning_rate": 1.991094419654812e-06, "loss": 3.165, "step": 55610 }, { "epoch": 0.5657450358072916, "grad_norm": 11.108048439025879, "learning_rate": 1.9907029179267105e-06, "loss": 3.4322, "step": 55615 }, { "epoch": 0.5657958984375, "grad_norm": 14.826831817626953, "learning_rate": 1.990311429228855e-06, "loss": 3.4251, "step": 55620 }, { "epoch": 0.5658467610677084, "grad_norm": 9.523362159729004, "learning_rate": 1.9899199535712603e-06, "loss": 3.2679, "step": 55625 }, { "epoch": 0.5658976236979166, "grad_norm": 13.137377738952637, "learning_rate": 1.989528490963943e-06, "loss": 3.3582, "step": 55630 }, { "epoch": 0.565948486328125, "grad_norm": 14.75580883026123, "learning_rate": 1.9891370414169174e-06, "loss": 3.4774, "step": 55635 }, { "epoch": 0.5659993489583334, "grad_norm": 11.692719459533691, "learning_rate": 1.9887456049401997e-06, "loss": 3.224, "step": 55640 }, { "epoch": 0.5660502115885416, "grad_norm": 10.250654220581055, "learning_rate": 1.9883541815438058e-06, "loss": 3.1661, "step": 55645 }, { "epoch": 0.56610107421875, "grad_norm": 7.867267608642578, "learning_rate": 1.987962771237748e-06, "loss": 3.4142, "step": 55650 }, { "epoch": 0.5661519368489584, "grad_norm": 17.736726760864258, "learning_rate": 1.9875713740320407e-06, "loss": 3.1155, "step": 55655 }, { "epoch": 0.5662027994791666, "grad_norm": 6.9726386070251465, "learning_rate": 1.9871799899366994e-06, "loss": 3.2602, "step": 55660 }, { "epoch": 0.566253662109375, "grad_norm": 8.041133880615234, "learning_rate": 1.9867886189617362e-06, "loss": 2.9021, "step": 55665 }, { "epoch": 0.5663045247395834, "grad_norm": 10.1356201171875, "learning_rate": 1.986397261117165e-06, "loss": 3.5967, "step": 55670 }, { "epoch": 0.5663553873697916, "grad_norm": 10.222390174865723, "learning_rate": 1.9860059164129967e-06, "loss": 3.4714, "step": 55675 }, { "epoch": 0.56640625, "grad_norm": 11.000347137451172, "learning_rate": 1.9856145848592464e-06, "loss": 3.6656, "step": 55680 }, { "epoch": 0.5664571126302084, "grad_norm": 12.125871658325195, "learning_rate": 1.985223266465925e-06, "loss": 3.3548, "step": 55685 }, { "epoch": 0.5665079752604166, "grad_norm": 12.343113899230957, "learning_rate": 1.984831961243044e-06, "loss": 3.4821, "step": 55690 }, { "epoch": 0.566558837890625, "grad_norm": 13.80804443359375, "learning_rate": 1.9844406692006147e-06, "loss": 3.9151, "step": 55695 }, { "epoch": 0.5666097005208334, "grad_norm": 10.661865234375, "learning_rate": 1.9840493903486496e-06, "loss": 3.3434, "step": 55700 }, { "epoch": 0.5666605631510416, "grad_norm": 7.703371524810791, "learning_rate": 1.983658124697158e-06, "loss": 3.1871, "step": 55705 }, { "epoch": 0.56671142578125, "grad_norm": 14.116416931152344, "learning_rate": 1.9832668722561506e-06, "loss": 3.2558, "step": 55710 }, { "epoch": 0.5667622884114584, "grad_norm": 11.84914493560791, "learning_rate": 1.9828756330356386e-06, "loss": 2.9877, "step": 55715 }, { "epoch": 0.5668131510416666, "grad_norm": 11.780055046081543, "learning_rate": 1.9824844070456305e-06, "loss": 3.5846, "step": 55720 }, { "epoch": 0.566864013671875, "grad_norm": 12.862578392028809, "learning_rate": 1.9820931942961364e-06, "loss": 3.0623, "step": 55725 }, { "epoch": 0.5669148763020834, "grad_norm": 13.045098304748535, "learning_rate": 1.9817019947971646e-06, "loss": 3.0973, "step": 55730 }, { "epoch": 0.5669657389322916, "grad_norm": 16.13922882080078, "learning_rate": 1.9813108085587248e-06, "loss": 3.186, "step": 55735 }, { "epoch": 0.5670166015625, "grad_norm": 13.725215911865234, "learning_rate": 1.9809196355908255e-06, "loss": 3.4581, "step": 55740 }, { "epoch": 0.5670674641927084, "grad_norm": 16.00213050842285, "learning_rate": 1.9805284759034737e-06, "loss": 3.1418, "step": 55745 }, { "epoch": 0.5671183268229166, "grad_norm": 10.054238319396973, "learning_rate": 1.9801373295066774e-06, "loss": 3.1284, "step": 55750 }, { "epoch": 0.567169189453125, "grad_norm": 14.31942367553711, "learning_rate": 1.9797461964104452e-06, "loss": 3.4611, "step": 55755 }, { "epoch": 0.5672200520833334, "grad_norm": 8.36902141571045, "learning_rate": 1.9793550766247827e-06, "loss": 3.2506, "step": 55760 }, { "epoch": 0.5672709147135416, "grad_norm": 11.869385719299316, "learning_rate": 1.978963970159698e-06, "loss": 3.1739, "step": 55765 }, { "epoch": 0.56732177734375, "grad_norm": 11.562711715698242, "learning_rate": 1.9785728770251948e-06, "loss": 3.2535, "step": 55770 }, { "epoch": 0.5673726399739584, "grad_norm": 15.536983489990234, "learning_rate": 1.9781817972312822e-06, "loss": 3.5746, "step": 55775 }, { "epoch": 0.5674235026041666, "grad_norm": 14.286041259765625, "learning_rate": 1.9777907307879647e-06, "loss": 3.2235, "step": 55780 }, { "epoch": 0.567474365234375, "grad_norm": 11.456912994384766, "learning_rate": 1.977399677705247e-06, "loss": 3.449, "step": 55785 }, { "epoch": 0.5675252278645834, "grad_norm": 15.62401294708252, "learning_rate": 1.977008637993135e-06, "loss": 3.2953, "step": 55790 }, { "epoch": 0.5675760904947916, "grad_norm": 7.052567958831787, "learning_rate": 1.976617611661633e-06, "loss": 3.1743, "step": 55795 }, { "epoch": 0.567626953125, "grad_norm": 8.342056274414062, "learning_rate": 1.976226598720746e-06, "loss": 3.3018, "step": 55800 }, { "epoch": 0.5676778157552084, "grad_norm": 16.19434928894043, "learning_rate": 1.9758355991804763e-06, "loss": 3.4816, "step": 55805 }, { "epoch": 0.5677286783854166, "grad_norm": 8.652542114257812, "learning_rate": 1.9754446130508285e-06, "loss": 3.4816, "step": 55810 }, { "epoch": 0.567779541015625, "grad_norm": 16.950336456298828, "learning_rate": 1.975053640341807e-06, "loss": 3.2507, "step": 55815 }, { "epoch": 0.5678304036458334, "grad_norm": 16.509111404418945, "learning_rate": 1.9746626810634127e-06, "loss": 3.8118, "step": 55820 }, { "epoch": 0.5678812662760416, "grad_norm": 11.515554428100586, "learning_rate": 1.974271735225649e-06, "loss": 3.4826, "step": 55825 }, { "epoch": 0.56793212890625, "grad_norm": 15.886150360107422, "learning_rate": 1.9738808028385197e-06, "loss": 3.0344, "step": 55830 }, { "epoch": 0.5679829915364584, "grad_norm": 9.29141902923584, "learning_rate": 1.9734898839120246e-06, "loss": 3.3004, "step": 55835 }, { "epoch": 0.5680338541666666, "grad_norm": 13.30876636505127, "learning_rate": 1.9730989784561663e-06, "loss": 3.1008, "step": 55840 }, { "epoch": 0.568084716796875, "grad_norm": 13.878016471862793, "learning_rate": 1.972708086480945e-06, "loss": 3.4856, "step": 55845 }, { "epoch": 0.5681355794270834, "grad_norm": 12.876764297485352, "learning_rate": 1.9723172079963627e-06, "loss": 3.0248, "step": 55850 }, { "epoch": 0.5681864420572916, "grad_norm": 10.323982238769531, "learning_rate": 1.97192634301242e-06, "loss": 3.6409, "step": 55855 }, { "epoch": 0.5682373046875, "grad_norm": 15.221038818359375, "learning_rate": 1.9715354915391163e-06, "loss": 3.2885, "step": 55860 }, { "epoch": 0.5682881673177084, "grad_norm": 11.07483959197998, "learning_rate": 1.971144653586451e-06, "loss": 3.5815, "step": 55865 }, { "epoch": 0.5683390299479166, "grad_norm": 14.276740074157715, "learning_rate": 1.970753829164426e-06, "loss": 2.9981, "step": 55870 }, { "epoch": 0.568389892578125, "grad_norm": 12.67236328125, "learning_rate": 1.970363018283038e-06, "loss": 3.3977, "step": 55875 }, { "epoch": 0.5684407552083334, "grad_norm": 9.223763465881348, "learning_rate": 1.969972220952287e-06, "loss": 3.5081, "step": 55880 }, { "epoch": 0.5684916178385416, "grad_norm": 12.528619766235352, "learning_rate": 1.96958143718217e-06, "loss": 3.328, "step": 55885 }, { "epoch": 0.56854248046875, "grad_norm": 13.376630783081055, "learning_rate": 1.9691906669826873e-06, "loss": 3.7906, "step": 55890 }, { "epoch": 0.5685933430989584, "grad_norm": 13.842061042785645, "learning_rate": 1.9687999103638354e-06, "loss": 3.4686, "step": 55895 }, { "epoch": 0.5686442057291666, "grad_norm": 12.943464279174805, "learning_rate": 1.9684091673356114e-06, "loss": 3.1778, "step": 55900 }, { "epoch": 0.568695068359375, "grad_norm": 12.769246101379395, "learning_rate": 1.968018437908013e-06, "loss": 3.3822, "step": 55905 }, { "epoch": 0.5687459309895834, "grad_norm": 8.439935684204102, "learning_rate": 1.9676277220910374e-06, "loss": 3.3736, "step": 55910 }, { "epoch": 0.5687967936197916, "grad_norm": 8.644288063049316, "learning_rate": 1.9672370198946795e-06, "loss": 3.2799, "step": 55915 }, { "epoch": 0.56884765625, "grad_norm": 9.586736679077148, "learning_rate": 1.966846331328936e-06, "loss": 2.8663, "step": 55920 }, { "epoch": 0.5688985188802084, "grad_norm": 14.994538307189941, "learning_rate": 1.966455656403804e-06, "loss": 3.4941, "step": 55925 }, { "epoch": 0.5689493815104166, "grad_norm": 10.81418514251709, "learning_rate": 1.9660649951292766e-06, "loss": 3.6868, "step": 55930 }, { "epoch": 0.569000244140625, "grad_norm": 13.596345901489258, "learning_rate": 1.9656743475153504e-06, "loss": 4.1722, "step": 55935 }, { "epoch": 0.5690511067708334, "grad_norm": 14.05886173248291, "learning_rate": 1.965283713572018e-06, "loss": 3.0352, "step": 55940 }, { "epoch": 0.5691019694010416, "grad_norm": 135.2580108642578, "learning_rate": 1.964893093309276e-06, "loss": 4.1592, "step": 55945 }, { "epoch": 0.56915283203125, "grad_norm": 11.427495002746582, "learning_rate": 1.9645024867371176e-06, "loss": 3.0614, "step": 55950 }, { "epoch": 0.5692036946614584, "grad_norm": 8.255717277526855, "learning_rate": 1.9641118938655366e-06, "loss": 3.7999, "step": 55955 }, { "epoch": 0.5692545572916666, "grad_norm": 12.217011451721191, "learning_rate": 1.9637213147045243e-06, "loss": 3.2522, "step": 55960 }, { "epoch": 0.569305419921875, "grad_norm": 14.885222434997559, "learning_rate": 1.963330749264076e-06, "loss": 3.0997, "step": 55965 }, { "epoch": 0.5693562825520834, "grad_norm": 16.567792892456055, "learning_rate": 1.9629401975541835e-06, "loss": 3.2213, "step": 55970 }, { "epoch": 0.5694071451822916, "grad_norm": 12.30616569519043, "learning_rate": 1.9625496595848382e-06, "loss": 3.568, "step": 55975 }, { "epoch": 0.5694580078125, "grad_norm": 11.718070983886719, "learning_rate": 1.9621591353660323e-06, "loss": 3.0996, "step": 55980 }, { "epoch": 0.5695088704427084, "grad_norm": 10.797274589538574, "learning_rate": 1.961768624907758e-06, "loss": 3.0879, "step": 55985 }, { "epoch": 0.5695597330729166, "grad_norm": 9.172492980957031, "learning_rate": 1.961378128220006e-06, "loss": 3.91, "step": 55990 }, { "epoch": 0.569610595703125, "grad_norm": 10.033970832824707, "learning_rate": 1.960987645312767e-06, "loss": 3.1516, "step": 55995 }, { "epoch": 0.5696614583333334, "grad_norm": 14.512290954589844, "learning_rate": 1.9605971761960304e-06, "loss": 3.1986, "step": 56000 }, { "epoch": 0.5697123209635416, "grad_norm": 19.83005714416504, "learning_rate": 1.9602067208797874e-06, "loss": 3.3876, "step": 56005 }, { "epoch": 0.56976318359375, "grad_norm": 10.733776092529297, "learning_rate": 1.9598162793740283e-06, "loss": 3.4806, "step": 56010 }, { "epoch": 0.5698140462239584, "grad_norm": 14.168524742126465, "learning_rate": 1.9594258516887404e-06, "loss": 3.7829, "step": 56015 }, { "epoch": 0.5698649088541666, "grad_norm": 13.03543758392334, "learning_rate": 1.959035437833915e-06, "loss": 3.2386, "step": 56020 }, { "epoch": 0.569915771484375, "grad_norm": 14.626461029052734, "learning_rate": 1.95864503781954e-06, "loss": 3.1362, "step": 56025 }, { "epoch": 0.5699666341145834, "grad_norm": 7.632985591888428, "learning_rate": 1.9582546516556025e-06, "loss": 3.2632, "step": 56030 }, { "epoch": 0.5700174967447916, "grad_norm": 18.721630096435547, "learning_rate": 1.957864279352091e-06, "loss": 3.1491, "step": 56035 }, { "epoch": 0.570068359375, "grad_norm": 12.163301467895508, "learning_rate": 1.9574739209189945e-06, "loss": 3.4391, "step": 56040 }, { "epoch": 0.5701192220052084, "grad_norm": 9.6724214553833, "learning_rate": 1.9570835763662986e-06, "loss": 3.2104, "step": 56045 }, { "epoch": 0.5701700846354166, "grad_norm": 12.953228950500488, "learning_rate": 1.9566932457039913e-06, "loss": 3.1921, "step": 56050 }, { "epoch": 0.570220947265625, "grad_norm": 17.458709716796875, "learning_rate": 1.956302928942057e-06, "loss": 3.563, "step": 56055 }, { "epoch": 0.5702718098958334, "grad_norm": 10.399765968322754, "learning_rate": 1.9559126260904848e-06, "loss": 3.6487, "step": 56060 }, { "epoch": 0.5703226725260416, "grad_norm": 17.193531036376953, "learning_rate": 1.955522337159259e-06, "loss": 3.3863, "step": 56065 }, { "epoch": 0.57037353515625, "grad_norm": 7.868402481079102, "learning_rate": 1.955132062158365e-06, "loss": 3.3566, "step": 56070 }, { "epoch": 0.5704243977864584, "grad_norm": 14.431535720825195, "learning_rate": 1.9547418010977874e-06, "loss": 3.1291, "step": 56075 }, { "epoch": 0.5704752604166666, "grad_norm": 10.154860496520996, "learning_rate": 1.9543515539875123e-06, "loss": 3.4842, "step": 56080 }, { "epoch": 0.570526123046875, "grad_norm": 12.6719970703125, "learning_rate": 1.9539613208375234e-06, "loss": 3.5905, "step": 56085 }, { "epoch": 0.5705769856770834, "grad_norm": 9.200603485107422, "learning_rate": 1.953571101657805e-06, "loss": 3.9783, "step": 56090 }, { "epoch": 0.5706278483072916, "grad_norm": 10.441458702087402, "learning_rate": 1.9531808964583388e-06, "loss": 3.2192, "step": 56095 }, { "epoch": 0.5706787109375, "grad_norm": 10.30545425415039, "learning_rate": 1.952790705249111e-06, "loss": 3.6149, "step": 56100 }, { "epoch": 0.5707295735677084, "grad_norm": 14.64566421508789, "learning_rate": 1.952400528040103e-06, "loss": 3.5059, "step": 56105 }, { "epoch": 0.5707804361979166, "grad_norm": 10.314763069152832, "learning_rate": 1.9520103648412973e-06, "loss": 3.3058, "step": 56110 }, { "epoch": 0.570831298828125, "grad_norm": 12.125476837158203, "learning_rate": 1.9516202156626777e-06, "loss": 3.2173, "step": 56115 }, { "epoch": 0.5708821614583334, "grad_norm": 15.160707473754883, "learning_rate": 1.9512300805142243e-06, "loss": 3.1716, "step": 56120 }, { "epoch": 0.5709330240885416, "grad_norm": 7.939380168914795, "learning_rate": 1.9508399594059196e-06, "loss": 3.5609, "step": 56125 }, { "epoch": 0.57098388671875, "grad_norm": 12.273107528686523, "learning_rate": 1.9504498523477434e-06, "loss": 3.3506, "step": 56130 }, { "epoch": 0.5710347493489584, "grad_norm": 14.89732837677002, "learning_rate": 1.9500597593496777e-06, "loss": 3.7025, "step": 56135 }, { "epoch": 0.5710856119791666, "grad_norm": 11.694847106933594, "learning_rate": 1.9496696804217037e-06, "loss": 3.2454, "step": 56140 }, { "epoch": 0.571136474609375, "grad_norm": 8.561016082763672, "learning_rate": 1.9492796155737996e-06, "loss": 3.0889, "step": 56145 }, { "epoch": 0.5711873372395834, "grad_norm": 15.127306938171387, "learning_rate": 1.9488895648159455e-06, "loss": 3.1905, "step": 56150 }, { "epoch": 0.5712381998697916, "grad_norm": 9.506645202636719, "learning_rate": 1.9484995281581225e-06, "loss": 3.2974, "step": 56155 }, { "epoch": 0.5712890625, "grad_norm": 11.450814247131348, "learning_rate": 1.9481095056103077e-06, "loss": 3.4543, "step": 56160 }, { "epoch": 0.5713399251302084, "grad_norm": 15.476284980773926, "learning_rate": 1.9477194971824813e-06, "loss": 3.536, "step": 56165 }, { "epoch": 0.5713907877604166, "grad_norm": 12.819311141967773, "learning_rate": 1.9473295028846192e-06, "loss": 3.0641, "step": 56170 }, { "epoch": 0.571441650390625, "grad_norm": 13.76785945892334, "learning_rate": 1.9469395227267013e-06, "loss": 3.399, "step": 56175 }, { "epoch": 0.5714925130208334, "grad_norm": 15.641027450561523, "learning_rate": 1.9465495567187054e-06, "loss": 3.4786, "step": 56180 }, { "epoch": 0.5715433756510416, "grad_norm": 12.041227340698242, "learning_rate": 1.9461596048706072e-06, "loss": 3.3719, "step": 56185 }, { "epoch": 0.57159423828125, "grad_norm": 10.25402545928955, "learning_rate": 1.945769667192384e-06, "loss": 3.2183, "step": 56190 }, { "epoch": 0.5716451009114584, "grad_norm": 10.055646896362305, "learning_rate": 1.9453797436940126e-06, "loss": 3.6629, "step": 56195 }, { "epoch": 0.5716959635416666, "grad_norm": 14.09347915649414, "learning_rate": 1.9449898343854696e-06, "loss": 3.3642, "step": 56200 }, { "epoch": 0.571746826171875, "grad_norm": 10.3724365234375, "learning_rate": 1.944599939276729e-06, "loss": 3.6581, "step": 56205 }, { "epoch": 0.5717976888020834, "grad_norm": 10.909431457519531, "learning_rate": 1.9442100583777685e-06, "loss": 3.2881, "step": 56210 }, { "epoch": 0.5718485514322916, "grad_norm": 14.096423149108887, "learning_rate": 1.943820191698561e-06, "loss": 3.788, "step": 56215 }, { "epoch": 0.5718994140625, "grad_norm": 16.65086555480957, "learning_rate": 1.943430339249083e-06, "loss": 3.8615, "step": 56220 }, { "epoch": 0.5719502766927084, "grad_norm": 11.530363082885742, "learning_rate": 1.943040501039307e-06, "loss": 3.282, "step": 56225 }, { "epoch": 0.5720011393229166, "grad_norm": 7.3054680824279785, "learning_rate": 1.9426506770792083e-06, "loss": 2.9748, "step": 56230 }, { "epoch": 0.572052001953125, "grad_norm": 13.849099159240723, "learning_rate": 1.94226086737876e-06, "loss": 2.9996, "step": 56235 }, { "epoch": 0.5721028645833334, "grad_norm": 17.799570083618164, "learning_rate": 1.941871071947935e-06, "loss": 3.4202, "step": 56240 }, { "epoch": 0.5721537272135416, "grad_norm": 15.279867172241211, "learning_rate": 1.941481290796706e-06, "loss": 3.335, "step": 56245 }, { "epoch": 0.57220458984375, "grad_norm": 17.308347702026367, "learning_rate": 1.941091523935046e-06, "loss": 3.4459, "step": 56250 }, { "epoch": 0.5722554524739584, "grad_norm": 7.215347766876221, "learning_rate": 1.9407017713729275e-06, "loss": 3.1686, "step": 56255 }, { "epoch": 0.5723063151041666, "grad_norm": 13.225153923034668, "learning_rate": 1.940312033120321e-06, "loss": 3.8314, "step": 56260 }, { "epoch": 0.572357177734375, "grad_norm": 14.776530265808105, "learning_rate": 1.939922309187198e-06, "loss": 3.7128, "step": 56265 }, { "epoch": 0.5724080403645834, "grad_norm": 9.53740119934082, "learning_rate": 1.939532599583531e-06, "loss": 3.0098, "step": 56270 }, { "epoch": 0.5724589029947916, "grad_norm": 10.148475646972656, "learning_rate": 1.939142904319289e-06, "loss": 3.2187, "step": 56275 }, { "epoch": 0.572509765625, "grad_norm": 14.827524185180664, "learning_rate": 1.9387532234044436e-06, "loss": 3.4579, "step": 56280 }, { "epoch": 0.5725606282552084, "grad_norm": 12.451027870178223, "learning_rate": 1.9383635568489624e-06, "loss": 3.3303, "step": 56285 }, { "epoch": 0.5726114908854166, "grad_norm": 11.020612716674805, "learning_rate": 1.937973904662818e-06, "loss": 3.4904, "step": 56290 }, { "epoch": 0.572662353515625, "grad_norm": 15.075807571411133, "learning_rate": 1.9375842668559774e-06, "loss": 3.0847, "step": 56295 }, { "epoch": 0.5727132161458334, "grad_norm": 14.280423164367676, "learning_rate": 1.9371946434384103e-06, "loss": 3.183, "step": 56300 }, { "epoch": 0.5727640787760416, "grad_norm": 9.974885940551758, "learning_rate": 1.9368050344200835e-06, "loss": 3.4728, "step": 56305 }, { "epoch": 0.57281494140625, "grad_norm": 11.663572311401367, "learning_rate": 1.9364154398109684e-06, "loss": 3.5697, "step": 56310 }, { "epoch": 0.5728658040364584, "grad_norm": 7.344516277313232, "learning_rate": 1.93602585962103e-06, "loss": 3.3228, "step": 56315 }, { "epoch": 0.5729166666666666, "grad_norm": 14.893961906433105, "learning_rate": 1.9356362938602352e-06, "loss": 3.9728, "step": 56320 }, { "epoch": 0.572967529296875, "grad_norm": 8.951798439025879, "learning_rate": 1.9352467425385534e-06, "loss": 3.2949, "step": 56325 }, { "epoch": 0.5730183919270834, "grad_norm": 8.843398094177246, "learning_rate": 1.934857205665949e-06, "loss": 3.1706, "step": 56330 }, { "epoch": 0.5730692545572916, "grad_norm": 8.021584510803223, "learning_rate": 1.93446768325239e-06, "loss": 3.1792, "step": 56335 }, { "epoch": 0.5731201171875, "grad_norm": 15.649279594421387, "learning_rate": 1.93407817530784e-06, "loss": 3.6829, "step": 56340 }, { "epoch": 0.5731709798177084, "grad_norm": 10.430551528930664, "learning_rate": 1.933688681842267e-06, "loss": 3.3624, "step": 56345 }, { "epoch": 0.5732218424479166, "grad_norm": 10.106148719787598, "learning_rate": 1.9332992028656343e-06, "loss": 3.4437, "step": 56350 }, { "epoch": 0.573272705078125, "grad_norm": 15.482913970947266, "learning_rate": 1.9329097383879073e-06, "loss": 3.6239, "step": 56355 }, { "epoch": 0.5733235677083334, "grad_norm": 8.75622272491455, "learning_rate": 1.9325202884190497e-06, "loss": 2.8706, "step": 56360 }, { "epoch": 0.5733744303385416, "grad_norm": 15.633439064025879, "learning_rate": 1.932130852969027e-06, "loss": 3.7574, "step": 56365 }, { "epoch": 0.57342529296875, "grad_norm": 10.85609245300293, "learning_rate": 1.931741432047801e-06, "loss": 3.5615, "step": 56370 }, { "epoch": 0.5734761555989584, "grad_norm": 8.461934089660645, "learning_rate": 1.9313520256653365e-06, "loss": 3.0443, "step": 56375 }, { "epoch": 0.5735270182291666, "grad_norm": 15.088605880737305, "learning_rate": 1.9309626338315946e-06, "loss": 3.663, "step": 56380 }, { "epoch": 0.573577880859375, "grad_norm": 14.905712127685547, "learning_rate": 1.9305732565565394e-06, "loss": 3.1529, "step": 56385 }, { "epoch": 0.5736287434895834, "grad_norm": 12.533961296081543, "learning_rate": 1.9301838938501327e-06, "loss": 3.118, "step": 56390 }, { "epoch": 0.5736796061197916, "grad_norm": 15.248723983764648, "learning_rate": 1.929794545722336e-06, "loss": 3.8423, "step": 56395 }, { "epoch": 0.57373046875, "grad_norm": 14.34217357635498, "learning_rate": 1.92940521218311e-06, "loss": 3.1248, "step": 56400 }, { "epoch": 0.5737813313802084, "grad_norm": 15.595789909362793, "learning_rate": 1.929015893242416e-06, "loss": 2.9282, "step": 56405 }, { "epoch": 0.5738321940104166, "grad_norm": 15.70177936553955, "learning_rate": 1.928626588910217e-06, "loss": 3.1065, "step": 56410 }, { "epoch": 0.573883056640625, "grad_norm": 8.46838092803955, "learning_rate": 1.928237299196469e-06, "loss": 3.2156, "step": 56415 }, { "epoch": 0.5739339192708334, "grad_norm": 12.688814163208008, "learning_rate": 1.927848024111136e-06, "loss": 3.8477, "step": 56420 }, { "epoch": 0.5739847819010416, "grad_norm": 13.845072746276855, "learning_rate": 1.9274587636641753e-06, "loss": 3.2232, "step": 56425 }, { "epoch": 0.57403564453125, "grad_norm": 10.488356590270996, "learning_rate": 1.927069517865546e-06, "loss": 3.4556, "step": 56430 }, { "epoch": 0.5740865071614584, "grad_norm": 16.517683029174805, "learning_rate": 1.926680286725207e-06, "loss": 3.3017, "step": 56435 }, { "epoch": 0.5741373697916666, "grad_norm": 9.872268676757812, "learning_rate": 1.926291070253118e-06, "loss": 3.2667, "step": 56440 }, { "epoch": 0.574188232421875, "grad_norm": 16.947826385498047, "learning_rate": 1.925901868459235e-06, "loss": 3.4922, "step": 56445 }, { "epoch": 0.5742390950520834, "grad_norm": 10.253779411315918, "learning_rate": 1.925512681353518e-06, "loss": 3.1615, "step": 56450 }, { "epoch": 0.5742899576822916, "grad_norm": 11.655627250671387, "learning_rate": 1.9251235089459212e-06, "loss": 3.8761, "step": 56455 }, { "epoch": 0.5743408203125, "grad_norm": 12.3087158203125, "learning_rate": 1.9247343512464037e-06, "loss": 3.5111, "step": 56460 }, { "epoch": 0.5743916829427084, "grad_norm": 8.526979446411133, "learning_rate": 1.9243452082649223e-06, "loss": 3.3005, "step": 56465 }, { "epoch": 0.5744425455729166, "grad_norm": 14.355345726013184, "learning_rate": 1.9239560800114316e-06, "loss": 2.8323, "step": 56470 }, { "epoch": 0.574493408203125, "grad_norm": 14.259971618652344, "learning_rate": 1.9235669664958873e-06, "loss": 3.7159, "step": 56475 }, { "epoch": 0.5745442708333334, "grad_norm": 9.394563674926758, "learning_rate": 1.923177867728247e-06, "loss": 3.1292, "step": 56480 }, { "epoch": 0.5745951334635416, "grad_norm": 14.726558685302734, "learning_rate": 1.922788783718463e-06, "loss": 3.0501, "step": 56485 }, { "epoch": 0.57464599609375, "grad_norm": 13.66686725616455, "learning_rate": 1.922399714476493e-06, "loss": 3.2185, "step": 56490 }, { "epoch": 0.5746968587239584, "grad_norm": 12.804473876953125, "learning_rate": 1.922010660012287e-06, "loss": 3.6264, "step": 56495 }, { "epoch": 0.5747477213541666, "grad_norm": 7.820730686187744, "learning_rate": 1.9216216203358024e-06, "loss": 3.4058, "step": 56500 }, { "epoch": 0.574798583984375, "grad_norm": 11.180335998535156, "learning_rate": 1.921232595456992e-06, "loss": 2.8898, "step": 56505 }, { "epoch": 0.5748494466145834, "grad_norm": 14.343398094177246, "learning_rate": 1.9208435853858078e-06, "loss": 3.0695, "step": 56510 }, { "epoch": 0.5749003092447916, "grad_norm": 16.27067756652832, "learning_rate": 1.920454590132203e-06, "loss": 3.4296, "step": 56515 }, { "epoch": 0.574951171875, "grad_norm": 14.813397407531738, "learning_rate": 1.920065609706131e-06, "loss": 3.0937, "step": 56520 }, { "epoch": 0.5750020345052084, "grad_norm": 9.876043319702148, "learning_rate": 1.9196766441175423e-06, "loss": 3.4687, "step": 56525 }, { "epoch": 0.5750528971354166, "grad_norm": 10.726964950561523, "learning_rate": 1.919287693376389e-06, "loss": 3.1954, "step": 56530 }, { "epoch": 0.575103759765625, "grad_norm": 11.442978858947754, "learning_rate": 1.9188987574926234e-06, "loss": 3.281, "step": 56535 }, { "epoch": 0.5751546223958334, "grad_norm": 13.518041610717773, "learning_rate": 1.9185098364761943e-06, "loss": 3.3092, "step": 56540 }, { "epoch": 0.5752054850260416, "grad_norm": 11.82832145690918, "learning_rate": 1.918120930337054e-06, "loss": 3.6624, "step": 56545 }, { "epoch": 0.57525634765625, "grad_norm": 15.283867835998535, "learning_rate": 1.91773203908515e-06, "loss": 3.3876, "step": 56550 }, { "epoch": 0.5753072102864584, "grad_norm": 10.985995292663574, "learning_rate": 1.917343162730436e-06, "loss": 3.4584, "step": 56555 }, { "epoch": 0.5753580729166666, "grad_norm": 12.337750434875488, "learning_rate": 1.916954301282858e-06, "loss": 3.4907, "step": 56560 }, { "epoch": 0.575408935546875, "grad_norm": 13.062437057495117, "learning_rate": 1.9165654547523667e-06, "loss": 3.5146, "step": 56565 }, { "epoch": 0.5754597981770834, "grad_norm": 12.77077579498291, "learning_rate": 1.916176623148909e-06, "loss": 3.7536, "step": 56570 }, { "epoch": 0.5755106608072916, "grad_norm": 11.67747974395752, "learning_rate": 1.915787806482434e-06, "loss": 3.3842, "step": 56575 }, { "epoch": 0.5755615234375, "grad_norm": 12.839545249938965, "learning_rate": 1.9153990047628905e-06, "loss": 3.2524, "step": 56580 }, { "epoch": 0.5756123860677084, "grad_norm": 8.737242698669434, "learning_rate": 1.9150102180002243e-06, "loss": 3.2188, "step": 56585 }, { "epoch": 0.5756632486979166, "grad_norm": 10.488981246948242, "learning_rate": 1.9146214462043826e-06, "loss": 3.7352, "step": 56590 }, { "epoch": 0.575714111328125, "grad_norm": 10.820968627929688, "learning_rate": 1.914232689385313e-06, "loss": 3.4999, "step": 56595 }, { "epoch": 0.5757649739583334, "grad_norm": 12.983179092407227, "learning_rate": 1.9138439475529614e-06, "loss": 3.1547, "step": 56600 }, { "epoch": 0.5758158365885416, "grad_norm": 8.708410263061523, "learning_rate": 1.9134552207172723e-06, "loss": 3.0745, "step": 56605 }, { "epoch": 0.57586669921875, "grad_norm": 11.97068977355957, "learning_rate": 1.913066508888194e-06, "loss": 3.314, "step": 56610 }, { "epoch": 0.5759175618489584, "grad_norm": 11.596902847290039, "learning_rate": 1.9126778120756687e-06, "loss": 3.3621, "step": 56615 }, { "epoch": 0.5759684244791666, "grad_norm": 14.041169166564941, "learning_rate": 1.9122891302896434e-06, "loss": 3.4463, "step": 56620 }, { "epoch": 0.576019287109375, "grad_norm": 10.68602466583252, "learning_rate": 1.9119004635400605e-06, "loss": 3.302, "step": 56625 }, { "epoch": 0.5760701497395834, "grad_norm": 13.853493690490723, "learning_rate": 1.911511811836865e-06, "loss": 3.4612, "step": 56630 }, { "epoch": 0.5761210123697916, "grad_norm": 16.029159545898438, "learning_rate": 1.911123175190001e-06, "loss": 3.5562, "step": 56635 }, { "epoch": 0.576171875, "grad_norm": 8.260101318359375, "learning_rate": 1.9107345536094106e-06, "loss": 3.0825, "step": 56640 }, { "epoch": 0.5762227376302084, "grad_norm": 9.413946151733398, "learning_rate": 1.9103459471050364e-06, "loss": 3.0764, "step": 56645 }, { "epoch": 0.5762736002604166, "grad_norm": 10.229106903076172, "learning_rate": 1.909957355686822e-06, "loss": 3.8249, "step": 56650 }, { "epoch": 0.576324462890625, "grad_norm": 16.245317459106445, "learning_rate": 1.9095687793647085e-06, "loss": 3.0807, "step": 56655 }, { "epoch": 0.5763753255208334, "grad_norm": 16.102489471435547, "learning_rate": 1.9091802181486385e-06, "loss": 3.6931, "step": 56660 }, { "epoch": 0.5764261881510416, "grad_norm": 10.535594940185547, "learning_rate": 1.908791672048551e-06, "loss": 2.9715, "step": 56665 }, { "epoch": 0.57647705078125, "grad_norm": 9.063885688781738, "learning_rate": 1.90840314107439e-06, "loss": 3.2165, "step": 56670 }, { "epoch": 0.5765279134114584, "grad_norm": 14.134736061096191, "learning_rate": 1.908014625236094e-06, "loss": 3.3692, "step": 56675 }, { "epoch": 0.5765787760416666, "grad_norm": 11.198437690734863, "learning_rate": 1.9076261245436034e-06, "loss": 3.6974, "step": 56680 }, { "epoch": 0.576629638671875, "grad_norm": 11.785255432128906, "learning_rate": 1.9072376390068576e-06, "loss": 3.3939, "step": 56685 }, { "epoch": 0.5766805013020834, "grad_norm": 8.374520301818848, "learning_rate": 1.906849168635797e-06, "loss": 3.2321, "step": 56690 }, { "epoch": 0.5767313639322916, "grad_norm": 9.535998344421387, "learning_rate": 1.9064607134403595e-06, "loss": 3.0934, "step": 56695 }, { "epoch": 0.5767822265625, "grad_norm": 13.945857048034668, "learning_rate": 1.9060722734304835e-06, "loss": 3.2331, "step": 56700 }, { "epoch": 0.5768330891927084, "grad_norm": 7.848199844360352, "learning_rate": 1.9056838486161081e-06, "loss": 3.391, "step": 56705 }, { "epoch": 0.5768839518229166, "grad_norm": 9.019949913024902, "learning_rate": 1.905295439007171e-06, "loss": 3.2544, "step": 56710 }, { "epoch": 0.576934814453125, "grad_norm": 8.967193603515625, "learning_rate": 1.9049070446136092e-06, "loss": 3.4716, "step": 56715 }, { "epoch": 0.5769856770833334, "grad_norm": 9.631693840026855, "learning_rate": 1.9045186654453578e-06, "loss": 2.999, "step": 56720 }, { "epoch": 0.5770365397135416, "grad_norm": 14.838756561279297, "learning_rate": 1.904130301512357e-06, "loss": 3.102, "step": 56725 }, { "epoch": 0.57708740234375, "grad_norm": 9.814398765563965, "learning_rate": 1.9037419528245406e-06, "loss": 3.1221, "step": 56730 }, { "epoch": 0.5771382649739584, "grad_norm": 10.964478492736816, "learning_rate": 1.9033536193918455e-06, "loss": 3.5277, "step": 56735 }, { "epoch": 0.5771891276041666, "grad_norm": 12.982027053833008, "learning_rate": 1.9029653012242059e-06, "loss": 3.4367, "step": 56740 }, { "epoch": 0.577239990234375, "grad_norm": 14.798869132995605, "learning_rate": 1.902576998331558e-06, "loss": 3.433, "step": 56745 }, { "epoch": 0.5772908528645834, "grad_norm": 13.125004768371582, "learning_rate": 1.902188710723836e-06, "loss": 3.7781, "step": 56750 }, { "epoch": 0.5773417154947916, "grad_norm": 9.113572120666504, "learning_rate": 1.9018004384109739e-06, "loss": 3.17, "step": 56755 }, { "epoch": 0.577392578125, "grad_norm": 13.796917915344238, "learning_rate": 1.9014121814029054e-06, "loss": 3.983, "step": 56760 }, { "epoch": 0.5774434407552084, "grad_norm": 10.384062767028809, "learning_rate": 1.9010239397095654e-06, "loss": 3.5382, "step": 56765 }, { "epoch": 0.5774943033854166, "grad_norm": 12.019784927368164, "learning_rate": 1.9006357133408854e-06, "loss": 3.4912, "step": 56770 }, { "epoch": 0.577545166015625, "grad_norm": 11.359028816223145, "learning_rate": 1.900247502306799e-06, "loss": 3.1923, "step": 56775 }, { "epoch": 0.5775960286458334, "grad_norm": 7.998235702514648, "learning_rate": 1.8998593066172371e-06, "loss": 3.2979, "step": 56780 }, { "epoch": 0.5776468912760416, "grad_norm": 10.577157020568848, "learning_rate": 1.899471126282133e-06, "loss": 3.2425, "step": 56785 }, { "epoch": 0.57769775390625, "grad_norm": 14.997776985168457, "learning_rate": 1.8990829613114187e-06, "loss": 3.3457, "step": 56790 }, { "epoch": 0.5777486165364584, "grad_norm": 11.988247871398926, "learning_rate": 1.8986948117150231e-06, "loss": 3.2524, "step": 56795 }, { "epoch": 0.5777994791666666, "grad_norm": 11.470266342163086, "learning_rate": 1.8983066775028789e-06, "loss": 3.3788, "step": 56800 }, { "epoch": 0.577850341796875, "grad_norm": 15.55947208404541, "learning_rate": 1.8979185586849158e-06, "loss": 3.3545, "step": 56805 }, { "epoch": 0.5779012044270834, "grad_norm": 12.32003402709961, "learning_rate": 1.8975304552710635e-06, "loss": 3.0713, "step": 56810 }, { "epoch": 0.5779520670572916, "grad_norm": 51.92470932006836, "learning_rate": 1.8971423672712514e-06, "loss": 3.3542, "step": 56815 }, { "epoch": 0.5780029296875, "grad_norm": 10.97673511505127, "learning_rate": 1.8967542946954096e-06, "loss": 3.5778, "step": 56820 }, { "epoch": 0.5780537923177084, "grad_norm": 13.67583179473877, "learning_rate": 1.896366237553466e-06, "loss": 3.6961, "step": 56825 }, { "epoch": 0.5781046549479166, "grad_norm": 10.543595314025879, "learning_rate": 1.895978195855349e-06, "loss": 3.6835, "step": 56830 }, { "epoch": 0.578155517578125, "grad_norm": 10.750131607055664, "learning_rate": 1.8955901696109864e-06, "loss": 3.2903, "step": 56835 }, { "epoch": 0.5782063802083334, "grad_norm": 11.75191879272461, "learning_rate": 1.8952021588303062e-06, "loss": 3.6327, "step": 56840 }, { "epoch": 0.5782572428385416, "grad_norm": 6.788650989532471, "learning_rate": 1.8948141635232358e-06, "loss": 3.3047, "step": 56845 }, { "epoch": 0.57830810546875, "grad_norm": 11.891761779785156, "learning_rate": 1.8944261836997013e-06, "loss": 3.1144, "step": 56850 }, { "epoch": 0.5783589680989584, "grad_norm": 13.748327255249023, "learning_rate": 1.894038219369629e-06, "loss": 2.9591, "step": 56855 }, { "epoch": 0.5784098307291666, "grad_norm": 14.535821914672852, "learning_rate": 1.893650270542946e-06, "loss": 3.0379, "step": 56860 }, { "epoch": 0.578460693359375, "grad_norm": 11.83864688873291, "learning_rate": 1.8932623372295766e-06, "loss": 2.9729, "step": 56865 }, { "epoch": 0.5785115559895834, "grad_norm": 11.101030349731445, "learning_rate": 1.8928744194394472e-06, "loss": 3.4374, "step": 56870 }, { "epoch": 0.5785624186197916, "grad_norm": 9.511262893676758, "learning_rate": 1.8924865171824803e-06, "loss": 3.2521, "step": 56875 }, { "epoch": 0.57861328125, "grad_norm": 26.6956787109375, "learning_rate": 1.8920986304686032e-06, "loss": 3.4854, "step": 56880 }, { "epoch": 0.5786641438802084, "grad_norm": 9.508420944213867, "learning_rate": 1.891710759307738e-06, "loss": 3.2311, "step": 56885 }, { "epoch": 0.5787150065104166, "grad_norm": 9.723649978637695, "learning_rate": 1.8913229037098096e-06, "loss": 3.2118, "step": 56890 }, { "epoch": 0.578765869140625, "grad_norm": 14.52519702911377, "learning_rate": 1.8909350636847391e-06, "loss": 3.654, "step": 56895 }, { "epoch": 0.5788167317708334, "grad_norm": 11.78041934967041, "learning_rate": 1.8905472392424514e-06, "loss": 3.1973, "step": 56900 }, { "epoch": 0.5788675944010416, "grad_norm": 8.476494789123535, "learning_rate": 1.8901594303928686e-06, "loss": 3.4497, "step": 56905 }, { "epoch": 0.57891845703125, "grad_norm": 14.352686882019043, "learning_rate": 1.8897716371459112e-06, "loss": 3.5338, "step": 56910 }, { "epoch": 0.5789693196614584, "grad_norm": 8.161144256591797, "learning_rate": 1.889383859511502e-06, "loss": 3.333, "step": 56915 }, { "epoch": 0.5790201822916666, "grad_norm": 15.801276206970215, "learning_rate": 1.888996097499563e-06, "loss": 3.31, "step": 56920 }, { "epoch": 0.579071044921875, "grad_norm": 11.01284122467041, "learning_rate": 1.8886083511200135e-06, "loss": 3.6326, "step": 56925 }, { "epoch": 0.5791219075520834, "grad_norm": 8.402137756347656, "learning_rate": 1.8882206203827738e-06, "loss": 3.0483, "step": 56930 }, { "epoch": 0.5791727701822916, "grad_norm": 16.430620193481445, "learning_rate": 1.8878329052977657e-06, "loss": 3.2311, "step": 56935 }, { "epoch": 0.5792236328125, "grad_norm": 14.661127090454102, "learning_rate": 1.8874452058749066e-06, "loss": 3.5519, "step": 56940 }, { "epoch": 0.5792744954427084, "grad_norm": 9.709014892578125, "learning_rate": 1.8870575221241178e-06, "loss": 3.7096, "step": 56945 }, { "epoch": 0.5793253580729166, "grad_norm": 11.967632293701172, "learning_rate": 1.8866698540553155e-06, "loss": 3.5083, "step": 56950 }, { "epoch": 0.579376220703125, "grad_norm": 14.899423599243164, "learning_rate": 1.8862822016784204e-06, "loss": 3.1922, "step": 56955 }, { "epoch": 0.5794270833333334, "grad_norm": 10.922184944152832, "learning_rate": 1.8858945650033503e-06, "loss": 3.6517, "step": 56960 }, { "epoch": 0.5794779459635416, "grad_norm": 12.147903442382812, "learning_rate": 1.8855069440400214e-06, "loss": 3.2416, "step": 56965 }, { "epoch": 0.57952880859375, "grad_norm": 12.775498390197754, "learning_rate": 1.8851193387983512e-06, "loss": 3.2935, "step": 56970 }, { "epoch": 0.5795796712239584, "grad_norm": 9.618669509887695, "learning_rate": 1.8847317492882582e-06, "loss": 3.8583, "step": 56975 }, { "epoch": 0.5796305338541666, "grad_norm": 11.85013484954834, "learning_rate": 1.884344175519657e-06, "loss": 3.1993, "step": 56980 }, { "epoch": 0.579681396484375, "grad_norm": 11.201725959777832, "learning_rate": 1.8839566175024648e-06, "loss": 3.2567, "step": 56985 }, { "epoch": 0.5797322591145834, "grad_norm": 8.808295249938965, "learning_rate": 1.8835690752465952e-06, "loss": 3.3521, "step": 56990 }, { "epoch": 0.5797831217447916, "grad_norm": 11.396921157836914, "learning_rate": 1.8831815487619654e-06, "loss": 3.1966, "step": 56995 }, { "epoch": 0.579833984375, "grad_norm": 13.508956909179688, "learning_rate": 1.88279403805849e-06, "loss": 3.545, "step": 57000 }, { "epoch": 0.5798848470052084, "grad_norm": 17.08087158203125, "learning_rate": 1.8824065431460817e-06, "loss": 3.224, "step": 57005 }, { "epoch": 0.5799357096354166, "grad_norm": 9.624190330505371, "learning_rate": 1.8820190640346562e-06, "loss": 3.4903, "step": 57010 }, { "epoch": 0.579986572265625, "grad_norm": 13.430679321289062, "learning_rate": 1.8816316007341268e-06, "loss": 3.0292, "step": 57015 }, { "epoch": 0.5800374348958334, "grad_norm": 9.54922103881836, "learning_rate": 1.881244153254406e-06, "loss": 3.5934, "step": 57020 }, { "epoch": 0.5800882975260416, "grad_norm": 9.288108825683594, "learning_rate": 1.8808567216054063e-06, "loss": 3.1301, "step": 57025 }, { "epoch": 0.58013916015625, "grad_norm": 11.815570831298828, "learning_rate": 1.880469305797041e-06, "loss": 3.2464, "step": 57030 }, { "epoch": 0.5801900227864584, "grad_norm": 9.707662582397461, "learning_rate": 1.8800819058392226e-06, "loss": 3.1669, "step": 57035 }, { "epoch": 0.5802408854166666, "grad_norm": 10.359423637390137, "learning_rate": 1.8796945217418608e-06, "loss": 3.3614, "step": 57040 }, { "epoch": 0.580291748046875, "grad_norm": 10.065942764282227, "learning_rate": 1.8793071535148672e-06, "loss": 3.1885, "step": 57045 }, { "epoch": 0.5803426106770834, "grad_norm": 11.11904239654541, "learning_rate": 1.878919801168154e-06, "loss": 3.1507, "step": 57050 }, { "epoch": 0.5803934733072916, "grad_norm": 15.753347396850586, "learning_rate": 1.8785324647116302e-06, "loss": 3.1128, "step": 57055 }, { "epoch": 0.5804443359375, "grad_norm": 20.453039169311523, "learning_rate": 1.8781451441552062e-06, "loss": 3.387, "step": 57060 }, { "epoch": 0.5804951985677084, "grad_norm": 16.42840576171875, "learning_rate": 1.8777578395087903e-06, "loss": 3.292, "step": 57065 }, { "epoch": 0.5805460611979166, "grad_norm": 15.925436973571777, "learning_rate": 1.877370550782293e-06, "loss": 3.1068, "step": 57070 }, { "epoch": 0.580596923828125, "grad_norm": 15.755450248718262, "learning_rate": 1.8769832779856234e-06, "loss": 3.462, "step": 57075 }, { "epoch": 0.5806477864583334, "grad_norm": 10.200871467590332, "learning_rate": 1.8765960211286885e-06, "loss": 3.6675, "step": 57080 }, { "epoch": 0.5806986490885416, "grad_norm": 9.75241756439209, "learning_rate": 1.8762087802213958e-06, "loss": 3.2287, "step": 57085 }, { "epoch": 0.58074951171875, "grad_norm": 14.130892753601074, "learning_rate": 1.8758215552736548e-06, "loss": 3.7924, "step": 57090 }, { "epoch": 0.5808003743489584, "grad_norm": 8.34168529510498, "learning_rate": 1.875434346295371e-06, "loss": 3.3221, "step": 57095 }, { "epoch": 0.5808512369791666, "grad_norm": 10.972088813781738, "learning_rate": 1.8750471532964504e-06, "loss": 3.3444, "step": 57100 }, { "epoch": 0.580902099609375, "grad_norm": 10.499613761901855, "learning_rate": 1.8746599762868016e-06, "loss": 3.2314, "step": 57105 }, { "epoch": 0.5809529622395834, "grad_norm": 18.19532585144043, "learning_rate": 1.8742728152763284e-06, "loss": 3.5773, "step": 57110 }, { "epoch": 0.5810038248697916, "grad_norm": 10.94426155090332, "learning_rate": 1.8738856702749379e-06, "loss": 3.2399, "step": 57115 }, { "epoch": 0.5810546875, "grad_norm": 10.755924224853516, "learning_rate": 1.8734985412925328e-06, "loss": 3.3212, "step": 57120 }, { "epoch": 0.5811055501302084, "grad_norm": 10.946020126342773, "learning_rate": 1.8731114283390195e-06, "loss": 3.8101, "step": 57125 }, { "epoch": 0.5811564127604166, "grad_norm": 7.830922603607178, "learning_rate": 1.872724331424302e-06, "loss": 3.2401, "step": 57130 }, { "epoch": 0.581207275390625, "grad_norm": 9.367474555969238, "learning_rate": 1.8723372505582837e-06, "loss": 3.4203, "step": 57135 }, { "epoch": 0.5812581380208334, "grad_norm": 12.73338794708252, "learning_rate": 1.8719501857508672e-06, "loss": 3.2724, "step": 57140 }, { "epoch": 0.5813090006510416, "grad_norm": 10.9208345413208, "learning_rate": 1.8715631370119574e-06, "loss": 3.503, "step": 57145 }, { "epoch": 0.58135986328125, "grad_norm": 15.710588455200195, "learning_rate": 1.8711761043514557e-06, "loss": 3.4525, "step": 57150 }, { "epoch": 0.5814107259114584, "grad_norm": 12.172246932983398, "learning_rate": 1.8707890877792644e-06, "loss": 3.1944, "step": 57155 }, { "epoch": 0.5814615885416666, "grad_norm": 15.046089172363281, "learning_rate": 1.8704020873052842e-06, "loss": 3.1853, "step": 57160 }, { "epoch": 0.581512451171875, "grad_norm": 9.864462852478027, "learning_rate": 1.8700151029394182e-06, "loss": 4.1923, "step": 57165 }, { "epoch": 0.5815633138020834, "grad_norm": 7.275952339172363, "learning_rate": 1.8696281346915669e-06, "loss": 3.5304, "step": 57170 }, { "epoch": 0.5816141764322916, "grad_norm": 9.70826244354248, "learning_rate": 1.8692411825716294e-06, "loss": 3.5269, "step": 57175 }, { "epoch": 0.5816650390625, "grad_norm": 12.5178804397583, "learning_rate": 1.8688542465895066e-06, "loss": 3.5193, "step": 57180 }, { "epoch": 0.5817159016927084, "grad_norm": 13.794378280639648, "learning_rate": 1.8684673267550984e-06, "loss": 3.3048, "step": 57185 }, { "epoch": 0.5817667643229166, "grad_norm": 12.371304512023926, "learning_rate": 1.868080423078305e-06, "loss": 3.3683, "step": 57190 }, { "epoch": 0.581817626953125, "grad_norm": 16.101579666137695, "learning_rate": 1.8676935355690228e-06, "loss": 3.2473, "step": 57195 }, { "epoch": 0.5818684895833334, "grad_norm": 12.373351097106934, "learning_rate": 1.8673066642371524e-06, "loss": 2.9094, "step": 57200 }, { "epoch": 0.5819193522135416, "grad_norm": 10.716297149658203, "learning_rate": 1.8669198090925915e-06, "loss": 3.1835, "step": 57205 }, { "epoch": 0.58197021484375, "grad_norm": 12.337839126586914, "learning_rate": 1.8665329701452367e-06, "loss": 3.3965, "step": 57210 }, { "epoch": 0.5820210774739584, "grad_norm": 7.9632978439331055, "learning_rate": 1.8661461474049853e-06, "loss": 3.2287, "step": 57215 }, { "epoch": 0.5820719401041666, "grad_norm": 13.325944900512695, "learning_rate": 1.8657593408817351e-06, "loss": 3.543, "step": 57220 }, { "epoch": 0.582122802734375, "grad_norm": 8.320006370544434, "learning_rate": 1.8653725505853814e-06, "loss": 3.4453, "step": 57225 }, { "epoch": 0.5821736653645834, "grad_norm": 21.190378189086914, "learning_rate": 1.8649857765258213e-06, "loss": 3.3959, "step": 57230 }, { "epoch": 0.5822245279947916, "grad_norm": 11.889701843261719, "learning_rate": 1.8645990187129486e-06, "loss": 3.0289, "step": 57235 }, { "epoch": 0.582275390625, "grad_norm": 9.687341690063477, "learning_rate": 1.8642122771566598e-06, "loss": 3.5415, "step": 57240 }, { "epoch": 0.5823262532552084, "grad_norm": 9.341906547546387, "learning_rate": 1.8638255518668496e-06, "loss": 2.9307, "step": 57245 }, { "epoch": 0.5823771158854166, "grad_norm": 12.794500350952148, "learning_rate": 1.8634388428534116e-06, "loss": 3.4308, "step": 57250 }, { "epoch": 0.582427978515625, "grad_norm": 13.471244812011719, "learning_rate": 1.8630521501262393e-06, "loss": 2.9125, "step": 57255 }, { "epoch": 0.5824788411458334, "grad_norm": 11.451852798461914, "learning_rate": 1.8626654736952279e-06, "loss": 3.3624, "step": 57260 }, { "epoch": 0.5825297037760416, "grad_norm": 9.186808586120605, "learning_rate": 1.8622788135702686e-06, "loss": 3.2651, "step": 57265 }, { "epoch": 0.58258056640625, "grad_norm": 7.854951858520508, "learning_rate": 1.8618921697612552e-06, "loss": 2.9606, "step": 57270 }, { "epoch": 0.5826314290364584, "grad_norm": 8.688505172729492, "learning_rate": 1.8615055422780787e-06, "loss": 3.2603, "step": 57275 }, { "epoch": 0.5826822916666666, "grad_norm": 15.499544143676758, "learning_rate": 1.8611189311306316e-06, "loss": 3.137, "step": 57280 }, { "epoch": 0.582733154296875, "grad_norm": 13.060683250427246, "learning_rate": 1.860732336328806e-06, "loss": 3.1386, "step": 57285 }, { "epoch": 0.5827840169270834, "grad_norm": 9.663434028625488, "learning_rate": 1.860345757882491e-06, "loss": 3.2919, "step": 57290 }, { "epoch": 0.5828348795572916, "grad_norm": 10.981467247009277, "learning_rate": 1.8599591958015787e-06, "loss": 3.4003, "step": 57295 }, { "epoch": 0.5828857421875, "grad_norm": 10.95694351196289, "learning_rate": 1.8595726500959591e-06, "loss": 3.4265, "step": 57300 }, { "epoch": 0.5829366048177084, "grad_norm": 13.341103553771973, "learning_rate": 1.859186120775521e-06, "loss": 3.1631, "step": 57305 }, { "epoch": 0.5829874674479166, "grad_norm": 10.463839530944824, "learning_rate": 1.8587996078501535e-06, "loss": 3.4325, "step": 57310 }, { "epoch": 0.583038330078125, "grad_norm": 11.585979461669922, "learning_rate": 1.858413111329747e-06, "loss": 3.0571, "step": 57315 }, { "epoch": 0.5830891927083334, "grad_norm": 12.85406494140625, "learning_rate": 1.8580266312241888e-06, "loss": 2.9965, "step": 57320 }, { "epoch": 0.5831400553385416, "grad_norm": 12.769957542419434, "learning_rate": 1.8576401675433675e-06, "loss": 3.3309, "step": 57325 }, { "epoch": 0.58319091796875, "grad_norm": 11.573023796081543, "learning_rate": 1.8572537202971686e-06, "loss": 3.1622, "step": 57330 }, { "epoch": 0.5832417805989584, "grad_norm": 13.39962100982666, "learning_rate": 1.8568672894954823e-06, "loss": 3.3914, "step": 57335 }, { "epoch": 0.5832926432291666, "grad_norm": 10.790485382080078, "learning_rate": 1.8564808751481938e-06, "loss": 3.508, "step": 57340 }, { "epoch": 0.583343505859375, "grad_norm": 17.249465942382812, "learning_rate": 1.8560944772651898e-06, "loss": 3.2018, "step": 57345 }, { "epoch": 0.5833943684895834, "grad_norm": 12.020594596862793, "learning_rate": 1.855708095856355e-06, "loss": 3.1479, "step": 57350 }, { "epoch": 0.5834452311197916, "grad_norm": 15.753055572509766, "learning_rate": 1.8553217309315763e-06, "loss": 3.2538, "step": 57355 }, { "epoch": 0.58349609375, "grad_norm": 13.768061637878418, "learning_rate": 1.854935382500739e-06, "loss": 3.3499, "step": 57360 }, { "epoch": 0.5835469563802084, "grad_norm": 16.716020584106445, "learning_rate": 1.8545490505737266e-06, "loss": 3.6358, "step": 57365 }, { "epoch": 0.5835978190104166, "grad_norm": 15.221572875976562, "learning_rate": 1.854162735160423e-06, "loss": 3.1542, "step": 57370 }, { "epoch": 0.583648681640625, "grad_norm": 10.232190132141113, "learning_rate": 1.8537764362707139e-06, "loss": 3.1022, "step": 57375 }, { "epoch": 0.5836995442708334, "grad_norm": 10.174397468566895, "learning_rate": 1.853390153914481e-06, "loss": 3.1219, "step": 57380 }, { "epoch": 0.5837504069010416, "grad_norm": 8.714889526367188, "learning_rate": 1.8530038881016082e-06, "loss": 3.5887, "step": 57385 }, { "epoch": 0.58380126953125, "grad_norm": 10.971238136291504, "learning_rate": 1.8526176388419767e-06, "loss": 3.0598, "step": 57390 }, { "epoch": 0.5838521321614584, "grad_norm": 7.573197364807129, "learning_rate": 1.8522314061454699e-06, "loss": 3.1088, "step": 57395 }, { "epoch": 0.5839029947916666, "grad_norm": 11.627606391906738, "learning_rate": 1.8518451900219695e-06, "loss": 3.3229, "step": 57400 }, { "epoch": 0.583953857421875, "grad_norm": 13.206156730651855, "learning_rate": 1.8514589904813551e-06, "loss": 2.9272, "step": 57405 }, { "epoch": 0.5840047200520834, "grad_norm": 14.942124366760254, "learning_rate": 1.8510728075335093e-06, "loss": 3.0356, "step": 57410 }, { "epoch": 0.5840555826822916, "grad_norm": 7.587666988372803, "learning_rate": 1.8506866411883128e-06, "loss": 3.02, "step": 57415 }, { "epoch": 0.5841064453125, "grad_norm": 7.650280475616455, "learning_rate": 1.8503004914556439e-06, "loss": 3.3738, "step": 57420 }, { "epoch": 0.5841573079427084, "grad_norm": 8.10651683807373, "learning_rate": 1.8499143583453822e-06, "loss": 3.0114, "step": 57425 }, { "epoch": 0.5842081705729166, "grad_norm": 9.125300407409668, "learning_rate": 1.8495282418674088e-06, "loss": 3.2324, "step": 57430 }, { "epoch": 0.584259033203125, "grad_norm": 11.46408748626709, "learning_rate": 1.8491421420316009e-06, "loss": 3.0857, "step": 57435 }, { "epoch": 0.5843098958333334, "grad_norm": 11.1484956741333, "learning_rate": 1.848756058847837e-06, "loss": 3.1192, "step": 57440 }, { "epoch": 0.5843607584635416, "grad_norm": 10.936185836791992, "learning_rate": 1.848369992325994e-06, "loss": 3.6811, "step": 57445 }, { "epoch": 0.58441162109375, "grad_norm": 16.094820022583008, "learning_rate": 1.8479839424759511e-06, "loss": 3.5156, "step": 57450 }, { "epoch": 0.5844624837239584, "grad_norm": 10.838696479797363, "learning_rate": 1.847597909307585e-06, "loss": 3.5173, "step": 57455 }, { "epoch": 0.5845133463541666, "grad_norm": 13.69144058227539, "learning_rate": 1.8472118928307712e-06, "loss": 3.2802, "step": 57460 }, { "epoch": 0.584564208984375, "grad_norm": 19.055984497070312, "learning_rate": 1.8468258930553856e-06, "loss": 3.3391, "step": 57465 }, { "epoch": 0.5846150716145834, "grad_norm": 14.424437522888184, "learning_rate": 1.846439909991306e-06, "loss": 3.3721, "step": 57470 }, { "epoch": 0.5846659342447916, "grad_norm": 12.521187782287598, "learning_rate": 1.846053943648406e-06, "loss": 3.2019, "step": 57475 }, { "epoch": 0.584716796875, "grad_norm": 16.133342742919922, "learning_rate": 1.8456679940365612e-06, "loss": 3.5034, "step": 57480 }, { "epoch": 0.5847676595052084, "grad_norm": 13.612138748168945, "learning_rate": 1.845282061165644e-06, "loss": 3.3122, "step": 57485 }, { "epoch": 0.5848185221354166, "grad_norm": 11.59680461883545, "learning_rate": 1.8448961450455318e-06, "loss": 3.8087, "step": 57490 }, { "epoch": 0.584869384765625, "grad_norm": 14.264731407165527, "learning_rate": 1.844510245686096e-06, "loss": 3.065, "step": 57495 }, { "epoch": 0.5849202473958334, "grad_norm": 11.807320594787598, "learning_rate": 1.8441243630972093e-06, "loss": 3.3935, "step": 57500 }, { "epoch": 0.5849711100260416, "grad_norm": 19.176963806152344, "learning_rate": 1.8437384972887468e-06, "loss": 3.2933, "step": 57505 }, { "epoch": 0.58502197265625, "grad_norm": 11.13174819946289, "learning_rate": 1.8433526482705787e-06, "loss": 3.4016, "step": 57510 }, { "epoch": 0.5850728352864584, "grad_norm": 8.262331008911133, "learning_rate": 1.8429668160525778e-06, "loss": 3.1822, "step": 57515 }, { "epoch": 0.5851236979166666, "grad_norm": 12.877121925354004, "learning_rate": 1.8425810006446144e-06, "loss": 3.4957, "step": 57520 }, { "epoch": 0.585174560546875, "grad_norm": 18.290891647338867, "learning_rate": 1.8421952020565603e-06, "loss": 3.334, "step": 57525 }, { "epoch": 0.5852254231770834, "grad_norm": 14.739322662353516, "learning_rate": 1.841809420298287e-06, "loss": 3.3009, "step": 57530 }, { "epoch": 0.5852762858072916, "grad_norm": 10.935076713562012, "learning_rate": 1.841423655379663e-06, "loss": 3.2133, "step": 57535 }, { "epoch": 0.5853271484375, "grad_norm": 14.294183731079102, "learning_rate": 1.8410379073105583e-06, "loss": 3.6313, "step": 57540 }, { "epoch": 0.5853780110677084, "grad_norm": 14.829392433166504, "learning_rate": 1.8406521761008433e-06, "loss": 3.2714, "step": 57545 }, { "epoch": 0.5854288736979166, "grad_norm": 14.30656909942627, "learning_rate": 1.8402664617603857e-06, "loss": 3.5128, "step": 57550 }, { "epoch": 0.585479736328125, "grad_norm": 8.534845352172852, "learning_rate": 1.8398807642990548e-06, "loss": 3.4336, "step": 57555 }, { "epoch": 0.5855305989583334, "grad_norm": 9.407914161682129, "learning_rate": 1.8394950837267172e-06, "loss": 3.5546, "step": 57560 }, { "epoch": 0.5855814615885416, "grad_norm": 13.708868026733398, "learning_rate": 1.8391094200532417e-06, "loss": 3.1824, "step": 57565 }, { "epoch": 0.58563232421875, "grad_norm": 9.58594799041748, "learning_rate": 1.8387237732884954e-06, "loss": 3.2787, "step": 57570 }, { "epoch": 0.5856831868489584, "grad_norm": 12.1710844039917, "learning_rate": 1.8383381434423448e-06, "loss": 3.3865, "step": 57575 }, { "epoch": 0.5857340494791666, "grad_norm": 14.10545825958252, "learning_rate": 1.8379525305246547e-06, "loss": 3.26, "step": 57580 }, { "epoch": 0.585784912109375, "grad_norm": 8.860973358154297, "learning_rate": 1.8375669345452935e-06, "loss": 3.3786, "step": 57585 }, { "epoch": 0.5858357747395834, "grad_norm": 15.432395935058594, "learning_rate": 1.8371813555141246e-06, "loss": 3.4559, "step": 57590 }, { "epoch": 0.5858866373697916, "grad_norm": 11.764700889587402, "learning_rate": 1.8367957934410132e-06, "loss": 3.223, "step": 57595 }, { "epoch": 0.5859375, "grad_norm": 10.494485855102539, "learning_rate": 1.8364102483358253e-06, "loss": 3.4047, "step": 57600 }, { "epoch": 0.5859883626302084, "grad_norm": 7.719424247741699, "learning_rate": 1.8360247202084236e-06, "loss": 3.2869, "step": 57605 }, { "epoch": 0.5860392252604166, "grad_norm": 11.962519645690918, "learning_rate": 1.8356392090686722e-06, "loss": 3.3633, "step": 57610 }, { "epoch": 0.586090087890625, "grad_norm": 12.523601531982422, "learning_rate": 1.8352537149264332e-06, "loss": 3.1762, "step": 57615 }, { "epoch": 0.5861409505208334, "grad_norm": 14.770298957824707, "learning_rate": 1.834868237791571e-06, "loss": 3.7998, "step": 57620 }, { "epoch": 0.5861918131510416, "grad_norm": 13.773069381713867, "learning_rate": 1.8344827776739478e-06, "loss": 3.3806, "step": 57625 }, { "epoch": 0.58624267578125, "grad_norm": 7.39944314956665, "learning_rate": 1.8340973345834244e-06, "loss": 3.0848, "step": 57630 }, { "epoch": 0.5862935384114584, "grad_norm": 11.790326118469238, "learning_rate": 1.8337119085298622e-06, "loss": 3.1497, "step": 57635 }, { "epoch": 0.5863444010416666, "grad_norm": 16.520463943481445, "learning_rate": 1.8333264995231237e-06, "loss": 3.4205, "step": 57640 }, { "epoch": 0.586395263671875, "grad_norm": 10.090576171875, "learning_rate": 1.832941107573069e-06, "loss": 3.6189, "step": 57645 }, { "epoch": 0.5864461263020834, "grad_norm": 10.001154899597168, "learning_rate": 1.8325557326895576e-06, "loss": 3.2779, "step": 57650 }, { "epoch": 0.5864969889322916, "grad_norm": 10.244816780090332, "learning_rate": 1.8321703748824488e-06, "loss": 3.094, "step": 57655 }, { "epoch": 0.5865478515625, "grad_norm": 9.945894241333008, "learning_rate": 1.8317850341616038e-06, "loss": 3.3676, "step": 57660 }, { "epoch": 0.5865987141927084, "grad_norm": 13.106073379516602, "learning_rate": 1.83139971053688e-06, "loss": 3.7474, "step": 57665 }, { "epoch": 0.5866495768229166, "grad_norm": 10.941320419311523, "learning_rate": 1.8310144040181365e-06, "loss": 3.1596, "step": 57670 }, { "epoch": 0.586700439453125, "grad_norm": 11.458958625793457, "learning_rate": 1.83062911461523e-06, "loss": 3.3828, "step": 57675 }, { "epoch": 0.5867513020833334, "grad_norm": 16.086597442626953, "learning_rate": 1.8302438423380192e-06, "loss": 3.5009, "step": 57680 }, { "epoch": 0.5868021647135416, "grad_norm": 10.989362716674805, "learning_rate": 1.8298585871963614e-06, "loss": 3.2763, "step": 57685 }, { "epoch": 0.58685302734375, "grad_norm": 13.481184005737305, "learning_rate": 1.8294733492001122e-06, "loss": 3.7339, "step": 57690 }, { "epoch": 0.5869038899739584, "grad_norm": 14.661421775817871, "learning_rate": 1.829088128359129e-06, "loss": 3.2107, "step": 57695 }, { "epoch": 0.5869547526041666, "grad_norm": 10.293462753295898, "learning_rate": 1.828702924683267e-06, "loss": 3.1729, "step": 57700 }, { "epoch": 0.587005615234375, "grad_norm": 12.358012199401855, "learning_rate": 1.8283177381823813e-06, "loss": 3.8046, "step": 57705 }, { "epoch": 0.5870564778645834, "grad_norm": 10.661593437194824, "learning_rate": 1.8279325688663263e-06, "loss": 3.9206, "step": 57710 }, { "epoch": 0.5871073404947916, "grad_norm": 10.541525840759277, "learning_rate": 1.8275474167449586e-06, "loss": 3.9404, "step": 57715 }, { "epoch": 0.587158203125, "grad_norm": 7.5321526527404785, "learning_rate": 1.8271622818281305e-06, "loss": 3.0435, "step": 57720 }, { "epoch": 0.5872090657552084, "grad_norm": 9.740371704101562, "learning_rate": 1.8267771641256959e-06, "loss": 3.4587, "step": 57725 }, { "epoch": 0.5872599283854166, "grad_norm": 12.067994117736816, "learning_rate": 1.8263920636475075e-06, "loss": 3.479, "step": 57730 }, { "epoch": 0.587310791015625, "grad_norm": 9.233492851257324, "learning_rate": 1.8260069804034186e-06, "loss": 3.1598, "step": 57735 }, { "epoch": 0.5873616536458334, "grad_norm": 12.713920593261719, "learning_rate": 1.825621914403282e-06, "loss": 3.2037, "step": 57740 }, { "epoch": 0.5874125162760416, "grad_norm": 14.358120918273926, "learning_rate": 1.8252368656569483e-06, "loss": 3.373, "step": 57745 }, { "epoch": 0.58746337890625, "grad_norm": 8.86528491973877, "learning_rate": 1.8248518341742688e-06, "loss": 3.1788, "step": 57750 }, { "epoch": 0.5875142415364584, "grad_norm": 11.562697410583496, "learning_rate": 1.8244668199650967e-06, "loss": 3.4024, "step": 57755 }, { "epoch": 0.5875651041666666, "grad_norm": 15.956350326538086, "learning_rate": 1.8240818230392797e-06, "loss": 3.8836, "step": 57760 }, { "epoch": 0.587615966796875, "grad_norm": 11.165114402770996, "learning_rate": 1.8236968434066701e-06, "loss": 3.3067, "step": 57765 }, { "epoch": 0.5876668294270834, "grad_norm": 9.314546585083008, "learning_rate": 1.823311881077115e-06, "loss": 3.6705, "step": 57770 }, { "epoch": 0.5877176920572916, "grad_norm": 9.98790168762207, "learning_rate": 1.822926936060466e-06, "loss": 3.1524, "step": 57775 }, { "epoch": 0.5877685546875, "grad_norm": 12.551997184753418, "learning_rate": 1.8225420083665713e-06, "loss": 3.7721, "step": 57780 }, { "epoch": 0.5878194173177084, "grad_norm": 14.371289253234863, "learning_rate": 1.8221570980052767e-06, "loss": 3.3989, "step": 57785 }, { "epoch": 0.5878702799479166, "grad_norm": 12.440781593322754, "learning_rate": 1.8217722049864339e-06, "loss": 3.0281, "step": 57790 }, { "epoch": 0.587921142578125, "grad_norm": 12.106391906738281, "learning_rate": 1.821387329319888e-06, "loss": 3.229, "step": 57795 }, { "epoch": 0.5879720052083334, "grad_norm": 13.718605041503906, "learning_rate": 1.8210024710154871e-06, "loss": 2.9594, "step": 57800 }, { "epoch": 0.5880228678385416, "grad_norm": 12.633403778076172, "learning_rate": 1.8206176300830757e-06, "loss": 3.2798, "step": 57805 }, { "epoch": 0.58807373046875, "grad_norm": 11.287311553955078, "learning_rate": 1.820232806532502e-06, "loss": 3.5343, "step": 57810 }, { "epoch": 0.5881245930989584, "grad_norm": 16.78968620300293, "learning_rate": 1.8198480003736114e-06, "loss": 3.252, "step": 57815 }, { "epoch": 0.5881754557291666, "grad_norm": 15.007670402526855, "learning_rate": 1.8194632116162476e-06, "loss": 4.1343, "step": 57820 }, { "epoch": 0.588226318359375, "grad_norm": 15.56428337097168, "learning_rate": 1.819078440270256e-06, "loss": 3.3209, "step": 57825 }, { "epoch": 0.5882771809895834, "grad_norm": 9.358656883239746, "learning_rate": 1.8186936863454819e-06, "loss": 3.3396, "step": 57830 }, { "epoch": 0.5883280436197916, "grad_norm": 11.954632759094238, "learning_rate": 1.818308949851768e-06, "loss": 3.9432, "step": 57835 }, { "epoch": 0.58837890625, "grad_norm": 10.74793815612793, "learning_rate": 1.8179242307989585e-06, "loss": 3.2117, "step": 57840 }, { "epoch": 0.5884297688802084, "grad_norm": 15.294000625610352, "learning_rate": 1.817539529196895e-06, "loss": 3.9292, "step": 57845 }, { "epoch": 0.5884806315104166, "grad_norm": 15.180642127990723, "learning_rate": 1.8171548450554213e-06, "loss": 3.2566, "step": 57850 }, { "epoch": 0.588531494140625, "grad_norm": 16.156967163085938, "learning_rate": 1.8167701783843795e-06, "loss": 3.7234, "step": 57855 }, { "epoch": 0.5885823567708334, "grad_norm": 15.802820205688477, "learning_rate": 1.8163855291936104e-06, "loss": 3.2445, "step": 57860 }, { "epoch": 0.5886332194010416, "grad_norm": 11.530492782592773, "learning_rate": 1.816000897492955e-06, "loss": 3.3812, "step": 57865 }, { "epoch": 0.58868408203125, "grad_norm": 10.436324119567871, "learning_rate": 1.8156162832922553e-06, "loss": 3.1804, "step": 57870 }, { "epoch": 0.5887349446614584, "grad_norm": 10.296968460083008, "learning_rate": 1.8152316866013504e-06, "loss": 3.2323, "step": 57875 }, { "epoch": 0.5887858072916666, "grad_norm": 8.679858207702637, "learning_rate": 1.81484710743008e-06, "loss": 2.959, "step": 57880 }, { "epoch": 0.588836669921875, "grad_norm": 10.867081642150879, "learning_rate": 1.8144625457882848e-06, "loss": 3.1545, "step": 57885 }, { "epoch": 0.5888875325520834, "grad_norm": 10.677846908569336, "learning_rate": 1.8140780016858022e-06, "loss": 3.3374, "step": 57890 }, { "epoch": 0.5889383951822916, "grad_norm": 7.242464065551758, "learning_rate": 1.8136934751324726e-06, "loss": 3.5794, "step": 57895 }, { "epoch": 0.5889892578125, "grad_norm": 11.086483001708984, "learning_rate": 1.813308966138131e-06, "loss": 3.9205, "step": 57900 }, { "epoch": 0.5890401204427084, "grad_norm": 15.184897422790527, "learning_rate": 1.8129244747126176e-06, "loss": 3.2889, "step": 57905 }, { "epoch": 0.5890909830729166, "grad_norm": 14.707979202270508, "learning_rate": 1.812540000865769e-06, "loss": 2.9598, "step": 57910 }, { "epoch": 0.589141845703125, "grad_norm": 14.677459716796875, "learning_rate": 1.8121555446074212e-06, "loss": 3.4606, "step": 57915 }, { "epoch": 0.5891927083333334, "grad_norm": 10.218809127807617, "learning_rate": 1.8117711059474102e-06, "loss": 3.0858, "step": 57920 }, { "epoch": 0.5892435709635416, "grad_norm": 12.145784378051758, "learning_rate": 1.8113866848955733e-06, "loss": 3.6013, "step": 57925 }, { "epoch": 0.58929443359375, "grad_norm": 9.029804229736328, "learning_rate": 1.8110022814617446e-06, "loss": 2.7677, "step": 57930 }, { "epoch": 0.5893452962239584, "grad_norm": 15.142792701721191, "learning_rate": 1.810617895655759e-06, "loss": 3.2275, "step": 57935 }, { "epoch": 0.5893961588541666, "grad_norm": 14.707941055297852, "learning_rate": 1.8102335274874503e-06, "loss": 3.5628, "step": 57940 }, { "epoch": 0.589447021484375, "grad_norm": 13.977078437805176, "learning_rate": 1.8098491769666549e-06, "loss": 3.6672, "step": 57945 }, { "epoch": 0.5894978841145834, "grad_norm": 11.963973045349121, "learning_rate": 1.8094648441032037e-06, "loss": 3.4277, "step": 57950 }, { "epoch": 0.5895487467447916, "grad_norm": 13.8197021484375, "learning_rate": 1.8090805289069316e-06, "loss": 3.0472, "step": 57955 }, { "epoch": 0.589599609375, "grad_norm": 8.149839401245117, "learning_rate": 1.8086962313876693e-06, "loss": 3.2089, "step": 57960 }, { "epoch": 0.5896504720052084, "grad_norm": 15.027643203735352, "learning_rate": 1.8083119515552506e-06, "loss": 3.4859, "step": 57965 }, { "epoch": 0.5897013346354166, "grad_norm": 10.01371955871582, "learning_rate": 1.807927689419507e-06, "loss": 3.4033, "step": 57970 }, { "epoch": 0.589752197265625, "grad_norm": 16.41242218017578, "learning_rate": 1.8075434449902693e-06, "loss": 3.3512, "step": 57975 }, { "epoch": 0.5898030598958334, "grad_norm": 7.639359474182129, "learning_rate": 1.8071592182773676e-06, "loss": 3.275, "step": 57980 }, { "epoch": 0.5898539225260416, "grad_norm": 17.695276260375977, "learning_rate": 1.8067750092906345e-06, "loss": 3.1948, "step": 57985 }, { "epoch": 0.58990478515625, "grad_norm": 16.86260223388672, "learning_rate": 1.8063908180398975e-06, "loss": 3.304, "step": 57990 }, { "epoch": 0.5899556477864584, "grad_norm": 11.927443504333496, "learning_rate": 1.8060066445349866e-06, "loss": 3.3504, "step": 57995 }, { "epoch": 0.5900065104166666, "grad_norm": 14.542708396911621, "learning_rate": 1.8056224887857323e-06, "loss": 3.6553, "step": 58000 }, { "epoch": 0.590057373046875, "grad_norm": 16.340160369873047, "learning_rate": 1.8052383508019616e-06, "loss": 2.9317, "step": 58005 }, { "epoch": 0.5901082356770834, "grad_norm": 15.192581176757812, "learning_rate": 1.8048542305935036e-06, "loss": 3.1567, "step": 58010 }, { "epoch": 0.5901590983072916, "grad_norm": 12.118131637573242, "learning_rate": 1.804470128170184e-06, "loss": 3.0627, "step": 58015 }, { "epoch": 0.5902099609375, "grad_norm": 12.99780559539795, "learning_rate": 1.804086043541832e-06, "loss": 3.3608, "step": 58020 }, { "epoch": 0.5902608235677084, "grad_norm": 9.27899169921875, "learning_rate": 1.8037019767182742e-06, "loss": 3.183, "step": 58025 }, { "epoch": 0.5903116861979166, "grad_norm": 10.601043701171875, "learning_rate": 1.8033179277093358e-06, "loss": 2.9006, "step": 58030 }, { "epoch": 0.590362548828125, "grad_norm": 10.075181007385254, "learning_rate": 1.8029338965248424e-06, "loss": 3.3884, "step": 58035 }, { "epoch": 0.5904134114583334, "grad_norm": 10.405427932739258, "learning_rate": 1.8025498831746212e-06, "loss": 2.99, "step": 58040 }, { "epoch": 0.5904642740885416, "grad_norm": 13.567709922790527, "learning_rate": 1.8021658876684955e-06, "loss": 3.2124, "step": 58045 }, { "epoch": 0.59051513671875, "grad_norm": 14.217094421386719, "learning_rate": 1.8017819100162904e-06, "loss": 3.4022, "step": 58050 }, { "epoch": 0.5905659993489584, "grad_norm": 12.847613334655762, "learning_rate": 1.801397950227829e-06, "loss": 3.3816, "step": 58055 }, { "epoch": 0.5906168619791666, "grad_norm": 8.856525421142578, "learning_rate": 1.8010140083129355e-06, "loss": 3.4207, "step": 58060 }, { "epoch": 0.590667724609375, "grad_norm": 9.142597198486328, "learning_rate": 1.8006300842814337e-06, "loss": 3.2907, "step": 58065 }, { "epoch": 0.5907185872395834, "grad_norm": 12.949610710144043, "learning_rate": 1.8002461781431447e-06, "loss": 3.5367, "step": 58070 }, { "epoch": 0.5907694498697916, "grad_norm": 11.678544044494629, "learning_rate": 1.799862289907891e-06, "loss": 3.0713, "step": 58075 }, { "epoch": 0.5908203125, "grad_norm": 9.619078636169434, "learning_rate": 1.799478419585496e-06, "loss": 3.4691, "step": 58080 }, { "epoch": 0.5908711751302084, "grad_norm": 12.252608299255371, "learning_rate": 1.7990945671857784e-06, "loss": 3.4869, "step": 58085 }, { "epoch": 0.5909220377604166, "grad_norm": 11.362506866455078, "learning_rate": 1.7987107327185605e-06, "loss": 3.1936, "step": 58090 }, { "epoch": 0.590972900390625, "grad_norm": 14.540589332580566, "learning_rate": 1.7983269161936622e-06, "loss": 3.2106, "step": 58095 }, { "epoch": 0.5910237630208334, "grad_norm": 9.324612617492676, "learning_rate": 1.797943117620904e-06, "loss": 3.0983, "step": 58100 }, { "epoch": 0.5910746256510416, "grad_norm": 10.476067543029785, "learning_rate": 1.7975593370101044e-06, "loss": 3.0299, "step": 58105 }, { "epoch": 0.59112548828125, "grad_norm": 11.967348098754883, "learning_rate": 1.797175574371082e-06, "loss": 3.1494, "step": 58110 }, { "epoch": 0.5911763509114584, "grad_norm": 10.870805740356445, "learning_rate": 1.7967918297136572e-06, "loss": 3.3598, "step": 58115 }, { "epoch": 0.5912272135416666, "grad_norm": 14.618064880371094, "learning_rate": 1.796408103047646e-06, "loss": 3.1634, "step": 58120 }, { "epoch": 0.591278076171875, "grad_norm": 13.103531837463379, "learning_rate": 1.796024394382867e-06, "loss": 3.3876, "step": 58125 }, { "epoch": 0.5913289388020834, "grad_norm": 9.35886287689209, "learning_rate": 1.7956407037291365e-06, "loss": 3.4578, "step": 58130 }, { "epoch": 0.5913798014322916, "grad_norm": 7.379724979400635, "learning_rate": 1.795257031096272e-06, "loss": 3.3323, "step": 58135 }, { "epoch": 0.5914306640625, "grad_norm": 9.186336517333984, "learning_rate": 1.7948733764940895e-06, "loss": 3.2934, "step": 58140 }, { "epoch": 0.5914815266927084, "grad_norm": 13.264702796936035, "learning_rate": 1.7944897399324046e-06, "loss": 3.083, "step": 58145 }, { "epoch": 0.5915323893229166, "grad_norm": 18.051586151123047, "learning_rate": 1.7941061214210315e-06, "loss": 3.3851, "step": 58150 }, { "epoch": 0.591583251953125, "grad_norm": 13.872904777526855, "learning_rate": 1.7937225209697873e-06, "loss": 2.9398, "step": 58155 }, { "epoch": 0.5916341145833334, "grad_norm": 13.26317024230957, "learning_rate": 1.7933389385884842e-06, "loss": 3.2565, "step": 58160 }, { "epoch": 0.5916849772135416, "grad_norm": 16.90981101989746, "learning_rate": 1.7929553742869376e-06, "loss": 3.3974, "step": 58165 }, { "epoch": 0.59173583984375, "grad_norm": 15.267029762268066, "learning_rate": 1.7925718280749587e-06, "loss": 3.343, "step": 58170 }, { "epoch": 0.5917867024739584, "grad_norm": 11.082977294921875, "learning_rate": 1.792188299962363e-06, "loss": 2.9795, "step": 58175 }, { "epoch": 0.5918375651041666, "grad_norm": 9.177314758300781, "learning_rate": 1.7918047899589623e-06, "loss": 3.4705, "step": 58180 }, { "epoch": 0.591888427734375, "grad_norm": 14.937149047851562, "learning_rate": 1.7914212980745667e-06, "loss": 3.7697, "step": 58185 }, { "epoch": 0.5919392903645834, "grad_norm": 10.546945571899414, "learning_rate": 1.7910378243189901e-06, "loss": 3.4776, "step": 58190 }, { "epoch": 0.5919901529947916, "grad_norm": 7.730606555938721, "learning_rate": 1.7906543687020433e-06, "loss": 3.4102, "step": 58195 }, { "epoch": 0.592041015625, "grad_norm": 12.9700345993042, "learning_rate": 1.7902709312335356e-06, "loss": 3.1313, "step": 58200 }, { "epoch": 0.5920918782552084, "grad_norm": 8.473440170288086, "learning_rate": 1.7898875119232775e-06, "loss": 3.4309, "step": 58205 }, { "epoch": 0.5921427408854166, "grad_norm": 11.529534339904785, "learning_rate": 1.78950411078108e-06, "loss": 3.8647, "step": 58210 }, { "epoch": 0.592193603515625, "grad_norm": 15.359539031982422, "learning_rate": 1.7891207278167512e-06, "loss": 3.3153, "step": 58215 }, { "epoch": 0.5922444661458334, "grad_norm": 16.164695739746094, "learning_rate": 1.7887373630401007e-06, "loss": 3.2186, "step": 58220 }, { "epoch": 0.5922953287760416, "grad_norm": 7.963503360748291, "learning_rate": 1.7883540164609347e-06, "loss": 3.54, "step": 58225 }, { "epoch": 0.59234619140625, "grad_norm": 13.482510566711426, "learning_rate": 1.787970688089063e-06, "loss": 3.3673, "step": 58230 }, { "epoch": 0.5923970540364584, "grad_norm": 12.424481391906738, "learning_rate": 1.7875873779342933e-06, "loss": 3.7461, "step": 58235 }, { "epoch": 0.5924479166666666, "grad_norm": 21.41114616394043, "learning_rate": 1.7872040860064311e-06, "loss": 3.9624, "step": 58240 }, { "epoch": 0.592498779296875, "grad_norm": 14.386809349060059, "learning_rate": 1.7868208123152827e-06, "loss": 3.4405, "step": 58245 }, { "epoch": 0.5925496419270834, "grad_norm": 12.011004447937012, "learning_rate": 1.7864375568706555e-06, "loss": 3.0951, "step": 58250 }, { "epoch": 0.5926005045572916, "grad_norm": 12.4212646484375, "learning_rate": 1.7860543196823548e-06, "loss": 3.3755, "step": 58255 }, { "epoch": 0.5926513671875, "grad_norm": 13.907829284667969, "learning_rate": 1.7856711007601846e-06, "loss": 3.2868, "step": 58260 }, { "epoch": 0.5927022298177084, "grad_norm": 11.430810928344727, "learning_rate": 1.7852879001139493e-06, "loss": 3.2822, "step": 58265 }, { "epoch": 0.5927530924479166, "grad_norm": 11.203187942504883, "learning_rate": 1.7849047177534546e-06, "loss": 3.7246, "step": 58270 }, { "epoch": 0.592803955078125, "grad_norm": 11.078207015991211, "learning_rate": 1.784521553688503e-06, "loss": 3.2157, "step": 58275 }, { "epoch": 0.5928548177083334, "grad_norm": 14.518149375915527, "learning_rate": 1.784138407928897e-06, "loss": 3.5838, "step": 58280 }, { "epoch": 0.5929056803385416, "grad_norm": 13.477997779846191, "learning_rate": 1.7837552804844416e-06, "loss": 3.2663, "step": 58285 }, { "epoch": 0.59295654296875, "grad_norm": 14.6279296875, "learning_rate": 1.7833721713649368e-06, "loss": 3.341, "step": 58290 }, { "epoch": 0.5930074055989584, "grad_norm": 16.762426376342773, "learning_rate": 1.782989080580186e-06, "loss": 3.3536, "step": 58295 }, { "epoch": 0.5930582682291666, "grad_norm": 9.366729736328125, "learning_rate": 1.7826060081399882e-06, "loss": 3.3015, "step": 58300 }, { "epoch": 0.593109130859375, "grad_norm": 12.638140678405762, "learning_rate": 1.782222954054146e-06, "loss": 3.3911, "step": 58305 }, { "epoch": 0.5931599934895834, "grad_norm": 12.419198989868164, "learning_rate": 1.7818399183324602e-06, "loss": 3.2025, "step": 58310 }, { "epoch": 0.5932108561197916, "grad_norm": 13.538446426391602, "learning_rate": 1.7814569009847293e-06, "loss": 3.3975, "step": 58315 }, { "epoch": 0.59326171875, "grad_norm": 9.20753288269043, "learning_rate": 1.781073902020753e-06, "loss": 3.3517, "step": 58320 }, { "epoch": 0.5933125813802084, "grad_norm": 12.255483627319336, "learning_rate": 1.780690921450331e-06, "loss": 3.1009, "step": 58325 }, { "epoch": 0.5933634440104166, "grad_norm": 12.996417999267578, "learning_rate": 1.7803079592832613e-06, "loss": 3.3092, "step": 58330 }, { "epoch": 0.593414306640625, "grad_norm": 10.153072357177734, "learning_rate": 1.7799250155293421e-06, "loss": 3.3059, "step": 58335 }, { "epoch": 0.5934651692708334, "grad_norm": 9.475560188293457, "learning_rate": 1.7795420901983695e-06, "loss": 3.3699, "step": 58340 }, { "epoch": 0.5935160319010416, "grad_norm": 13.278281211853027, "learning_rate": 1.7791591833001426e-06, "loss": 3.1001, "step": 58345 }, { "epoch": 0.59356689453125, "grad_norm": 11.277542114257812, "learning_rate": 1.7787762948444575e-06, "loss": 3.0136, "step": 58350 }, { "epoch": 0.5936177571614584, "grad_norm": 9.570847511291504, "learning_rate": 1.7783934248411097e-06, "loss": 3.2451, "step": 58355 }, { "epoch": 0.5936686197916666, "grad_norm": 14.030230522155762, "learning_rate": 1.7780105732998942e-06, "loss": 3.2859, "step": 58360 }, { "epoch": 0.593719482421875, "grad_norm": 7.317997932434082, "learning_rate": 1.7776277402306085e-06, "loss": 3.2868, "step": 58365 }, { "epoch": 0.5937703450520834, "grad_norm": 12.152804374694824, "learning_rate": 1.777244925643045e-06, "loss": 3.1848, "step": 58370 }, { "epoch": 0.5938212076822916, "grad_norm": 7.249147891998291, "learning_rate": 1.7768621295469985e-06, "loss": 3.2292, "step": 58375 }, { "epoch": 0.5938720703125, "grad_norm": 12.727730751037598, "learning_rate": 1.7764793519522639e-06, "loss": 3.2557, "step": 58380 }, { "epoch": 0.5939229329427084, "grad_norm": 10.969575881958008, "learning_rate": 1.776096592868633e-06, "loss": 3.5441, "step": 58385 }, { "epoch": 0.5939737955729166, "grad_norm": 8.714305877685547, "learning_rate": 1.7757138523058999e-06, "loss": 3.072, "step": 58390 }, { "epoch": 0.594024658203125, "grad_norm": 12.414077758789062, "learning_rate": 1.775331130273854e-06, "loss": 3.6681, "step": 58395 }, { "epoch": 0.5940755208333334, "grad_norm": 13.802921295166016, "learning_rate": 1.7749484267822917e-06, "loss": 3.3932, "step": 58400 }, { "epoch": 0.5941263834635416, "grad_norm": 12.4661226272583, "learning_rate": 1.774565741841001e-06, "loss": 3.008, "step": 58405 }, { "epoch": 0.59417724609375, "grad_norm": 8.011397361755371, "learning_rate": 1.774183075459775e-06, "loss": 3.5852, "step": 58410 }, { "epoch": 0.5942281087239584, "grad_norm": 9.829503059387207, "learning_rate": 1.7738004276484014e-06, "loss": 3.2902, "step": 58415 }, { "epoch": 0.5942789713541666, "grad_norm": 8.826567649841309, "learning_rate": 1.7734177984166722e-06, "loss": 3.3655, "step": 58420 }, { "epoch": 0.594329833984375, "grad_norm": 14.86225414276123, "learning_rate": 1.7730351877743773e-06, "loss": 3.5861, "step": 58425 }, { "epoch": 0.5943806966145834, "grad_norm": 18.58818244934082, "learning_rate": 1.772652595731304e-06, "loss": 3.9893, "step": 58430 }, { "epoch": 0.5944315592447916, "grad_norm": 16.581823348999023, "learning_rate": 1.7722700222972413e-06, "loss": 3.4196, "step": 58435 }, { "epoch": 0.594482421875, "grad_norm": 7.3273115158081055, "learning_rate": 1.7718874674819786e-06, "loss": 3.3065, "step": 58440 }, { "epoch": 0.5945332845052084, "grad_norm": 11.707185745239258, "learning_rate": 1.771504931295302e-06, "loss": 3.5165, "step": 58445 }, { "epoch": 0.5945841471354166, "grad_norm": 17.30613899230957, "learning_rate": 1.7711224137469996e-06, "loss": 3.2353, "step": 58450 }, { "epoch": 0.594635009765625, "grad_norm": 13.378973960876465, "learning_rate": 1.7707399148468563e-06, "loss": 3.0759, "step": 58455 }, { "epoch": 0.5946858723958334, "grad_norm": 7.2831621170043945, "learning_rate": 1.7703574346046598e-06, "loss": 3.0799, "step": 58460 }, { "epoch": 0.5947367350260416, "grad_norm": 10.333426475524902, "learning_rate": 1.7699749730301962e-06, "loss": 3.1861, "step": 58465 }, { "epoch": 0.59478759765625, "grad_norm": 6.475285530090332, "learning_rate": 1.7695925301332495e-06, "loss": 3.4412, "step": 58470 }, { "epoch": 0.5948384602864584, "grad_norm": 15.218671798706055, "learning_rate": 1.769210105923604e-06, "loss": 3.0268, "step": 58475 }, { "epoch": 0.5948893229166666, "grad_norm": 11.751965522766113, "learning_rate": 1.7688277004110458e-06, "loss": 2.8304, "step": 58480 }, { "epoch": 0.594940185546875, "grad_norm": 9.582796096801758, "learning_rate": 1.768445313605357e-06, "loss": 3.4982, "step": 58485 }, { "epoch": 0.5949910481770834, "grad_norm": 14.962919235229492, "learning_rate": 1.768062945516321e-06, "loss": 3.5315, "step": 58490 }, { "epoch": 0.5950419108072916, "grad_norm": 10.981086730957031, "learning_rate": 1.7676805961537218e-06, "loss": 3.3454, "step": 58495 }, { "epoch": 0.5950927734375, "grad_norm": 11.686980247497559, "learning_rate": 1.7672982655273406e-06, "loss": 3.2746, "step": 58500 }, { "epoch": 0.5951436360677084, "grad_norm": 11.312081336975098, "learning_rate": 1.7669159536469605e-06, "loss": 4.051, "step": 58505 }, { "epoch": 0.5951944986979166, "grad_norm": 14.038021087646484, "learning_rate": 1.766533660522361e-06, "loss": 3.3311, "step": 58510 }, { "epoch": 0.595245361328125, "grad_norm": 16.62759780883789, "learning_rate": 1.766151386163324e-06, "loss": 3.4934, "step": 58515 }, { "epoch": 0.5952962239583334, "grad_norm": 11.089447021484375, "learning_rate": 1.7657691305796303e-06, "loss": 3.3098, "step": 58520 }, { "epoch": 0.5953470865885416, "grad_norm": 9.269929885864258, "learning_rate": 1.7653868937810593e-06, "loss": 3.1054, "step": 58525 }, { "epoch": 0.59539794921875, "grad_norm": 14.380538940429688, "learning_rate": 1.7650046757773899e-06, "loss": 2.8936, "step": 58530 }, { "epoch": 0.5954488118489584, "grad_norm": 12.806015968322754, "learning_rate": 1.7646224765784024e-06, "loss": 3.3982, "step": 58535 }, { "epoch": 0.5954996744791666, "grad_norm": 9.387150764465332, "learning_rate": 1.764240296193874e-06, "loss": 3.5869, "step": 58540 }, { "epoch": 0.595550537109375, "grad_norm": 10.369173049926758, "learning_rate": 1.7638581346335837e-06, "loss": 3.205, "step": 58545 }, { "epoch": 0.5956013997395834, "grad_norm": 13.991856575012207, "learning_rate": 1.7634759919073074e-06, "loss": 3.215, "step": 58550 }, { "epoch": 0.5956522623697916, "grad_norm": 10.374763488769531, "learning_rate": 1.7630938680248243e-06, "loss": 3.3059, "step": 58555 }, { "epoch": 0.595703125, "grad_norm": 12.562834739685059, "learning_rate": 1.76271176299591e-06, "loss": 3.0095, "step": 58560 }, { "epoch": 0.5957539876302084, "grad_norm": 11.885716438293457, "learning_rate": 1.7623296768303406e-06, "loss": 3.4797, "step": 58565 }, { "epoch": 0.5958048502604166, "grad_norm": 8.960821151733398, "learning_rate": 1.7619476095378907e-06, "loss": 3.5235, "step": 58570 }, { "epoch": 0.595855712890625, "grad_norm": 11.510443687438965, "learning_rate": 1.7615655611283366e-06, "loss": 3.3977, "step": 58575 }, { "epoch": 0.5959065755208334, "grad_norm": 15.014580726623535, "learning_rate": 1.7611835316114534e-06, "loss": 3.2362, "step": 58580 }, { "epoch": 0.5959574381510416, "grad_norm": 10.226090431213379, "learning_rate": 1.760801520997013e-06, "loss": 3.1782, "step": 58585 }, { "epoch": 0.59600830078125, "grad_norm": 11.057794570922852, "learning_rate": 1.7604195292947915e-06, "loss": 3.224, "step": 58590 }, { "epoch": 0.5960591634114584, "grad_norm": 7.222981929779053, "learning_rate": 1.760037556514561e-06, "loss": 3.656, "step": 58595 }, { "epoch": 0.5961100260416666, "grad_norm": 9.445796966552734, "learning_rate": 1.7596556026660944e-06, "loss": 3.1746, "step": 58600 }, { "epoch": 0.596160888671875, "grad_norm": 14.64661979675293, "learning_rate": 1.7592736677591627e-06, "loss": 3.1428, "step": 58605 }, { "epoch": 0.5962117513020834, "grad_norm": 11.342286109924316, "learning_rate": 1.7588917518035398e-06, "loss": 3.1805, "step": 58610 }, { "epoch": 0.5962626139322916, "grad_norm": 14.933340072631836, "learning_rate": 1.7585098548089952e-06, "loss": 3.0741, "step": 58615 }, { "epoch": 0.5963134765625, "grad_norm": 15.432071685791016, "learning_rate": 1.7581279767853008e-06, "loss": 3.2039, "step": 58620 }, { "epoch": 0.5963643391927084, "grad_norm": 9.950495719909668, "learning_rate": 1.7577461177422251e-06, "loss": 3.5878, "step": 58625 }, { "epoch": 0.5964152018229166, "grad_norm": 8.13646411895752, "learning_rate": 1.7573642776895397e-06, "loss": 3.3971, "step": 58630 }, { "epoch": 0.596466064453125, "grad_norm": 12.200660705566406, "learning_rate": 1.7569824566370136e-06, "loss": 3.2983, "step": 58635 }, { "epoch": 0.5965169270833334, "grad_norm": 14.329954147338867, "learning_rate": 1.7566006545944148e-06, "loss": 3.2166, "step": 58640 }, { "epoch": 0.5965677897135416, "grad_norm": 10.474040031433105, "learning_rate": 1.7562188715715117e-06, "loss": 3.3249, "step": 58645 }, { "epoch": 0.59661865234375, "grad_norm": 8.579286575317383, "learning_rate": 1.7558371075780733e-06, "loss": 3.5359, "step": 58650 }, { "epoch": 0.5966695149739584, "grad_norm": 10.491554260253906, "learning_rate": 1.7554553626238656e-06, "loss": 3.0442, "step": 58655 }, { "epoch": 0.5967203776041666, "grad_norm": 16.097248077392578, "learning_rate": 1.7550736367186566e-06, "loss": 3.2724, "step": 58660 }, { "epoch": 0.596771240234375, "grad_norm": 11.073954582214355, "learning_rate": 1.754691929872211e-06, "loss": 3.2875, "step": 58665 }, { "epoch": 0.5968221028645834, "grad_norm": 16.58736228942871, "learning_rate": 1.7543102420942961e-06, "loss": 3.5692, "step": 58670 }, { "epoch": 0.5968729654947916, "grad_norm": 13.786785125732422, "learning_rate": 1.7539285733946776e-06, "loss": 3.7272, "step": 58675 }, { "epoch": 0.596923828125, "grad_norm": 8.079156875610352, "learning_rate": 1.7535469237831185e-06, "loss": 4.6149, "step": 58680 }, { "epoch": 0.5969746907552084, "grad_norm": 14.249360084533691, "learning_rate": 1.7531652932693848e-06, "loss": 3.1452, "step": 58685 }, { "epoch": 0.5970255533854166, "grad_norm": 11.673576354980469, "learning_rate": 1.752783681863241e-06, "loss": 3.3092, "step": 58690 }, { "epoch": 0.597076416015625, "grad_norm": 13.817118644714355, "learning_rate": 1.752402089574449e-06, "loss": 3.1523, "step": 58695 }, { "epoch": 0.5971272786458334, "grad_norm": 15.398924827575684, "learning_rate": 1.7520205164127719e-06, "loss": 3.3453, "step": 58700 }, { "epoch": 0.5971781412760416, "grad_norm": 12.843722343444824, "learning_rate": 1.7516389623879733e-06, "loss": 3.78, "step": 58705 }, { "epoch": 0.59722900390625, "grad_norm": 10.914924621582031, "learning_rate": 1.7512574275098138e-06, "loss": 3.0658, "step": 58710 }, { "epoch": 0.5972798665364584, "grad_norm": 13.276107788085938, "learning_rate": 1.7508759117880564e-06, "loss": 3.3682, "step": 58715 }, { "epoch": 0.5973307291666666, "grad_norm": 7.376313209533691, "learning_rate": 1.75049441523246e-06, "loss": 3.1761, "step": 58720 }, { "epoch": 0.597381591796875, "grad_norm": 12.459508895874023, "learning_rate": 1.750112937852787e-06, "loss": 3.7262, "step": 58725 }, { "epoch": 0.5974324544270834, "grad_norm": 6.936720848083496, "learning_rate": 1.749731479658797e-06, "loss": 3.2802, "step": 58730 }, { "epoch": 0.5974833170572916, "grad_norm": 13.150242805480957, "learning_rate": 1.7493500406602492e-06, "loss": 3.2631, "step": 58735 }, { "epoch": 0.5975341796875, "grad_norm": 8.150609970092773, "learning_rate": 1.7489686208669022e-06, "loss": 3.1906, "step": 58740 }, { "epoch": 0.5975850423177084, "grad_norm": 12.01266860961914, "learning_rate": 1.7485872202885152e-06, "loss": 3.2785, "step": 58745 }, { "epoch": 0.5976359049479166, "grad_norm": 9.509927749633789, "learning_rate": 1.7482058389348466e-06, "loss": 3.0776, "step": 58750 }, { "epoch": 0.597686767578125, "grad_norm": 10.995317459106445, "learning_rate": 1.7478244768156532e-06, "loss": 3.038, "step": 58755 }, { "epoch": 0.5977376302083334, "grad_norm": 15.866677284240723, "learning_rate": 1.7474431339406916e-06, "loss": 3.3283, "step": 58760 }, { "epoch": 0.5977884928385416, "grad_norm": 10.797828674316406, "learning_rate": 1.7470618103197201e-06, "loss": 3.4482, "step": 58765 }, { "epoch": 0.59783935546875, "grad_norm": 10.114940643310547, "learning_rate": 1.7466805059624931e-06, "loss": 3.336, "step": 58770 }, { "epoch": 0.5978902180989584, "grad_norm": 10.19825553894043, "learning_rate": 1.746299220878766e-06, "loss": 3.0855, "step": 58775 }, { "epoch": 0.5979410807291666, "grad_norm": 15.008298873901367, "learning_rate": 1.7459179550782962e-06, "loss": 3.244, "step": 58780 }, { "epoch": 0.597991943359375, "grad_norm": 13.538183212280273, "learning_rate": 1.745536708570836e-06, "loss": 3.6771, "step": 58785 }, { "epoch": 0.5980428059895834, "grad_norm": 11.229158401489258, "learning_rate": 1.7451554813661408e-06, "loss": 3.2748, "step": 58790 }, { "epoch": 0.5980936686197916, "grad_norm": 12.117464065551758, "learning_rate": 1.7447742734739628e-06, "loss": 3.3054, "step": 58795 }, { "epoch": 0.59814453125, "grad_norm": 8.106527328491211, "learning_rate": 1.7443930849040564e-06, "loss": 3.099, "step": 58800 }, { "epoch": 0.5981953938802084, "grad_norm": 17.27057456970215, "learning_rate": 1.7440119156661745e-06, "loss": 3.2644, "step": 58805 }, { "epoch": 0.5982462565104166, "grad_norm": 15.627812385559082, "learning_rate": 1.743630765770068e-06, "loss": 3.1907, "step": 58810 }, { "epoch": 0.598297119140625, "grad_norm": 14.384893417358398, "learning_rate": 1.7432496352254885e-06, "loss": 3.2499, "step": 58815 }, { "epoch": 0.5983479817708334, "grad_norm": 15.350054740905762, "learning_rate": 1.7428685240421888e-06, "loss": 2.977, "step": 58820 }, { "epoch": 0.5983988444010416, "grad_norm": 12.908646583557129, "learning_rate": 1.742487432229918e-06, "loss": 3.2672, "step": 58825 }, { "epoch": 0.59844970703125, "grad_norm": 16.558061599731445, "learning_rate": 1.742106359798427e-06, "loss": 3.231, "step": 58830 }, { "epoch": 0.5985005696614584, "grad_norm": 11.36466121673584, "learning_rate": 1.7417253067574642e-06, "loss": 3.5004, "step": 58835 }, { "epoch": 0.5985514322916666, "grad_norm": 16.1599178314209, "learning_rate": 1.7413442731167802e-06, "loss": 3.2495, "step": 58840 }, { "epoch": 0.598602294921875, "grad_norm": 14.043704986572266, "learning_rate": 1.7409632588861236e-06, "loss": 3.5283, "step": 58845 }, { "epoch": 0.5986531575520834, "grad_norm": 15.785425186157227, "learning_rate": 1.7405822640752419e-06, "loss": 3.4809, "step": 58850 }, { "epoch": 0.5987040201822916, "grad_norm": 16.065847396850586, "learning_rate": 1.7402012886938819e-06, "loss": 3.4675, "step": 58855 }, { "epoch": 0.5987548828125, "grad_norm": 13.364585876464844, "learning_rate": 1.739820332751793e-06, "loss": 3.2538, "step": 58860 }, { "epoch": 0.5988057454427084, "grad_norm": 11.64784049987793, "learning_rate": 1.739439396258721e-06, "loss": 3.1564, "step": 58865 }, { "epoch": 0.5988566080729166, "grad_norm": 10.144237518310547, "learning_rate": 1.7390584792244102e-06, "loss": 3.3792, "step": 58870 }, { "epoch": 0.598907470703125, "grad_norm": 14.06118392944336, "learning_rate": 1.7386775816586088e-06, "loss": 3.2614, "step": 58875 }, { "epoch": 0.5989583333333334, "grad_norm": 10.689396858215332, "learning_rate": 1.738296703571062e-06, "loss": 3.1, "step": 58880 }, { "epoch": 0.5990091959635416, "grad_norm": 14.174532890319824, "learning_rate": 1.7379158449715123e-06, "loss": 3.6983, "step": 58885 }, { "epoch": 0.59906005859375, "grad_norm": 15.426204681396484, "learning_rate": 1.737535005869705e-06, "loss": 3.5526, "step": 58890 }, { "epoch": 0.5991109212239584, "grad_norm": 14.208455085754395, "learning_rate": 1.7371541862753843e-06, "loss": 3.8607, "step": 58895 }, { "epoch": 0.5991617838541666, "grad_norm": 11.174375534057617, "learning_rate": 1.7367733861982928e-06, "loss": 3.4787, "step": 58900 }, { "epoch": 0.599212646484375, "grad_norm": 11.884801864624023, "learning_rate": 1.736392605648174e-06, "loss": 3.0598, "step": 58905 }, { "epoch": 0.5992635091145834, "grad_norm": 6.930401802062988, "learning_rate": 1.736011844634768e-06, "loss": 2.9284, "step": 58910 }, { "epoch": 0.5993143717447916, "grad_norm": 10.723294258117676, "learning_rate": 1.7356311031678186e-06, "loss": 3.403, "step": 58915 }, { "epoch": 0.599365234375, "grad_norm": 12.299421310424805, "learning_rate": 1.7352503812570667e-06, "loss": 4.1806, "step": 58920 }, { "epoch": 0.5994160970052084, "grad_norm": 10.311161994934082, "learning_rate": 1.7348696789122522e-06, "loss": 2.729, "step": 58925 }, { "epoch": 0.5994669596354166, "grad_norm": 7.91664981842041, "learning_rate": 1.7344889961431149e-06, "loss": 3.3468, "step": 58930 }, { "epoch": 0.599517822265625, "grad_norm": 13.23727798461914, "learning_rate": 1.734108332959396e-06, "loss": 3.2103, "step": 58935 }, { "epoch": 0.5995686848958334, "grad_norm": 13.324912071228027, "learning_rate": 1.7337276893708338e-06, "loss": 3.4046, "step": 58940 }, { "epoch": 0.5996195475260416, "grad_norm": 14.138757705688477, "learning_rate": 1.7333470653871673e-06, "loss": 2.9858, "step": 58945 }, { "epoch": 0.59967041015625, "grad_norm": 9.890541076660156, "learning_rate": 1.7329664610181335e-06, "loss": 3.2769, "step": 58950 }, { "epoch": 0.5997212727864584, "grad_norm": 15.262154579162598, "learning_rate": 1.7325858762734715e-06, "loss": 3.7433, "step": 58955 }, { "epoch": 0.5997721354166666, "grad_norm": 13.031238555908203, "learning_rate": 1.7322053111629183e-06, "loss": 3.5493, "step": 58960 }, { "epoch": 0.599822998046875, "grad_norm": 12.975473403930664, "learning_rate": 1.7318247656962092e-06, "loss": 3.1998, "step": 58965 }, { "epoch": 0.5998738606770834, "grad_norm": 15.250924110412598, "learning_rate": 1.7314442398830822e-06, "loss": 3.7708, "step": 58970 }, { "epoch": 0.5999247233072916, "grad_norm": 16.053945541381836, "learning_rate": 1.7310637337332725e-06, "loss": 3.3696, "step": 58975 }, { "epoch": 0.5999755859375, "grad_norm": 13.52525806427002, "learning_rate": 1.7306832472565143e-06, "loss": 3.1525, "step": 58980 }, { "epoch": 0.6000264485677084, "grad_norm": 11.047719955444336, "learning_rate": 1.7303027804625426e-06, "loss": 2.9954, "step": 58985 }, { "epoch": 0.6000773111979166, "grad_norm": 8.911846160888672, "learning_rate": 1.7299223333610926e-06, "loss": 3.5218, "step": 58990 }, { "epoch": 0.600128173828125, "grad_norm": 11.748200416564941, "learning_rate": 1.7295419059618967e-06, "loss": 3.2972, "step": 58995 }, { "epoch": 0.6001790364583334, "grad_norm": 8.499863624572754, "learning_rate": 1.729161498274689e-06, "loss": 3.4798, "step": 59000 }, { "epoch": 0.6002298990885416, "grad_norm": 8.420709609985352, "learning_rate": 1.7287811103092006e-06, "loss": 3.5517, "step": 59005 }, { "epoch": 0.60028076171875, "grad_norm": 16.08116340637207, "learning_rate": 1.7284007420751653e-06, "loss": 3.7896, "step": 59010 }, { "epoch": 0.6003316243489584, "grad_norm": 9.745408058166504, "learning_rate": 1.7280203935823148e-06, "loss": 3.6144, "step": 59015 }, { "epoch": 0.6003824869791666, "grad_norm": 7.625930309295654, "learning_rate": 1.7276400648403791e-06, "loss": 3.4174, "step": 59020 }, { "epoch": 0.600433349609375, "grad_norm": 10.471306800842285, "learning_rate": 1.7272597558590883e-06, "loss": 3.1803, "step": 59025 }, { "epoch": 0.6004842122395834, "grad_norm": 12.285701751708984, "learning_rate": 1.7268794666481742e-06, "loss": 3.3564, "step": 59030 }, { "epoch": 0.6005350748697916, "grad_norm": 11.852290153503418, "learning_rate": 1.7264991972173665e-06, "loss": 3.1323, "step": 59035 }, { "epoch": 0.6005859375, "grad_norm": 9.487098693847656, "learning_rate": 1.7261189475763929e-06, "loss": 3.249, "step": 59040 }, { "epoch": 0.6006368001302084, "grad_norm": 8.560872077941895, "learning_rate": 1.7257387177349815e-06, "loss": 3.7592, "step": 59045 }, { "epoch": 0.6006876627604166, "grad_norm": 13.85609245300293, "learning_rate": 1.725358507702863e-06, "loss": 3.0362, "step": 59050 }, { "epoch": 0.600738525390625, "grad_norm": 11.601663589477539, "learning_rate": 1.724978317489763e-06, "loss": 3.2711, "step": 59055 }, { "epoch": 0.6007893880208334, "grad_norm": 10.587480545043945, "learning_rate": 1.7245981471054096e-06, "loss": 3.4282, "step": 59060 }, { "epoch": 0.6008402506510416, "grad_norm": 14.014335632324219, "learning_rate": 1.7242179965595273e-06, "loss": 3.5206, "step": 59065 }, { "epoch": 0.60089111328125, "grad_norm": 13.849995613098145, "learning_rate": 1.723837865861845e-06, "loss": 3.8606, "step": 59070 }, { "epoch": 0.6009419759114584, "grad_norm": 12.70814037322998, "learning_rate": 1.7234577550220869e-06, "loss": 3.443, "step": 59075 }, { "epoch": 0.6009928385416666, "grad_norm": 16.227697372436523, "learning_rate": 1.723077664049977e-06, "loss": 3.3012, "step": 59080 }, { "epoch": 0.601043701171875, "grad_norm": 14.024761199951172, "learning_rate": 1.7226975929552414e-06, "loss": 3.3245, "step": 59085 }, { "epoch": 0.6010945638020834, "grad_norm": 8.826449394226074, "learning_rate": 1.7223175417476046e-06, "loss": 3.0356, "step": 59090 }, { "epoch": 0.6011454264322916, "grad_norm": 14.546638488769531, "learning_rate": 1.7219375104367883e-06, "loss": 3.1104, "step": 59095 }, { "epoch": 0.6011962890625, "grad_norm": 9.69225788116455, "learning_rate": 1.721557499032516e-06, "loss": 3.8635, "step": 59100 }, { "epoch": 0.6012471516927084, "grad_norm": 10.164959907531738, "learning_rate": 1.7211775075445115e-06, "loss": 3.1768, "step": 59105 }, { "epoch": 0.6012980143229166, "grad_norm": 18.523700714111328, "learning_rate": 1.7207975359824953e-06, "loss": 3.2138, "step": 59110 }, { "epoch": 0.601348876953125, "grad_norm": 10.2969331741333, "learning_rate": 1.7204175843561902e-06, "loss": 3.3226, "step": 59115 }, { "epoch": 0.6013997395833334, "grad_norm": 9.768226623535156, "learning_rate": 1.720037652675315e-06, "loss": 3.0602, "step": 59120 }, { "epoch": 0.6014506022135416, "grad_norm": 10.433574676513672, "learning_rate": 1.7196577409495924e-06, "loss": 3.1022, "step": 59125 }, { "epoch": 0.60150146484375, "grad_norm": 11.451906204223633, "learning_rate": 1.719277849188742e-06, "loss": 3.4766, "step": 59130 }, { "epoch": 0.6015523274739584, "grad_norm": 9.445000648498535, "learning_rate": 1.7188979774024825e-06, "loss": 3.3239, "step": 59135 }, { "epoch": 0.6016031901041666, "grad_norm": 12.806665420532227, "learning_rate": 1.7185181256005323e-06, "loss": 3.0114, "step": 59140 }, { "epoch": 0.601654052734375, "grad_norm": 16.7288761138916, "learning_rate": 1.7181382937926117e-06, "loss": 3.1627, "step": 59145 }, { "epoch": 0.6017049153645834, "grad_norm": 14.747552871704102, "learning_rate": 1.7177584819884368e-06, "loss": 3.3099, "step": 59150 }, { "epoch": 0.6017557779947916, "grad_norm": 15.10758113861084, "learning_rate": 1.7173786901977268e-06, "loss": 3.398, "step": 59155 }, { "epoch": 0.601806640625, "grad_norm": 16.61138916015625, "learning_rate": 1.716998918430196e-06, "loss": 2.9033, "step": 59160 }, { "epoch": 0.6018575032552084, "grad_norm": 9.121318817138672, "learning_rate": 1.7166191666955628e-06, "loss": 3.1971, "step": 59165 }, { "epoch": 0.6019083658854166, "grad_norm": 13.609682083129883, "learning_rate": 1.7162394350035433e-06, "loss": 3.3779, "step": 59170 }, { "epoch": 0.601959228515625, "grad_norm": 11.46147632598877, "learning_rate": 1.7158597233638505e-06, "loss": 3.5063, "step": 59175 }, { "epoch": 0.6020100911458334, "grad_norm": 13.441067695617676, "learning_rate": 1.7154800317862024e-06, "loss": 3.4049, "step": 59180 }, { "epoch": 0.6020609537760416, "grad_norm": 10.99342155456543, "learning_rate": 1.7151003602803113e-06, "loss": 3.6103, "step": 59185 }, { "epoch": 0.60211181640625, "grad_norm": 11.134747505187988, "learning_rate": 1.7147207088558914e-06, "loss": 3.2871, "step": 59190 }, { "epoch": 0.6021626790364584, "grad_norm": 17.46708869934082, "learning_rate": 1.7143410775226554e-06, "loss": 3.166, "step": 59195 }, { "epoch": 0.6022135416666666, "grad_norm": 12.266764640808105, "learning_rate": 1.7139614662903174e-06, "loss": 3.3268, "step": 59200 }, { "epoch": 0.602264404296875, "grad_norm": 13.35457992553711, "learning_rate": 1.7135818751685892e-06, "loss": 3.6107, "step": 59205 }, { "epoch": 0.6023152669270834, "grad_norm": 12.261884689331055, "learning_rate": 1.713202304167182e-06, "loss": 3.4055, "step": 59210 }, { "epoch": 0.6023661295572916, "grad_norm": 10.646519660949707, "learning_rate": 1.7128227532958068e-06, "loss": 3.4347, "step": 59215 }, { "epoch": 0.6024169921875, "grad_norm": 12.019163131713867, "learning_rate": 1.7124432225641758e-06, "loss": 3.1606, "step": 59220 }, { "epoch": 0.6024678548177084, "grad_norm": 12.81616497039795, "learning_rate": 1.7120637119819978e-06, "loss": 3.1076, "step": 59225 }, { "epoch": 0.6025187174479166, "grad_norm": 8.086276054382324, "learning_rate": 1.7116842215589834e-06, "loss": 3.4989, "step": 59230 }, { "epoch": 0.602569580078125, "grad_norm": 7.3298258781433105, "learning_rate": 1.7113047513048404e-06, "loss": 3.2682, "step": 59235 }, { "epoch": 0.6026204427083334, "grad_norm": 12.518247604370117, "learning_rate": 1.7109253012292789e-06, "loss": 3.5294, "step": 59240 }, { "epoch": 0.6026713053385416, "grad_norm": 11.548209190368652, "learning_rate": 1.710545871342007e-06, "loss": 3.6109, "step": 59245 }, { "epoch": 0.60272216796875, "grad_norm": 14.335244178771973, "learning_rate": 1.7101664616527314e-06, "loss": 3.3422, "step": 59250 }, { "epoch": 0.6027730305989584, "grad_norm": 10.248363494873047, "learning_rate": 1.7097870721711593e-06, "loss": 3.6576, "step": 59255 }, { "epoch": 0.6028238932291666, "grad_norm": 11.230707168579102, "learning_rate": 1.7094077029069985e-06, "loss": 3.2768, "step": 59260 }, { "epoch": 0.602874755859375, "grad_norm": 10.547666549682617, "learning_rate": 1.7090283538699538e-06, "loss": 3.4327, "step": 59265 }, { "epoch": 0.6029256184895834, "grad_norm": 7.769001483917236, "learning_rate": 1.7086490250697308e-06, "loss": 3.6755, "step": 59270 }, { "epoch": 0.6029764811197916, "grad_norm": 14.495406150817871, "learning_rate": 1.7082697165160356e-06, "loss": 2.927, "step": 59275 }, { "epoch": 0.60302734375, "grad_norm": 14.664689064025879, "learning_rate": 1.7078904282185716e-06, "loss": 3.6418, "step": 59280 }, { "epoch": 0.6030782063802084, "grad_norm": 11.937549591064453, "learning_rate": 1.7075111601870441e-06, "loss": 3.2874, "step": 59285 }, { "epoch": 0.6031290690104166, "grad_norm": 12.20949935913086, "learning_rate": 1.7071319124311544e-06, "loss": 3.0881, "step": 59290 }, { "epoch": 0.603179931640625, "grad_norm": 13.66655158996582, "learning_rate": 1.7067526849606076e-06, "loss": 3.1871, "step": 59295 }, { "epoch": 0.6032307942708334, "grad_norm": 9.82215404510498, "learning_rate": 1.7063734777851054e-06, "loss": 3.4314, "step": 59300 }, { "epoch": 0.6032816569010416, "grad_norm": 12.262903213500977, "learning_rate": 1.7059942909143495e-06, "loss": 3.1973, "step": 59305 }, { "epoch": 0.60333251953125, "grad_norm": 16.095964431762695, "learning_rate": 1.7056151243580407e-06, "loss": 3.4884, "step": 59310 }, { "epoch": 0.6033833821614584, "grad_norm": 12.30743408203125, "learning_rate": 1.7052359781258819e-06, "loss": 3.2251, "step": 59315 }, { "epoch": 0.6034342447916666, "grad_norm": 12.364432334899902, "learning_rate": 1.7048568522275717e-06, "loss": 3.1654, "step": 59320 }, { "epoch": 0.603485107421875, "grad_norm": 12.293103218078613, "learning_rate": 1.7044777466728107e-06, "loss": 3.495, "step": 59325 }, { "epoch": 0.6035359700520834, "grad_norm": 10.636536598205566, "learning_rate": 1.7040986614712968e-06, "loss": 3.2208, "step": 59330 }, { "epoch": 0.6035868326822916, "grad_norm": 7.765628337860107, "learning_rate": 1.7037195966327313e-06, "loss": 3.3043, "step": 59335 }, { "epoch": 0.6036376953125, "grad_norm": 14.77886962890625, "learning_rate": 1.7033405521668106e-06, "loss": 3.9433, "step": 59340 }, { "epoch": 0.6036885579427084, "grad_norm": 12.097742080688477, "learning_rate": 1.702961528083234e-06, "loss": 3.5081, "step": 59345 }, { "epoch": 0.6037394205729166, "grad_norm": 12.712528228759766, "learning_rate": 1.7025825243916963e-06, "loss": 3.5559, "step": 59350 }, { "epoch": 0.603790283203125, "grad_norm": 12.2130765914917, "learning_rate": 1.7022035411018964e-06, "loss": 3.4545, "step": 59355 }, { "epoch": 0.6038411458333334, "grad_norm": 10.991181373596191, "learning_rate": 1.7018245782235305e-06, "loss": 3.619, "step": 59360 }, { "epoch": 0.6038920084635416, "grad_norm": 9.631376266479492, "learning_rate": 1.7014456357662923e-06, "loss": 3.213, "step": 59365 }, { "epoch": 0.60394287109375, "grad_norm": 13.050620079040527, "learning_rate": 1.7010667137398784e-06, "loss": 3.0377, "step": 59370 }, { "epoch": 0.6039937337239584, "grad_norm": 15.846744537353516, "learning_rate": 1.7006878121539844e-06, "loss": 3.1607, "step": 59375 }, { "epoch": 0.6040445963541666, "grad_norm": 13.608674049377441, "learning_rate": 1.7003089310183024e-06, "loss": 3.54, "step": 59380 }, { "epoch": 0.604095458984375, "grad_norm": 13.009920120239258, "learning_rate": 1.6999300703425264e-06, "loss": 3.2227, "step": 59385 }, { "epoch": 0.6041463216145834, "grad_norm": 13.079833030700684, "learning_rate": 1.6995512301363512e-06, "loss": 3.22, "step": 59390 }, { "epoch": 0.6041971842447916, "grad_norm": 12.880718231201172, "learning_rate": 1.6991724104094672e-06, "loss": 3.2554, "step": 59395 }, { "epoch": 0.604248046875, "grad_norm": 9.71114444732666, "learning_rate": 1.698793611171568e-06, "loss": 3.1012, "step": 59400 }, { "epoch": 0.6042989095052084, "grad_norm": 13.014288902282715, "learning_rate": 1.6984148324323433e-06, "loss": 3.286, "step": 59405 }, { "epoch": 0.6043497721354166, "grad_norm": 7.333260536193848, "learning_rate": 1.6980360742014857e-06, "loss": 3.668, "step": 59410 }, { "epoch": 0.604400634765625, "grad_norm": 10.733609199523926, "learning_rate": 1.6976573364886854e-06, "loss": 3.2209, "step": 59415 }, { "epoch": 0.6044514973958334, "grad_norm": 8.100669860839844, "learning_rate": 1.6972786193036313e-06, "loss": 3.5353, "step": 59420 }, { "epoch": 0.6045023600260416, "grad_norm": 12.514671325683594, "learning_rate": 1.6968999226560132e-06, "loss": 3.2706, "step": 59425 }, { "epoch": 0.60455322265625, "grad_norm": 11.186192512512207, "learning_rate": 1.6965212465555214e-06, "loss": 3.4426, "step": 59430 }, { "epoch": 0.6046040852864584, "grad_norm": 13.176190376281738, "learning_rate": 1.6961425910118423e-06, "loss": 3.6039, "step": 59435 }, { "epoch": 0.6046549479166666, "grad_norm": 13.289362907409668, "learning_rate": 1.695763956034665e-06, "loss": 3.3651, "step": 59440 }, { "epoch": 0.604705810546875, "grad_norm": 12.703752517700195, "learning_rate": 1.6953853416336753e-06, "loss": 3.1128, "step": 59445 }, { "epoch": 0.6047566731770834, "grad_norm": 15.119664192199707, "learning_rate": 1.695006747818561e-06, "loss": 3.4101, "step": 59450 }, { "epoch": 0.6048075358072916, "grad_norm": 8.316143035888672, "learning_rate": 1.6946281745990096e-06, "loss": 3.0702, "step": 59455 }, { "epoch": 0.6048583984375, "grad_norm": 11.337885856628418, "learning_rate": 1.6942496219847037e-06, "loss": 3.1695, "step": 59460 }, { "epoch": 0.6049092610677084, "grad_norm": 15.670044898986816, "learning_rate": 1.693871089985331e-06, "loss": 3.3852, "step": 59465 }, { "epoch": 0.6049601236979166, "grad_norm": 14.897687911987305, "learning_rate": 1.693492578610576e-06, "loss": 3.5343, "step": 59470 }, { "epoch": 0.605010986328125, "grad_norm": 13.250404357910156, "learning_rate": 1.693114087870122e-06, "loss": 3.0307, "step": 59475 }, { "epoch": 0.6050618489583334, "grad_norm": 13.780698776245117, "learning_rate": 1.6927356177736516e-06, "loss": 3.7522, "step": 59480 }, { "epoch": 0.6051127115885416, "grad_norm": 12.571311950683594, "learning_rate": 1.6923571683308497e-06, "loss": 3.1342, "step": 59485 }, { "epoch": 0.60516357421875, "grad_norm": 15.983752250671387, "learning_rate": 1.6919787395513993e-06, "loss": 3.1635, "step": 59490 }, { "epoch": 0.6052144368489584, "grad_norm": 10.591636657714844, "learning_rate": 1.6916003314449804e-06, "loss": 3.5172, "step": 59495 }, { "epoch": 0.6052652994791666, "grad_norm": 15.415030479431152, "learning_rate": 1.6912219440212751e-06, "loss": 3.251, "step": 59500 }, { "epoch": 0.605316162109375, "grad_norm": 16.442806243896484, "learning_rate": 1.6908435772899656e-06, "loss": 3.2447, "step": 59505 }, { "epoch": 0.6053670247395834, "grad_norm": 12.691445350646973, "learning_rate": 1.6904652312607306e-06, "loss": 2.9315, "step": 59510 }, { "epoch": 0.6054178873697916, "grad_norm": 10.97580623626709, "learning_rate": 1.6900869059432518e-06, "loss": 3.3459, "step": 59515 }, { "epoch": 0.60546875, "grad_norm": 7.406706809997559, "learning_rate": 1.689708601347206e-06, "loss": 3.1818, "step": 59520 }, { "epoch": 0.6055196126302084, "grad_norm": 13.156481742858887, "learning_rate": 1.6893303174822746e-06, "loss": 3.1799, "step": 59525 }, { "epoch": 0.6055704752604166, "grad_norm": 15.111701011657715, "learning_rate": 1.688952054358135e-06, "loss": 3.2728, "step": 59530 }, { "epoch": 0.605621337890625, "grad_norm": 14.210783004760742, "learning_rate": 1.688573811984464e-06, "loss": 3.6025, "step": 59535 }, { "epoch": 0.6056722005208334, "grad_norm": 12.524040222167969, "learning_rate": 1.68819559037094e-06, "loss": 3.5204, "step": 59540 }, { "epoch": 0.6057230631510416, "grad_norm": 12.803829193115234, "learning_rate": 1.68781738952724e-06, "loss": 3.8352, "step": 59545 }, { "epoch": 0.60577392578125, "grad_norm": 12.3107328414917, "learning_rate": 1.6874392094630392e-06, "loss": 3.5257, "step": 59550 }, { "epoch": 0.6058247884114584, "grad_norm": 10.546566009521484, "learning_rate": 1.687061050188013e-06, "loss": 3.1221, "step": 59555 }, { "epoch": 0.6058756510416666, "grad_norm": 9.176051139831543, "learning_rate": 1.6866829117118383e-06, "loss": 3.1705, "step": 59560 }, { "epoch": 0.605926513671875, "grad_norm": 10.537932395935059, "learning_rate": 1.6863047940441879e-06, "loss": 3.1709, "step": 59565 }, { "epoch": 0.6059773763020834, "grad_norm": 10.292647361755371, "learning_rate": 1.6859266971947369e-06, "loss": 3.133, "step": 59570 }, { "epoch": 0.6060282389322916, "grad_norm": 10.263216018676758, "learning_rate": 1.6855486211731575e-06, "loss": 3.3067, "step": 59575 }, { "epoch": 0.6060791015625, "grad_norm": 8.906332015991211, "learning_rate": 1.685170565989124e-06, "loss": 3.4641, "step": 59580 }, { "epoch": 0.6061299641927084, "grad_norm": 11.056873321533203, "learning_rate": 1.684792531652309e-06, "loss": 3.4226, "step": 59585 }, { "epoch": 0.6061808268229166, "grad_norm": 12.264915466308594, "learning_rate": 1.6844145181723833e-06, "loss": 3.3957, "step": 59590 }, { "epoch": 0.606231689453125, "grad_norm": 12.649701118469238, "learning_rate": 1.6840365255590186e-06, "loss": 3.1921, "step": 59595 }, { "epoch": 0.6062825520833334, "grad_norm": 13.983546257019043, "learning_rate": 1.6836585538218868e-06, "loss": 3.0141, "step": 59600 }, { "epoch": 0.6063334147135416, "grad_norm": 8.407326698303223, "learning_rate": 1.6832806029706568e-06, "loss": 3.5029, "step": 59605 }, { "epoch": 0.60638427734375, "grad_norm": 14.59919548034668, "learning_rate": 1.6829026730149993e-06, "loss": 3.2456, "step": 59610 }, { "epoch": 0.6064351399739584, "grad_norm": 6.537646293640137, "learning_rate": 1.6825247639645828e-06, "loss": 3.3567, "step": 59615 }, { "epoch": 0.6064860026041666, "grad_norm": 8.798364639282227, "learning_rate": 1.6821468758290765e-06, "loss": 3.3308, "step": 59620 }, { "epoch": 0.606536865234375, "grad_norm": 13.769247055053711, "learning_rate": 1.6817690086181494e-06, "loss": 3.1139, "step": 59625 }, { "epoch": 0.6065877278645834, "grad_norm": 14.075472831726074, "learning_rate": 1.6813911623414674e-06, "loss": 3.2553, "step": 59630 }, { "epoch": 0.6066385904947916, "grad_norm": 16.200557708740234, "learning_rate": 1.6810133370086985e-06, "loss": 3.3608, "step": 59635 }, { "epoch": 0.606689453125, "grad_norm": 8.004645347595215, "learning_rate": 1.6806355326295094e-06, "loss": 2.9995, "step": 59640 }, { "epoch": 0.6067403157552084, "grad_norm": 14.007003784179688, "learning_rate": 1.6802577492135664e-06, "loss": 3.5005, "step": 59645 }, { "epoch": 0.6067911783854166, "grad_norm": 13.227628707885742, "learning_rate": 1.679879986770534e-06, "loss": 3.1238, "step": 59650 }, { "epoch": 0.606842041015625, "grad_norm": 16.293874740600586, "learning_rate": 1.6795022453100773e-06, "loss": 3.0458, "step": 59655 }, { "epoch": 0.6068929036458334, "grad_norm": 10.60593032836914, "learning_rate": 1.6791245248418624e-06, "loss": 3.5369, "step": 59660 }, { "epoch": 0.6069437662760416, "grad_norm": 12.795389175415039, "learning_rate": 1.6787468253755516e-06, "loss": 3.1973, "step": 59665 }, { "epoch": 0.60699462890625, "grad_norm": 7.493161201477051, "learning_rate": 1.6783691469208078e-06, "loss": 3.3322, "step": 59670 }, { "epoch": 0.6070454915364584, "grad_norm": 13.887099266052246, "learning_rate": 1.6779914894872955e-06, "loss": 3.2289, "step": 59675 }, { "epoch": 0.6070963541666666, "grad_norm": 13.77320384979248, "learning_rate": 1.6776138530846758e-06, "loss": 3.4333, "step": 59680 }, { "epoch": 0.607147216796875, "grad_norm": 6.699323654174805, "learning_rate": 1.6772362377226115e-06, "loss": 2.8962, "step": 59685 }, { "epoch": 0.6071980794270834, "grad_norm": 16.819921493530273, "learning_rate": 1.6768586434107616e-06, "loss": 3.3299, "step": 59690 }, { "epoch": 0.6072489420572916, "grad_norm": 7.822608470916748, "learning_rate": 1.6764810701587891e-06, "loss": 3.3881, "step": 59695 }, { "epoch": 0.6072998046875, "grad_norm": 13.360980987548828, "learning_rate": 1.6761035179763536e-06, "loss": 3.5471, "step": 59700 }, { "epoch": 0.6073506673177084, "grad_norm": 8.949075698852539, "learning_rate": 1.6757259868731134e-06, "loss": 3.1237, "step": 59705 }, { "epoch": 0.6074015299479166, "grad_norm": 13.684633255004883, "learning_rate": 1.6753484768587285e-06, "loss": 3.2771, "step": 59710 }, { "epoch": 0.607452392578125, "grad_norm": 9.120360374450684, "learning_rate": 1.6749709879428584e-06, "loss": 3.329, "step": 59715 }, { "epoch": 0.6075032552083334, "grad_norm": 11.726250648498535, "learning_rate": 1.6745935201351587e-06, "loss": 3.0295, "step": 59720 }, { "epoch": 0.6075541178385416, "grad_norm": 7.57747220993042, "learning_rate": 1.6742160734452894e-06, "loss": 3.4097, "step": 59725 }, { "epoch": 0.60760498046875, "grad_norm": 7.077104568481445, "learning_rate": 1.6738386478829049e-06, "loss": 3.1283, "step": 59730 }, { "epoch": 0.6076558430989584, "grad_norm": 12.774635314941406, "learning_rate": 1.6734612434576636e-06, "loss": 3.305, "step": 59735 }, { "epoch": 0.6077067057291666, "grad_norm": 8.077064514160156, "learning_rate": 1.6730838601792203e-06, "loss": 3.3254, "step": 59740 }, { "epoch": 0.607757568359375, "grad_norm": 9.806227684020996, "learning_rate": 1.6727064980572306e-06, "loss": 2.9541, "step": 59745 }, { "epoch": 0.6078084309895834, "grad_norm": 14.675581932067871, "learning_rate": 1.672329157101348e-06, "loss": 3.3303, "step": 59750 }, { "epoch": 0.6078592936197916, "grad_norm": 9.640061378479004, "learning_rate": 1.671951837321229e-06, "loss": 3.2153, "step": 59755 }, { "epoch": 0.60791015625, "grad_norm": 14.155195236206055, "learning_rate": 1.6715745387265255e-06, "loss": 3.1934, "step": 59760 }, { "epoch": 0.6079610188802084, "grad_norm": 13.437745094299316, "learning_rate": 1.6711972613268907e-06, "loss": 3.4795, "step": 59765 }, { "epoch": 0.6080118815104166, "grad_norm": 13.996333122253418, "learning_rate": 1.6708200051319783e-06, "loss": 3.5947, "step": 59770 }, { "epoch": 0.608062744140625, "grad_norm": 11.20358943939209, "learning_rate": 1.6704427701514392e-06, "loss": 3.2206, "step": 59775 }, { "epoch": 0.6081136067708334, "grad_norm": 9.134904861450195, "learning_rate": 1.6700655563949256e-06, "loss": 3.3026, "step": 59780 }, { "epoch": 0.6081644694010416, "grad_norm": 12.813766479492188, "learning_rate": 1.6696883638720864e-06, "loss": 3.1282, "step": 59785 }, { "epoch": 0.60821533203125, "grad_norm": 11.599610328674316, "learning_rate": 1.6693111925925753e-06, "loss": 3.6992, "step": 59790 }, { "epoch": 0.6082661946614584, "grad_norm": 11.954835891723633, "learning_rate": 1.66893404256604e-06, "loss": 3.4668, "step": 59795 }, { "epoch": 0.6083170572916666, "grad_norm": 12.370587348937988, "learning_rate": 1.6685569138021308e-06, "loss": 3.5618, "step": 59800 }, { "epoch": 0.608367919921875, "grad_norm": 9.70938777923584, "learning_rate": 1.6681798063104943e-06, "loss": 3.1108, "step": 59805 }, { "epoch": 0.6084187825520834, "grad_norm": 9.000341415405273, "learning_rate": 1.6678027201007813e-06, "loss": 3.4236, "step": 59810 }, { "epoch": 0.6084696451822916, "grad_norm": 11.784370422363281, "learning_rate": 1.667425655182639e-06, "loss": 3.5309, "step": 59815 }, { "epoch": 0.6085205078125, "grad_norm": 11.995101928710938, "learning_rate": 1.6670486115657131e-06, "loss": 2.7516, "step": 59820 }, { "epoch": 0.6085713704427084, "grad_norm": 12.232709884643555, "learning_rate": 1.6666715892596508e-06, "loss": 3.4496, "step": 59825 }, { "epoch": 0.6086222330729166, "grad_norm": 11.079641342163086, "learning_rate": 1.6662945882740993e-06, "loss": 3.1395, "step": 59830 }, { "epoch": 0.608673095703125, "grad_norm": 12.308189392089844, "learning_rate": 1.6659176086187029e-06, "loss": 3.4337, "step": 59835 }, { "epoch": 0.6087239583333334, "grad_norm": 11.949128150939941, "learning_rate": 1.6655406503031068e-06, "loss": 3.4379, "step": 59840 }, { "epoch": 0.6087748209635416, "grad_norm": 13.886360168457031, "learning_rate": 1.6651637133369547e-06, "loss": 3.0902, "step": 59845 }, { "epoch": 0.60882568359375, "grad_norm": 7.020474433898926, "learning_rate": 1.6647867977298915e-06, "loss": 3.1472, "step": 59850 }, { "epoch": 0.6088765462239584, "grad_norm": 13.490387916564941, "learning_rate": 1.6644099034915607e-06, "loss": 3.1536, "step": 59855 }, { "epoch": 0.6089274088541666, "grad_norm": 12.121207237243652, "learning_rate": 1.6640330306316033e-06, "loss": 3.1876, "step": 59860 }, { "epoch": 0.608978271484375, "grad_norm": 8.730725288391113, "learning_rate": 1.6636561791596634e-06, "loss": 3.4631, "step": 59865 }, { "epoch": 0.6090291341145834, "grad_norm": 10.086065292358398, "learning_rate": 1.6632793490853822e-06, "loss": 3.2787, "step": 59870 }, { "epoch": 0.6090799967447916, "grad_norm": 13.390612602233887, "learning_rate": 1.6629025404184e-06, "loss": 3.4593, "step": 59875 }, { "epoch": 0.609130859375, "grad_norm": 9.88414192199707, "learning_rate": 1.6625257531683579e-06, "loss": 3.0767, "step": 59880 }, { "epoch": 0.6091817220052084, "grad_norm": 8.802709579467773, "learning_rate": 1.6621489873448964e-06, "loss": 3.3471, "step": 59885 }, { "epoch": 0.6092325846354166, "grad_norm": 11.346463203430176, "learning_rate": 1.6617722429576542e-06, "loss": 3.3545, "step": 59890 }, { "epoch": 0.609283447265625, "grad_norm": 12.869057655334473, "learning_rate": 1.6613955200162706e-06, "loss": 3.1323, "step": 59895 }, { "epoch": 0.6093343098958334, "grad_norm": 10.93761157989502, "learning_rate": 1.6610188185303834e-06, "loss": 3.3741, "step": 59900 }, { "epoch": 0.6093851725260416, "grad_norm": 7.27028751373291, "learning_rate": 1.6606421385096311e-06, "loss": 3.0519, "step": 59905 }, { "epoch": 0.60943603515625, "grad_norm": 11.998244285583496, "learning_rate": 1.6602654799636512e-06, "loss": 3.3283, "step": 59910 }, { "epoch": 0.6094868977864584, "grad_norm": 13.576044082641602, "learning_rate": 1.6598888429020792e-06, "loss": 3.683, "step": 59915 }, { "epoch": 0.6095377604166666, "grad_norm": 13.00714111328125, "learning_rate": 1.6595122273345521e-06, "loss": 3.1331, "step": 59920 }, { "epoch": 0.609588623046875, "grad_norm": 10.230525970458984, "learning_rate": 1.659135633270706e-06, "loss": 3.209, "step": 59925 }, { "epoch": 0.6096394856770834, "grad_norm": 8.578835487365723, "learning_rate": 1.6587590607201754e-06, "loss": 3.1594, "step": 59930 }, { "epoch": 0.6096903483072916, "grad_norm": 7.010080814361572, "learning_rate": 1.6583825096925948e-06, "loss": 3.7019, "step": 59935 }, { "epoch": 0.6097412109375, "grad_norm": 9.297857284545898, "learning_rate": 1.6580059801975968e-06, "loss": 3.7343, "step": 59940 }, { "epoch": 0.6097920735677084, "grad_norm": 14.203421592712402, "learning_rate": 1.657629472244818e-06, "loss": 3.1271, "step": 59945 }, { "epoch": 0.6098429361979166, "grad_norm": 7.7891998291015625, "learning_rate": 1.6572529858438888e-06, "loss": 2.9507, "step": 59950 }, { "epoch": 0.609893798828125, "grad_norm": 9.288692474365234, "learning_rate": 1.6568765210044419e-06, "loss": 3.1269, "step": 59955 }, { "epoch": 0.6099446614583334, "grad_norm": 23.345260620117188, "learning_rate": 1.65650007773611e-06, "loss": 3.6945, "step": 59960 }, { "epoch": 0.6099955240885416, "grad_norm": 12.625548362731934, "learning_rate": 1.6561236560485233e-06, "loss": 3.3782, "step": 59965 }, { "epoch": 0.61004638671875, "grad_norm": 9.033101081848145, "learning_rate": 1.6557472559513136e-06, "loss": 3.4724, "step": 59970 }, { "epoch": 0.6100972493489584, "grad_norm": 14.025511741638184, "learning_rate": 1.6553708774541087e-06, "loss": 3.849, "step": 59975 }, { "epoch": 0.6101481119791666, "grad_norm": 12.046526908874512, "learning_rate": 1.6549945205665408e-06, "loss": 3.3072, "step": 59980 }, { "epoch": 0.610198974609375, "grad_norm": 10.726386070251465, "learning_rate": 1.654618185298238e-06, "loss": 3.3468, "step": 59985 }, { "epoch": 0.6102498372395834, "grad_norm": 11.036408424377441, "learning_rate": 1.6542418716588283e-06, "loss": 3.3872, "step": 59990 }, { "epoch": 0.6103006998697916, "grad_norm": 16.50437355041504, "learning_rate": 1.653865579657939e-06, "loss": 3.1342, "step": 59995 }, { "epoch": 0.6103515625, "grad_norm": 9.462359428405762, "learning_rate": 1.6534893093051995e-06, "loss": 3.4811, "step": 60000 }, { "epoch": 0.6104024251302084, "grad_norm": 11.049467086791992, "learning_rate": 1.653113060610235e-06, "loss": 3.3115, "step": 60005 }, { "epoch": 0.6104532877604166, "grad_norm": 13.344293594360352, "learning_rate": 1.6527368335826724e-06, "loss": 3.6994, "step": 60010 }, { "epoch": 0.610504150390625, "grad_norm": 12.55429458618164, "learning_rate": 1.6523606282321361e-06, "loss": 3.4279, "step": 60015 }, { "epoch": 0.6105550130208334, "grad_norm": 14.450660705566406, "learning_rate": 1.6519844445682525e-06, "loss": 3.2444, "step": 60020 }, { "epoch": 0.6106058756510416, "grad_norm": 12.68309497833252, "learning_rate": 1.651608282600647e-06, "loss": 3.4776, "step": 60025 }, { "epoch": 0.61065673828125, "grad_norm": 15.468415260314941, "learning_rate": 1.6512321423389416e-06, "loss": 3.2383, "step": 60030 }, { "epoch": 0.6107076009114584, "grad_norm": 9.671670913696289, "learning_rate": 1.6508560237927601e-06, "loss": 3.4282, "step": 60035 }, { "epoch": 0.6107584635416666, "grad_norm": 11.182158470153809, "learning_rate": 1.650479926971727e-06, "loss": 3.5716, "step": 60040 }, { "epoch": 0.610809326171875, "grad_norm": 10.060457229614258, "learning_rate": 1.6501038518854634e-06, "loss": 3.1253, "step": 60045 }, { "epoch": 0.6108601888020834, "grad_norm": 11.251843452453613, "learning_rate": 1.6497277985435903e-06, "loss": 3.1022, "step": 60050 }, { "epoch": 0.6109110514322916, "grad_norm": 12.302145957946777, "learning_rate": 1.6493517669557314e-06, "loss": 3.2956, "step": 60055 }, { "epoch": 0.6109619140625, "grad_norm": 9.519197463989258, "learning_rate": 1.648975757131505e-06, "loss": 3.225, "step": 60060 }, { "epoch": 0.6110127766927084, "grad_norm": 13.209206581115723, "learning_rate": 1.6485997690805328e-06, "loss": 3.0118, "step": 60065 }, { "epoch": 0.6110636393229166, "grad_norm": 12.686041831970215, "learning_rate": 1.6482238028124328e-06, "loss": 3.8128, "step": 60070 }, { "epoch": 0.611114501953125, "grad_norm": 14.71212100982666, "learning_rate": 1.6478478583368255e-06, "loss": 3.4088, "step": 60075 }, { "epoch": 0.6111653645833334, "grad_norm": 8.9414644241333, "learning_rate": 1.647471935663329e-06, "loss": 3.508, "step": 60080 }, { "epoch": 0.6112162272135416, "grad_norm": 11.188956260681152, "learning_rate": 1.6470960348015607e-06, "loss": 3.1362, "step": 60085 }, { "epoch": 0.61126708984375, "grad_norm": 11.71552848815918, "learning_rate": 1.6467201557611377e-06, "loss": 3.0803, "step": 60090 }, { "epoch": 0.6113179524739584, "grad_norm": 15.764275550842285, "learning_rate": 1.6463442985516776e-06, "loss": 3.8125, "step": 60095 }, { "epoch": 0.6113688151041666, "grad_norm": 9.769700050354004, "learning_rate": 1.6459684631827965e-06, "loss": 3.257, "step": 60100 }, { "epoch": 0.611419677734375, "grad_norm": 9.204610824584961, "learning_rate": 1.6455926496641101e-06, "loss": 3.2443, "step": 60105 }, { "epoch": 0.6114705403645834, "grad_norm": 10.023176193237305, "learning_rate": 1.6452168580052318e-06, "loss": 3.2031, "step": 60110 }, { "epoch": 0.6115214029947916, "grad_norm": 16.631004333496094, "learning_rate": 1.6448410882157794e-06, "loss": 3.0563, "step": 60115 }, { "epoch": 0.611572265625, "grad_norm": 17.92494010925293, "learning_rate": 1.6444653403053642e-06, "loss": 3.433, "step": 60120 }, { "epoch": 0.6116231282552084, "grad_norm": 11.909153938293457, "learning_rate": 1.6440896142836011e-06, "loss": 3.4044, "step": 60125 }, { "epoch": 0.6116739908854166, "grad_norm": 11.371234893798828, "learning_rate": 1.6437139101601014e-06, "loss": 3.6105, "step": 60130 }, { "epoch": 0.611724853515625, "grad_norm": 10.851405143737793, "learning_rate": 1.6433382279444787e-06, "loss": 3.6112, "step": 60135 }, { "epoch": 0.6117757161458334, "grad_norm": 13.315210342407227, "learning_rate": 1.6429625676463448e-06, "loss": 3.4852, "step": 60140 }, { "epoch": 0.6118265787760416, "grad_norm": 10.062915802001953, "learning_rate": 1.6425869292753101e-06, "loss": 3.215, "step": 60145 }, { "epoch": 0.61187744140625, "grad_norm": 15.468650817871094, "learning_rate": 1.642211312840985e-06, "loss": 3.4406, "step": 60150 }, { "epoch": 0.6119283040364584, "grad_norm": 8.585348129272461, "learning_rate": 1.6418357183529816e-06, "loss": 3.0856, "step": 60155 }, { "epoch": 0.6119791666666666, "grad_norm": 12.80462646484375, "learning_rate": 1.6414601458209067e-06, "loss": 3.8671, "step": 60160 }, { "epoch": 0.612030029296875, "grad_norm": 10.727526664733887, "learning_rate": 1.6410845952543703e-06, "loss": 3.5206, "step": 60165 }, { "epoch": 0.6120808919270834, "grad_norm": 13.365429878234863, "learning_rate": 1.640709066662982e-06, "loss": 4.123, "step": 60170 }, { "epoch": 0.6121317545572916, "grad_norm": 9.198284149169922, "learning_rate": 1.640333560056348e-06, "loss": 3.0953, "step": 60175 }, { "epoch": 0.6121826171875, "grad_norm": 10.258539199829102, "learning_rate": 1.639958075444077e-06, "loss": 3.3011, "step": 60180 }, { "epoch": 0.6122334798177084, "grad_norm": 10.84228229522705, "learning_rate": 1.6395826128357733e-06, "loss": 3.4246, "step": 60185 }, { "epoch": 0.6122843424479166, "grad_norm": 12.045772552490234, "learning_rate": 1.639207172241045e-06, "loss": 3.2353, "step": 60190 }, { "epoch": 0.612335205078125, "grad_norm": 13.82982063293457, "learning_rate": 1.6388317536694978e-06, "loss": 3.5885, "step": 60195 }, { "epoch": 0.6123860677083334, "grad_norm": 12.485939025878906, "learning_rate": 1.6384563571307356e-06, "loss": 3.0635, "step": 60200 }, { "epoch": 0.6124369303385416, "grad_norm": 8.347084045410156, "learning_rate": 1.6380809826343628e-06, "loss": 3.2999, "step": 60205 }, { "epoch": 0.61248779296875, "grad_norm": 8.247750282287598, "learning_rate": 1.6377056301899847e-06, "loss": 3.24, "step": 60210 }, { "epoch": 0.6125386555989584, "grad_norm": 8.546690940856934, "learning_rate": 1.6373302998072032e-06, "loss": 3.3027, "step": 60215 }, { "epoch": 0.6125895182291666, "grad_norm": 10.810529708862305, "learning_rate": 1.6369549914956217e-06, "loss": 3.2058, "step": 60220 }, { "epoch": 0.612640380859375, "grad_norm": 11.5653657913208, "learning_rate": 1.6365797052648417e-06, "loss": 3.596, "step": 60225 }, { "epoch": 0.6126912434895834, "grad_norm": 11.945663452148438, "learning_rate": 1.6362044411244654e-06, "loss": 2.9793, "step": 60230 }, { "epoch": 0.6127421061197916, "grad_norm": 11.178726196289062, "learning_rate": 1.6358291990840947e-06, "loss": 3.3166, "step": 60235 }, { "epoch": 0.61279296875, "grad_norm": 11.82205867767334, "learning_rate": 1.635453979153328e-06, "loss": 3.1893, "step": 60240 }, { "epoch": 0.6128438313802084, "grad_norm": 12.103434562683105, "learning_rate": 1.6350787813417662e-06, "loss": 3.2575, "step": 60245 }, { "epoch": 0.6128946940104166, "grad_norm": 10.711541175842285, "learning_rate": 1.6347036056590093e-06, "loss": 3.4353, "step": 60250 }, { "epoch": 0.612945556640625, "grad_norm": 12.123007774353027, "learning_rate": 1.6343284521146557e-06, "loss": 3.0564, "step": 60255 }, { "epoch": 0.6129964192708334, "grad_norm": 12.458571434020996, "learning_rate": 1.6339533207183028e-06, "loss": 3.5443, "step": 60260 }, { "epoch": 0.6130472819010416, "grad_norm": 12.762242317199707, "learning_rate": 1.6335782114795494e-06, "loss": 3.1803, "step": 60265 }, { "epoch": 0.61309814453125, "grad_norm": 13.09855842590332, "learning_rate": 1.6332031244079925e-06, "loss": 3.6583, "step": 60270 }, { "epoch": 0.6131490071614584, "grad_norm": 8.688170433044434, "learning_rate": 1.632828059513228e-06, "loss": 3.4282, "step": 60275 }, { "epoch": 0.6131998697916666, "grad_norm": 10.610824584960938, "learning_rate": 1.6324530168048508e-06, "loss": 3.2545, "step": 60280 }, { "epoch": 0.613250732421875, "grad_norm": 13.22155475616455, "learning_rate": 1.6320779962924593e-06, "loss": 3.0439, "step": 60285 }, { "epoch": 0.6133015950520834, "grad_norm": 9.175676345825195, "learning_rate": 1.6317029979856458e-06, "loss": 3.5291, "step": 60290 }, { "epoch": 0.6133524576822916, "grad_norm": 10.860220909118652, "learning_rate": 1.6313280218940056e-06, "loss": 3.6229, "step": 60295 }, { "epoch": 0.6134033203125, "grad_norm": 14.858694076538086, "learning_rate": 1.6309530680271312e-06, "loss": 3.1163, "step": 60300 }, { "epoch": 0.6134541829427084, "grad_norm": 8.63109302520752, "learning_rate": 1.6305781363946172e-06, "loss": 2.9914, "step": 60305 }, { "epoch": 0.6135050455729166, "grad_norm": 9.039706230163574, "learning_rate": 1.630203227006056e-06, "loss": 3.4852, "step": 60310 }, { "epoch": 0.613555908203125, "grad_norm": 13.092826843261719, "learning_rate": 1.6298283398710383e-06, "loss": 3.4819, "step": 60315 }, { "epoch": 0.6136067708333334, "grad_norm": 10.986658096313477, "learning_rate": 1.6294534749991559e-06, "loss": 3.3921, "step": 60320 }, { "epoch": 0.6136576334635416, "grad_norm": 12.920578002929688, "learning_rate": 1.6290786324000007e-06, "loss": 3.2766, "step": 60325 }, { "epoch": 0.61370849609375, "grad_norm": 12.116365432739258, "learning_rate": 1.6287038120831621e-06, "loss": 3.1988, "step": 60330 }, { "epoch": 0.6137593587239584, "grad_norm": 8.024656295776367, "learning_rate": 1.6283290140582305e-06, "loss": 3.0811, "step": 60335 }, { "epoch": 0.6138102213541666, "grad_norm": 16.094438552856445, "learning_rate": 1.6279542383347935e-06, "loss": 3.1802, "step": 60340 }, { "epoch": 0.613861083984375, "grad_norm": 9.648401260375977, "learning_rate": 1.6275794849224411e-06, "loss": 3.434, "step": 60345 }, { "epoch": 0.6139119466145834, "grad_norm": 10.579446792602539, "learning_rate": 1.627204753830761e-06, "loss": 3.2385, "step": 60350 }, { "epoch": 0.6139628092447916, "grad_norm": 11.142621040344238, "learning_rate": 1.6268300450693397e-06, "loss": 3.5142, "step": 60355 }, { "epoch": 0.614013671875, "grad_norm": 8.502201080322266, "learning_rate": 1.6264553586477651e-06, "loss": 3.7343, "step": 60360 }, { "epoch": 0.6140645345052084, "grad_norm": 12.512798309326172, "learning_rate": 1.6260806945756235e-06, "loss": 4.0635, "step": 60365 }, { "epoch": 0.6141153971354166, "grad_norm": 8.338419914245605, "learning_rate": 1.6257060528625e-06, "loss": 3.2704, "step": 60370 }, { "epoch": 0.614166259765625, "grad_norm": 13.648222923278809, "learning_rate": 1.6253314335179792e-06, "loss": 3.3203, "step": 60375 }, { "epoch": 0.6142171223958334, "grad_norm": 10.438228607177734, "learning_rate": 1.6249568365516474e-06, "loss": 3.4015, "step": 60380 }, { "epoch": 0.6142679850260416, "grad_norm": 12.06532096862793, "learning_rate": 1.6245822619730871e-06, "loss": 3.1786, "step": 60385 }, { "epoch": 0.61431884765625, "grad_norm": 9.721664428710938, "learning_rate": 1.6242077097918824e-06, "loss": 3.1364, "step": 60390 }, { "epoch": 0.6143697102864584, "grad_norm": 13.711265563964844, "learning_rate": 1.6238331800176144e-06, "loss": 3.8421, "step": 60395 }, { "epoch": 0.6144205729166666, "grad_norm": 12.896185874938965, "learning_rate": 1.6234586726598685e-06, "loss": 3.2728, "step": 60400 }, { "epoch": 0.614471435546875, "grad_norm": 6.6597418785095215, "learning_rate": 1.6230841877282242e-06, "loss": 3.3412, "step": 60405 }, { "epoch": 0.6145222981770834, "grad_norm": 10.515288352966309, "learning_rate": 1.6227097252322636e-06, "loss": 3.2781, "step": 60410 }, { "epoch": 0.6145731608072916, "grad_norm": 12.3347749710083, "learning_rate": 1.6223352851815654e-06, "loss": 3.3966, "step": 60415 }, { "epoch": 0.6146240234375, "grad_norm": 9.619572639465332, "learning_rate": 1.6219608675857118e-06, "loss": 3.4378, "step": 60420 }, { "epoch": 0.6146748860677084, "grad_norm": 11.642086029052734, "learning_rate": 1.6215864724542819e-06, "loss": 3.2912, "step": 60425 }, { "epoch": 0.6147257486979166, "grad_norm": 19.23407554626465, "learning_rate": 1.6212120997968528e-06, "loss": 3.6397, "step": 60430 }, { "epoch": 0.614776611328125, "grad_norm": 7.65911865234375, "learning_rate": 1.6208377496230038e-06, "loss": 3.3781, "step": 60435 }, { "epoch": 0.6148274739583334, "grad_norm": 17.415040969848633, "learning_rate": 1.620463421942313e-06, "loss": 3.3809, "step": 60440 }, { "epoch": 0.6148783365885416, "grad_norm": 9.991722106933594, "learning_rate": 1.6200891167643573e-06, "loss": 3.0369, "step": 60445 }, { "epoch": 0.61492919921875, "grad_norm": 10.868343353271484, "learning_rate": 1.6197148340987123e-06, "loss": 3.4453, "step": 60450 }, { "epoch": 0.6149800618489584, "grad_norm": 11.543356895446777, "learning_rate": 1.6193405739549555e-06, "loss": 3.2694, "step": 60455 }, { "epoch": 0.6150309244791666, "grad_norm": 15.196809768676758, "learning_rate": 1.6189663363426612e-06, "loss": 3.1897, "step": 60460 }, { "epoch": 0.615081787109375, "grad_norm": 7.814451217651367, "learning_rate": 1.6185921212714046e-06, "loss": 3.5805, "step": 60465 }, { "epoch": 0.6151326497395834, "grad_norm": 14.216017723083496, "learning_rate": 1.6182179287507582e-06, "loss": 3.2458, "step": 60470 }, { "epoch": 0.6151835123697916, "grad_norm": 13.001861572265625, "learning_rate": 1.6178437587902984e-06, "loss": 3.0432, "step": 60475 }, { "epoch": 0.615234375, "grad_norm": 8.819528579711914, "learning_rate": 1.6174696113995969e-06, "loss": 3.4943, "step": 60480 }, { "epoch": 0.6152852376302084, "grad_norm": 9.95112133026123, "learning_rate": 1.617095486588226e-06, "loss": 3.1944, "step": 60485 }, { "epoch": 0.6153361002604166, "grad_norm": 8.025426864624023, "learning_rate": 1.616721384365757e-06, "loss": 2.825, "step": 60490 }, { "epoch": 0.615386962890625, "grad_norm": 14.088895797729492, "learning_rate": 1.6163473047417632e-06, "loss": 3.2174, "step": 60495 }, { "epoch": 0.6154378255208334, "grad_norm": 11.933631896972656, "learning_rate": 1.615973247725814e-06, "loss": 3.9773, "step": 60500 }, { "epoch": 0.6154886881510416, "grad_norm": 11.40555191040039, "learning_rate": 1.6155992133274795e-06, "loss": 3.2435, "step": 60505 }, { "epoch": 0.61553955078125, "grad_norm": 10.640403747558594, "learning_rate": 1.6152252015563292e-06, "loss": 3.3319, "step": 60510 }, { "epoch": 0.6155904134114584, "grad_norm": 8.765336990356445, "learning_rate": 1.6148512124219324e-06, "loss": 3.2578, "step": 60515 }, { "epoch": 0.6156412760416666, "grad_norm": 9.339484214782715, "learning_rate": 1.6144772459338582e-06, "loss": 3.4517, "step": 60520 }, { "epoch": 0.615692138671875, "grad_norm": 13.520297050476074, "learning_rate": 1.6141033021016732e-06, "loss": 3.8159, "step": 60525 }, { "epoch": 0.6157430013020834, "grad_norm": 8.561175346374512, "learning_rate": 1.6137293809349448e-06, "loss": 3.4511, "step": 60530 }, { "epoch": 0.6157938639322916, "grad_norm": 13.042895317077637, "learning_rate": 1.6133554824432413e-06, "loss": 2.9227, "step": 60535 }, { "epoch": 0.6158447265625, "grad_norm": 16.622398376464844, "learning_rate": 1.6129816066361267e-06, "loss": 3.5857, "step": 60540 }, { "epoch": 0.6158955891927084, "grad_norm": 13.52224349975586, "learning_rate": 1.612607753523167e-06, "loss": 3.1323, "step": 60545 }, { "epoch": 0.6159464518229166, "grad_norm": 12.300966262817383, "learning_rate": 1.6122339231139286e-06, "loss": 3.8293, "step": 60550 }, { "epoch": 0.615997314453125, "grad_norm": 11.415060043334961, "learning_rate": 1.611860115417975e-06, "loss": 3.1111, "step": 60555 }, { "epoch": 0.6160481770833334, "grad_norm": 8.763971328735352, "learning_rate": 1.6114863304448699e-06, "loss": 3.3285, "step": 60560 }, { "epoch": 0.6160990397135416, "grad_norm": 8.566849708557129, "learning_rate": 1.6111125682041752e-06, "loss": 3.5791, "step": 60565 }, { "epoch": 0.61614990234375, "grad_norm": 11.191141128540039, "learning_rate": 1.6107388287054558e-06, "loss": 2.918, "step": 60570 }, { "epoch": 0.6162007649739584, "grad_norm": 6.991872310638428, "learning_rate": 1.6103651119582721e-06, "loss": 3.2489, "step": 60575 }, { "epoch": 0.6162516276041666, "grad_norm": 11.497028350830078, "learning_rate": 1.609991417972187e-06, "loss": 3.2849, "step": 60580 }, { "epoch": 0.616302490234375, "grad_norm": 12.913334846496582, "learning_rate": 1.6096177467567594e-06, "loss": 3.4357, "step": 60585 }, { "epoch": 0.6163533528645834, "grad_norm": 13.08057975769043, "learning_rate": 1.6092440983215514e-06, "loss": 3.703, "step": 60590 }, { "epoch": 0.6164042154947916, "grad_norm": 8.695859909057617, "learning_rate": 1.6088704726761222e-06, "loss": 3.5304, "step": 60595 }, { "epoch": 0.616455078125, "grad_norm": 9.729327201843262, "learning_rate": 1.6084968698300304e-06, "loss": 3.4571, "step": 60600 }, { "epoch": 0.6165059407552084, "grad_norm": 13.946184158325195, "learning_rate": 1.6081232897928345e-06, "loss": 3.5135, "step": 60605 }, { "epoch": 0.6165568033854166, "grad_norm": 14.043551445007324, "learning_rate": 1.6077497325740938e-06, "loss": 3.1448, "step": 60610 }, { "epoch": 0.616607666015625, "grad_norm": 7.511965751647949, "learning_rate": 1.607376198183364e-06, "loss": 3.381, "step": 60615 }, { "epoch": 0.6166585286458334, "grad_norm": 12.47742748260498, "learning_rate": 1.607002686630203e-06, "loss": 3.7181, "step": 60620 }, { "epoch": 0.6167093912760416, "grad_norm": 8.424934387207031, "learning_rate": 1.6066291979241658e-06, "loss": 3.6728, "step": 60625 }, { "epoch": 0.61676025390625, "grad_norm": 15.62203311920166, "learning_rate": 1.6062557320748092e-06, "loss": 3.1998, "step": 60630 }, { "epoch": 0.6168111165364584, "grad_norm": 12.289361000061035, "learning_rate": 1.6058822890916887e-06, "loss": 3.3507, "step": 60635 }, { "epoch": 0.6168619791666666, "grad_norm": 10.98160171508789, "learning_rate": 1.6055088689843566e-06, "loss": 3.4108, "step": 60640 }, { "epoch": 0.616912841796875, "grad_norm": 12.248900413513184, "learning_rate": 1.6051354717623685e-06, "loss": 3.5467, "step": 60645 }, { "epoch": 0.6169637044270834, "grad_norm": 11.305987358093262, "learning_rate": 1.6047620974352779e-06, "loss": 2.991, "step": 60650 }, { "epoch": 0.6170145670572916, "grad_norm": 11.323430061340332, "learning_rate": 1.6043887460126361e-06, "loss": 3.7423, "step": 60655 }, { "epoch": 0.6170654296875, "grad_norm": 9.055760383605957, "learning_rate": 1.6040154175039958e-06, "loss": 2.8189, "step": 60660 }, { "epoch": 0.6171162923177084, "grad_norm": 14.46419620513916, "learning_rate": 1.6036421119189096e-06, "loss": 3.2592, "step": 60665 }, { "epoch": 0.6171671549479166, "grad_norm": 10.725924491882324, "learning_rate": 1.6032688292669268e-06, "loss": 3.2105, "step": 60670 }, { "epoch": 0.617218017578125, "grad_norm": 9.541210174560547, "learning_rate": 1.6028955695575993e-06, "loss": 3.4469, "step": 60675 }, { "epoch": 0.6172688802083334, "grad_norm": 9.244621276855469, "learning_rate": 1.6025223328004747e-06, "loss": 3.2099, "step": 60680 }, { "epoch": 0.6173197428385416, "grad_norm": 18.29025650024414, "learning_rate": 1.602149119005104e-06, "loss": 4.1906, "step": 60685 }, { "epoch": 0.61737060546875, "grad_norm": 12.4437894821167, "learning_rate": 1.6017759281810361e-06, "loss": 3.2861, "step": 60690 }, { "epoch": 0.6174214680989584, "grad_norm": 13.091300010681152, "learning_rate": 1.6014027603378174e-06, "loss": 3.1759, "step": 60695 }, { "epoch": 0.6174723307291666, "grad_norm": 10.310749053955078, "learning_rate": 1.6010296154849958e-06, "loss": 3.1665, "step": 60700 }, { "epoch": 0.617523193359375, "grad_norm": 12.083423614501953, "learning_rate": 1.6006564936321193e-06, "loss": 3.1931, "step": 60705 }, { "epoch": 0.6175740559895834, "grad_norm": 13.541406631469727, "learning_rate": 1.6002833947887326e-06, "loss": 3.6198, "step": 60710 }, { "epoch": 0.6176249186197916, "grad_norm": 7.324504375457764, "learning_rate": 1.5999103189643827e-06, "loss": 3.2634, "step": 60715 }, { "epoch": 0.61767578125, "grad_norm": 12.332198143005371, "learning_rate": 1.599537266168612e-06, "loss": 3.1276, "step": 60720 }, { "epoch": 0.6177266438802084, "grad_norm": 8.473389625549316, "learning_rate": 1.5991642364109689e-06, "loss": 3.4168, "step": 60725 }, { "epoch": 0.6177775065104166, "grad_norm": 17.08176612854004, "learning_rate": 1.5987912297009944e-06, "loss": 3.3005, "step": 60730 }, { "epoch": 0.617828369140625, "grad_norm": 17.63117027282715, "learning_rate": 1.5984182460482336e-06, "loss": 3.0557, "step": 60735 }, { "epoch": 0.6178792317708334, "grad_norm": 17.080352783203125, "learning_rate": 1.598045285462227e-06, "loss": 3.3817, "step": 60740 }, { "epoch": 0.6179300944010416, "grad_norm": 15.415255546569824, "learning_rate": 1.5976723479525185e-06, "loss": 3.4583, "step": 60745 }, { "epoch": 0.61798095703125, "grad_norm": 13.57775592803955, "learning_rate": 1.5972994335286499e-06, "loss": 3.5549, "step": 60750 }, { "epoch": 0.6180318196614584, "grad_norm": 9.898155212402344, "learning_rate": 1.59692654220016e-06, "loss": 3.1014, "step": 60755 }, { "epoch": 0.6180826822916666, "grad_norm": 10.535991668701172, "learning_rate": 1.5965536739765914e-06, "loss": 3.6444, "step": 60760 }, { "epoch": 0.618133544921875, "grad_norm": 11.3397216796875, "learning_rate": 1.5961808288674828e-06, "loss": 3.128, "step": 60765 }, { "epoch": 0.6181844075520834, "grad_norm": 11.788433074951172, "learning_rate": 1.5958080068823733e-06, "loss": 3.14, "step": 60770 }, { "epoch": 0.6182352701822916, "grad_norm": 11.378423690795898, "learning_rate": 1.5954352080308011e-06, "loss": 3.5258, "step": 60775 }, { "epoch": 0.6182861328125, "grad_norm": 16.231063842773438, "learning_rate": 1.5950624323223056e-06, "loss": 3.1571, "step": 60780 }, { "epoch": 0.6183369954427084, "grad_norm": 10.170050621032715, "learning_rate": 1.594689679766423e-06, "loss": 3.3047, "step": 60785 }, { "epoch": 0.6183878580729166, "grad_norm": 10.96178913116455, "learning_rate": 1.5943169503726907e-06, "loss": 3.3667, "step": 60790 }, { "epoch": 0.618438720703125, "grad_norm": 12.257305145263672, "learning_rate": 1.5939442441506434e-06, "loss": 3.4634, "step": 60795 }, { "epoch": 0.6184895833333334, "grad_norm": 10.836377143859863, "learning_rate": 1.5935715611098184e-06, "loss": 3.0399, "step": 60800 }, { "epoch": 0.6185404459635416, "grad_norm": 7.719210147857666, "learning_rate": 1.593198901259751e-06, "loss": 3.208, "step": 60805 }, { "epoch": 0.61859130859375, "grad_norm": 11.608940124511719, "learning_rate": 1.592826264609974e-06, "loss": 3.5864, "step": 60810 }, { "epoch": 0.6186421712239584, "grad_norm": 12.18756103515625, "learning_rate": 1.5924536511700216e-06, "loss": 3.1128, "step": 60815 }, { "epoch": 0.6186930338541666, "grad_norm": 14.461941719055176, "learning_rate": 1.5920810609494283e-06, "loss": 3.4594, "step": 60820 }, { "epoch": 0.618743896484375, "grad_norm": 12.70378303527832, "learning_rate": 1.5917084939577255e-06, "loss": 3.5058, "step": 60825 }, { "epoch": 0.6187947591145834, "grad_norm": 10.086445808410645, "learning_rate": 1.5913359502044459e-06, "loss": 3.3497, "step": 60830 }, { "epoch": 0.6188456217447916, "grad_norm": 13.142423629760742, "learning_rate": 1.59096342969912e-06, "loss": 3.1421, "step": 60835 }, { "epoch": 0.618896484375, "grad_norm": 11.093779563903809, "learning_rate": 1.5905909324512792e-06, "loss": 3.5211, "step": 60840 }, { "epoch": 0.6189473470052084, "grad_norm": 13.340781211853027, "learning_rate": 1.5902184584704549e-06, "loss": 3.4532, "step": 60845 }, { "epoch": 0.6189982096354166, "grad_norm": 11.851021766662598, "learning_rate": 1.5898460077661743e-06, "loss": 3.3527, "step": 60850 }, { "epoch": 0.619049072265625, "grad_norm": 10.32907485961914, "learning_rate": 1.5894735803479688e-06, "loss": 3.1641, "step": 60855 }, { "epoch": 0.6190999348958334, "grad_norm": 13.037763595581055, "learning_rate": 1.589101176225366e-06, "loss": 3.2318, "step": 60860 }, { "epoch": 0.6191507975260416, "grad_norm": 14.834087371826172, "learning_rate": 1.5887287954078935e-06, "loss": 3.5543, "step": 60865 }, { "epoch": 0.61920166015625, "grad_norm": 14.938041687011719, "learning_rate": 1.5883564379050782e-06, "loss": 3.3169, "step": 60870 }, { "epoch": 0.6192525227864584, "grad_norm": 8.798954963684082, "learning_rate": 1.5879841037264476e-06, "loss": 3.2308, "step": 60875 }, { "epoch": 0.6193033854166666, "grad_norm": 11.63089370727539, "learning_rate": 1.5876117928815282e-06, "loss": 3.1783, "step": 60880 }, { "epoch": 0.619354248046875, "grad_norm": 13.736751556396484, "learning_rate": 1.5872395053798446e-06, "loss": 3.2713, "step": 60885 }, { "epoch": 0.6194051106770834, "grad_norm": 9.929327964782715, "learning_rate": 1.5868672412309212e-06, "loss": 5.0198, "step": 60890 }, { "epoch": 0.6194559733072916, "grad_norm": 10.958555221557617, "learning_rate": 1.5864950004442836e-06, "loss": 3.6732, "step": 60895 }, { "epoch": 0.6195068359375, "grad_norm": 13.88362979888916, "learning_rate": 1.5861227830294548e-06, "loss": 3.5338, "step": 60900 }, { "epoch": 0.6195576985677084, "grad_norm": 16.07509422302246, "learning_rate": 1.5857505889959585e-06, "loss": 3.236, "step": 60905 }, { "epoch": 0.6196085611979166, "grad_norm": 9.3330717086792, "learning_rate": 1.5853784183533156e-06, "loss": 3.0099, "step": 60910 }, { "epoch": 0.619659423828125, "grad_norm": 12.54224967956543, "learning_rate": 1.5850062711110497e-06, "loss": 3.1137, "step": 60915 }, { "epoch": 0.6197102864583334, "grad_norm": 10.101669311523438, "learning_rate": 1.584634147278682e-06, "loss": 3.0886, "step": 60920 }, { "epoch": 0.6197611490885416, "grad_norm": 8.978915214538574, "learning_rate": 1.5842620468657316e-06, "loss": 3.1704, "step": 60925 }, { "epoch": 0.61981201171875, "grad_norm": 17.443098068237305, "learning_rate": 1.5838899698817197e-06, "loss": 3.2783, "step": 60930 }, { "epoch": 0.6198628743489584, "grad_norm": 8.883197784423828, "learning_rate": 1.583517916336167e-06, "loss": 3.483, "step": 60935 }, { "epoch": 0.6199137369791666, "grad_norm": 13.600460052490234, "learning_rate": 1.58314588623859e-06, "loss": 3.2715, "step": 60940 }, { "epoch": 0.619964599609375, "grad_norm": 12.828145027160645, "learning_rate": 1.582773879598508e-06, "loss": 3.33, "step": 60945 }, { "epoch": 0.6200154622395834, "grad_norm": 10.351181983947754, "learning_rate": 1.58240189642544e-06, "loss": 3.2824, "step": 60950 }, { "epoch": 0.6200663248697916, "grad_norm": 11.74328327178955, "learning_rate": 1.5820299367289015e-06, "loss": 2.9625, "step": 60955 }, { "epoch": 0.6201171875, "grad_norm": 8.31732177734375, "learning_rate": 1.5816580005184101e-06, "loss": 4.0897, "step": 60960 }, { "epoch": 0.6201680501302084, "grad_norm": 10.640729904174805, "learning_rate": 1.5812860878034796e-06, "loss": 3.3504, "step": 60965 }, { "epoch": 0.6202189127604166, "grad_norm": 12.188005447387695, "learning_rate": 1.5809141985936278e-06, "loss": 2.9415, "step": 60970 }, { "epoch": 0.620269775390625, "grad_norm": 14.315089225769043, "learning_rate": 1.5805423328983688e-06, "loss": 3.2181, "step": 60975 }, { "epoch": 0.6203206380208334, "grad_norm": 13.620156288146973, "learning_rate": 1.5801704907272155e-06, "loss": 3.3906, "step": 60980 }, { "epoch": 0.6203715006510416, "grad_norm": 13.081335067749023, "learning_rate": 1.5797986720896819e-06, "loss": 3.2793, "step": 60985 }, { "epoch": 0.62042236328125, "grad_norm": 14.394508361816406, "learning_rate": 1.5794268769952819e-06, "loss": 3.253, "step": 60990 }, { "epoch": 0.6204732259114584, "grad_norm": 11.010272979736328, "learning_rate": 1.579055105453527e-06, "loss": 3.2714, "step": 60995 }, { "epoch": 0.6205240885416666, "grad_norm": 17.308883666992188, "learning_rate": 1.578683357473929e-06, "loss": 3.7297, "step": 61000 }, { "epoch": 0.620574951171875, "grad_norm": 16.127193450927734, "learning_rate": 1.5783116330659982e-06, "loss": 4.0288, "step": 61005 }, { "epoch": 0.6206258138020834, "grad_norm": 10.189144134521484, "learning_rate": 1.5779399322392461e-06, "loss": 3.0836, "step": 61010 }, { "epoch": 0.6206766764322916, "grad_norm": 12.883061408996582, "learning_rate": 1.5775682550031828e-06, "loss": 3.2443, "step": 61015 }, { "epoch": 0.6207275390625, "grad_norm": 11.590263366699219, "learning_rate": 1.5771966013673169e-06, "loss": 3.6211, "step": 61020 }, { "epoch": 0.6207784016927084, "grad_norm": 12.362700462341309, "learning_rate": 1.5768249713411562e-06, "loss": 3.5006, "step": 61025 }, { "epoch": 0.6208292643229166, "grad_norm": 13.575629234313965, "learning_rate": 1.5764533649342107e-06, "loss": 3.2972, "step": 61030 }, { "epoch": 0.620880126953125, "grad_norm": 10.85159683227539, "learning_rate": 1.576081782155987e-06, "loss": 2.9801, "step": 61035 }, { "epoch": 0.6209309895833334, "grad_norm": 8.414695739746094, "learning_rate": 1.5757102230159915e-06, "loss": 3.2545, "step": 61040 }, { "epoch": 0.6209818522135416, "grad_norm": 81.48870849609375, "learning_rate": 1.5753386875237309e-06, "loss": 3.462, "step": 61045 }, { "epoch": 0.62103271484375, "grad_norm": 10.37614917755127, "learning_rate": 1.5749671756887117e-06, "loss": 3.5636, "step": 61050 }, { "epoch": 0.6210835774739584, "grad_norm": 14.355762481689453, "learning_rate": 1.5745956875204372e-06, "loss": 3.9059, "step": 61055 }, { "epoch": 0.6211344401041666, "grad_norm": 12.319256782531738, "learning_rate": 1.5742242230284122e-06, "loss": 3.194, "step": 61060 }, { "epoch": 0.621185302734375, "grad_norm": 7.4418134689331055, "learning_rate": 1.5738527822221421e-06, "loss": 3.1786, "step": 61065 }, { "epoch": 0.6212361653645834, "grad_norm": 14.95451545715332, "learning_rate": 1.573481365111129e-06, "loss": 3.5189, "step": 61070 }, { "epoch": 0.6212870279947916, "grad_norm": 13.600105285644531, "learning_rate": 1.5731099717048753e-06, "loss": 3.0263, "step": 61075 }, { "epoch": 0.621337890625, "grad_norm": 12.841672897338867, "learning_rate": 1.5727386020128826e-06, "loss": 3.1362, "step": 61080 }, { "epoch": 0.6213887532552084, "grad_norm": 8.517480850219727, "learning_rate": 1.5723672560446537e-06, "loss": 3.2489, "step": 61085 }, { "epoch": 0.6214396158854166, "grad_norm": 11.729193687438965, "learning_rate": 1.571995933809689e-06, "loss": 3.0913, "step": 61090 }, { "epoch": 0.621490478515625, "grad_norm": 7.707211971282959, "learning_rate": 1.571624635317488e-06, "loss": 3.1272, "step": 61095 }, { "epoch": 0.6215413411458334, "grad_norm": 13.470673561096191, "learning_rate": 1.57125336057755e-06, "loss": 3.1542, "step": 61100 }, { "epoch": 0.6215922037760416, "grad_norm": 11.42409610748291, "learning_rate": 1.5708821095993756e-06, "loss": 3.6853, "step": 61105 }, { "epoch": 0.62164306640625, "grad_norm": 15.191719055175781, "learning_rate": 1.570510882392462e-06, "loss": 3.4534, "step": 61110 }, { "epoch": 0.6216939290364584, "grad_norm": 14.386995315551758, "learning_rate": 1.5701396789663074e-06, "loss": 3.2562, "step": 61115 }, { "epoch": 0.6217447916666666, "grad_norm": 15.53824234008789, "learning_rate": 1.5697684993304074e-06, "loss": 3.5162, "step": 61120 }, { "epoch": 0.621795654296875, "grad_norm": 12.227032661437988, "learning_rate": 1.5693973434942608e-06, "loss": 3.472, "step": 61125 }, { "epoch": 0.6218465169270834, "grad_norm": 14.022648811340332, "learning_rate": 1.5690262114673632e-06, "loss": 3.3509, "step": 61130 }, { "epoch": 0.6218973795572916, "grad_norm": 15.221781730651855, "learning_rate": 1.568655103259208e-06, "loss": 3.6707, "step": 61135 }, { "epoch": 0.6219482421875, "grad_norm": 16.67391014099121, "learning_rate": 1.568284018879292e-06, "loss": 3.6313, "step": 61140 }, { "epoch": 0.6219991048177084, "grad_norm": 10.213263511657715, "learning_rate": 1.5679129583371086e-06, "loss": 3.448, "step": 61145 }, { "epoch": 0.6220499674479166, "grad_norm": 12.233593940734863, "learning_rate": 1.567541921642151e-06, "loss": 2.8903, "step": 61150 }, { "epoch": 0.622100830078125, "grad_norm": 14.135449409484863, "learning_rate": 1.567170908803912e-06, "loss": 3.3162, "step": 61155 }, { "epoch": 0.6221516927083334, "grad_norm": 12.742118835449219, "learning_rate": 1.566799919831885e-06, "loss": 3.0368, "step": 61160 }, { "epoch": 0.6222025553385416, "grad_norm": 13.452081680297852, "learning_rate": 1.5664289547355604e-06, "loss": 3.3572, "step": 61165 }, { "epoch": 0.62225341796875, "grad_norm": 14.116361618041992, "learning_rate": 1.5660580135244303e-06, "loss": 3.6128, "step": 61170 }, { "epoch": 0.6223042805989584, "grad_norm": 12.684968948364258, "learning_rate": 1.5656870962079827e-06, "loss": 3.5763, "step": 61175 }, { "epoch": 0.6223551432291666, "grad_norm": 14.246038436889648, "learning_rate": 1.5653162027957108e-06, "loss": 2.9694, "step": 61180 }, { "epoch": 0.622406005859375, "grad_norm": 18.166240692138672, "learning_rate": 1.5649453332971022e-06, "loss": 3.5386, "step": 61185 }, { "epoch": 0.6224568684895834, "grad_norm": 9.938048362731934, "learning_rate": 1.564574487721646e-06, "loss": 3.2802, "step": 61190 }, { "epoch": 0.6225077311197916, "grad_norm": 9.44188117980957, "learning_rate": 1.5642036660788286e-06, "loss": 3.5396, "step": 61195 }, { "epoch": 0.62255859375, "grad_norm": 15.443852424621582, "learning_rate": 1.5638328683781396e-06, "loss": 3.5264, "step": 61200 }, { "epoch": 0.6226094563802084, "grad_norm": 11.711338996887207, "learning_rate": 1.5634620946290646e-06, "loss": 3.3401, "step": 61205 }, { "epoch": 0.6226603190104166, "grad_norm": 18.48138427734375, "learning_rate": 1.56309134484109e-06, "loss": 3.2839, "step": 61210 }, { "epoch": 0.622711181640625, "grad_norm": 14.25977897644043, "learning_rate": 1.5627206190237004e-06, "loss": 3.3824, "step": 61215 }, { "epoch": 0.6227620442708334, "grad_norm": 10.02924919128418, "learning_rate": 1.5623499171863826e-06, "loss": 3.4369, "step": 61220 }, { "epoch": 0.6228129069010416, "grad_norm": 12.48977279663086, "learning_rate": 1.5619792393386196e-06, "loss": 3.9375, "step": 61225 }, { "epoch": 0.62286376953125, "grad_norm": 9.716508865356445, "learning_rate": 1.561608585489896e-06, "loss": 3.3135, "step": 61230 }, { "epoch": 0.6229146321614584, "grad_norm": 10.432272911071777, "learning_rate": 1.5612379556496932e-06, "loss": 3.3268, "step": 61235 }, { "epoch": 0.6229654947916666, "grad_norm": 12.925854682922363, "learning_rate": 1.560867349827495e-06, "loss": 3.3902, "step": 61240 }, { "epoch": 0.623016357421875, "grad_norm": 12.435482025146484, "learning_rate": 1.560496768032784e-06, "loss": 3.2661, "step": 61245 }, { "epoch": 0.6230672200520834, "grad_norm": 8.915007591247559, "learning_rate": 1.5601262102750391e-06, "loss": 3.4743, "step": 61250 }, { "epoch": 0.6231180826822916, "grad_norm": 8.147088050842285, "learning_rate": 1.5597556765637434e-06, "loss": 3.2753, "step": 61255 }, { "epoch": 0.6231689453125, "grad_norm": 12.007842063903809, "learning_rate": 1.559385166908376e-06, "loss": 3.073, "step": 61260 }, { "epoch": 0.6232198079427084, "grad_norm": 10.465039253234863, "learning_rate": 1.5590146813184156e-06, "loss": 3.2077, "step": 61265 }, { "epoch": 0.6232706705729166, "grad_norm": 10.362761497497559, "learning_rate": 1.558644219803341e-06, "loss": 3.4545, "step": 61270 }, { "epoch": 0.623321533203125, "grad_norm": 11.860074043273926, "learning_rate": 1.5582737823726322e-06, "loss": 3.2515, "step": 61275 }, { "epoch": 0.6233723958333334, "grad_norm": 8.5953950881958, "learning_rate": 1.5579033690357647e-06, "loss": 3.0689, "step": 61280 }, { "epoch": 0.6234232584635416, "grad_norm": 15.707984924316406, "learning_rate": 1.557532979802217e-06, "loss": 3.8959, "step": 61285 }, { "epoch": 0.62347412109375, "grad_norm": 15.859342575073242, "learning_rate": 1.5571626146814634e-06, "loss": 3.5281, "step": 61290 }, { "epoch": 0.6235249837239584, "grad_norm": 15.086223602294922, "learning_rate": 1.556792273682981e-06, "loss": 3.5218, "step": 61295 }, { "epoch": 0.6235758463541666, "grad_norm": 10.326348304748535, "learning_rate": 1.556421956816246e-06, "loss": 3.0658, "step": 61300 }, { "epoch": 0.623626708984375, "grad_norm": 16.39034652709961, "learning_rate": 1.5560516640907304e-06, "loss": 3.3312, "step": 61305 }, { "epoch": 0.6236775716145834, "grad_norm": 10.670428276062012, "learning_rate": 1.5556813955159089e-06, "loss": 3.3061, "step": 61310 }, { "epoch": 0.6237284342447916, "grad_norm": 10.530497550964355, "learning_rate": 1.5553111511012565e-06, "loss": 3.2695, "step": 61315 }, { "epoch": 0.623779296875, "grad_norm": 10.719157218933105, "learning_rate": 1.5549409308562433e-06, "loss": 3.2204, "step": 61320 }, { "epoch": 0.6238301595052084, "grad_norm": 9.842597007751465, "learning_rate": 1.5545707347903432e-06, "loss": 3.0986, "step": 61325 }, { "epoch": 0.6238810221354166, "grad_norm": 12.339625358581543, "learning_rate": 1.5542005629130253e-06, "loss": 3.1297, "step": 61330 }, { "epoch": 0.623931884765625, "grad_norm": 14.002964973449707, "learning_rate": 1.553830415233763e-06, "loss": 3.5682, "step": 61335 }, { "epoch": 0.6239827473958334, "grad_norm": 10.0540771484375, "learning_rate": 1.553460291762025e-06, "loss": 3.4962, "step": 61340 }, { "epoch": 0.6240336100260416, "grad_norm": 14.774270057678223, "learning_rate": 1.5530901925072806e-06, "loss": 3.4707, "step": 61345 }, { "epoch": 0.62408447265625, "grad_norm": 7.532827854156494, "learning_rate": 1.5527201174789999e-06, "loss": 3.2857, "step": 61350 }, { "epoch": 0.6241353352864584, "grad_norm": 16.660783767700195, "learning_rate": 1.5523500666866503e-06, "loss": 3.2688, "step": 61355 }, { "epoch": 0.6241861979166666, "grad_norm": 7.445517539978027, "learning_rate": 1.5519800401397e-06, "loss": 3.1697, "step": 61360 }, { "epoch": 0.624237060546875, "grad_norm": 8.375680923461914, "learning_rate": 1.5516100378476146e-06, "loss": 3.0433, "step": 61365 }, { "epoch": 0.6242879231770834, "grad_norm": 14.068017959594727, "learning_rate": 1.551240059819862e-06, "loss": 3.3664, "step": 61370 }, { "epoch": 0.6243387858072916, "grad_norm": 16.048603057861328, "learning_rate": 1.550870106065908e-06, "loss": 3.3437, "step": 61375 }, { "epoch": 0.6243896484375, "grad_norm": 12.818937301635742, "learning_rate": 1.5505001765952175e-06, "loss": 3.2911, "step": 61380 }, { "epoch": 0.6244405110677084, "grad_norm": 10.751648902893066, "learning_rate": 1.550130271417254e-06, "loss": 3.1494, "step": 61385 }, { "epoch": 0.6244913736979166, "grad_norm": 8.600159645080566, "learning_rate": 1.5497603905414832e-06, "loss": 3.7665, "step": 61390 }, { "epoch": 0.624542236328125, "grad_norm": 9.184415817260742, "learning_rate": 1.5493905339773674e-06, "loss": 3.1591, "step": 61395 }, { "epoch": 0.6245930989583334, "grad_norm": 11.268553733825684, "learning_rate": 1.54902070173437e-06, "loss": 3.1523, "step": 61400 }, { "epoch": 0.6246439615885416, "grad_norm": 8.070582389831543, "learning_rate": 1.5486508938219514e-06, "loss": 3.2748, "step": 61405 }, { "epoch": 0.62469482421875, "grad_norm": 13.604533195495605, "learning_rate": 1.5482811102495746e-06, "loss": 3.5694, "step": 61410 }, { "epoch": 0.6247456868489584, "grad_norm": 14.422013282775879, "learning_rate": 1.5479113510267008e-06, "loss": 3.2941, "step": 61415 }, { "epoch": 0.6247965494791666, "grad_norm": 12.898037910461426, "learning_rate": 1.5475416161627885e-06, "loss": 3.9211, "step": 61420 }, { "epoch": 0.624847412109375, "grad_norm": 11.304436683654785, "learning_rate": 1.5471719056672979e-06, "loss": 3.0899, "step": 61425 }, { "epoch": 0.6248982747395834, "grad_norm": 13.482966423034668, "learning_rate": 1.546802219549689e-06, "loss": 2.9373, "step": 61430 }, { "epoch": 0.6249491373697916, "grad_norm": 8.581000328063965, "learning_rate": 1.5464325578194194e-06, "loss": 3.0325, "step": 61435 }, { "epoch": 0.625, "grad_norm": 8.412178039550781, "learning_rate": 1.5460629204859458e-06, "loss": 3.5235, "step": 61440 }, { "epoch": 0.6250508626302084, "grad_norm": 12.63332748413086, "learning_rate": 1.5456933075587278e-06, "loss": 3.7609, "step": 61445 }, { "epoch": 0.6251017252604166, "grad_norm": 10.920650482177734, "learning_rate": 1.5453237190472191e-06, "loss": 3.3654, "step": 61450 }, { "epoch": 0.625152587890625, "grad_norm": 11.684036254882812, "learning_rate": 1.544954154960878e-06, "loss": 3.0327, "step": 61455 }, { "epoch": 0.6252034505208334, "grad_norm": 180.89102172851562, "learning_rate": 1.544584615309157e-06, "loss": 4.0488, "step": 61460 }, { "epoch": 0.6252543131510416, "grad_norm": 9.9097318649292, "learning_rate": 1.5442151001015126e-06, "loss": 3.1162, "step": 61465 }, { "epoch": 0.62530517578125, "grad_norm": 13.314180374145508, "learning_rate": 1.5438456093473992e-06, "loss": 3.2771, "step": 61470 }, { "epoch": 0.6253560384114584, "grad_norm": 6.927476406097412, "learning_rate": 1.5434761430562687e-06, "loss": 3.0689, "step": 61475 }, { "epoch": 0.6254069010416666, "grad_norm": 9.061094284057617, "learning_rate": 1.5431067012375736e-06, "loss": 3.4548, "step": 61480 }, { "epoch": 0.625457763671875, "grad_norm": 9.196162223815918, "learning_rate": 1.5427372839007676e-06, "loss": 3.1121, "step": 61485 }, { "epoch": 0.6255086263020834, "grad_norm": 10.728259086608887, "learning_rate": 1.5423678910553014e-06, "loss": 3.279, "step": 61490 }, { "epoch": 0.6255594889322916, "grad_norm": 14.14704704284668, "learning_rate": 1.5419985227106254e-06, "loss": 3.7505, "step": 61495 }, { "epoch": 0.6256103515625, "grad_norm": 13.941288948059082, "learning_rate": 1.5416291788761895e-06, "loss": 3.2193, "step": 61500 }, { "epoch": 0.6256612141927084, "grad_norm": 10.623527526855469, "learning_rate": 1.5412598595614453e-06, "loss": 3.2586, "step": 61505 }, { "epoch": 0.6257120768229166, "grad_norm": 12.52288818359375, "learning_rate": 1.5408905647758394e-06, "loss": 3.1298, "step": 61510 }, { "epoch": 0.625762939453125, "grad_norm": 8.349310874938965, "learning_rate": 1.5405212945288218e-06, "loss": 3.1393, "step": 61515 }, { "epoch": 0.6258138020833334, "grad_norm": 9.99730110168457, "learning_rate": 1.5401520488298382e-06, "loss": 3.5043, "step": 61520 }, { "epoch": 0.6258646647135416, "grad_norm": 9.792665481567383, "learning_rate": 1.5397828276883379e-06, "loss": 3.4502, "step": 61525 }, { "epoch": 0.62591552734375, "grad_norm": 15.315463066101074, "learning_rate": 1.5394136311137663e-06, "loss": 3.7809, "step": 61530 }, { "epoch": 0.6259663899739584, "grad_norm": 9.428129196166992, "learning_rate": 1.5390444591155688e-06, "loss": 3.388, "step": 61535 }, { "epoch": 0.6260172526041666, "grad_norm": 18.923006057739258, "learning_rate": 1.538675311703191e-06, "loss": 3.0328, "step": 61540 }, { "epoch": 0.626068115234375, "grad_norm": 8.522552490234375, "learning_rate": 1.5383061888860784e-06, "loss": 3.3867, "step": 61545 }, { "epoch": 0.6261189778645834, "grad_norm": 14.090705871582031, "learning_rate": 1.5379370906736735e-06, "loss": 3.239, "step": 61550 }, { "epoch": 0.6261698404947916, "grad_norm": 11.4149808883667, "learning_rate": 1.5375680170754192e-06, "loss": 3.5858, "step": 61555 }, { "epoch": 0.626220703125, "grad_norm": 14.478057861328125, "learning_rate": 1.5371989681007604e-06, "loss": 3.4361, "step": 61560 }, { "epoch": 0.6262715657552084, "grad_norm": 14.236669540405273, "learning_rate": 1.5368299437591371e-06, "loss": 3.6861, "step": 61565 }, { "epoch": 0.6263224283854166, "grad_norm": 12.363887786865234, "learning_rate": 1.5364609440599924e-06, "loss": 3.1573, "step": 61570 }, { "epoch": 0.626373291015625, "grad_norm": 12.806648254394531, "learning_rate": 1.5360919690127646e-06, "loss": 3.0565, "step": 61575 }, { "epoch": 0.6264241536458334, "grad_norm": 10.38432502746582, "learning_rate": 1.5357230186268959e-06, "loss": 3.375, "step": 61580 }, { "epoch": 0.6264750162760416, "grad_norm": 13.700526237487793, "learning_rate": 1.5353540929118257e-06, "loss": 3.3333, "step": 61585 }, { "epoch": 0.62652587890625, "grad_norm": 12.815266609191895, "learning_rate": 1.534985191876992e-06, "loss": 3.2802, "step": 61590 }, { "epoch": 0.6265767415364584, "grad_norm": 15.226441383361816, "learning_rate": 1.5346163155318328e-06, "loss": 3.2804, "step": 61595 }, { "epoch": 0.6266276041666666, "grad_norm": 20.004838943481445, "learning_rate": 1.5342474638857874e-06, "loss": 3.2972, "step": 61600 }, { "epoch": 0.626678466796875, "grad_norm": 16.466358184814453, "learning_rate": 1.5338786369482911e-06, "loss": 3.2316, "step": 61605 }, { "epoch": 0.6267293294270834, "grad_norm": 11.442941665649414, "learning_rate": 1.5335098347287813e-06, "loss": 3.0558, "step": 61610 }, { "epoch": 0.6267801920572916, "grad_norm": 9.389209747314453, "learning_rate": 1.5331410572366927e-06, "loss": 3.4597, "step": 61615 }, { "epoch": 0.6268310546875, "grad_norm": 15.952877044677734, "learning_rate": 1.5327723044814612e-06, "loss": 3.5438, "step": 61620 }, { "epoch": 0.6268819173177084, "grad_norm": 15.153554916381836, "learning_rate": 1.5324035764725216e-06, "loss": 3.6297, "step": 61625 }, { "epoch": 0.6269327799479166, "grad_norm": 10.831584930419922, "learning_rate": 1.5320348732193058e-06, "loss": 3.3283, "step": 61630 }, { "epoch": 0.626983642578125, "grad_norm": 10.518292427062988, "learning_rate": 1.5316661947312495e-06, "loss": 3.2956, "step": 61635 }, { "epoch": 0.6270345052083334, "grad_norm": 8.727978706359863, "learning_rate": 1.5312975410177838e-06, "loss": 4.0958, "step": 61640 }, { "epoch": 0.6270853678385416, "grad_norm": 13.804841995239258, "learning_rate": 1.5309289120883414e-06, "loss": 3.0639, "step": 61645 }, { "epoch": 0.62713623046875, "grad_norm": 13.853920936584473, "learning_rate": 1.5305603079523519e-06, "loss": 3.2488, "step": 61650 }, { "epoch": 0.6271870930989584, "grad_norm": 13.531047821044922, "learning_rate": 1.5301917286192478e-06, "loss": 3.1688, "step": 61655 }, { "epoch": 0.6272379557291666, "grad_norm": 13.21961498260498, "learning_rate": 1.529823174098459e-06, "loss": 3.1852, "step": 61660 }, { "epoch": 0.627288818359375, "grad_norm": 13.482869148254395, "learning_rate": 1.5294546443994139e-06, "loss": 3.5621, "step": 61665 }, { "epoch": 0.6273396809895834, "grad_norm": 13.146780014038086, "learning_rate": 1.529086139531541e-06, "loss": 3.6089, "step": 61670 }, { "epoch": 0.6273905436197916, "grad_norm": 8.145610809326172, "learning_rate": 1.5287176595042701e-06, "loss": 3.1591, "step": 61675 }, { "epoch": 0.62744140625, "grad_norm": 11.181539535522461, "learning_rate": 1.528349204327027e-06, "loss": 3.2037, "step": 61680 }, { "epoch": 0.6274922688802084, "grad_norm": 8.060432434082031, "learning_rate": 1.52798077400924e-06, "loss": 3.4265, "step": 61685 }, { "epoch": 0.6275431315104166, "grad_norm": 8.361469268798828, "learning_rate": 1.5276123685603334e-06, "loss": 3.7025, "step": 61690 }, { "epoch": 0.627593994140625, "grad_norm": 10.280891418457031, "learning_rate": 1.5272439879897345e-06, "loss": 3.2371, "step": 61695 }, { "epoch": 0.6276448567708334, "grad_norm": 8.784655570983887, "learning_rate": 1.5268756323068678e-06, "loss": 3.1614, "step": 61700 }, { "epoch": 0.6276957194010416, "grad_norm": 13.677227020263672, "learning_rate": 1.5265073015211574e-06, "loss": 3.5891, "step": 61705 }, { "epoch": 0.62774658203125, "grad_norm": 10.98469352722168, "learning_rate": 1.5261389956420258e-06, "loss": 3.4069, "step": 61710 }, { "epoch": 0.6277974446614584, "grad_norm": 13.143728256225586, "learning_rate": 1.5257707146788985e-06, "loss": 3.427, "step": 61715 }, { "epoch": 0.6278483072916666, "grad_norm": 8.646453857421875, "learning_rate": 1.5254024586411962e-06, "loss": 3.2642, "step": 61720 }, { "epoch": 0.627899169921875, "grad_norm": 8.369274139404297, "learning_rate": 1.52503422753834e-06, "loss": 3.2025, "step": 61725 }, { "epoch": 0.6279500325520834, "grad_norm": 12.068510055541992, "learning_rate": 1.5246660213797537e-06, "loss": 3.5439, "step": 61730 }, { "epoch": 0.6280008951822916, "grad_norm": 9.908411979675293, "learning_rate": 1.5242978401748549e-06, "loss": 3.1221, "step": 61735 }, { "epoch": 0.6280517578125, "grad_norm": 13.45252513885498, "learning_rate": 1.5239296839330656e-06, "loss": 3.3488, "step": 61740 }, { "epoch": 0.6281026204427084, "grad_norm": 12.97950553894043, "learning_rate": 1.5235615526638026e-06, "loss": 3.1984, "step": 61745 }, { "epoch": 0.6281534830729166, "grad_norm": 14.618078231811523, "learning_rate": 1.5231934463764863e-06, "loss": 3.3464, "step": 61750 }, { "epoch": 0.628204345703125, "grad_norm": 16.660869598388672, "learning_rate": 1.5228253650805353e-06, "loss": 3.3367, "step": 61755 }, { "epoch": 0.6282552083333334, "grad_norm": 9.507746696472168, "learning_rate": 1.5224573087853645e-06, "loss": 3.1135, "step": 61760 }, { "epoch": 0.6283060709635416, "grad_norm": 15.523812294006348, "learning_rate": 1.5220892775003915e-06, "loss": 3.5686, "step": 61765 }, { "epoch": 0.62835693359375, "grad_norm": 15.808150291442871, "learning_rate": 1.5217212712350338e-06, "loss": 3.2818, "step": 61770 }, { "epoch": 0.6284077962239584, "grad_norm": 12.82918643951416, "learning_rate": 1.5213532899987044e-06, "loss": 3.3855, "step": 61775 }, { "epoch": 0.6284586588541666, "grad_norm": 8.895464897155762, "learning_rate": 1.52098533380082e-06, "loss": 3.0504, "step": 61780 }, { "epoch": 0.628509521484375, "grad_norm": 8.048547744750977, "learning_rate": 1.5206174026507925e-06, "loss": 3.0557, "step": 61785 }, { "epoch": 0.6285603841145834, "grad_norm": 12.804614067077637, "learning_rate": 1.5202494965580378e-06, "loss": 3.738, "step": 61790 }, { "epoch": 0.6286112467447916, "grad_norm": 14.638296127319336, "learning_rate": 1.5198816155319667e-06, "loss": 3.0572, "step": 61795 }, { "epoch": 0.628662109375, "grad_norm": 12.839740753173828, "learning_rate": 1.519513759581993e-06, "loss": 3.1828, "step": 61800 }, { "epoch": 0.6287129720052084, "grad_norm": 11.563626289367676, "learning_rate": 1.5191459287175266e-06, "loss": 3.7413, "step": 61805 }, { "epoch": 0.6287638346354166, "grad_norm": 12.180401802062988, "learning_rate": 1.5187781229479796e-06, "loss": 3.4383, "step": 61810 }, { "epoch": 0.628814697265625, "grad_norm": 9.479646682739258, "learning_rate": 1.5184103422827618e-06, "loss": 3.2573, "step": 61815 }, { "epoch": 0.6288655598958334, "grad_norm": 13.48900032043457, "learning_rate": 1.5180425867312825e-06, "loss": 3.4651, "step": 61820 }, { "epoch": 0.6289164225260416, "grad_norm": 12.358738899230957, "learning_rate": 1.5176748563029504e-06, "loss": 3.4829, "step": 61825 }, { "epoch": 0.62896728515625, "grad_norm": 10.56752872467041, "learning_rate": 1.5173071510071753e-06, "loss": 3.4634, "step": 61830 }, { "epoch": 0.6290181477864584, "grad_norm": 10.471076965332031, "learning_rate": 1.516939470853363e-06, "loss": 2.9963, "step": 61835 }, { "epoch": 0.6290690104166666, "grad_norm": 13.142584800720215, "learning_rate": 1.5165718158509212e-06, "loss": 3.2467, "step": 61840 }, { "epoch": 0.629119873046875, "grad_norm": 10.433077812194824, "learning_rate": 1.5162041860092574e-06, "loss": 3.4712, "step": 61845 }, { "epoch": 0.6291707356770834, "grad_norm": 11.209397315979004, "learning_rate": 1.5158365813377758e-06, "loss": 3.2425, "step": 61850 }, { "epoch": 0.6292215983072916, "grad_norm": 10.223564147949219, "learning_rate": 1.5154690018458823e-06, "loss": 3.2572, "step": 61855 }, { "epoch": 0.6292724609375, "grad_norm": 18.375215530395508, "learning_rate": 1.5151014475429798e-06, "loss": 3.4334, "step": 61860 }, { "epoch": 0.6293233235677084, "grad_norm": 10.725737571716309, "learning_rate": 1.5147339184384741e-06, "loss": 3.3111, "step": 61865 }, { "epoch": 0.6293741861979166, "grad_norm": 15.086240768432617, "learning_rate": 1.514366414541768e-06, "loss": 3.3796, "step": 61870 }, { "epoch": 0.629425048828125, "grad_norm": 13.658660888671875, "learning_rate": 1.5139989358622633e-06, "loss": 3.2555, "step": 61875 }, { "epoch": 0.6294759114583334, "grad_norm": 11.644039154052734, "learning_rate": 1.5136314824093616e-06, "loss": 3.7394, "step": 61880 }, { "epoch": 0.6295267740885416, "grad_norm": 9.223989486694336, "learning_rate": 1.5132640541924654e-06, "loss": 3.5052, "step": 61885 }, { "epoch": 0.62957763671875, "grad_norm": 9.818203926086426, "learning_rate": 1.5128966512209741e-06, "loss": 3.4412, "step": 61890 }, { "epoch": 0.6296284993489584, "grad_norm": 9.734807014465332, "learning_rate": 1.5125292735042884e-06, "loss": 3.2482, "step": 61895 }, { "epoch": 0.6296793619791666, "grad_norm": 11.073369026184082, "learning_rate": 1.5121619210518062e-06, "loss": 3.3511, "step": 61900 }, { "epoch": 0.629730224609375, "grad_norm": 11.540410995483398, "learning_rate": 1.5117945938729279e-06, "loss": 3.3419, "step": 61905 }, { "epoch": 0.6297810872395834, "grad_norm": 10.681864738464355, "learning_rate": 1.5114272919770506e-06, "loss": 3.4348, "step": 61910 }, { "epoch": 0.6298319498697916, "grad_norm": 14.877259254455566, "learning_rate": 1.5110600153735715e-06, "loss": 3.6201, "step": 61915 }, { "epoch": 0.6298828125, "grad_norm": 12.818219184875488, "learning_rate": 1.510692764071887e-06, "loss": 2.9859, "step": 61920 }, { "epoch": 0.6299336751302084, "grad_norm": 15.593382835388184, "learning_rate": 1.5103255380813953e-06, "loss": 2.9853, "step": 61925 }, { "epoch": 0.6299845377604166, "grad_norm": 11.005517959594727, "learning_rate": 1.509958337411489e-06, "loss": 3.2757, "step": 61930 }, { "epoch": 0.630035400390625, "grad_norm": 17.521547317504883, "learning_rate": 1.5095911620715633e-06, "loss": 3.0168, "step": 61935 }, { "epoch": 0.6300862630208334, "grad_norm": 13.582290649414062, "learning_rate": 1.509224012071014e-06, "loss": 3.0386, "step": 61940 }, { "epoch": 0.6301371256510416, "grad_norm": 13.638851165771484, "learning_rate": 1.5088568874192333e-06, "loss": 3.4342, "step": 61945 }, { "epoch": 0.63018798828125, "grad_norm": 11.255894660949707, "learning_rate": 1.5084897881256142e-06, "loss": 3.3469, "step": 61950 }, { "epoch": 0.6302388509114584, "grad_norm": 11.319867134094238, "learning_rate": 1.5081227141995483e-06, "loss": 3.3897, "step": 61955 }, { "epoch": 0.6302897135416666, "grad_norm": 10.453210830688477, "learning_rate": 1.5077556656504285e-06, "loss": 3.5842, "step": 61960 }, { "epoch": 0.630340576171875, "grad_norm": 11.986652374267578, "learning_rate": 1.5073886424876443e-06, "loss": 3.0917, "step": 61965 }, { "epoch": 0.6303914388020834, "grad_norm": 13.140625953674316, "learning_rate": 1.5070216447205871e-06, "loss": 3.1504, "step": 61970 }, { "epoch": 0.6304423014322916, "grad_norm": 11.39574146270752, "learning_rate": 1.5066546723586448e-06, "loss": 3.3914, "step": 61975 }, { "epoch": 0.6304931640625, "grad_norm": 9.115983963012695, "learning_rate": 1.5062877254112077e-06, "loss": 3.6619, "step": 61980 }, { "epoch": 0.6305440266927084, "grad_norm": 10.484349250793457, "learning_rate": 1.5059208038876639e-06, "loss": 3.2941, "step": 61985 }, { "epoch": 0.6305948893229166, "grad_norm": 11.486682891845703, "learning_rate": 1.5055539077974004e-06, "loss": 3.3043, "step": 61990 }, { "epoch": 0.630645751953125, "grad_norm": 10.346829414367676, "learning_rate": 1.505187037149804e-06, "loss": 3.28, "step": 61995 }, { "epoch": 0.6306966145833334, "grad_norm": 9.820465087890625, "learning_rate": 1.5048201919542623e-06, "loss": 3.5778, "step": 62000 }, { "epoch": 0.6307474772135416, "grad_norm": 12.007710456848145, "learning_rate": 1.50445337222016e-06, "loss": 3.1602, "step": 62005 }, { "epoch": 0.63079833984375, "grad_norm": 8.000428199768066, "learning_rate": 1.5040865779568822e-06, "loss": 3.3569, "step": 62010 }, { "epoch": 0.6308492024739584, "grad_norm": 8.897919654846191, "learning_rate": 1.5037198091738125e-06, "loss": 3.1817, "step": 62015 }, { "epoch": 0.6309000651041666, "grad_norm": 15.00073528289795, "learning_rate": 1.5033530658803357e-06, "loss": 3.3084, "step": 62020 }, { "epoch": 0.630950927734375, "grad_norm": 12.301580429077148, "learning_rate": 1.502986348085835e-06, "loss": 3.1554, "step": 62025 }, { "epoch": 0.6310017903645834, "grad_norm": 12.632869720458984, "learning_rate": 1.5026196557996913e-06, "loss": 3.0949, "step": 62030 }, { "epoch": 0.6310526529947916, "grad_norm": 16.767946243286133, "learning_rate": 1.502252989031288e-06, "loss": 3.4847, "step": 62035 }, { "epoch": 0.631103515625, "grad_norm": 13.190328598022461, "learning_rate": 1.501886347790006e-06, "loss": 3.3136, "step": 62040 }, { "epoch": 0.6311543782552084, "grad_norm": 10.962267875671387, "learning_rate": 1.5015197320852244e-06, "loss": 3.6856, "step": 62045 }, { "epoch": 0.6312052408854166, "grad_norm": 8.856995582580566, "learning_rate": 1.5011531419263231e-06, "loss": 3.206, "step": 62050 }, { "epoch": 0.631256103515625, "grad_norm": 14.306882858276367, "learning_rate": 1.5007865773226831e-06, "loss": 3.2216, "step": 62055 }, { "epoch": 0.6313069661458334, "grad_norm": 9.836094856262207, "learning_rate": 1.5004200382836814e-06, "loss": 3.1486, "step": 62060 }, { "epoch": 0.6313578287760416, "grad_norm": 10.92822551727295, "learning_rate": 1.5000535248186965e-06, "loss": 3.336, "step": 62065 }, { "epoch": 0.63140869140625, "grad_norm": 16.33973503112793, "learning_rate": 1.499687036937104e-06, "loss": 3.3093, "step": 62070 }, { "epoch": 0.6314595540364584, "grad_norm": 10.504921913146973, "learning_rate": 1.4993205746482825e-06, "loss": 2.9332, "step": 62075 }, { "epoch": 0.6315104166666666, "grad_norm": 8.555520057678223, "learning_rate": 1.498954137961607e-06, "loss": 3.3795, "step": 62080 }, { "epoch": 0.631561279296875, "grad_norm": 17.40530776977539, "learning_rate": 1.4985877268864525e-06, "loss": 3.3138, "step": 62085 }, { "epoch": 0.6316121419270834, "grad_norm": 11.477150917053223, "learning_rate": 1.4982213414321928e-06, "loss": 3.4017, "step": 62090 }, { "epoch": 0.6316630045572916, "grad_norm": 16.35191535949707, "learning_rate": 1.497854981608203e-06, "loss": 3.5194, "step": 62095 }, { "epoch": 0.6317138671875, "grad_norm": 13.10997486114502, "learning_rate": 1.4974886474238568e-06, "loss": 3.3317, "step": 62100 }, { "epoch": 0.6317647298177084, "grad_norm": 9.912026405334473, "learning_rate": 1.4971223388885252e-06, "loss": 3.213, "step": 62105 }, { "epoch": 0.6318155924479166, "grad_norm": 8.097939491271973, "learning_rate": 1.4967560560115807e-06, "loss": 3.5305, "step": 62110 }, { "epoch": 0.631866455078125, "grad_norm": 12.29067611694336, "learning_rate": 1.4963897988023952e-06, "loss": 3.3737, "step": 62115 }, { "epoch": 0.6319173177083334, "grad_norm": 9.925728797912598, "learning_rate": 1.4960235672703388e-06, "loss": 2.929, "step": 62120 }, { "epoch": 0.6319681803385416, "grad_norm": 13.718255996704102, "learning_rate": 1.4956573614247804e-06, "loss": 3.0586, "step": 62125 }, { "epoch": 0.63201904296875, "grad_norm": 12.421064376831055, "learning_rate": 1.495291181275092e-06, "loss": 3.0733, "step": 62130 }, { "epoch": 0.6320699055989584, "grad_norm": 13.674421310424805, "learning_rate": 1.4949250268306397e-06, "loss": 3.7793, "step": 62135 }, { "epoch": 0.6321207682291666, "grad_norm": 10.796311378479004, "learning_rate": 1.494558898100793e-06, "loss": 3.4028, "step": 62140 }, { "epoch": 0.632171630859375, "grad_norm": 9.48852252960205, "learning_rate": 1.4941927950949175e-06, "loss": 3.5249, "step": 62145 }, { "epoch": 0.6322224934895834, "grad_norm": 10.705741882324219, "learning_rate": 1.4938267178223814e-06, "loss": 3.1707, "step": 62150 }, { "epoch": 0.6322733561197916, "grad_norm": 13.889814376831055, "learning_rate": 1.4934606662925508e-06, "loss": 3.3301, "step": 62155 }, { "epoch": 0.63232421875, "grad_norm": 15.42905044555664, "learning_rate": 1.49309464051479e-06, "loss": 3.2366, "step": 62160 }, { "epoch": 0.6323750813802084, "grad_norm": 12.578567504882812, "learning_rate": 1.4927286404984637e-06, "loss": 3.4207, "step": 62165 }, { "epoch": 0.6324259440104166, "grad_norm": 11.376194953918457, "learning_rate": 1.4923626662529372e-06, "loss": 3.1807, "step": 62170 }, { "epoch": 0.632476806640625, "grad_norm": 13.187199592590332, "learning_rate": 1.4919967177875726e-06, "loss": 3.7483, "step": 62175 }, { "epoch": 0.6325276692708334, "grad_norm": 15.951757431030273, "learning_rate": 1.4916307951117336e-06, "loss": 3.2735, "step": 62180 }, { "epoch": 0.6325785319010416, "grad_norm": 15.15351676940918, "learning_rate": 1.4912648982347805e-06, "loss": 3.0444, "step": 62185 }, { "epoch": 0.63262939453125, "grad_norm": 12.16561508178711, "learning_rate": 1.4908990271660766e-06, "loss": 3.69, "step": 62190 }, { "epoch": 0.6326802571614584, "grad_norm": 9.109246253967285, "learning_rate": 1.4905331819149827e-06, "loss": 3.342, "step": 62195 }, { "epoch": 0.6327311197916666, "grad_norm": 16.777318954467773, "learning_rate": 1.490167362490857e-06, "loss": 3.448, "step": 62200 }, { "epoch": 0.632781982421875, "grad_norm": 11.811336517333984, "learning_rate": 1.4898015689030598e-06, "loss": 3.1386, "step": 62205 }, { "epoch": 0.6328328450520834, "grad_norm": 7.091428279876709, "learning_rate": 1.4894358011609511e-06, "loss": 3.0787, "step": 62210 }, { "epoch": 0.6328837076822916, "grad_norm": 9.410467147827148, "learning_rate": 1.4890700592738872e-06, "loss": 3.7048, "step": 62215 }, { "epoch": 0.6329345703125, "grad_norm": 15.245011329650879, "learning_rate": 1.4887043432512258e-06, "loss": 3.2783, "step": 62220 }, { "epoch": 0.6329854329427084, "grad_norm": 11.711705207824707, "learning_rate": 1.4883386531023251e-06, "loss": 3.6483, "step": 62225 }, { "epoch": 0.6330362955729166, "grad_norm": 9.617206573486328, "learning_rate": 1.4879729888365395e-06, "loss": 3.2444, "step": 62230 }, { "epoch": 0.633087158203125, "grad_norm": 17.462865829467773, "learning_rate": 1.4876073504632257e-06, "loss": 3.5213, "step": 62235 }, { "epoch": 0.6331380208333334, "grad_norm": 13.961889266967773, "learning_rate": 1.4872417379917365e-06, "loss": 3.2226, "step": 62240 }, { "epoch": 0.6331888834635416, "grad_norm": 9.276631355285645, "learning_rate": 1.4868761514314287e-06, "loss": 3.3173, "step": 62245 }, { "epoch": 0.63323974609375, "grad_norm": 15.21993350982666, "learning_rate": 1.4865105907916539e-06, "loss": 3.2775, "step": 62250 }, { "epoch": 0.6332906087239584, "grad_norm": 12.131950378417969, "learning_rate": 1.4861450560817662e-06, "loss": 3.5868, "step": 62255 }, { "epoch": 0.6333414713541666, "grad_norm": 13.67236042022705, "learning_rate": 1.4857795473111155e-06, "loss": 3.2668, "step": 62260 }, { "epoch": 0.633392333984375, "grad_norm": 13.7980318069458, "learning_rate": 1.4854140644890554e-06, "loss": 3.7161, "step": 62265 }, { "epoch": 0.6334431966145834, "grad_norm": 10.002974510192871, "learning_rate": 1.4850486076249365e-06, "loss": 3.2517, "step": 62270 }, { "epoch": 0.6334940592447916, "grad_norm": 17.057403564453125, "learning_rate": 1.4846831767281082e-06, "loss": 3.239, "step": 62275 }, { "epoch": 0.633544921875, "grad_norm": 13.9342622756958, "learning_rate": 1.4843177718079194e-06, "loss": 3.1106, "step": 62280 }, { "epoch": 0.6335957845052084, "grad_norm": 9.62298583984375, "learning_rate": 1.4839523928737204e-06, "loss": 2.8991, "step": 62285 }, { "epoch": 0.6336466471354166, "grad_norm": 12.052846908569336, "learning_rate": 1.4835870399348584e-06, "loss": 3.4887, "step": 62290 }, { "epoch": 0.633697509765625, "grad_norm": 8.109212875366211, "learning_rate": 1.4832217130006816e-06, "loss": 3.2346, "step": 62295 }, { "epoch": 0.6337483723958334, "grad_norm": 13.58726692199707, "learning_rate": 1.482856412080535e-06, "loss": 3.0532, "step": 62300 }, { "epoch": 0.6337992350260416, "grad_norm": 10.953584671020508, "learning_rate": 1.482491137183767e-06, "loss": 3.2216, "step": 62305 }, { "epoch": 0.63385009765625, "grad_norm": 14.744987487792969, "learning_rate": 1.482125888319722e-06, "loss": 3.1238, "step": 62310 }, { "epoch": 0.6339009602864584, "grad_norm": 16.00750160217285, "learning_rate": 1.4817606654977446e-06, "loss": 3.6184, "step": 62315 }, { "epoch": 0.6339518229166666, "grad_norm": 11.541016578674316, "learning_rate": 1.4813954687271788e-06, "loss": 3.4754, "step": 62320 }, { "epoch": 0.634002685546875, "grad_norm": 14.393070220947266, "learning_rate": 1.4810302980173696e-06, "loss": 3.2028, "step": 62325 }, { "epoch": 0.6340535481770834, "grad_norm": 7.236337184906006, "learning_rate": 1.480665153377658e-06, "loss": 3.0415, "step": 62330 }, { "epoch": 0.6341044108072916, "grad_norm": 11.734339714050293, "learning_rate": 1.4803000348173863e-06, "loss": 3.3178, "step": 62335 }, { "epoch": 0.6341552734375, "grad_norm": 12.241242408752441, "learning_rate": 1.4799349423458979e-06, "loss": 3.2239, "step": 62340 }, { "epoch": 0.6342061360677084, "grad_norm": 10.239745140075684, "learning_rate": 1.4795698759725313e-06, "loss": 3.3292, "step": 62345 }, { "epoch": 0.6342569986979166, "grad_norm": 10.468929290771484, "learning_rate": 1.4792048357066282e-06, "loss": 3.1149, "step": 62350 }, { "epoch": 0.634307861328125, "grad_norm": 10.030734062194824, "learning_rate": 1.4788398215575265e-06, "loss": 3.2583, "step": 62355 }, { "epoch": 0.6343587239583334, "grad_norm": 8.35238265991211, "learning_rate": 1.4784748335345663e-06, "loss": 3.4598, "step": 62360 }, { "epoch": 0.6344095865885416, "grad_norm": 12.03402042388916, "learning_rate": 1.478109871647086e-06, "loss": 3.1848, "step": 62365 }, { "epoch": 0.63446044921875, "grad_norm": 12.839322090148926, "learning_rate": 1.4777449359044217e-06, "loss": 3.3202, "step": 62370 }, { "epoch": 0.6345113118489584, "grad_norm": 11.045984268188477, "learning_rate": 1.4773800263159105e-06, "loss": 3.1569, "step": 62375 }, { "epoch": 0.6345621744791666, "grad_norm": 9.864595413208008, "learning_rate": 1.4770151428908903e-06, "loss": 3.1199, "step": 62380 }, { "epoch": 0.634613037109375, "grad_norm": 9.273886680603027, "learning_rate": 1.4766502856386941e-06, "loss": 3.3802, "step": 62385 }, { "epoch": 0.6346638997395834, "grad_norm": 14.827187538146973, "learning_rate": 1.4762854545686587e-06, "loss": 3.652, "step": 62390 }, { "epoch": 0.6347147623697916, "grad_norm": 11.817782402038574, "learning_rate": 1.4759206496901158e-06, "loss": 3.09, "step": 62395 }, { "epoch": 0.634765625, "grad_norm": 12.936819076538086, "learning_rate": 1.4755558710124012e-06, "loss": 3.312, "step": 62400 }, { "epoch": 0.6348164876302084, "grad_norm": 8.248379707336426, "learning_rate": 1.4751911185448476e-06, "loss": 3.3594, "step": 62405 }, { "epoch": 0.6348673502604166, "grad_norm": 17.511751174926758, "learning_rate": 1.4748263922967854e-06, "loss": 3.2554, "step": 62410 }, { "epoch": 0.634918212890625, "grad_norm": 14.318390846252441, "learning_rate": 1.4744616922775468e-06, "loss": 3.3874, "step": 62415 }, { "epoch": 0.6349690755208334, "grad_norm": 16.752254486083984, "learning_rate": 1.4740970184964631e-06, "loss": 3.4619, "step": 62420 }, { "epoch": 0.6350199381510416, "grad_norm": 12.39830493927002, "learning_rate": 1.4737323709628644e-06, "loss": 3.1976, "step": 62425 }, { "epoch": 0.63507080078125, "grad_norm": 17.892972946166992, "learning_rate": 1.4733677496860789e-06, "loss": 3.1798, "step": 62430 }, { "epoch": 0.6351216634114584, "grad_norm": 10.058704376220703, "learning_rate": 1.4730031546754365e-06, "loss": 3.2515, "step": 62435 }, { "epoch": 0.6351725260416666, "grad_norm": 7.284869194030762, "learning_rate": 1.4726385859402652e-06, "loss": 3.6694, "step": 62440 }, { "epoch": 0.635223388671875, "grad_norm": 14.861515045166016, "learning_rate": 1.4722740434898921e-06, "loss": 3.1816, "step": 62445 }, { "epoch": 0.6352742513020834, "grad_norm": 13.92569351196289, "learning_rate": 1.4719095273336437e-06, "loss": 3.2837, "step": 62450 }, { "epoch": 0.6353251139322916, "grad_norm": 15.121036529541016, "learning_rate": 1.4715450374808469e-06, "loss": 3.4223, "step": 62455 }, { "epoch": 0.6353759765625, "grad_norm": 10.16884994506836, "learning_rate": 1.4711805739408261e-06, "loss": 3.3786, "step": 62460 }, { "epoch": 0.6354268391927084, "grad_norm": 7.572779178619385, "learning_rate": 1.470816136722907e-06, "loss": 3.2864, "step": 62465 }, { "epoch": 0.6354777018229166, "grad_norm": 14.944366455078125, "learning_rate": 1.4704517258364123e-06, "loss": 3.4903, "step": 62470 }, { "epoch": 0.635528564453125, "grad_norm": 10.901159286499023, "learning_rate": 1.4700873412906664e-06, "loss": 3.2218, "step": 62475 }, { "epoch": 0.6355794270833334, "grad_norm": 14.795011520385742, "learning_rate": 1.4697229830949926e-06, "loss": 3.0696, "step": 62480 }, { "epoch": 0.6356302897135416, "grad_norm": 11.672063827514648, "learning_rate": 1.4693586512587113e-06, "loss": 3.3608, "step": 62485 }, { "epoch": 0.63568115234375, "grad_norm": 11.651619911193848, "learning_rate": 1.4689943457911444e-06, "loss": 2.9439, "step": 62490 }, { "epoch": 0.6357320149739584, "grad_norm": 7.681040287017822, "learning_rate": 1.4686300667016135e-06, "loss": 3.2055, "step": 62495 }, { "epoch": 0.6357828776041666, "grad_norm": 7.397478103637695, "learning_rate": 1.4682658139994372e-06, "loss": 4.086, "step": 62500 }, { "epoch": 0.635833740234375, "grad_norm": 10.454808235168457, "learning_rate": 1.4679015876939363e-06, "loss": 3.1367, "step": 62505 }, { "epoch": 0.6358846028645834, "grad_norm": 15.2985200881958, "learning_rate": 1.4675373877944277e-06, "loss": 3.6327, "step": 62510 }, { "epoch": 0.6359354654947916, "grad_norm": 12.302929878234863, "learning_rate": 1.4671732143102308e-06, "loss": 3.391, "step": 62515 }, { "epoch": 0.635986328125, "grad_norm": 20.90006446838379, "learning_rate": 1.4668090672506625e-06, "loss": 3.3075, "step": 62520 }, { "epoch": 0.6360371907552084, "grad_norm": 14.097554206848145, "learning_rate": 1.4664449466250386e-06, "loss": 3.181, "step": 62525 }, { "epoch": 0.6360880533854166, "grad_norm": 12.83116626739502, "learning_rate": 1.4660808524426764e-06, "loss": 3.224, "step": 62530 }, { "epoch": 0.636138916015625, "grad_norm": 12.021821022033691, "learning_rate": 1.4657167847128908e-06, "loss": 3.0381, "step": 62535 }, { "epoch": 0.6361897786458334, "grad_norm": 9.282414436340332, "learning_rate": 1.465352743444996e-06, "loss": 2.8872, "step": 62540 }, { "epoch": 0.6362406412760416, "grad_norm": 13.512335777282715, "learning_rate": 1.464988728648305e-06, "loss": 3.324, "step": 62545 }, { "epoch": 0.63629150390625, "grad_norm": 8.797607421875, "learning_rate": 1.4646247403321334e-06, "loss": 3.1153, "step": 62550 }, { "epoch": 0.6363423665364584, "grad_norm": 8.574508666992188, "learning_rate": 1.4642607785057917e-06, "loss": 3.4983, "step": 62555 }, { "epoch": 0.6363932291666666, "grad_norm": 9.874283790588379, "learning_rate": 1.4638968431785933e-06, "loss": 3.7679, "step": 62560 }, { "epoch": 0.636444091796875, "grad_norm": 13.190855979919434, "learning_rate": 1.4635329343598468e-06, "loss": 3.4084, "step": 62565 }, { "epoch": 0.6364949544270834, "grad_norm": 9.361830711364746, "learning_rate": 1.463169052058866e-06, "loss": 3.0899, "step": 62570 }, { "epoch": 0.6365458170572916, "grad_norm": 14.177390098571777, "learning_rate": 1.4628051962849588e-06, "loss": 3.3523, "step": 62575 }, { "epoch": 0.6365966796875, "grad_norm": 12.679844856262207, "learning_rate": 1.4624413670474355e-06, "loss": 3.4776, "step": 62580 }, { "epoch": 0.6366475423177084, "grad_norm": 14.361136436462402, "learning_rate": 1.4620775643556025e-06, "loss": 3.2976, "step": 62585 }, { "epoch": 0.6366984049479166, "grad_norm": 10.473655700683594, "learning_rate": 1.46171378821877e-06, "loss": 3.6659, "step": 62590 }, { "epoch": 0.636749267578125, "grad_norm": 8.326841354370117, "learning_rate": 1.4613500386462443e-06, "loss": 4.0207, "step": 62595 }, { "epoch": 0.6368001302083334, "grad_norm": 14.48426342010498, "learning_rate": 1.4609863156473315e-06, "loss": 2.9788, "step": 62600 }, { "epoch": 0.6368509928385416, "grad_norm": 15.910515785217285, "learning_rate": 1.4606226192313366e-06, "loss": 3.2576, "step": 62605 }, { "epoch": 0.63690185546875, "grad_norm": 12.90970230102539, "learning_rate": 1.4602589494075667e-06, "loss": 3.4674, "step": 62610 }, { "epoch": 0.6369527180989584, "grad_norm": 14.598609924316406, "learning_rate": 1.459895306185325e-06, "loss": 3.3849, "step": 62615 }, { "epoch": 0.6370035807291666, "grad_norm": 11.820744514465332, "learning_rate": 1.4595316895739148e-06, "loss": 3.2215, "step": 62620 }, { "epoch": 0.637054443359375, "grad_norm": 13.531217575073242, "learning_rate": 1.4591680995826396e-06, "loss": 2.81, "step": 62625 }, { "epoch": 0.6371053059895834, "grad_norm": 13.425281524658203, "learning_rate": 1.4588045362208025e-06, "loss": 3.4903, "step": 62630 }, { "epoch": 0.6371561686197916, "grad_norm": 13.802509307861328, "learning_rate": 1.458440999497705e-06, "loss": 3.2356, "step": 62635 }, { "epoch": 0.63720703125, "grad_norm": 11.911323547363281, "learning_rate": 1.4580774894226466e-06, "loss": 3.3578, "step": 62640 }, { "epoch": 0.6372578938802084, "grad_norm": 12.430571556091309, "learning_rate": 1.4577140060049293e-06, "loss": 3.3196, "step": 62645 }, { "epoch": 0.6373087565104166, "grad_norm": 9.14650821685791, "learning_rate": 1.4573505492538514e-06, "loss": 3.2798, "step": 62650 }, { "epoch": 0.637359619140625, "grad_norm": 15.641694068908691, "learning_rate": 1.4569871191787137e-06, "loss": 3.869, "step": 62655 }, { "epoch": 0.6374104817708334, "grad_norm": 8.274557113647461, "learning_rate": 1.4566237157888122e-06, "loss": 3.3859, "step": 62660 }, { "epoch": 0.6374613444010416, "grad_norm": 10.886077880859375, "learning_rate": 1.4562603390934465e-06, "loss": 3.1028, "step": 62665 }, { "epoch": 0.63751220703125, "grad_norm": 13.126774787902832, "learning_rate": 1.4558969891019126e-06, "loss": 3.1486, "step": 62670 }, { "epoch": 0.6375630696614584, "grad_norm": 16.401315689086914, "learning_rate": 1.4555336658235059e-06, "loss": 3.8196, "step": 62675 }, { "epoch": 0.6376139322916666, "grad_norm": 7.959024429321289, "learning_rate": 1.455170369267523e-06, "loss": 3.3189, "step": 62680 }, { "epoch": 0.637664794921875, "grad_norm": 7.147455215454102, "learning_rate": 1.454807099443259e-06, "loss": 3.5084, "step": 62685 }, { "epoch": 0.6377156575520834, "grad_norm": 13.789383888244629, "learning_rate": 1.454443856360008e-06, "loss": 3.4256, "step": 62690 }, { "epoch": 0.6377665201822916, "grad_norm": 13.630207061767578, "learning_rate": 1.454080640027063e-06, "loss": 3.6908, "step": 62695 }, { "epoch": 0.6378173828125, "grad_norm": 14.849143028259277, "learning_rate": 1.453717450453716e-06, "loss": 3.2571, "step": 62700 }, { "epoch": 0.6378682454427084, "grad_norm": 8.7376127243042, "learning_rate": 1.45335428764926e-06, "loss": 3.1046, "step": 62705 }, { "epoch": 0.6379191080729166, "grad_norm": 7.985586166381836, "learning_rate": 1.4529911516229875e-06, "loss": 3.0926, "step": 62710 }, { "epoch": 0.637969970703125, "grad_norm": 7.017002582550049, "learning_rate": 1.452628042384187e-06, "loss": 3.2291, "step": 62715 }, { "epoch": 0.6380208333333334, "grad_norm": 14.071551322937012, "learning_rate": 1.4522649599421513e-06, "loss": 3.2681, "step": 62720 }, { "epoch": 0.6380716959635416, "grad_norm": 7.781976699829102, "learning_rate": 1.4519019043061666e-06, "loss": 3.4062, "step": 62725 }, { "epoch": 0.63812255859375, "grad_norm": 9.860779762268066, "learning_rate": 1.4515388754855247e-06, "loss": 2.7846, "step": 62730 }, { "epoch": 0.6381734212239584, "grad_norm": 8.453152656555176, "learning_rate": 1.451175873489511e-06, "loss": 3.3673, "step": 62735 }, { "epoch": 0.6382242838541666, "grad_norm": 12.840231895446777, "learning_rate": 1.450812898327415e-06, "loss": 3.158, "step": 62740 }, { "epoch": 0.638275146484375, "grad_norm": 15.269471168518066, "learning_rate": 1.4504499500085223e-06, "loss": 3.1418, "step": 62745 }, { "epoch": 0.6383260091145834, "grad_norm": 10.599361419677734, "learning_rate": 1.4500870285421179e-06, "loss": 3.9911, "step": 62750 }, { "epoch": 0.6383768717447916, "grad_norm": 7.777949333190918, "learning_rate": 1.4497241339374879e-06, "loss": 3.1717, "step": 62755 }, { "epoch": 0.638427734375, "grad_norm": 11.030150413513184, "learning_rate": 1.449361266203918e-06, "loss": 3.3687, "step": 62760 }, { "epoch": 0.6384785970052084, "grad_norm": 8.590954780578613, "learning_rate": 1.4489984253506915e-06, "loss": 2.9457, "step": 62765 }, { "epoch": 0.6385294596354166, "grad_norm": 12.137688636779785, "learning_rate": 1.4486356113870908e-06, "loss": 3.5217, "step": 62770 }, { "epoch": 0.638580322265625, "grad_norm": 13.380548477172852, "learning_rate": 1.4482728243223975e-06, "loss": 3.3363, "step": 62775 }, { "epoch": 0.6386311848958334, "grad_norm": 13.236571311950684, "learning_rate": 1.4479100641658952e-06, "loss": 3.1293, "step": 62780 }, { "epoch": 0.6386820475260416, "grad_norm": 12.542268753051758, "learning_rate": 1.4475473309268652e-06, "loss": 3.6334, "step": 62785 }, { "epoch": 0.63873291015625, "grad_norm": 16.519969940185547, "learning_rate": 1.4471846246145876e-06, "loss": 3.5497, "step": 62790 }, { "epoch": 0.6387837727864584, "grad_norm": 14.062509536743164, "learning_rate": 1.4468219452383408e-06, "loss": 3.1123, "step": 62795 }, { "epoch": 0.6388346354166666, "grad_norm": 10.798090934753418, "learning_rate": 1.4464592928074055e-06, "loss": 3.3044, "step": 62800 }, { "epoch": 0.638885498046875, "grad_norm": 16.738853454589844, "learning_rate": 1.446096667331059e-06, "loss": 3.1467, "step": 62805 }, { "epoch": 0.6389363606770834, "grad_norm": 10.667718887329102, "learning_rate": 1.4457340688185789e-06, "loss": 3.2462, "step": 62810 }, { "epoch": 0.6389872233072916, "grad_norm": 11.143059730529785, "learning_rate": 1.4453714972792443e-06, "loss": 3.7527, "step": 62815 }, { "epoch": 0.6390380859375, "grad_norm": 15.1730375289917, "learning_rate": 1.4450089527223298e-06, "loss": 3.5277, "step": 62820 }, { "epoch": 0.6390889485677084, "grad_norm": 10.914835929870605, "learning_rate": 1.444646435157111e-06, "loss": 3.1531, "step": 62825 }, { "epoch": 0.6391398111979166, "grad_norm": 7.6904144287109375, "learning_rate": 1.4442839445928622e-06, "loss": 3.3885, "step": 62830 }, { "epoch": 0.639190673828125, "grad_norm": 16.357162475585938, "learning_rate": 1.4439214810388582e-06, "loss": 3.2952, "step": 62835 }, { "epoch": 0.6392415364583334, "grad_norm": 14.014001846313477, "learning_rate": 1.443559044504374e-06, "loss": 3.3613, "step": 62840 }, { "epoch": 0.6392923990885416, "grad_norm": 14.548418045043945, "learning_rate": 1.4431966349986814e-06, "loss": 3.969, "step": 62845 }, { "epoch": 0.63934326171875, "grad_norm": 12.191168785095215, "learning_rate": 1.4428342525310513e-06, "loss": 3.3481, "step": 62850 }, { "epoch": 0.6393941243489584, "grad_norm": 11.43539047241211, "learning_rate": 1.4424718971107572e-06, "loss": 3.4122, "step": 62855 }, { "epoch": 0.6394449869791666, "grad_norm": 13.015048027038574, "learning_rate": 1.4421095687470677e-06, "loss": 3.1428, "step": 62860 }, { "epoch": 0.639495849609375, "grad_norm": 12.808711051940918, "learning_rate": 1.4417472674492556e-06, "loss": 3.4874, "step": 62865 }, { "epoch": 0.6395467122395834, "grad_norm": 11.39872932434082, "learning_rate": 1.4413849932265872e-06, "loss": 3.9441, "step": 62870 }, { "epoch": 0.6395975748697916, "grad_norm": 9.288200378417969, "learning_rate": 1.4410227460883345e-06, "loss": 3.1887, "step": 62875 }, { "epoch": 0.6396484375, "grad_norm": 20.272361755371094, "learning_rate": 1.4406605260437626e-06, "loss": 3.3801, "step": 62880 }, { "epoch": 0.6396993001302084, "grad_norm": 9.169677734375, "learning_rate": 1.440298333102141e-06, "loss": 3.3967, "step": 62885 }, { "epoch": 0.6397501627604166, "grad_norm": 10.119462013244629, "learning_rate": 1.4399361672727342e-06, "loss": 3.4271, "step": 62890 }, { "epoch": 0.639801025390625, "grad_norm": 10.71320629119873, "learning_rate": 1.4395740285648109e-06, "loss": 3.3772, "step": 62895 }, { "epoch": 0.6398518880208334, "grad_norm": 12.625038146972656, "learning_rate": 1.439211916987634e-06, "loss": 3.303, "step": 62900 }, { "epoch": 0.6399027506510416, "grad_norm": 17.40066909790039, "learning_rate": 1.4388498325504679e-06, "loss": 3.3495, "step": 62905 }, { "epoch": 0.63995361328125, "grad_norm": 12.901152610778809, "learning_rate": 1.4384877752625776e-06, "loss": 3.5543, "step": 62910 }, { "epoch": 0.6400044759114584, "grad_norm": 13.61598014831543, "learning_rate": 1.438125745133227e-06, "loss": 3.5211, "step": 62915 }, { "epoch": 0.6400553385416666, "grad_norm": 12.479560852050781, "learning_rate": 1.4377637421716773e-06, "loss": 3.2052, "step": 62920 }, { "epoch": 0.640106201171875, "grad_norm": 13.060637474060059, "learning_rate": 1.4374017663871898e-06, "loss": 3.3348, "step": 62925 }, { "epoch": 0.6401570638020834, "grad_norm": 11.65615463256836, "learning_rate": 1.437039817789027e-06, "loss": 3.2979, "step": 62930 }, { "epoch": 0.6402079264322916, "grad_norm": 11.959769248962402, "learning_rate": 1.4366778963864476e-06, "loss": 3.5626, "step": 62935 }, { "epoch": 0.6402587890625, "grad_norm": 11.341804504394531, "learning_rate": 1.436316002188713e-06, "loss": 3.4241, "step": 62940 }, { "epoch": 0.6403096516927084, "grad_norm": 10.488006591796875, "learning_rate": 1.4359541352050805e-06, "loss": 3.2683, "step": 62945 }, { "epoch": 0.6403605143229166, "grad_norm": 15.101091384887695, "learning_rate": 1.4355922954448098e-06, "loss": 3.1145, "step": 62950 }, { "epoch": 0.640411376953125, "grad_norm": 10.951150894165039, "learning_rate": 1.4352304829171587e-06, "loss": 3.1962, "step": 62955 }, { "epoch": 0.6404622395833334, "grad_norm": 12.33436107635498, "learning_rate": 1.4348686976313818e-06, "loss": 3.5271, "step": 62960 }, { "epoch": 0.6405131022135416, "grad_norm": 11.635231971740723, "learning_rate": 1.4345069395967371e-06, "loss": 3.8374, "step": 62965 }, { "epoch": 0.64056396484375, "grad_norm": 11.820862770080566, "learning_rate": 1.4341452088224805e-06, "loss": 3.3047, "step": 62970 }, { "epoch": 0.6406148274739584, "grad_norm": 9.315909385681152, "learning_rate": 1.4337835053178662e-06, "loss": 3.003, "step": 62975 }, { "epoch": 0.6406656901041666, "grad_norm": 7.3973612785339355, "learning_rate": 1.4334218290921483e-06, "loss": 3.3779, "step": 62980 }, { "epoch": 0.640716552734375, "grad_norm": 9.989119529724121, "learning_rate": 1.4330601801545791e-06, "loss": 3.3835, "step": 62985 }, { "epoch": 0.6407674153645834, "grad_norm": 11.238359451293945, "learning_rate": 1.4326985585144121e-06, "loss": 2.8967, "step": 62990 }, { "epoch": 0.6408182779947916, "grad_norm": 12.609733581542969, "learning_rate": 1.432336964180901e-06, "loss": 3.661, "step": 62995 }, { "epoch": 0.640869140625, "grad_norm": 9.334088325500488, "learning_rate": 1.4319753971632948e-06, "loss": 3.31, "step": 63000 }, { "epoch": 0.6409200032552084, "grad_norm": 15.808244705200195, "learning_rate": 1.4316138574708446e-06, "loss": 3.6069, "step": 63005 }, { "epoch": 0.6409708658854166, "grad_norm": 10.603769302368164, "learning_rate": 1.4312523451128013e-06, "loss": 3.3133, "step": 63010 }, { "epoch": 0.641021728515625, "grad_norm": 10.078536033630371, "learning_rate": 1.4308908600984128e-06, "loss": 3.278, "step": 63015 }, { "epoch": 0.6410725911458334, "grad_norm": 7.120779514312744, "learning_rate": 1.4305294024369278e-06, "loss": 3.7759, "step": 63020 }, { "epoch": 0.6411234537760416, "grad_norm": 13.637188911437988, "learning_rate": 1.4301679721375958e-06, "loss": 3.2534, "step": 63025 }, { "epoch": 0.64117431640625, "grad_norm": 15.969144821166992, "learning_rate": 1.4298065692096625e-06, "loss": 3.2703, "step": 63030 }, { "epoch": 0.6412251790364584, "grad_norm": 14.17587947845459, "learning_rate": 1.4294451936623738e-06, "loss": 3.5892, "step": 63035 }, { "epoch": 0.6412760416666666, "grad_norm": 14.112336158752441, "learning_rate": 1.4290838455049754e-06, "loss": 3.3573, "step": 63040 }, { "epoch": 0.641326904296875, "grad_norm": 9.915677070617676, "learning_rate": 1.4287225247467145e-06, "loss": 3.4702, "step": 63045 }, { "epoch": 0.6413777669270834, "grad_norm": 10.888671875, "learning_rate": 1.4283612313968337e-06, "loss": 3.2467, "step": 63050 }, { "epoch": 0.6414286295572916, "grad_norm": 12.389484405517578, "learning_rate": 1.4279999654645771e-06, "loss": 3.2647, "step": 63055 }, { "epoch": 0.6414794921875, "grad_norm": 11.430819511413574, "learning_rate": 1.427638726959186e-06, "loss": 3.3085, "step": 63060 }, { "epoch": 0.6415303548177084, "grad_norm": 13.8035888671875, "learning_rate": 1.4272775158899038e-06, "loss": 3.635, "step": 63065 }, { "epoch": 0.6415812174479166, "grad_norm": 10.649247169494629, "learning_rate": 1.4269163322659734e-06, "loss": 3.379, "step": 63070 }, { "epoch": 0.641632080078125, "grad_norm": 9.171544075012207, "learning_rate": 1.426555176096634e-06, "loss": 3.2024, "step": 63075 }, { "epoch": 0.6416829427083334, "grad_norm": 8.184263229370117, "learning_rate": 1.4261940473911252e-06, "loss": 3.0368, "step": 63080 }, { "epoch": 0.6417338053385416, "grad_norm": 10.136472702026367, "learning_rate": 1.4258329461586879e-06, "loss": 3.0165, "step": 63085 }, { "epoch": 0.64178466796875, "grad_norm": 17.416135787963867, "learning_rate": 1.4254718724085591e-06, "loss": 3.9392, "step": 63090 }, { "epoch": 0.6418355305989584, "grad_norm": 12.152560234069824, "learning_rate": 1.425110826149979e-06, "loss": 3.1957, "step": 63095 }, { "epoch": 0.6418863932291666, "grad_norm": 223.0073699951172, "learning_rate": 1.4247498073921829e-06, "loss": 3.7556, "step": 63100 }, { "epoch": 0.641937255859375, "grad_norm": 9.165891647338867, "learning_rate": 1.4243888161444087e-06, "loss": 2.9984, "step": 63105 }, { "epoch": 0.6419881184895834, "grad_norm": 10.189545631408691, "learning_rate": 1.4240278524158916e-06, "loss": 3.3366, "step": 63110 }, { "epoch": 0.6420389811197916, "grad_norm": 8.695596694946289, "learning_rate": 1.4236669162158662e-06, "loss": 3.4958, "step": 63115 }, { "epoch": 0.64208984375, "grad_norm": 13.895856857299805, "learning_rate": 1.423306007553567e-06, "loss": 3.8767, "step": 63120 }, { "epoch": 0.6421407063802084, "grad_norm": 14.967219352722168, "learning_rate": 1.4229451264382298e-06, "loss": 3.4166, "step": 63125 }, { "epoch": 0.6421915690104166, "grad_norm": 10.004438400268555, "learning_rate": 1.4225842728790866e-06, "loss": 3.3435, "step": 63130 }, { "epoch": 0.642242431640625, "grad_norm": 15.684747695922852, "learning_rate": 1.422223446885368e-06, "loss": 3.3918, "step": 63135 }, { "epoch": 0.6422932942708334, "grad_norm": 13.166836738586426, "learning_rate": 1.421862648466308e-06, "loss": 3.2962, "step": 63140 }, { "epoch": 0.6423441569010416, "grad_norm": 12.41301441192627, "learning_rate": 1.4215018776311359e-06, "loss": 3.0552, "step": 63145 }, { "epoch": 0.64239501953125, "grad_norm": 13.288503646850586, "learning_rate": 1.4211411343890835e-06, "loss": 3.0028, "step": 63150 }, { "epoch": 0.6424458821614584, "grad_norm": 7.562656879425049, "learning_rate": 1.4207804187493784e-06, "loss": 3.4307, "step": 63155 }, { "epoch": 0.6424967447916666, "grad_norm": 9.266556739807129, "learning_rate": 1.4204197307212513e-06, "loss": 3.4913, "step": 63160 }, { "epoch": 0.642547607421875, "grad_norm": 14.564390182495117, "learning_rate": 1.42005907031393e-06, "loss": 2.9874, "step": 63165 }, { "epoch": 0.6425984700520834, "grad_norm": 12.410235404968262, "learning_rate": 1.4196984375366401e-06, "loss": 3.2584, "step": 63170 }, { "epoch": 0.6426493326822916, "grad_norm": 11.363146781921387, "learning_rate": 1.4193378323986096e-06, "loss": 3.5505, "step": 63175 }, { "epoch": 0.6427001953125, "grad_norm": 14.288817405700684, "learning_rate": 1.4189772549090651e-06, "loss": 3.0511, "step": 63180 }, { "epoch": 0.6427510579427084, "grad_norm": 16.43682098388672, "learning_rate": 1.418616705077232e-06, "loss": 3.503, "step": 63185 }, { "epoch": 0.6428019205729166, "grad_norm": 9.374382972717285, "learning_rate": 1.4182561829123332e-06, "loss": 3.7444, "step": 63190 }, { "epoch": 0.642852783203125, "grad_norm": 7.787618160247803, "learning_rate": 1.4178956884235932e-06, "loss": 3.3986, "step": 63195 }, { "epoch": 0.6429036458333334, "grad_norm": 8.526206970214844, "learning_rate": 1.4175352216202364e-06, "loss": 3.1476, "step": 63200 }, { "epoch": 0.6429545084635416, "grad_norm": 13.395658493041992, "learning_rate": 1.4171747825114846e-06, "loss": 3.3565, "step": 63205 }, { "epoch": 0.64300537109375, "grad_norm": 10.183991432189941, "learning_rate": 1.4168143711065585e-06, "loss": 3.6162, "step": 63210 }, { "epoch": 0.6430562337239584, "grad_norm": 12.942578315734863, "learning_rate": 1.4164539874146806e-06, "loss": 3.4341, "step": 63215 }, { "epoch": 0.6431070963541666, "grad_norm": 14.537596702575684, "learning_rate": 1.4160936314450698e-06, "loss": 3.3841, "step": 63220 }, { "epoch": 0.643157958984375, "grad_norm": 11.549388885498047, "learning_rate": 1.4157333032069476e-06, "loss": 3.1836, "step": 63225 }, { "epoch": 0.6432088216145834, "grad_norm": 16.362728118896484, "learning_rate": 1.4153730027095308e-06, "loss": 3.3485, "step": 63230 }, { "epoch": 0.6432596842447916, "grad_norm": 9.83456802368164, "learning_rate": 1.4150127299620398e-06, "loss": 3.5327, "step": 63235 }, { "epoch": 0.643310546875, "grad_norm": 12.874783515930176, "learning_rate": 1.4146524849736906e-06, "loss": 3.2493, "step": 63240 }, { "epoch": 0.6433614095052084, "grad_norm": 11.884406089782715, "learning_rate": 1.4142922677536994e-06, "loss": 2.9893, "step": 63245 }, { "epoch": 0.6434122721354166, "grad_norm": 7.375535011291504, "learning_rate": 1.4139320783112832e-06, "loss": 3.9192, "step": 63250 }, { "epoch": 0.643463134765625, "grad_norm": 13.060131072998047, "learning_rate": 1.4135719166556582e-06, "loss": 3.1543, "step": 63255 }, { "epoch": 0.6435139973958334, "grad_norm": 11.191900253295898, "learning_rate": 1.4132117827960386e-06, "loss": 3.5979, "step": 63260 }, { "epoch": 0.6435648600260416, "grad_norm": 14.281481742858887, "learning_rate": 1.4128516767416378e-06, "loss": 3.3245, "step": 63265 }, { "epoch": 0.64361572265625, "grad_norm": 12.863600730895996, "learning_rate": 1.412491598501668e-06, "loss": 3.3724, "step": 63270 }, { "epoch": 0.6436665852864584, "grad_norm": 19.69635581970215, "learning_rate": 1.412131548085343e-06, "loss": 3.481, "step": 63275 }, { "epoch": 0.6437174479166666, "grad_norm": 10.664237022399902, "learning_rate": 1.4117715255018754e-06, "loss": 3.3954, "step": 63280 }, { "epoch": 0.643768310546875, "grad_norm": 10.354385375976562, "learning_rate": 1.4114115307604756e-06, "loss": 3.1319, "step": 63285 }, { "epoch": 0.6438191731770834, "grad_norm": 11.787322998046875, "learning_rate": 1.4110515638703525e-06, "loss": 3.2131, "step": 63290 }, { "epoch": 0.6438700358072916, "grad_norm": 9.325935363769531, "learning_rate": 1.4106916248407181e-06, "loss": 3.0139, "step": 63295 }, { "epoch": 0.6439208984375, "grad_norm": 9.582632064819336, "learning_rate": 1.4103317136807795e-06, "loss": 3.5877, "step": 63300 }, { "epoch": 0.6439717610677084, "grad_norm": 8.88012981414795, "learning_rate": 1.4099718303997457e-06, "loss": 3.0274, "step": 63305 }, { "epoch": 0.6440226236979166, "grad_norm": 14.654858589172363, "learning_rate": 1.409611975006825e-06, "loss": 3.2661, "step": 63310 }, { "epoch": 0.644073486328125, "grad_norm": 10.071319580078125, "learning_rate": 1.4092521475112237e-06, "loss": 3.4717, "step": 63315 }, { "epoch": 0.6441243489583334, "grad_norm": 10.545663833618164, "learning_rate": 1.4088923479221472e-06, "loss": 2.9251, "step": 63320 }, { "epoch": 0.6441752115885416, "grad_norm": 10.301362991333008, "learning_rate": 1.4085325762488e-06, "loss": 3.5179, "step": 63325 }, { "epoch": 0.64422607421875, "grad_norm": 15.441143989562988, "learning_rate": 1.4081728325003896e-06, "loss": 3.718, "step": 63330 }, { "epoch": 0.6442769368489584, "grad_norm": 8.218626976013184, "learning_rate": 1.407813116686119e-06, "loss": 3.2409, "step": 63335 }, { "epoch": 0.6443277994791666, "grad_norm": 12.70429515838623, "learning_rate": 1.4074534288151906e-06, "loss": 3.4641, "step": 63340 }, { "epoch": 0.644378662109375, "grad_norm": 10.418517112731934, "learning_rate": 1.4070937688968062e-06, "loss": 3.0988, "step": 63345 }, { "epoch": 0.6444295247395834, "grad_norm": 16.651914596557617, "learning_rate": 1.4067341369401688e-06, "loss": 3.7684, "step": 63350 }, { "epoch": 0.6444803873697916, "grad_norm": 15.939814567565918, "learning_rate": 1.4063745329544803e-06, "loss": 3.2531, "step": 63355 }, { "epoch": 0.64453125, "grad_norm": 13.09463119506836, "learning_rate": 1.4060149569489405e-06, "loss": 3.3047, "step": 63360 }, { "epoch": 0.6445821126302084, "grad_norm": 11.609050750732422, "learning_rate": 1.4056554089327474e-06, "loss": 3.5869, "step": 63365 }, { "epoch": 0.6446329752604166, "grad_norm": 9.671092987060547, "learning_rate": 1.4052958889151025e-06, "loss": 3.2309, "step": 63370 }, { "epoch": 0.644683837890625, "grad_norm": 13.178396224975586, "learning_rate": 1.4049363969052016e-06, "loss": 3.9849, "step": 63375 }, { "epoch": 0.6447347005208334, "grad_norm": 14.983819007873535, "learning_rate": 1.4045769329122444e-06, "loss": 3.6724, "step": 63380 }, { "epoch": 0.6447855631510416, "grad_norm": 8.321818351745605, "learning_rate": 1.4042174969454258e-06, "loss": 3.1741, "step": 63385 }, { "epoch": 0.64483642578125, "grad_norm": 14.03845500946045, "learning_rate": 1.4038580890139436e-06, "loss": 3.4564, "step": 63390 }, { "epoch": 0.6448872884114584, "grad_norm": 16.891016006469727, "learning_rate": 1.4034987091269928e-06, "loss": 3.4601, "step": 63395 }, { "epoch": 0.6449381510416666, "grad_norm": 14.487154960632324, "learning_rate": 1.4031393572937662e-06, "loss": 3.8728, "step": 63400 }, { "epoch": 0.644989013671875, "grad_norm": 9.721774101257324, "learning_rate": 1.4027800335234596e-06, "loss": 3.5094, "step": 63405 }, { "epoch": 0.6450398763020834, "grad_norm": 12.373363494873047, "learning_rate": 1.4024207378252664e-06, "loss": 3.3832, "step": 63410 }, { "epoch": 0.6450907389322916, "grad_norm": 12.942902565002441, "learning_rate": 1.4020614702083785e-06, "loss": 3.5001, "step": 63415 }, { "epoch": 0.6451416015625, "grad_norm": 9.149224281311035, "learning_rate": 1.401702230681987e-06, "loss": 3.3465, "step": 63420 }, { "epoch": 0.6451924641927084, "grad_norm": 7.111023426055908, "learning_rate": 1.4013430192552844e-06, "loss": 3.4166, "step": 63425 }, { "epoch": 0.6452433268229166, "grad_norm": 12.62641716003418, "learning_rate": 1.400983835937459e-06, "loss": 3.365, "step": 63430 }, { "epoch": 0.645294189453125, "grad_norm": 15.175331115722656, "learning_rate": 1.4006246807377027e-06, "loss": 3.4815, "step": 63435 }, { "epoch": 0.6453450520833334, "grad_norm": 10.579191207885742, "learning_rate": 1.4002655536652028e-06, "loss": 3.4229, "step": 63440 }, { "epoch": 0.6453959147135416, "grad_norm": 11.43730640411377, "learning_rate": 1.3999064547291485e-06, "loss": 3.5058, "step": 63445 }, { "epoch": 0.64544677734375, "grad_norm": 11.279643058776855, "learning_rate": 1.3995473839387274e-06, "loss": 3.1227, "step": 63450 }, { "epoch": 0.6454976399739584, "grad_norm": 9.88045883178711, "learning_rate": 1.3991883413031243e-06, "loss": 3.4518, "step": 63455 }, { "epoch": 0.6455485026041666, "grad_norm": 12.620511054992676, "learning_rate": 1.3988293268315267e-06, "loss": 3.3055, "step": 63460 }, { "epoch": 0.645599365234375, "grad_norm": 10.935976028442383, "learning_rate": 1.3984703405331204e-06, "loss": 3.2843, "step": 63465 }, { "epoch": 0.6456502278645834, "grad_norm": 12.42562198638916, "learning_rate": 1.3981113824170896e-06, "loss": 3.1433, "step": 63470 }, { "epoch": 0.6457010904947916, "grad_norm": 8.379152297973633, "learning_rate": 1.3977524524926177e-06, "loss": 3.2988, "step": 63475 }, { "epoch": 0.645751953125, "grad_norm": 10.612476348876953, "learning_rate": 1.3973935507688866e-06, "loss": 2.9124, "step": 63480 }, { "epoch": 0.6458028157552084, "grad_norm": 13.055747032165527, "learning_rate": 1.3970346772550819e-06, "loss": 3.6181, "step": 63485 }, { "epoch": 0.6458536783854166, "grad_norm": 15.127704620361328, "learning_rate": 1.3966758319603835e-06, "loss": 3.6793, "step": 63490 }, { "epoch": 0.645904541015625, "grad_norm": 14.916153907775879, "learning_rate": 1.3963170148939719e-06, "loss": 3.3365, "step": 63495 }, { "epoch": 0.6459554036458334, "grad_norm": 13.672470092773438, "learning_rate": 1.395958226065028e-06, "loss": 3.5576, "step": 63500 }, { "epoch": 0.6460062662760416, "grad_norm": 10.827059745788574, "learning_rate": 1.3955994654827303e-06, "loss": 3.5075, "step": 63505 }, { "epoch": 0.64605712890625, "grad_norm": 12.494633674621582, "learning_rate": 1.3952407331562594e-06, "loss": 3.3687, "step": 63510 }, { "epoch": 0.6461079915364584, "grad_norm": 7.683717727661133, "learning_rate": 1.394882029094792e-06, "loss": 3.5266, "step": 63515 }, { "epoch": 0.6461588541666666, "grad_norm": 10.356765747070312, "learning_rate": 1.394523353307507e-06, "loss": 3.0642, "step": 63520 }, { "epoch": 0.646209716796875, "grad_norm": 9.716863632202148, "learning_rate": 1.3941647058035796e-06, "loss": 3.4504, "step": 63525 }, { "epoch": 0.6462605794270834, "grad_norm": 14.333846092224121, "learning_rate": 1.3938060865921855e-06, "loss": 3.1182, "step": 63530 }, { "epoch": 0.6463114420572916, "grad_norm": 12.775233268737793, "learning_rate": 1.3934474956825e-06, "loss": 3.0615, "step": 63535 }, { "epoch": 0.6463623046875, "grad_norm": 11.847692489624023, "learning_rate": 1.3930889330836994e-06, "loss": 3.3087, "step": 63540 }, { "epoch": 0.6464131673177084, "grad_norm": 13.588580131530762, "learning_rate": 1.392730398804956e-06, "loss": 3.6494, "step": 63545 }, { "epoch": 0.6464640299479166, "grad_norm": 9.224186897277832, "learning_rate": 1.3923718928554428e-06, "loss": 3.1022, "step": 63550 }, { "epoch": 0.646514892578125, "grad_norm": 8.812080383300781, "learning_rate": 1.392013415244331e-06, "loss": 3.3728, "step": 63555 }, { "epoch": 0.6465657552083334, "grad_norm": 15.281486511230469, "learning_rate": 1.3916549659807938e-06, "loss": 3.4459, "step": 63560 }, { "epoch": 0.6466166178385416, "grad_norm": 12.427238464355469, "learning_rate": 1.3912965450740023e-06, "loss": 3.3156, "step": 63565 }, { "epoch": 0.64666748046875, "grad_norm": 15.172652244567871, "learning_rate": 1.3909381525331261e-06, "loss": 3.3442, "step": 63570 }, { "epoch": 0.6467183430989584, "grad_norm": 14.54151725769043, "learning_rate": 1.3905797883673332e-06, "loss": 3.1957, "step": 63575 }, { "epoch": 0.6467692057291666, "grad_norm": 14.314568519592285, "learning_rate": 1.3902214525857944e-06, "loss": 3.444, "step": 63580 }, { "epoch": 0.646820068359375, "grad_norm": 11.108024597167969, "learning_rate": 1.3898631451976757e-06, "loss": 3.1377, "step": 63585 }, { "epoch": 0.6468709309895834, "grad_norm": 13.669203758239746, "learning_rate": 1.3895048662121467e-06, "loss": 3.4922, "step": 63590 }, { "epoch": 0.6469217936197916, "grad_norm": 11.712994575500488, "learning_rate": 1.389146615638371e-06, "loss": 3.1179, "step": 63595 }, { "epoch": 0.64697265625, "grad_norm": 12.378658294677734, "learning_rate": 1.3887883934855173e-06, "loss": 3.9974, "step": 63600 }, { "epoch": 0.6470235188802084, "grad_norm": 10.653694152832031, "learning_rate": 1.3884301997627486e-06, "loss": 3.0061, "step": 63605 }, { "epoch": 0.6470743815104166, "grad_norm": 9.582548141479492, "learning_rate": 1.3880720344792293e-06, "loss": 3.8848, "step": 63610 }, { "epoch": 0.647125244140625, "grad_norm": 12.967094421386719, "learning_rate": 1.3877138976441228e-06, "loss": 3.3185, "step": 63615 }, { "epoch": 0.6471761067708334, "grad_norm": 11.003528594970703, "learning_rate": 1.3873557892665942e-06, "loss": 3.3306, "step": 63620 }, { "epoch": 0.6472269694010416, "grad_norm": 15.506131172180176, "learning_rate": 1.386997709355804e-06, "loss": 2.9387, "step": 63625 }, { "epoch": 0.64727783203125, "grad_norm": 9.115920066833496, "learning_rate": 1.3866396579209124e-06, "loss": 3.1316, "step": 63630 }, { "epoch": 0.6473286946614584, "grad_norm": 13.750835418701172, "learning_rate": 1.3862816349710812e-06, "loss": 3.2268, "step": 63635 }, { "epoch": 0.6473795572916666, "grad_norm": 14.21649169921875, "learning_rate": 1.3859236405154714e-06, "loss": 3.177, "step": 63640 }, { "epoch": 0.647430419921875, "grad_norm": 13.725008964538574, "learning_rate": 1.3855656745632412e-06, "loss": 3.3347, "step": 63645 }, { "epoch": 0.6474812825520834, "grad_norm": 11.765166282653809, "learning_rate": 1.3852077371235481e-06, "loss": 3.3822, "step": 63650 }, { "epoch": 0.6475321451822916, "grad_norm": 14.484723091125488, "learning_rate": 1.3848498282055518e-06, "loss": 3.5759, "step": 63655 }, { "epoch": 0.6475830078125, "grad_norm": 13.23550796508789, "learning_rate": 1.3844919478184072e-06, "loss": 3.4329, "step": 63660 }, { "epoch": 0.6476338704427084, "grad_norm": 13.231417655944824, "learning_rate": 1.384134095971273e-06, "loss": 3.3019, "step": 63665 }, { "epoch": 0.6476847330729166, "grad_norm": 8.883193016052246, "learning_rate": 1.3837762726733024e-06, "loss": 3.5572, "step": 63670 }, { "epoch": 0.647735595703125, "grad_norm": 15.052298545837402, "learning_rate": 1.383418477933652e-06, "loss": 3.5487, "step": 63675 }, { "epoch": 0.6477864583333334, "grad_norm": 13.665106773376465, "learning_rate": 1.3830607117614752e-06, "loss": 3.5822, "step": 63680 }, { "epoch": 0.6478373209635416, "grad_norm": 10.003432273864746, "learning_rate": 1.3827029741659248e-06, "loss": 3.4682, "step": 63685 }, { "epoch": 0.64788818359375, "grad_norm": 12.446817398071289, "learning_rate": 1.3823452651561536e-06, "loss": 3.4898, "step": 63690 }, { "epoch": 0.6479390462239584, "grad_norm": 8.958677291870117, "learning_rate": 1.381987584741315e-06, "loss": 3.4242, "step": 63695 }, { "epoch": 0.6479899088541666, "grad_norm": 9.530407905578613, "learning_rate": 1.381629932930559e-06, "loss": 3.4295, "step": 63700 }, { "epoch": 0.648040771484375, "grad_norm": 10.85719108581543, "learning_rate": 1.3812723097330353e-06, "loss": 3.2851, "step": 63705 }, { "epoch": 0.6480916341145834, "grad_norm": 10.73904037475586, "learning_rate": 1.380914715157895e-06, "loss": 3.4357, "step": 63710 }, { "epoch": 0.6481424967447916, "grad_norm": 12.596543312072754, "learning_rate": 1.3805571492142858e-06, "loss": 3.1528, "step": 63715 }, { "epoch": 0.648193359375, "grad_norm": 14.61168384552002, "learning_rate": 1.3801996119113576e-06, "loss": 3.0919, "step": 63720 }, { "epoch": 0.6482442220052084, "grad_norm": 10.773356437683105, "learning_rate": 1.3798421032582557e-06, "loss": 3.2884, "step": 63725 }, { "epoch": 0.6482950846354166, "grad_norm": 7.623642921447754, "learning_rate": 1.3794846232641291e-06, "loss": 3.6828, "step": 63730 }, { "epoch": 0.648345947265625, "grad_norm": 14.701728820800781, "learning_rate": 1.3791271719381227e-06, "loss": 3.4508, "step": 63735 }, { "epoch": 0.6483968098958334, "grad_norm": 14.30395221710205, "learning_rate": 1.3787697492893815e-06, "loss": 3.3275, "step": 63740 }, { "epoch": 0.6484476725260416, "grad_norm": 13.148125648498535, "learning_rate": 1.37841235532705e-06, "loss": 3.4249, "step": 63745 }, { "epoch": 0.64849853515625, "grad_norm": 17.876314163208008, "learning_rate": 1.3780549900602736e-06, "loss": 3.1441, "step": 63750 }, { "epoch": 0.6485493977864584, "grad_norm": 12.513371467590332, "learning_rate": 1.3776976534981945e-06, "loss": 3.1713, "step": 63755 }, { "epoch": 0.6486002604166666, "grad_norm": 10.822637557983398, "learning_rate": 1.3773403456499545e-06, "loss": 3.5461, "step": 63760 }, { "epoch": 0.648651123046875, "grad_norm": 12.970320701599121, "learning_rate": 1.3769830665246953e-06, "loss": 3.0942, "step": 63765 }, { "epoch": 0.6487019856770834, "grad_norm": 10.76432991027832, "learning_rate": 1.3766258161315577e-06, "loss": 2.9625, "step": 63770 }, { "epoch": 0.6487528483072916, "grad_norm": 8.161712646484375, "learning_rate": 1.3762685944796833e-06, "loss": 3.476, "step": 63775 }, { "epoch": 0.6488037109375, "grad_norm": 11.488871574401855, "learning_rate": 1.375911401578211e-06, "loss": 3.3814, "step": 63780 }, { "epoch": 0.6488545735677084, "grad_norm": 6.997217655181885, "learning_rate": 1.3755542374362776e-06, "loss": 3.1818, "step": 63785 }, { "epoch": 0.6489054361979166, "grad_norm": 14.117745399475098, "learning_rate": 1.375197102063023e-06, "loss": 3.3206, "step": 63790 }, { "epoch": 0.648956298828125, "grad_norm": 9.993836402893066, "learning_rate": 1.3748399954675845e-06, "loss": 3.5025, "step": 63795 }, { "epoch": 0.6490071614583334, "grad_norm": 11.988035202026367, "learning_rate": 1.374482917659097e-06, "loss": 3.3537, "step": 63800 }, { "epoch": 0.6490580240885416, "grad_norm": 10.683988571166992, "learning_rate": 1.3741258686466986e-06, "loss": 3.2564, "step": 63805 }, { "epoch": 0.64910888671875, "grad_norm": 13.883316040039062, "learning_rate": 1.3737688484395228e-06, "loss": 3.3515, "step": 63810 }, { "epoch": 0.6491597493489584, "grad_norm": 11.055418968200684, "learning_rate": 1.373411857046703e-06, "loss": 3.1519, "step": 63815 }, { "epoch": 0.6492106119791666, "grad_norm": 13.652414321899414, "learning_rate": 1.3730548944773739e-06, "loss": 3.3909, "step": 63820 }, { "epoch": 0.649261474609375, "grad_norm": 13.452717781066895, "learning_rate": 1.3726979607406693e-06, "loss": 3.365, "step": 63825 }, { "epoch": 0.6493123372395834, "grad_norm": 10.121183395385742, "learning_rate": 1.3723410558457202e-06, "loss": 3.2002, "step": 63830 }, { "epoch": 0.6493631998697916, "grad_norm": 13.718680381774902, "learning_rate": 1.3719841798016574e-06, "loss": 3.0125, "step": 63835 }, { "epoch": 0.6494140625, "grad_norm": 11.836580276489258, "learning_rate": 1.3716273326176117e-06, "loss": 3.7457, "step": 63840 }, { "epoch": 0.6494649251302084, "grad_norm": 9.903429985046387, "learning_rate": 1.3712705143027127e-06, "loss": 3.2571, "step": 63845 }, { "epoch": 0.6495157877604166, "grad_norm": 14.779627799987793, "learning_rate": 1.370913724866091e-06, "loss": 3.1684, "step": 63850 }, { "epoch": 0.649566650390625, "grad_norm": 7.905979156494141, "learning_rate": 1.3705569643168746e-06, "loss": 3.6488, "step": 63855 }, { "epoch": 0.6496175130208334, "grad_norm": 15.816743850708008, "learning_rate": 1.3702002326641893e-06, "loss": 3.2197, "step": 63860 }, { "epoch": 0.6496683756510416, "grad_norm": 10.404696464538574, "learning_rate": 1.3698435299171641e-06, "loss": 3.0273, "step": 63865 }, { "epoch": 0.64971923828125, "grad_norm": 13.25161361694336, "learning_rate": 1.3694868560849235e-06, "loss": 3.3951, "step": 63870 }, { "epoch": 0.6497701009114584, "grad_norm": 10.754583358764648, "learning_rate": 1.3691302111765945e-06, "loss": 3.5149, "step": 63875 }, { "epoch": 0.6498209635416666, "grad_norm": 10.602897644042969, "learning_rate": 1.3687735952013e-06, "loss": 2.9521, "step": 63880 }, { "epoch": 0.649871826171875, "grad_norm": 11.862292289733887, "learning_rate": 1.3684170081681663e-06, "loss": 3.2957, "step": 63885 }, { "epoch": 0.6499226888020834, "grad_norm": 8.14902400970459, "learning_rate": 1.3680604500863147e-06, "loss": 3.073, "step": 63890 }, { "epoch": 0.6499735514322916, "grad_norm": 9.84872817993164, "learning_rate": 1.3677039209648676e-06, "loss": 3.3011, "step": 63895 }, { "epoch": 0.6500244140625, "grad_norm": 10.788445472717285, "learning_rate": 1.3673474208129472e-06, "loss": 3.2758, "step": 63900 }, { "epoch": 0.6500752766927084, "grad_norm": 10.837233543395996, "learning_rate": 1.3669909496396756e-06, "loss": 3.5907, "step": 63905 }, { "epoch": 0.6501261393229166, "grad_norm": 7.292312145233154, "learning_rate": 1.366634507454172e-06, "loss": 3.4772, "step": 63910 }, { "epoch": 0.650177001953125, "grad_norm": 12.889204978942871, "learning_rate": 1.3662780942655549e-06, "loss": 3.3667, "step": 63915 }, { "epoch": 0.6502278645833334, "grad_norm": 11.986324310302734, "learning_rate": 1.3659217100829447e-06, "loss": 3.0141, "step": 63920 }, { "epoch": 0.6502787272135416, "grad_norm": 11.964778900146484, "learning_rate": 1.3655653549154582e-06, "loss": 2.9396, "step": 63925 }, { "epoch": 0.65032958984375, "grad_norm": 17.04004669189453, "learning_rate": 1.365209028772214e-06, "loss": 4.0778, "step": 63930 }, { "epoch": 0.6503804524739584, "grad_norm": 10.48777961730957, "learning_rate": 1.364852731662327e-06, "loss": 3.3832, "step": 63935 }, { "epoch": 0.6504313151041666, "grad_norm": 12.528505325317383, "learning_rate": 1.3644964635949147e-06, "loss": 3.5456, "step": 63940 }, { "epoch": 0.650482177734375, "grad_norm": 16.32647705078125, "learning_rate": 1.3641402245790902e-06, "loss": 3.1487, "step": 63945 }, { "epoch": 0.6505330403645834, "grad_norm": 13.028712272644043, "learning_rate": 1.3637840146239698e-06, "loss": 3.0298, "step": 63950 }, { "epoch": 0.6505839029947916, "grad_norm": 13.009231567382812, "learning_rate": 1.363427833738665e-06, "loss": 3.388, "step": 63955 }, { "epoch": 0.650634765625, "grad_norm": 11.50768756866455, "learning_rate": 1.3630716819322908e-06, "loss": 3.3371, "step": 63960 }, { "epoch": 0.6506856282552084, "grad_norm": 10.840215682983398, "learning_rate": 1.3627155592139579e-06, "loss": 3.0109, "step": 63965 }, { "epoch": 0.6507364908854166, "grad_norm": 8.058030128479004, "learning_rate": 1.3623594655927775e-06, "loss": 3.0849, "step": 63970 }, { "epoch": 0.650787353515625, "grad_norm": 12.560381889343262, "learning_rate": 1.36200340107786e-06, "loss": 3.0619, "step": 63975 }, { "epoch": 0.6508382161458334, "grad_norm": 11.920454025268555, "learning_rate": 1.3616473656783167e-06, "loss": 3.2486, "step": 63980 }, { "epoch": 0.6508890787760416, "grad_norm": 14.832955360412598, "learning_rate": 1.3612913594032555e-06, "loss": 3.0761, "step": 63985 }, { "epoch": 0.65093994140625, "grad_norm": 11.608518600463867, "learning_rate": 1.3609353822617855e-06, "loss": 3.1821, "step": 63990 }, { "epoch": 0.6509908040364584, "grad_norm": 9.848051071166992, "learning_rate": 1.3605794342630127e-06, "loss": 3.1642, "step": 63995 }, { "epoch": 0.6510416666666666, "grad_norm": 12.472249984741211, "learning_rate": 1.3602235154160448e-06, "loss": 3.4782, "step": 64000 }, { "epoch": 0.651092529296875, "grad_norm": 12.055642127990723, "learning_rate": 1.3598676257299892e-06, "loss": 3.3443, "step": 64005 }, { "epoch": 0.6511433919270834, "grad_norm": 11.859967231750488, "learning_rate": 1.3595117652139492e-06, "loss": 3.3312, "step": 64010 }, { "epoch": 0.6511942545572916, "grad_norm": 15.819765090942383, "learning_rate": 1.3591559338770312e-06, "loss": 3.5675, "step": 64015 }, { "epoch": 0.6512451171875, "grad_norm": 10.581780433654785, "learning_rate": 1.3588001317283384e-06, "loss": 3.2153, "step": 64020 }, { "epoch": 0.6512959798177084, "grad_norm": 12.730134963989258, "learning_rate": 1.3584443587769727e-06, "loss": 3.4063, "step": 64025 }, { "epoch": 0.6513468424479166, "grad_norm": 15.363131523132324, "learning_rate": 1.3580886150320374e-06, "loss": 3.0632, "step": 64030 }, { "epoch": 0.651397705078125, "grad_norm": 14.5416898727417, "learning_rate": 1.3577329005026351e-06, "loss": 3.3405, "step": 64035 }, { "epoch": 0.6514485677083334, "grad_norm": 15.410543441772461, "learning_rate": 1.3573772151978662e-06, "loss": 3.359, "step": 64040 }, { "epoch": 0.6514994303385416, "grad_norm": 12.796439170837402, "learning_rate": 1.3570215591268303e-06, "loss": 3.0914, "step": 64045 }, { "epoch": 0.65155029296875, "grad_norm": 10.722402572631836, "learning_rate": 1.3566659322986259e-06, "loss": 3.5775, "step": 64050 }, { "epoch": 0.6516011555989584, "grad_norm": 12.038302421569824, "learning_rate": 1.3563103347223523e-06, "loss": 3.0707, "step": 64055 }, { "epoch": 0.6516520182291666, "grad_norm": 14.053557395935059, "learning_rate": 1.3559547664071089e-06, "loss": 3.3114, "step": 64060 }, { "epoch": 0.651702880859375, "grad_norm": 11.177879333496094, "learning_rate": 1.3555992273619916e-06, "loss": 3.267, "step": 64065 }, { "epoch": 0.6517537434895834, "grad_norm": 12.664907455444336, "learning_rate": 1.3552437175960959e-06, "loss": 3.7841, "step": 64070 }, { "epoch": 0.6518046061197916, "grad_norm": 11.753214836120605, "learning_rate": 1.3548882371185195e-06, "loss": 3.5764, "step": 64075 }, { "epoch": 0.65185546875, "grad_norm": 10.01693344116211, "learning_rate": 1.354532785938355e-06, "loss": 3.4242, "step": 64080 }, { "epoch": 0.6519063313802084, "grad_norm": 10.688921928405762, "learning_rate": 1.3541773640646982e-06, "loss": 3.4629, "step": 64085 }, { "epoch": 0.6519571940104166, "grad_norm": 16.974910736083984, "learning_rate": 1.3538219715066414e-06, "loss": 3.2687, "step": 64090 }, { "epoch": 0.652008056640625, "grad_norm": 7.56576681137085, "learning_rate": 1.353466608273279e-06, "loss": 3.6427, "step": 64095 }, { "epoch": 0.6520589192708334, "grad_norm": 9.493888854980469, "learning_rate": 1.3531112743737002e-06, "loss": 3.6024, "step": 64100 }, { "epoch": 0.6521097819010416, "grad_norm": 8.88723373413086, "learning_rate": 1.3527559698169978e-06, "loss": 3.1188, "step": 64105 }, { "epoch": 0.65216064453125, "grad_norm": 9.895440101623535, "learning_rate": 1.352400694612263e-06, "loss": 3.0877, "step": 64110 }, { "epoch": 0.6522115071614584, "grad_norm": 8.68016529083252, "learning_rate": 1.3520454487685846e-06, "loss": 3.6815, "step": 64115 }, { "epoch": 0.6522623697916666, "grad_norm": 9.342655181884766, "learning_rate": 1.3516902322950508e-06, "loss": 3.3618, "step": 64120 }, { "epoch": 0.652313232421875, "grad_norm": 7.545049667358398, "learning_rate": 1.3513350452007498e-06, "loss": 3.3348, "step": 64125 }, { "epoch": 0.6523640950520834, "grad_norm": 12.7225341796875, "learning_rate": 1.3509798874947694e-06, "loss": 3.3068, "step": 64130 }, { "epoch": 0.6524149576822916, "grad_norm": 9.57697582244873, "learning_rate": 1.3506247591861969e-06, "loss": 3.4641, "step": 64135 }, { "epoch": 0.6524658203125, "grad_norm": 12.992835998535156, "learning_rate": 1.3502696602841176e-06, "loss": 3.3959, "step": 64140 }, { "epoch": 0.6525166829427084, "grad_norm": 9.261260986328125, "learning_rate": 1.3499145907976153e-06, "loss": 3.369, "step": 64145 }, { "epoch": 0.6525675455729166, "grad_norm": 14.930026054382324, "learning_rate": 1.3495595507357766e-06, "loss": 3.5845, "step": 64150 }, { "epoch": 0.652618408203125, "grad_norm": 7.567656993865967, "learning_rate": 1.3492045401076836e-06, "loss": 3.0222, "step": 64155 }, { "epoch": 0.6526692708333334, "grad_norm": 7.853155136108398, "learning_rate": 1.3488495589224205e-06, "loss": 3.2456, "step": 64160 }, { "epoch": 0.6527201334635416, "grad_norm": 10.342120170593262, "learning_rate": 1.3484946071890676e-06, "loss": 3.434, "step": 64165 }, { "epoch": 0.65277099609375, "grad_norm": 10.163643836975098, "learning_rate": 1.3481396849167078e-06, "loss": 3.0487, "step": 64170 }, { "epoch": 0.6528218587239584, "grad_norm": 11.490717887878418, "learning_rate": 1.3477847921144216e-06, "loss": 3.6839, "step": 64175 }, { "epoch": 0.6528727213541666, "grad_norm": 12.72451400756836, "learning_rate": 1.3474299287912873e-06, "loss": 3.5324, "step": 64180 }, { "epoch": 0.652923583984375, "grad_norm": 11.382487297058105, "learning_rate": 1.347075094956385e-06, "loss": 3.1754, "step": 64185 }, { "epoch": 0.6529744466145834, "grad_norm": 8.600659370422363, "learning_rate": 1.346720290618794e-06, "loss": 3.6986, "step": 64190 }, { "epoch": 0.6530253092447916, "grad_norm": 9.42199420928955, "learning_rate": 1.346365515787591e-06, "loss": 3.273, "step": 64195 }, { "epoch": 0.653076171875, "grad_norm": 11.763266563415527, "learning_rate": 1.3460107704718522e-06, "loss": 3.3125, "step": 64200 }, { "epoch": 0.6531270345052084, "grad_norm": 10.710270881652832, "learning_rate": 1.3456560546806553e-06, "loss": 3.0234, "step": 64205 }, { "epoch": 0.6531778971354166, "grad_norm": 8.050313949584961, "learning_rate": 1.3453013684230732e-06, "loss": 3.2202, "step": 64210 }, { "epoch": 0.653228759765625, "grad_norm": 10.442748069763184, "learning_rate": 1.3449467117081832e-06, "loss": 3.2625, "step": 64215 }, { "epoch": 0.6532796223958334, "grad_norm": 12.160253524780273, "learning_rate": 1.3445920845450567e-06, "loss": 3.2401, "step": 64220 }, { "epoch": 0.6533304850260416, "grad_norm": 7.322057247161865, "learning_rate": 1.3442374869427689e-06, "loss": 3.2435, "step": 64225 }, { "epoch": 0.65338134765625, "grad_norm": 13.100785255432129, "learning_rate": 1.343882918910391e-06, "loss": 3.2074, "step": 64230 }, { "epoch": 0.6534322102864584, "grad_norm": 11.584278106689453, "learning_rate": 1.3435283804569937e-06, "loss": 3.5782, "step": 64235 }, { "epoch": 0.6534830729166666, "grad_norm": 15.957377433776855, "learning_rate": 1.3431738715916488e-06, "loss": 2.9493, "step": 64240 }, { "epoch": 0.653533935546875, "grad_norm": 9.237360954284668, "learning_rate": 1.3428193923234267e-06, "loss": 3.064, "step": 64245 }, { "epoch": 0.6535847981770834, "grad_norm": 11.011441230773926, "learning_rate": 1.3424649426613967e-06, "loss": 3.4114, "step": 64250 }, { "epoch": 0.6536356608072916, "grad_norm": 11.149901390075684, "learning_rate": 1.3421105226146265e-06, "loss": 3.2155, "step": 64255 }, { "epoch": 0.6536865234375, "grad_norm": 10.68295955657959, "learning_rate": 1.3417561321921822e-06, "loss": 3.8012, "step": 64260 }, { "epoch": 0.6537373860677084, "grad_norm": 8.677666664123535, "learning_rate": 1.3414017714031347e-06, "loss": 3.3083, "step": 64265 }, { "epoch": 0.6537882486979166, "grad_norm": 7.5353569984436035, "learning_rate": 1.3410474402565482e-06, "loss": 3.1559, "step": 64270 }, { "epoch": 0.653839111328125, "grad_norm": 12.037880897521973, "learning_rate": 1.340693138761488e-06, "loss": 4.1406, "step": 64275 }, { "epoch": 0.6538899739583334, "grad_norm": 10.325486183166504, "learning_rate": 1.3403388669270183e-06, "loss": 3.5111, "step": 64280 }, { "epoch": 0.6539408365885416, "grad_norm": 9.48999309539795, "learning_rate": 1.3399846247622039e-06, "loss": 3.2343, "step": 64285 }, { "epoch": 0.65399169921875, "grad_norm": 9.181351661682129, "learning_rate": 1.3396304122761089e-06, "loss": 3.7118, "step": 64290 }, { "epoch": 0.6540425618489584, "grad_norm": 11.652165412902832, "learning_rate": 1.3392762294777938e-06, "loss": 3.2166, "step": 64295 }, { "epoch": 0.6540934244791666, "grad_norm": 8.631184577941895, "learning_rate": 1.3389220763763222e-06, "loss": 3.0276, "step": 64300 }, { "epoch": 0.654144287109375, "grad_norm": 12.19041633605957, "learning_rate": 1.3385679529807544e-06, "loss": 3.0959, "step": 64305 }, { "epoch": 0.6541951497395834, "grad_norm": 18.410381317138672, "learning_rate": 1.3382138593001486e-06, "loss": 3.5766, "step": 64310 }, { "epoch": 0.6542460123697916, "grad_norm": 16.024188995361328, "learning_rate": 1.3378597953435662e-06, "loss": 3.4945, "step": 64315 }, { "epoch": 0.654296875, "grad_norm": 11.737725257873535, "learning_rate": 1.3375057611200664e-06, "loss": 3.3308, "step": 64320 }, { "epoch": 0.6543477376302084, "grad_norm": 11.525810241699219, "learning_rate": 1.3371517566387062e-06, "loss": 3.2458, "step": 64325 }, { "epoch": 0.6543986002604166, "grad_norm": 13.881775856018066, "learning_rate": 1.3367977819085425e-06, "loss": 3.534, "step": 64330 }, { "epoch": 0.654449462890625, "grad_norm": 13.49406623840332, "learning_rate": 1.3364438369386313e-06, "loss": 3.2864, "step": 64335 }, { "epoch": 0.6545003255208334, "grad_norm": 15.493911743164062, "learning_rate": 1.3360899217380285e-06, "loss": 3.4563, "step": 64340 }, { "epoch": 0.6545511881510416, "grad_norm": 9.129322052001953, "learning_rate": 1.33573603631579e-06, "loss": 3.5747, "step": 64345 }, { "epoch": 0.65460205078125, "grad_norm": 15.619573593139648, "learning_rate": 1.335382180680969e-06, "loss": 3.0429, "step": 64350 }, { "epoch": 0.6546529134114584, "grad_norm": 10.902886390686035, "learning_rate": 1.3350283548426183e-06, "loss": 3.3387, "step": 64355 }, { "epoch": 0.6547037760416666, "grad_norm": 10.480283737182617, "learning_rate": 1.3346745588097918e-06, "loss": 3.66, "step": 64360 }, { "epoch": 0.654754638671875, "grad_norm": 15.958941459655762, "learning_rate": 1.3343207925915393e-06, "loss": 3.7825, "step": 64365 }, { "epoch": 0.6548055013020834, "grad_norm": 10.528629302978516, "learning_rate": 1.3339670561969142e-06, "loss": 3.2601, "step": 64370 }, { "epoch": 0.6548563639322916, "grad_norm": 11.030975341796875, "learning_rate": 1.3336133496349646e-06, "loss": 3.3519, "step": 64375 }, { "epoch": 0.6549072265625, "grad_norm": 11.710150718688965, "learning_rate": 1.3332596729147418e-06, "loss": 3.0692, "step": 64380 }, { "epoch": 0.6549580891927084, "grad_norm": 7.4101881980896, "learning_rate": 1.3329060260452938e-06, "loss": 3.3557, "step": 64385 }, { "epoch": 0.6550089518229166, "grad_norm": 10.991748809814453, "learning_rate": 1.3325524090356667e-06, "loss": 3.5383, "step": 64390 }, { "epoch": 0.655059814453125, "grad_norm": 15.911041259765625, "learning_rate": 1.3321988218949115e-06, "loss": 3.0189, "step": 64395 }, { "epoch": 0.6551106770833334, "grad_norm": 9.134468078613281, "learning_rate": 1.3318452646320723e-06, "loss": 3.0937, "step": 64400 }, { "epoch": 0.6551615397135416, "grad_norm": 16.44624900817871, "learning_rate": 1.3314917372561954e-06, "loss": 3.6389, "step": 64405 }, { "epoch": 0.65521240234375, "grad_norm": 12.228934288024902, "learning_rate": 1.3311382397763245e-06, "loss": 3.4447, "step": 64410 }, { "epoch": 0.6552632649739584, "grad_norm": 8.263579368591309, "learning_rate": 1.3307847722015043e-06, "loss": 3.2181, "step": 64415 }, { "epoch": 0.6553141276041666, "grad_norm": 14.126418113708496, "learning_rate": 1.3304313345407799e-06, "loss": 3.3393, "step": 64420 }, { "epoch": 0.655364990234375, "grad_norm": 7.036675453186035, "learning_rate": 1.3300779268031928e-06, "loss": 3.5008, "step": 64425 }, { "epoch": 0.6554158528645834, "grad_norm": 12.67174243927002, "learning_rate": 1.329724548997783e-06, "loss": 3.9012, "step": 64430 }, { "epoch": 0.6554667154947916, "grad_norm": 13.009026527404785, "learning_rate": 1.3293712011335946e-06, "loss": 3.0976, "step": 64435 }, { "epoch": 0.655517578125, "grad_norm": 9.975815773010254, "learning_rate": 1.3290178832196654e-06, "loss": 3.5631, "step": 64440 }, { "epoch": 0.6555684407552084, "grad_norm": 16.536361694335938, "learning_rate": 1.3286645952650373e-06, "loss": 3.4593, "step": 64445 }, { "epoch": 0.6556193033854166, "grad_norm": 9.64079475402832, "learning_rate": 1.328311337278747e-06, "loss": 3.235, "step": 64450 }, { "epoch": 0.655670166015625, "grad_norm": 11.996866226196289, "learning_rate": 1.327958109269834e-06, "loss": 3.7884, "step": 64455 }, { "epoch": 0.6557210286458334, "grad_norm": 14.872767448425293, "learning_rate": 1.3276049112473355e-06, "loss": 3.3653, "step": 64460 }, { "epoch": 0.6557718912760416, "grad_norm": 10.64519214630127, "learning_rate": 1.327251743220286e-06, "loss": 2.9918, "step": 64465 }, { "epoch": 0.65582275390625, "grad_norm": 12.196529388427734, "learning_rate": 1.3268986051977228e-06, "loss": 3.1684, "step": 64470 }, { "epoch": 0.6558736165364584, "grad_norm": 11.566841125488281, "learning_rate": 1.3265454971886818e-06, "loss": 3.5357, "step": 64475 }, { "epoch": 0.6559244791666666, "grad_norm": 11.732039451599121, "learning_rate": 1.3261924192021961e-06, "loss": 3.3351, "step": 64480 }, { "epoch": 0.655975341796875, "grad_norm": 11.234846115112305, "learning_rate": 1.3258393712472981e-06, "loss": 3.1402, "step": 64485 }, { "epoch": 0.6560262044270834, "grad_norm": 13.978314399719238, "learning_rate": 1.3254863533330226e-06, "loss": 3.3941, "step": 64490 }, { "epoch": 0.6560770670572916, "grad_norm": 8.966712951660156, "learning_rate": 1.325133365468399e-06, "loss": 3.417, "step": 64495 }, { "epoch": 0.6561279296875, "grad_norm": 10.272655487060547, "learning_rate": 1.324780407662461e-06, "loss": 3.2492, "step": 64500 }, { "epoch": 0.6561787923177084, "grad_norm": 16.021495819091797, "learning_rate": 1.3244274799242368e-06, "loss": 3.504, "step": 64505 }, { "epoch": 0.6562296549479166, "grad_norm": 13.629439353942871, "learning_rate": 1.324074582262758e-06, "loss": 3.47, "step": 64510 }, { "epoch": 0.656280517578125, "grad_norm": 12.70556354522705, "learning_rate": 1.323721714687052e-06, "loss": 3.1075, "step": 64515 }, { "epoch": 0.6563313802083334, "grad_norm": 10.02698040008545, "learning_rate": 1.3233688772061462e-06, "loss": 3.6611, "step": 64520 }, { "epoch": 0.6563822428385416, "grad_norm": 12.080476760864258, "learning_rate": 1.3230160698290689e-06, "loss": 3.3706, "step": 64525 }, { "epoch": 0.65643310546875, "grad_norm": 10.975835800170898, "learning_rate": 1.3226632925648474e-06, "loss": 3.0203, "step": 64530 }, { "epoch": 0.6564839680989584, "grad_norm": 12.529753684997559, "learning_rate": 1.3223105454225067e-06, "loss": 3.1196, "step": 64535 }, { "epoch": 0.6565348307291666, "grad_norm": 13.119550704956055, "learning_rate": 1.3219578284110711e-06, "loss": 3.1326, "step": 64540 }, { "epoch": 0.656585693359375, "grad_norm": 12.159619331359863, "learning_rate": 1.3216051415395645e-06, "loss": 3.1074, "step": 64545 }, { "epoch": 0.6566365559895834, "grad_norm": 17.395584106445312, "learning_rate": 1.321252484817011e-06, "loss": 3.4433, "step": 64550 }, { "epoch": 0.6566874186197916, "grad_norm": 7.426807880401611, "learning_rate": 1.3208998582524343e-06, "loss": 3.3032, "step": 64555 }, { "epoch": 0.65673828125, "grad_norm": 10.322322845458984, "learning_rate": 1.3205472618548554e-06, "loss": 3.3263, "step": 64560 }, { "epoch": 0.6567891438802084, "grad_norm": 14.927852630615234, "learning_rate": 1.320194695633294e-06, "loss": 3.6157, "step": 64565 }, { "epoch": 0.6568400065104166, "grad_norm": 12.767248153686523, "learning_rate": 1.3198421595967718e-06, "loss": 3.1727, "step": 64570 }, { "epoch": 0.656890869140625, "grad_norm": 15.87508487701416, "learning_rate": 1.319489653754309e-06, "loss": 3.2067, "step": 64575 }, { "epoch": 0.6569417317708334, "grad_norm": 12.932256698608398, "learning_rate": 1.3191371781149237e-06, "loss": 3.6383, "step": 64580 }, { "epoch": 0.6569925944010416, "grad_norm": 10.85139274597168, "learning_rate": 1.318784732687633e-06, "loss": 3.5736, "step": 64585 }, { "epoch": 0.65704345703125, "grad_norm": 12.298839569091797, "learning_rate": 1.3184323174814558e-06, "loss": 3.2548, "step": 64590 }, { "epoch": 0.6570943196614584, "grad_norm": 17.16855812072754, "learning_rate": 1.3180799325054067e-06, "loss": 3.4335, "step": 64595 }, { "epoch": 0.6571451822916666, "grad_norm": 13.399876594543457, "learning_rate": 1.3177275777685025e-06, "loss": 3.471, "step": 64600 }, { "epoch": 0.657196044921875, "grad_norm": 10.207135200500488, "learning_rate": 1.317375253279759e-06, "loss": 3.0937, "step": 64605 }, { "epoch": 0.6572469075520834, "grad_norm": 12.139558792114258, "learning_rate": 1.3170229590481892e-06, "loss": 3.4177, "step": 64610 }, { "epoch": 0.6572977701822916, "grad_norm": 12.48451042175293, "learning_rate": 1.3166706950828067e-06, "loss": 3.1147, "step": 64615 }, { "epoch": 0.6573486328125, "grad_norm": 12.349983215332031, "learning_rate": 1.3163184613926228e-06, "loss": 3.0056, "step": 64620 }, { "epoch": 0.6573994954427084, "grad_norm": 12.045706748962402, "learning_rate": 1.3159662579866505e-06, "loss": 3.3908, "step": 64625 }, { "epoch": 0.6574503580729166, "grad_norm": 10.621950149536133, "learning_rate": 1.315614084873902e-06, "loss": 3.4618, "step": 64630 }, { "epoch": 0.657501220703125, "grad_norm": 10.255273818969727, "learning_rate": 1.3152619420633864e-06, "loss": 3.2101, "step": 64635 }, { "epoch": 0.6575520833333334, "grad_norm": 17.442726135253906, "learning_rate": 1.3149098295641122e-06, "loss": 3.5172, "step": 64640 }, { "epoch": 0.6576029459635416, "grad_norm": 12.190653800964355, "learning_rate": 1.3145577473850902e-06, "loss": 3.4842, "step": 64645 }, { "epoch": 0.65765380859375, "grad_norm": 20.169771194458008, "learning_rate": 1.3142056955353262e-06, "loss": 3.3943, "step": 64650 }, { "epoch": 0.6577046712239584, "grad_norm": 9.705902099609375, "learning_rate": 1.3138536740238289e-06, "loss": 3.431, "step": 64655 }, { "epoch": 0.6577555338541666, "grad_norm": 15.668383598327637, "learning_rate": 1.3135016828596037e-06, "loss": 3.6356, "step": 64660 }, { "epoch": 0.657806396484375, "grad_norm": 12.712372779846191, "learning_rate": 1.3131497220516575e-06, "loss": 3.3855, "step": 64665 }, { "epoch": 0.6578572591145834, "grad_norm": 273.25811767578125, "learning_rate": 1.3127977916089948e-06, "loss": 3.7067, "step": 64670 }, { "epoch": 0.6579081217447916, "grad_norm": 11.827073097229004, "learning_rate": 1.312445891540618e-06, "loss": 3.6846, "step": 64675 }, { "epoch": 0.657958984375, "grad_norm": 14.099276542663574, "learning_rate": 1.3120940218555314e-06, "loss": 3.2874, "step": 64680 }, { "epoch": 0.6580098470052084, "grad_norm": 12.601127624511719, "learning_rate": 1.3117421825627388e-06, "loss": 3.2402, "step": 64685 }, { "epoch": 0.6580607096354166, "grad_norm": 9.837937355041504, "learning_rate": 1.3113903736712408e-06, "loss": 3.4378, "step": 64690 }, { "epoch": 0.658111572265625, "grad_norm": 17.775718688964844, "learning_rate": 1.3110385951900373e-06, "loss": 3.5461, "step": 64695 }, { "epoch": 0.6581624348958334, "grad_norm": 15.552910804748535, "learning_rate": 1.3106868471281305e-06, "loss": 3.7383, "step": 64700 }, { "epoch": 0.6582132975260416, "grad_norm": 11.443589210510254, "learning_rate": 1.310335129494518e-06, "loss": 3.5422, "step": 64705 }, { "epoch": 0.65826416015625, "grad_norm": 12.87744140625, "learning_rate": 1.3099834422981999e-06, "loss": 3.3954, "step": 64710 }, { "epoch": 0.6583150227864584, "grad_norm": 10.184366226196289, "learning_rate": 1.3096317855481727e-06, "loss": 3.0972, "step": 64715 }, { "epoch": 0.6583658854166666, "grad_norm": 13.616209030151367, "learning_rate": 1.3092801592534348e-06, "loss": 3.3293, "step": 64720 }, { "epoch": 0.658416748046875, "grad_norm": 12.936397552490234, "learning_rate": 1.3089285634229814e-06, "loss": 3.0804, "step": 64725 }, { "epoch": 0.6584676106770834, "grad_norm": 12.256488800048828, "learning_rate": 1.308576998065809e-06, "loss": 3.1282, "step": 64730 }, { "epoch": 0.6585184733072916, "grad_norm": 12.041582107543945, "learning_rate": 1.3082254631909106e-06, "loss": 3.386, "step": 64735 }, { "epoch": 0.6585693359375, "grad_norm": 14.85916519165039, "learning_rate": 1.3078739588072823e-06, "loss": 3.5176, "step": 64740 }, { "epoch": 0.6586201985677084, "grad_norm": 10.276881217956543, "learning_rate": 1.3075224849239162e-06, "loss": 3.3579, "step": 64745 }, { "epoch": 0.6586710611979166, "grad_norm": 14.437859535217285, "learning_rate": 1.3071710415498038e-06, "loss": 3.2329, "step": 64750 }, { "epoch": 0.658721923828125, "grad_norm": 10.961657524108887, "learning_rate": 1.3068196286939374e-06, "loss": 3.4126, "step": 64755 }, { "epoch": 0.6587727864583334, "grad_norm": 8.900579452514648, "learning_rate": 1.3064682463653091e-06, "loss": 3.3339, "step": 64760 }, { "epoch": 0.6588236490885416, "grad_norm": 14.60119915008545, "learning_rate": 1.3061168945729078e-06, "loss": 3.8722, "step": 64765 }, { "epoch": 0.65887451171875, "grad_norm": 13.560766220092773, "learning_rate": 1.3057655733257227e-06, "loss": 3.4208, "step": 64770 }, { "epoch": 0.6589253743489584, "grad_norm": 13.868523597717285, "learning_rate": 1.3054142826327414e-06, "loss": 3.4038, "step": 64775 }, { "epoch": 0.6589762369791666, "grad_norm": 9.550907135009766, "learning_rate": 1.3050630225029526e-06, "loss": 3.323, "step": 64780 }, { "epoch": 0.659027099609375, "grad_norm": 13.122942924499512, "learning_rate": 1.304711792945344e-06, "loss": 3.3643, "step": 64785 }, { "epoch": 0.6590779622395834, "grad_norm": 9.805845260620117, "learning_rate": 1.3043605939689e-06, "loss": 3.303, "step": 64790 }, { "epoch": 0.6591288248697916, "grad_norm": 10.915084838867188, "learning_rate": 1.3040094255826078e-06, "loss": 3.6199, "step": 64795 }, { "epoch": 0.6591796875, "grad_norm": 14.074462890625, "learning_rate": 1.3036582877954506e-06, "loss": 3.2834, "step": 64800 }, { "epoch": 0.6592305501302084, "grad_norm": 8.09543228149414, "learning_rate": 1.3033071806164122e-06, "loss": 3.2665, "step": 64805 }, { "epoch": 0.6592814127604166, "grad_norm": 6.727164268493652, "learning_rate": 1.3029561040544755e-06, "loss": 2.809, "step": 64810 }, { "epoch": 0.659332275390625, "grad_norm": 11.766030311584473, "learning_rate": 1.3026050581186242e-06, "loss": 3.7183, "step": 64815 }, { "epoch": 0.6593831380208334, "grad_norm": 12.66413402557373, "learning_rate": 1.3022540428178386e-06, "loss": 3.4647, "step": 64820 }, { "epoch": 0.6594340006510416, "grad_norm": 12.70841121673584, "learning_rate": 1.3019030581610998e-06, "loss": 3.4055, "step": 64825 }, { "epoch": 0.65948486328125, "grad_norm": 11.378216743469238, "learning_rate": 1.3015521041573858e-06, "loss": 3.2415, "step": 64830 }, { "epoch": 0.6595357259114584, "grad_norm": 9.645951271057129, "learning_rate": 1.3012011808156775e-06, "loss": 3.1352, "step": 64835 }, { "epoch": 0.6595865885416666, "grad_norm": 8.79885482788086, "learning_rate": 1.3008502881449536e-06, "loss": 3.1783, "step": 64840 }, { "epoch": 0.659637451171875, "grad_norm": 12.367588996887207, "learning_rate": 1.3004994261541908e-06, "loss": 3.4423, "step": 64845 }, { "epoch": 0.6596883138020834, "grad_norm": 19.567867279052734, "learning_rate": 1.3001485948523652e-06, "loss": 2.8524, "step": 64850 }, { "epoch": 0.6597391764322916, "grad_norm": 12.848206520080566, "learning_rate": 1.2997977942484541e-06, "loss": 3.3016, "step": 64855 }, { "epoch": 0.6597900390625, "grad_norm": 14.344475746154785, "learning_rate": 1.2994470243514311e-06, "loss": 3.3602, "step": 64860 }, { "epoch": 0.6598409016927084, "grad_norm": 16.25145149230957, "learning_rate": 1.2990962851702724e-06, "loss": 3.3631, "step": 64865 }, { "epoch": 0.6598917643229166, "grad_norm": 9.507192611694336, "learning_rate": 1.2987455767139496e-06, "loss": 3.2929, "step": 64870 }, { "epoch": 0.659942626953125, "grad_norm": 9.96194076538086, "learning_rate": 1.2983948989914375e-06, "loss": 3.2014, "step": 64875 }, { "epoch": 0.6599934895833334, "grad_norm": 14.200433731079102, "learning_rate": 1.298044252011706e-06, "loss": 3.3339, "step": 64880 }, { "epoch": 0.6600443522135416, "grad_norm": 11.347167015075684, "learning_rate": 1.2976936357837278e-06, "loss": 3.0248, "step": 64885 }, { "epoch": 0.66009521484375, "grad_norm": 12.991911888122559, "learning_rate": 1.2973430503164735e-06, "loss": 3.1184, "step": 64890 }, { "epoch": 0.6601460774739584, "grad_norm": 9.217988967895508, "learning_rate": 1.2969924956189122e-06, "loss": 3.4694, "step": 64895 }, { "epoch": 0.6601969401041666, "grad_norm": 15.204910278320312, "learning_rate": 1.296641971700013e-06, "loss": 3.0725, "step": 64900 }, { "epoch": 0.660247802734375, "grad_norm": 7.2170538902282715, "learning_rate": 1.2962914785687426e-06, "loss": 3.5342, "step": 64905 }, { "epoch": 0.6602986653645834, "grad_norm": 12.763792037963867, "learning_rate": 1.2959410162340694e-06, "loss": 3.3776, "step": 64910 }, { "epoch": 0.6603495279947916, "grad_norm": 10.381855010986328, "learning_rate": 1.295590584704961e-06, "loss": 3.1652, "step": 64915 }, { "epoch": 0.660400390625, "grad_norm": 14.777809143066406, "learning_rate": 1.295240183990382e-06, "loss": 3.2149, "step": 64920 }, { "epoch": 0.6604512532552084, "grad_norm": 17.232894897460938, "learning_rate": 1.294889814099296e-06, "loss": 3.5293, "step": 64925 }, { "epoch": 0.6605021158854166, "grad_norm": 15.255964279174805, "learning_rate": 1.2945394750406699e-06, "loss": 4.0128, "step": 64930 }, { "epoch": 0.660552978515625, "grad_norm": 17.207504272460938, "learning_rate": 1.2941891668234646e-06, "loss": 3.5108, "step": 64935 }, { "epoch": 0.6606038411458334, "grad_norm": 18.810527801513672, "learning_rate": 1.2938388894566445e-06, "loss": 3.5506, "step": 64940 }, { "epoch": 0.6606547037760416, "grad_norm": 17.15147590637207, "learning_rate": 1.2934886429491695e-06, "loss": 3.7975, "step": 64945 }, { "epoch": 0.66070556640625, "grad_norm": 14.673933982849121, "learning_rate": 1.2931384273100025e-06, "loss": 3.3426, "step": 64950 }, { "epoch": 0.6607564290364584, "grad_norm": 9.279642105102539, "learning_rate": 1.292788242548103e-06, "loss": 3.5146, "step": 64955 }, { "epoch": 0.6608072916666666, "grad_norm": 14.439388275146484, "learning_rate": 1.2924380886724292e-06, "loss": 3.5971, "step": 64960 }, { "epoch": 0.660858154296875, "grad_norm": 16.13722801208496, "learning_rate": 1.2920879656919405e-06, "loss": 3.2019, "step": 64965 }, { "epoch": 0.6609090169270834, "grad_norm": 13.01333999633789, "learning_rate": 1.2917378736155961e-06, "loss": 3.0868, "step": 64970 }, { "epoch": 0.6609598795572916, "grad_norm": 11.99376392364502, "learning_rate": 1.2913878124523515e-06, "loss": 3.2505, "step": 64975 }, { "epoch": 0.6610107421875, "grad_norm": 11.174510955810547, "learning_rate": 1.2910377822111624e-06, "loss": 3.5321, "step": 64980 }, { "epoch": 0.6610616048177084, "grad_norm": 11.970725059509277, "learning_rate": 1.2906877829009861e-06, "loss": 3.9141, "step": 64985 }, { "epoch": 0.6611124674479166, "grad_norm": 8.983936309814453, "learning_rate": 1.2903378145307755e-06, "loss": 3.2809, "step": 64990 }, { "epoch": 0.661163330078125, "grad_norm": 15.522686004638672, "learning_rate": 1.2899878771094857e-06, "loss": 3.2497, "step": 64995 }, { "epoch": 0.6612141927083334, "grad_norm": 10.832334518432617, "learning_rate": 1.2896379706460684e-06, "loss": 3.2215, "step": 65000 }, { "epoch": 0.6612650553385416, "grad_norm": 13.5785493850708, "learning_rate": 1.289288095149478e-06, "loss": 3.0875, "step": 65005 }, { "epoch": 0.66131591796875, "grad_norm": 14.808880805969238, "learning_rate": 1.2889382506286646e-06, "loss": 3.4202, "step": 65010 }, { "epoch": 0.6613667805989584, "grad_norm": 40.9049072265625, "learning_rate": 1.2885884370925777e-06, "loss": 3.6887, "step": 65015 }, { "epoch": 0.6614176432291666, "grad_norm": 8.365484237670898, "learning_rate": 1.2882386545501691e-06, "loss": 3.122, "step": 65020 }, { "epoch": 0.661468505859375, "grad_norm": 8.404397010803223, "learning_rate": 1.2878889030103879e-06, "loss": 3.0866, "step": 65025 }, { "epoch": 0.6615193684895834, "grad_norm": 8.957504272460938, "learning_rate": 1.2875391824821818e-06, "loss": 3.0308, "step": 65030 }, { "epoch": 0.6615702311197916, "grad_norm": 7.428070068359375, "learning_rate": 1.2871894929744971e-06, "loss": 3.4467, "step": 65035 }, { "epoch": 0.66162109375, "grad_norm": 10.792874336242676, "learning_rate": 1.2868398344962823e-06, "loss": 3.0817, "step": 65040 }, { "epoch": 0.6616719563802084, "grad_norm": 11.444332122802734, "learning_rate": 1.2864902070564834e-06, "loss": 3.0255, "step": 65045 }, { "epoch": 0.6617228190104166, "grad_norm": 8.817506790161133, "learning_rate": 1.286140610664045e-06, "loss": 3.2208, "step": 65050 }, { "epoch": 0.661773681640625, "grad_norm": 9.036532402038574, "learning_rate": 1.2857910453279115e-06, "loss": 3.2843, "step": 65055 }, { "epoch": 0.6618245442708334, "grad_norm": 14.336651802062988, "learning_rate": 1.2854415110570254e-06, "loss": 3.3392, "step": 65060 }, { "epoch": 0.6618754069010416, "grad_norm": 11.58194351196289, "learning_rate": 1.2850920078603302e-06, "loss": 3.5359, "step": 65065 }, { "epoch": 0.66192626953125, "grad_norm": 12.473958015441895, "learning_rate": 1.2847425357467688e-06, "loss": 3.5369, "step": 65070 }, { "epoch": 0.6619771321614584, "grad_norm": 18.386960983276367, "learning_rate": 1.284393094725282e-06, "loss": 3.4017, "step": 65075 }, { "epoch": 0.6620279947916666, "grad_norm": 11.408589363098145, "learning_rate": 1.2840436848048083e-06, "loss": 3.0546, "step": 65080 }, { "epoch": 0.662078857421875, "grad_norm": 15.049735069274902, "learning_rate": 1.28369430599429e-06, "loss": 3.1304, "step": 65085 }, { "epoch": 0.6621297200520834, "grad_norm": 11.646748542785645, "learning_rate": 1.2833449583026636e-06, "loss": 3.7416, "step": 65090 }, { "epoch": 0.6621805826822916, "grad_norm": 11.964768409729004, "learning_rate": 1.282995641738868e-06, "loss": 3.428, "step": 65095 }, { "epoch": 0.6622314453125, "grad_norm": 13.79321002960205, "learning_rate": 1.2826463563118414e-06, "loss": 3.1163, "step": 65100 }, { "epoch": 0.6622823079427084, "grad_norm": 12.12221622467041, "learning_rate": 1.282297102030519e-06, "loss": 3.9972, "step": 65105 }, { "epoch": 0.6623331705729166, "grad_norm": 7.613211154937744, "learning_rate": 1.281947878903837e-06, "loss": 3.2019, "step": 65110 }, { "epoch": 0.662384033203125, "grad_norm": 10.106460571289062, "learning_rate": 1.2815986869407288e-06, "loss": 3.0862, "step": 65115 }, { "epoch": 0.6624348958333334, "grad_norm": 7.456476211547852, "learning_rate": 1.281249526150129e-06, "loss": 3.1201, "step": 65120 }, { "epoch": 0.6624857584635416, "grad_norm": 13.168668746948242, "learning_rate": 1.2809003965409727e-06, "loss": 3.8123, "step": 65125 }, { "epoch": 0.66253662109375, "grad_norm": 15.872102737426758, "learning_rate": 1.2805512981221904e-06, "loss": 3.1302, "step": 65130 }, { "epoch": 0.6625874837239584, "grad_norm": 8.025379180908203, "learning_rate": 1.2802022309027134e-06, "loss": 2.9626, "step": 65135 }, { "epoch": 0.6626383463541666, "grad_norm": 10.427035331726074, "learning_rate": 1.2798531948914739e-06, "loss": 3.0251, "step": 65140 }, { "epoch": 0.662689208984375, "grad_norm": 13.071147918701172, "learning_rate": 1.2795041900974003e-06, "loss": 3.1159, "step": 65145 }, { "epoch": 0.6627400716145834, "grad_norm": 17.302522659301758, "learning_rate": 1.2791552165294241e-06, "loss": 3.2413, "step": 65150 }, { "epoch": 0.6627909342447916, "grad_norm": 13.62521743774414, "learning_rate": 1.2788062741964708e-06, "loss": 3.1752, "step": 65155 }, { "epoch": 0.662841796875, "grad_norm": 14.582704544067383, "learning_rate": 1.2784573631074708e-06, "loss": 3.2239, "step": 65160 }, { "epoch": 0.6628926595052084, "grad_norm": 9.311365127563477, "learning_rate": 1.2781084832713494e-06, "loss": 3.2253, "step": 65165 }, { "epoch": 0.6629435221354166, "grad_norm": 9.742881774902344, "learning_rate": 1.2777596346970323e-06, "loss": 3.5226, "step": 65170 }, { "epoch": 0.662994384765625, "grad_norm": 9.145849227905273, "learning_rate": 1.2774108173934447e-06, "loss": 3.7364, "step": 65175 }, { "epoch": 0.6630452473958334, "grad_norm": 10.365301132202148, "learning_rate": 1.2770620313695127e-06, "loss": 3.0558, "step": 65180 }, { "epoch": 0.6630961100260416, "grad_norm": 13.321115493774414, "learning_rate": 1.2767132766341588e-06, "loss": 3.2569, "step": 65185 }, { "epoch": 0.66314697265625, "grad_norm": 12.305617332458496, "learning_rate": 1.2763645531963048e-06, "loss": 3.1887, "step": 65190 }, { "epoch": 0.6631978352864584, "grad_norm": 15.394637107849121, "learning_rate": 1.2760158610648738e-06, "loss": 2.7999, "step": 65195 }, { "epoch": 0.6632486979166666, "grad_norm": 13.64035701751709, "learning_rate": 1.2756672002487877e-06, "loss": 3.4076, "step": 65200 }, { "epoch": 0.663299560546875, "grad_norm": 7.51201057434082, "learning_rate": 1.275318570756966e-06, "loss": 3.6468, "step": 65205 }, { "epoch": 0.6633504231770834, "grad_norm": 10.302927017211914, "learning_rate": 1.2749699725983278e-06, "loss": 3.2673, "step": 65210 }, { "epoch": 0.6634012858072916, "grad_norm": 8.08244514465332, "learning_rate": 1.2746214057817929e-06, "loss": 3.6082, "step": 65215 }, { "epoch": 0.6634521484375, "grad_norm": 12.379706382751465, "learning_rate": 1.2742728703162782e-06, "loss": 3.1576, "step": 65220 }, { "epoch": 0.6635030110677084, "grad_norm": 9.567678451538086, "learning_rate": 1.2739243662107025e-06, "loss": 3.1563, "step": 65225 }, { "epoch": 0.6635538736979166, "grad_norm": 10.015485763549805, "learning_rate": 1.2735758934739807e-06, "loss": 3.268, "step": 65230 }, { "epoch": 0.663604736328125, "grad_norm": 6.774351119995117, "learning_rate": 1.2732274521150296e-06, "loss": 3.3882, "step": 65235 }, { "epoch": 0.6636555989583334, "grad_norm": 7.824482440948486, "learning_rate": 1.2728790421427633e-06, "loss": 3.1738, "step": 65240 }, { "epoch": 0.6637064615885416, "grad_norm": 13.696547508239746, "learning_rate": 1.272530663566095e-06, "loss": 3.2887, "step": 65245 }, { "epoch": 0.66375732421875, "grad_norm": 11.98934268951416, "learning_rate": 1.2721823163939384e-06, "loss": 3.3336, "step": 65250 }, { "epoch": 0.6638081868489584, "grad_norm": 9.976804733276367, "learning_rate": 1.2718340006352075e-06, "loss": 3.6427, "step": 65255 }, { "epoch": 0.6638590494791666, "grad_norm": 11.145037651062012, "learning_rate": 1.2714857162988126e-06, "loss": 3.7012, "step": 65260 }, { "epoch": 0.663909912109375, "grad_norm": 11.77367877960205, "learning_rate": 1.2711374633936643e-06, "loss": 3.2743, "step": 65265 }, { "epoch": 0.6639607747395834, "grad_norm": 10.72303581237793, "learning_rate": 1.2707892419286721e-06, "loss": 3.3152, "step": 65270 }, { "epoch": 0.6640116373697916, "grad_norm": 13.609925270080566, "learning_rate": 1.2704410519127452e-06, "loss": 3.6111, "step": 65275 }, { "epoch": 0.6640625, "grad_norm": 11.802302360534668, "learning_rate": 1.270092893354794e-06, "loss": 4.0092, "step": 65280 }, { "epoch": 0.6641133626302084, "grad_norm": 9.450394630432129, "learning_rate": 1.2697447662637236e-06, "loss": 3.3385, "step": 65285 }, { "epoch": 0.6641642252604166, "grad_norm": 13.917665481567383, "learning_rate": 1.2693966706484425e-06, "loss": 3.2738, "step": 65290 }, { "epoch": 0.664215087890625, "grad_norm": 12.290202140808105, "learning_rate": 1.2690486065178562e-06, "loss": 3.377, "step": 65295 }, { "epoch": 0.6642659505208334, "grad_norm": 15.638297080993652, "learning_rate": 1.2687005738808684e-06, "loss": 3.3482, "step": 65300 }, { "epoch": 0.6643168131510416, "grad_norm": 18.216962814331055, "learning_rate": 1.2683525727463844e-06, "loss": 3.9138, "step": 65305 }, { "epoch": 0.66436767578125, "grad_norm": 9.221813201904297, "learning_rate": 1.268004603123309e-06, "loss": 3.3517, "step": 65310 }, { "epoch": 0.6644185384114584, "grad_norm": 13.317717552185059, "learning_rate": 1.2676566650205441e-06, "loss": 3.3212, "step": 65315 }, { "epoch": 0.6644694010416666, "grad_norm": 10.615367889404297, "learning_rate": 1.2673087584469909e-06, "loss": 3.8712, "step": 65320 }, { "epoch": 0.664520263671875, "grad_norm": 7.501321792602539, "learning_rate": 1.2669608834115492e-06, "loss": 3.2691, "step": 65325 }, { "epoch": 0.6645711263020834, "grad_norm": 16.984865188598633, "learning_rate": 1.266613039923123e-06, "loss": 3.2706, "step": 65330 }, { "epoch": 0.6646219889322916, "grad_norm": 9.495741844177246, "learning_rate": 1.2662652279906095e-06, "loss": 3.3808, "step": 65335 }, { "epoch": 0.6646728515625, "grad_norm": 12.779135704040527, "learning_rate": 1.265917447622908e-06, "loss": 3.5763, "step": 65340 }, { "epoch": 0.6647237141927084, "grad_norm": 12.259757995605469, "learning_rate": 1.2655696988289151e-06, "loss": 3.3813, "step": 65345 }, { "epoch": 0.6647745768229166, "grad_norm": 7.823262691497803, "learning_rate": 1.2652219816175287e-06, "loss": 3.591, "step": 65350 }, { "epoch": 0.664825439453125, "grad_norm": 12.372004508972168, "learning_rate": 1.2648742959976468e-06, "loss": 3.7868, "step": 65355 }, { "epoch": 0.6648763020833334, "grad_norm": 17.824193954467773, "learning_rate": 1.2645266419781626e-06, "loss": 3.3528, "step": 65360 }, { "epoch": 0.6649271647135416, "grad_norm": 13.596588134765625, "learning_rate": 1.2641790195679707e-06, "loss": 3.5585, "step": 65365 }, { "epoch": 0.66497802734375, "grad_norm": 10.611842155456543, "learning_rate": 1.2638314287759668e-06, "loss": 3.0753, "step": 65370 }, { "epoch": 0.6650288899739584, "grad_norm": 9.485777854919434, "learning_rate": 1.2634838696110414e-06, "loss": 3.1376, "step": 65375 }, { "epoch": 0.6650797526041666, "grad_norm": 10.109831809997559, "learning_rate": 1.2631363420820886e-06, "loss": 3.8677, "step": 65380 }, { "epoch": 0.665130615234375, "grad_norm": 14.614781379699707, "learning_rate": 1.2627888461979999e-06, "loss": 4.1296, "step": 65385 }, { "epoch": 0.6651814778645834, "grad_norm": 11.567127227783203, "learning_rate": 1.2624413819676657e-06, "loss": 3.334, "step": 65390 }, { "epoch": 0.6652323404947916, "grad_norm": 8.257408142089844, "learning_rate": 1.262093949399975e-06, "loss": 3.4889, "step": 65395 }, { "epoch": 0.665283203125, "grad_norm": 8.923105239868164, "learning_rate": 1.2617465485038163e-06, "loss": 4.0292, "step": 65400 }, { "epoch": 0.6653340657552084, "grad_norm": 8.343690872192383, "learning_rate": 1.2613991792880787e-06, "loss": 3.1172, "step": 65405 }, { "epoch": 0.6653849283854166, "grad_norm": 12.005006790161133, "learning_rate": 1.2610518417616507e-06, "loss": 3.5156, "step": 65410 }, { "epoch": 0.665435791015625, "grad_norm": 11.469767570495605, "learning_rate": 1.2607045359334175e-06, "loss": 3.4081, "step": 65415 }, { "epoch": 0.6654866536458334, "grad_norm": 10.728708267211914, "learning_rate": 1.2603572618122638e-06, "loss": 3.1886, "step": 65420 }, { "epoch": 0.6655375162760416, "grad_norm": 15.159509658813477, "learning_rate": 1.2600100194070769e-06, "loss": 3.2632, "step": 65425 }, { "epoch": 0.66558837890625, "grad_norm": 12.759864807128906, "learning_rate": 1.259662808726739e-06, "loss": 3.1454, "step": 65430 }, { "epoch": 0.6656392415364584, "grad_norm": 13.23811149597168, "learning_rate": 1.259315629780135e-06, "loss": 3.3728, "step": 65435 }, { "epoch": 0.6656901041666666, "grad_norm": 14.851045608520508, "learning_rate": 1.2589684825761456e-06, "loss": 3.0316, "step": 65440 }, { "epoch": 0.665740966796875, "grad_norm": 14.239526748657227, "learning_rate": 1.2586213671236541e-06, "loss": 3.3417, "step": 65445 }, { "epoch": 0.6657918294270834, "grad_norm": 12.545265197753906, "learning_rate": 1.258274283431541e-06, "loss": 3.4793, "step": 65450 }, { "epoch": 0.6658426920572916, "grad_norm": 16.657880783081055, "learning_rate": 1.2579272315086848e-06, "loss": 3.3026, "step": 65455 }, { "epoch": 0.6658935546875, "grad_norm": 12.7116117477417, "learning_rate": 1.2575802113639662e-06, "loss": 3.5199, "step": 65460 }, { "epoch": 0.6659444173177084, "grad_norm": 12.080648422241211, "learning_rate": 1.257233223006264e-06, "loss": 3.0086, "step": 65465 }, { "epoch": 0.6659952799479166, "grad_norm": 14.472729682922363, "learning_rate": 1.2568862664444557e-06, "loss": 3.0486, "step": 65470 }, { "epoch": 0.666046142578125, "grad_norm": 8.622594833374023, "learning_rate": 1.2565393416874164e-06, "loss": 3.0956, "step": 65475 }, { "epoch": 0.6660970052083334, "grad_norm": 10.788368225097656, "learning_rate": 1.2561924487440232e-06, "loss": 3.295, "step": 65480 }, { "epoch": 0.6661478678385416, "grad_norm": 15.795666694641113, "learning_rate": 1.2558455876231526e-06, "loss": 3.013, "step": 65485 }, { "epoch": 0.66619873046875, "grad_norm": 11.166401863098145, "learning_rate": 1.2554987583336779e-06, "loss": 3.1137, "step": 65490 }, { "epoch": 0.6662495930989584, "grad_norm": 9.57898998260498, "learning_rate": 1.2551519608844712e-06, "loss": 3.2857, "step": 65495 }, { "epoch": 0.6663004557291666, "grad_norm": 8.748499870300293, "learning_rate": 1.2548051952844075e-06, "loss": 3.0462, "step": 65500 }, { "epoch": 0.666351318359375, "grad_norm": 14.830123901367188, "learning_rate": 1.2544584615423569e-06, "loss": 3.1002, "step": 65505 }, { "epoch": 0.6664021809895834, "grad_norm": 14.823974609375, "learning_rate": 1.254111759667192e-06, "loss": 3.3103, "step": 65510 }, { "epoch": 0.6664530436197916, "grad_norm": 15.658598899841309, "learning_rate": 1.253765089667782e-06, "loss": 3.2588, "step": 65515 }, { "epoch": 0.66650390625, "grad_norm": 14.136911392211914, "learning_rate": 1.2534184515529974e-06, "loss": 3.39, "step": 65520 }, { "epoch": 0.6665547688802084, "grad_norm": 11.759278297424316, "learning_rate": 1.253071845331706e-06, "loss": 3.3365, "step": 65525 }, { "epoch": 0.6666056315104166, "grad_norm": 13.362621307373047, "learning_rate": 1.2527252710127756e-06, "loss": 3.4476, "step": 65530 }, { "epoch": 0.666656494140625, "grad_norm": 10.812849044799805, "learning_rate": 1.252378728605073e-06, "loss": 3.5454, "step": 65535 }, { "epoch": 0.6667073567708334, "grad_norm": 18.48908233642578, "learning_rate": 1.252032218117466e-06, "loss": 3.3882, "step": 65540 }, { "epoch": 0.6667582194010416, "grad_norm": 15.132722854614258, "learning_rate": 1.2516857395588188e-06, "loss": 3.2699, "step": 65545 }, { "epoch": 0.66680908203125, "grad_norm": 9.398686408996582, "learning_rate": 1.2513392929379965e-06, "loss": 3.2346, "step": 65550 }, { "epoch": 0.6668599446614584, "grad_norm": 9.966465950012207, "learning_rate": 1.2509928782638609e-06, "loss": 3.632, "step": 65555 }, { "epoch": 0.6669108072916666, "grad_norm": 19.779407501220703, "learning_rate": 1.2506464955452768e-06, "loss": 2.7112, "step": 65560 }, { "epoch": 0.666961669921875, "grad_norm": 10.571062088012695, "learning_rate": 1.2503001447911068e-06, "loss": 3.2902, "step": 65565 }, { "epoch": 0.6670125325520834, "grad_norm": 11.481815338134766, "learning_rate": 1.2499538260102105e-06, "loss": 4.1773, "step": 65570 }, { "epoch": 0.6670633951822916, "grad_norm": 10.567340850830078, "learning_rate": 1.2496075392114501e-06, "loss": 3.2373, "step": 65575 }, { "epoch": 0.6671142578125, "grad_norm": 14.8667631149292, "learning_rate": 1.2492612844036845e-06, "loss": 3.4899, "step": 65580 }, { "epoch": 0.6671651204427084, "grad_norm": 17.505634307861328, "learning_rate": 1.2489150615957715e-06, "loss": 3.4972, "step": 65585 }, { "epoch": 0.6672159830729166, "grad_norm": 10.369640350341797, "learning_rate": 1.24856887079657e-06, "loss": 3.08, "step": 65590 }, { "epoch": 0.667266845703125, "grad_norm": 9.872705459594727, "learning_rate": 1.2482227120149387e-06, "loss": 2.9665, "step": 65595 }, { "epoch": 0.6673177083333334, "grad_norm": 14.500247955322266, "learning_rate": 1.247876585259732e-06, "loss": 3.1538, "step": 65600 }, { "epoch": 0.6673685709635416, "grad_norm": 16.241443634033203, "learning_rate": 1.2475304905398067e-06, "loss": 3.0915, "step": 65605 }, { "epoch": 0.66741943359375, "grad_norm": 9.634785652160645, "learning_rate": 1.2471844278640156e-06, "loss": 3.2526, "step": 65610 }, { "epoch": 0.6674702962239584, "grad_norm": 15.240821838378906, "learning_rate": 1.246838397241214e-06, "loss": 3.0817, "step": 65615 }, { "epoch": 0.6675211588541666, "grad_norm": 8.462591171264648, "learning_rate": 1.246492398680256e-06, "loss": 3.1551, "step": 65620 }, { "epoch": 0.667572021484375, "grad_norm": 15.741432189941406, "learning_rate": 1.246146432189993e-06, "loss": 3.1494, "step": 65625 }, { "epoch": 0.6676228841145834, "grad_norm": 13.4065523147583, "learning_rate": 1.2458004977792752e-06, "loss": 3.5921, "step": 65630 }, { "epoch": 0.6676737467447916, "grad_norm": 12.74771785736084, "learning_rate": 1.2454545954569545e-06, "loss": 3.255, "step": 65635 }, { "epoch": 0.667724609375, "grad_norm": 12.701573371887207, "learning_rate": 1.2451087252318813e-06, "loss": 3.1132, "step": 65640 }, { "epoch": 0.6677754720052084, "grad_norm": 17.424406051635742, "learning_rate": 1.244762887112904e-06, "loss": 3.5127, "step": 65645 }, { "epoch": 0.6678263346354166, "grad_norm": 19.323753356933594, "learning_rate": 1.2444170811088701e-06, "loss": 2.8202, "step": 65650 }, { "epoch": 0.667877197265625, "grad_norm": 12.669976234436035, "learning_rate": 1.2440713072286285e-06, "loss": 3.2664, "step": 65655 }, { "epoch": 0.6679280598958334, "grad_norm": 11.021958351135254, "learning_rate": 1.2437255654810234e-06, "loss": 3.447, "step": 65660 }, { "epoch": 0.6679789225260416, "grad_norm": 15.243814468383789, "learning_rate": 1.2433798558749032e-06, "loss": 3.3269, "step": 65665 }, { "epoch": 0.66802978515625, "grad_norm": 12.763647079467773, "learning_rate": 1.2430341784191105e-06, "loss": 3.2928, "step": 65670 }, { "epoch": 0.6680806477864584, "grad_norm": 9.308406829833984, "learning_rate": 1.2426885331224913e-06, "loss": 3.4037, "step": 65675 }, { "epoch": 0.6681315104166666, "grad_norm": 14.168586730957031, "learning_rate": 1.2423429199938882e-06, "loss": 3.1222, "step": 65680 }, { "epoch": 0.668182373046875, "grad_norm": 12.823803901672363, "learning_rate": 1.2419973390421421e-06, "loss": 3.471, "step": 65685 }, { "epoch": 0.6682332356770834, "grad_norm": 9.29395580291748, "learning_rate": 1.2416517902760963e-06, "loss": 3.3921, "step": 65690 }, { "epoch": 0.6682840983072916, "grad_norm": 9.270599365234375, "learning_rate": 1.241306273704592e-06, "loss": 3.2971, "step": 65695 }, { "epoch": 0.6683349609375, "grad_norm": 10.670681953430176, "learning_rate": 1.2409607893364684e-06, "loss": 3.2714, "step": 65700 }, { "epoch": 0.6683858235677084, "grad_norm": 10.184364318847656, "learning_rate": 1.2406153371805636e-06, "loss": 3.5247, "step": 65705 }, { "epoch": 0.6684366861979166, "grad_norm": 15.072261810302734, "learning_rate": 1.2402699172457175e-06, "loss": 3.3025, "step": 65710 }, { "epoch": 0.668487548828125, "grad_norm": 14.222228050231934, "learning_rate": 1.2399245295407663e-06, "loss": 3.4413, "step": 65715 }, { "epoch": 0.6685384114583334, "grad_norm": 11.479125022888184, "learning_rate": 1.2395791740745482e-06, "loss": 3.1769, "step": 65720 }, { "epoch": 0.6685892740885416, "grad_norm": 11.094748497009277, "learning_rate": 1.2392338508558975e-06, "loss": 3.2729, "step": 65725 }, { "epoch": 0.66864013671875, "grad_norm": 13.094012260437012, "learning_rate": 1.2388885598936507e-06, "loss": 3.2583, "step": 65730 }, { "epoch": 0.6686909993489584, "grad_norm": 9.674267768859863, "learning_rate": 1.2385433011966412e-06, "loss": 3.5974, "step": 65735 }, { "epoch": 0.6687418619791666, "grad_norm": 11.32293701171875, "learning_rate": 1.238198074773701e-06, "loss": 3.2655, "step": 65740 }, { "epoch": 0.668792724609375, "grad_norm": 14.59813117980957, "learning_rate": 1.2378528806336643e-06, "loss": 3.2294, "step": 65745 }, { "epoch": 0.6688435872395834, "grad_norm": 13.594170570373535, "learning_rate": 1.2375077187853635e-06, "loss": 3.0215, "step": 65750 }, { "epoch": 0.6688944498697916, "grad_norm": 17.96681785583496, "learning_rate": 1.2371625892376284e-06, "loss": 3.6447, "step": 65755 }, { "epoch": 0.6689453125, "grad_norm": 14.56413459777832, "learning_rate": 1.2368174919992892e-06, "loss": 3.4442, "step": 65760 }, { "epoch": 0.6689961751302084, "grad_norm": 16.94887351989746, "learning_rate": 1.2364724270791738e-06, "loss": 3.2471, "step": 65765 }, { "epoch": 0.6690470377604166, "grad_norm": 7.452788829803467, "learning_rate": 1.2361273944861124e-06, "loss": 3.2806, "step": 65770 }, { "epoch": 0.669097900390625, "grad_norm": 13.136384010314941, "learning_rate": 1.2357823942289326e-06, "loss": 3.2754, "step": 65775 }, { "epoch": 0.6691487630208334, "grad_norm": 10.442337036132812, "learning_rate": 1.2354374263164599e-06, "loss": 3.3363, "step": 65780 }, { "epoch": 0.6691996256510416, "grad_norm": 10.758193016052246, "learning_rate": 1.2350924907575216e-06, "loss": 3.4805, "step": 65785 }, { "epoch": 0.66925048828125, "grad_norm": 7.44234037399292, "learning_rate": 1.2347475875609414e-06, "loss": 3.1424, "step": 65790 }, { "epoch": 0.6693013509114584, "grad_norm": 14.84299087524414, "learning_rate": 1.2344027167355449e-06, "loss": 3.5229, "step": 65795 }, { "epoch": 0.6693522135416666, "grad_norm": 14.938111305236816, "learning_rate": 1.2340578782901542e-06, "loss": 3.1912, "step": 65800 }, { "epoch": 0.669403076171875, "grad_norm": 8.341020584106445, "learning_rate": 1.2337130722335936e-06, "loss": 3.4417, "step": 65805 }, { "epoch": 0.6694539388020834, "grad_norm": 11.463062286376953, "learning_rate": 1.2333682985746836e-06, "loss": 3.202, "step": 65810 }, { "epoch": 0.6695048014322916, "grad_norm": 15.396414756774902, "learning_rate": 1.233023557322245e-06, "loss": 3.2796, "step": 65815 }, { "epoch": 0.6695556640625, "grad_norm": 8.394917488098145, "learning_rate": 1.2326788484850979e-06, "loss": 3.2157, "step": 65820 }, { "epoch": 0.6696065266927084, "grad_norm": 10.8167085647583, "learning_rate": 1.2323341720720634e-06, "loss": 3.7603, "step": 65825 }, { "epoch": 0.6696573893229166, "grad_norm": 16.594776153564453, "learning_rate": 1.2319895280919583e-06, "loss": 3.1545, "step": 65830 }, { "epoch": 0.669708251953125, "grad_norm": 12.708965301513672, "learning_rate": 1.2316449165536007e-06, "loss": 3.4222, "step": 65835 }, { "epoch": 0.6697591145833334, "grad_norm": 10.709924697875977, "learning_rate": 1.2313003374658063e-06, "loss": 3.4012, "step": 65840 }, { "epoch": 0.6698099772135416, "grad_norm": 11.355735778808594, "learning_rate": 1.2309557908373923e-06, "loss": 3.2987, "step": 65845 }, { "epoch": 0.66986083984375, "grad_norm": 13.788281440734863, "learning_rate": 1.2306112766771744e-06, "loss": 2.961, "step": 65850 }, { "epoch": 0.6699117024739584, "grad_norm": 13.849200248718262, "learning_rate": 1.2302667949939662e-06, "loss": 3.3111, "step": 65855 }, { "epoch": 0.6699625651041666, "grad_norm": 13.513203620910645, "learning_rate": 1.2299223457965798e-06, "loss": 3.6833, "step": 65860 }, { "epoch": 0.670013427734375, "grad_norm": 10.663004875183105, "learning_rate": 1.2295779290938304e-06, "loss": 3.1343, "step": 65865 }, { "epoch": 0.6700642903645834, "grad_norm": 16.419111251831055, "learning_rate": 1.2292335448945275e-06, "loss": 3.4315, "step": 65870 }, { "epoch": 0.6701151529947916, "grad_norm": 10.535104751586914, "learning_rate": 1.2288891932074831e-06, "loss": 3.0774, "step": 65875 }, { "epoch": 0.670166015625, "grad_norm": 10.039164543151855, "learning_rate": 1.2285448740415085e-06, "loss": 3.3215, "step": 65880 }, { "epoch": 0.6702168782552084, "grad_norm": 15.77993392944336, "learning_rate": 1.2282005874054118e-06, "loss": 3.4125, "step": 65885 }, { "epoch": 0.6702677408854166, "grad_norm": 11.289288520812988, "learning_rate": 1.2278563333080017e-06, "loss": 3.9813, "step": 65890 }, { "epoch": 0.670318603515625, "grad_norm": 9.770861625671387, "learning_rate": 1.2275121117580848e-06, "loss": 3.2592, "step": 65895 }, { "epoch": 0.6703694661458334, "grad_norm": 10.595755577087402, "learning_rate": 1.227167922764469e-06, "loss": 3.3272, "step": 65900 }, { "epoch": 0.6704203287760416, "grad_norm": 13.553621292114258, "learning_rate": 1.226823766335961e-06, "loss": 3.142, "step": 65905 }, { "epoch": 0.67047119140625, "grad_norm": 10.56498908996582, "learning_rate": 1.226479642481365e-06, "loss": 3.2443, "step": 65910 }, { "epoch": 0.6705220540364584, "grad_norm": 9.785940170288086, "learning_rate": 1.226135551209485e-06, "loss": 3.1709, "step": 65915 }, { "epoch": 0.6705729166666666, "grad_norm": 18.74580192565918, "learning_rate": 1.2257914925291258e-06, "loss": 3.6727, "step": 65920 }, { "epoch": 0.670623779296875, "grad_norm": 14.1307373046875, "learning_rate": 1.2254474664490882e-06, "loss": 3.3455, "step": 65925 }, { "epoch": 0.6706746419270834, "grad_norm": 13.096781730651855, "learning_rate": 1.2251034729781761e-06, "loss": 3.0302, "step": 65930 }, { "epoch": 0.6707255045572916, "grad_norm": 8.60185432434082, "learning_rate": 1.224759512125188e-06, "loss": 3.2431, "step": 65935 }, { "epoch": 0.6707763671875, "grad_norm": 13.76044750213623, "learning_rate": 1.2244155838989272e-06, "loss": 3.061, "step": 65940 }, { "epoch": 0.6708272298177084, "grad_norm": 14.920628547668457, "learning_rate": 1.2240716883081902e-06, "loss": 3.2226, "step": 65945 }, { "epoch": 0.6708780924479166, "grad_norm": 14.226794242858887, "learning_rate": 1.2237278253617774e-06, "loss": 3.6226, "step": 65950 }, { "epoch": 0.670928955078125, "grad_norm": 8.770042419433594, "learning_rate": 1.2233839950684848e-06, "loss": 3.4317, "step": 65955 }, { "epoch": 0.6709798177083334, "grad_norm": 11.76567554473877, "learning_rate": 1.223040197437111e-06, "loss": 3.2916, "step": 65960 }, { "epoch": 0.6710306803385416, "grad_norm": 16.681499481201172, "learning_rate": 1.2226964324764513e-06, "loss": 3.2829, "step": 65965 }, { "epoch": 0.67108154296875, "grad_norm": 14.340017318725586, "learning_rate": 1.2223527001952992e-06, "loss": 3.1534, "step": 65970 }, { "epoch": 0.6711324055989584, "grad_norm": 18.81924057006836, "learning_rate": 1.2220090006024506e-06, "loss": 3.2541, "step": 65975 }, { "epoch": 0.6711832682291666, "grad_norm": 13.129728317260742, "learning_rate": 1.2216653337067e-06, "loss": 3.2745, "step": 65980 }, { "epoch": 0.671234130859375, "grad_norm": 9.782035827636719, "learning_rate": 1.2213216995168386e-06, "loss": 3.3167, "step": 65985 }, { "epoch": 0.6712849934895834, "grad_norm": 16.59641456604004, "learning_rate": 1.2209780980416577e-06, "loss": 3.2621, "step": 65990 }, { "epoch": 0.6713358561197916, "grad_norm": 15.288775444030762, "learning_rate": 1.22063452928995e-06, "loss": 3.3434, "step": 65995 }, { "epoch": 0.67138671875, "grad_norm": 9.345989227294922, "learning_rate": 1.2202909932705034e-06, "loss": 3.2314, "step": 66000 }, { "epoch": 0.6714375813802084, "grad_norm": 14.896824836730957, "learning_rate": 1.2199474899921095e-06, "loss": 3.7613, "step": 66005 }, { "epoch": 0.6714884440104166, "grad_norm": 12.153127670288086, "learning_rate": 1.2196040194635547e-06, "loss": 2.748, "step": 66010 }, { "epoch": 0.671539306640625, "grad_norm": 9.977337837219238, "learning_rate": 1.2192605816936285e-06, "loss": 3.1902, "step": 66015 }, { "epoch": 0.6715901692708334, "grad_norm": 13.28049087524414, "learning_rate": 1.2189171766911168e-06, "loss": 3.3504, "step": 66020 }, { "epoch": 0.6716410319010416, "grad_norm": 13.772388458251953, "learning_rate": 1.2185738044648044e-06, "loss": 3.5055, "step": 66025 }, { "epoch": 0.67169189453125, "grad_norm": 16.815696716308594, "learning_rate": 1.2182304650234778e-06, "loss": 3.1191, "step": 66030 }, { "epoch": 0.6717427571614584, "grad_norm": 10.708442687988281, "learning_rate": 1.2178871583759213e-06, "loss": 3.3703, "step": 66035 }, { "epoch": 0.6717936197916666, "grad_norm": 12.045191764831543, "learning_rate": 1.2175438845309185e-06, "loss": 3.4816, "step": 66040 }, { "epoch": 0.671844482421875, "grad_norm": 13.626344680786133, "learning_rate": 1.2172006434972514e-06, "loss": 3.7009, "step": 66045 }, { "epoch": 0.6718953450520834, "grad_norm": 11.73372745513916, "learning_rate": 1.2168574352837006e-06, "loss": 2.9432, "step": 66050 }, { "epoch": 0.6719462076822916, "grad_norm": 12.726306915283203, "learning_rate": 1.216514259899048e-06, "loss": 2.8776, "step": 66055 }, { "epoch": 0.6719970703125, "grad_norm": 16.720542907714844, "learning_rate": 1.2161711173520754e-06, "loss": 3.4371, "step": 66060 }, { "epoch": 0.6720479329427084, "grad_norm": 13.193948745727539, "learning_rate": 1.215828007651559e-06, "loss": 3.7822, "step": 66065 }, { "epoch": 0.6720987955729166, "grad_norm": 9.035006523132324, "learning_rate": 1.2154849308062799e-06, "loss": 3.2921, "step": 66070 }, { "epoch": 0.672149658203125, "grad_norm": 17.360515594482422, "learning_rate": 1.2151418868250142e-06, "loss": 3.8383, "step": 66075 }, { "epoch": 0.6722005208333334, "grad_norm": 14.382808685302734, "learning_rate": 1.214798875716538e-06, "loss": 3.5088, "step": 66080 }, { "epoch": 0.6722513834635416, "grad_norm": 8.316353797912598, "learning_rate": 1.2144558974896278e-06, "loss": 2.9866, "step": 66085 }, { "epoch": 0.67230224609375, "grad_norm": 12.267139434814453, "learning_rate": 1.2141129521530598e-06, "loss": 3.2495, "step": 66090 }, { "epoch": 0.6723531087239584, "grad_norm": 13.435066223144531, "learning_rate": 1.2137700397156069e-06, "loss": 3.3119, "step": 66095 }, { "epoch": 0.6724039713541666, "grad_norm": 12.881227493286133, "learning_rate": 1.213427160186042e-06, "loss": 3.1359, "step": 66100 }, { "epoch": 0.672454833984375, "grad_norm": 11.653464317321777, "learning_rate": 1.2130843135731379e-06, "loss": 3.1652, "step": 66105 }, { "epoch": 0.6725056966145834, "grad_norm": 16.216772079467773, "learning_rate": 1.212741499885668e-06, "loss": 3.8288, "step": 66110 }, { "epoch": 0.6725565592447916, "grad_norm": 13.487979888916016, "learning_rate": 1.2123987191324016e-06, "loss": 3.4653, "step": 66115 }, { "epoch": 0.672607421875, "grad_norm": 10.186954498291016, "learning_rate": 1.2120559713221087e-06, "loss": 3.3901, "step": 66120 }, { "epoch": 0.6726582845052084, "grad_norm": 13.468384742736816, "learning_rate": 1.2117132564635572e-06, "loss": 3.4884, "step": 66125 }, { "epoch": 0.6727091471354166, "grad_norm": 8.447620391845703, "learning_rate": 1.2113705745655173e-06, "loss": 3.0469, "step": 66130 }, { "epoch": 0.672760009765625, "grad_norm": 14.793883323669434, "learning_rate": 1.2110279256367563e-06, "loss": 3.0333, "step": 66135 }, { "epoch": 0.6728108723958334, "grad_norm": 11.138039588928223, "learning_rate": 1.2106853096860404e-06, "loss": 3.3258, "step": 66140 }, { "epoch": 0.6728617350260416, "grad_norm": 31.979942321777344, "learning_rate": 1.2103427267221344e-06, "loss": 3.856, "step": 66145 }, { "epoch": 0.67291259765625, "grad_norm": 15.37503433227539, "learning_rate": 1.210000176753805e-06, "loss": 3.3685, "step": 66150 }, { "epoch": 0.6729634602864584, "grad_norm": 12.416085243225098, "learning_rate": 1.2096576597898145e-06, "loss": 3.3378, "step": 66155 }, { "epoch": 0.6730143229166666, "grad_norm": 15.238469123840332, "learning_rate": 1.2093151758389276e-06, "loss": 3.1085, "step": 66160 }, { "epoch": 0.673065185546875, "grad_norm": 9.534880638122559, "learning_rate": 1.2089727249099052e-06, "loss": 3.3571, "step": 66165 }, { "epoch": 0.6731160481770834, "grad_norm": 9.172733306884766, "learning_rate": 1.2086303070115104e-06, "loss": 3.7219, "step": 66170 }, { "epoch": 0.6731669108072916, "grad_norm": 12.223248481750488, "learning_rate": 1.2082879221525032e-06, "loss": 3.1753, "step": 66175 }, { "epoch": 0.6732177734375, "grad_norm": 13.02455997467041, "learning_rate": 1.2079455703416425e-06, "loss": 3.2062, "step": 66180 }, { "epoch": 0.6732686360677084, "grad_norm": 13.005733489990234, "learning_rate": 1.2076032515876881e-06, "loss": 3.0596, "step": 66185 }, { "epoch": 0.6733194986979166, "grad_norm": 12.467793464660645, "learning_rate": 1.207260965899399e-06, "loss": 3.3055, "step": 66190 }, { "epoch": 0.673370361328125, "grad_norm": 12.573270797729492, "learning_rate": 1.2069187132855317e-06, "loss": 3.7388, "step": 66195 }, { "epoch": 0.6734212239583334, "grad_norm": 13.337157249450684, "learning_rate": 1.2065764937548419e-06, "loss": 3.2291, "step": 66200 }, { "epoch": 0.6734720865885416, "grad_norm": 9.928634643554688, "learning_rate": 1.2062343073160867e-06, "loss": 3.0714, "step": 66205 }, { "epoch": 0.67352294921875, "grad_norm": 10.728508949279785, "learning_rate": 1.2058921539780192e-06, "loss": 4.0894, "step": 66210 }, { "epoch": 0.6735738118489584, "grad_norm": 12.563323020935059, "learning_rate": 1.205550033749395e-06, "loss": 3.1733, "step": 66215 }, { "epoch": 0.6736246744791666, "grad_norm": 9.485886573791504, "learning_rate": 1.2052079466389658e-06, "loss": 3.3199, "step": 66220 }, { "epoch": 0.673675537109375, "grad_norm": 8.653766632080078, "learning_rate": 1.204865892655485e-06, "loss": 4.2234, "step": 66225 }, { "epoch": 0.6737263997395834, "grad_norm": 14.788777351379395, "learning_rate": 1.2045238718077035e-06, "loss": 3.2298, "step": 66230 }, { "epoch": 0.6737772623697916, "grad_norm": 12.110572814941406, "learning_rate": 1.2041818841043707e-06, "loss": 3.0991, "step": 66235 }, { "epoch": 0.673828125, "grad_norm": 7.55382776260376, "learning_rate": 1.203839929554237e-06, "loss": 3.5065, "step": 66240 }, { "epoch": 0.6738789876302084, "grad_norm": 13.87610912322998, "learning_rate": 1.2034980081660527e-06, "loss": 3.0659, "step": 66245 }, { "epoch": 0.6739298502604166, "grad_norm": 13.413716316223145, "learning_rate": 1.2031561199485644e-06, "loss": 3.2979, "step": 66250 }, { "epoch": 0.673980712890625, "grad_norm": 8.117220878601074, "learning_rate": 1.2028142649105196e-06, "loss": 3.3326, "step": 66255 }, { "epoch": 0.6740315755208334, "grad_norm": 14.149643898010254, "learning_rate": 1.2024724430606624e-06, "loss": 2.9656, "step": 66260 }, { "epoch": 0.6740824381510416, "grad_norm": 12.142016410827637, "learning_rate": 1.202130654407742e-06, "loss": 2.9977, "step": 66265 }, { "epoch": 0.67413330078125, "grad_norm": 8.97005844116211, "learning_rate": 1.201788898960501e-06, "loss": 3.6632, "step": 66270 }, { "epoch": 0.6741841634114584, "grad_norm": 11.278656959533691, "learning_rate": 1.2014471767276827e-06, "loss": 3.1504, "step": 66275 }, { "epoch": 0.6742350260416666, "grad_norm": 11.183878898620605, "learning_rate": 1.2011054877180314e-06, "loss": 3.4126, "step": 66280 }, { "epoch": 0.674285888671875, "grad_norm": 13.596809387207031, "learning_rate": 1.2007638319402877e-06, "loss": 3.3145, "step": 66285 }, { "epoch": 0.6743367513020834, "grad_norm": 12.224353790283203, "learning_rate": 1.200422209403194e-06, "loss": 3.4264, "step": 66290 }, { "epoch": 0.6743876139322916, "grad_norm": 12.031774520874023, "learning_rate": 1.200080620115489e-06, "loss": 3.3783, "step": 66295 }, { "epoch": 0.6744384765625, "grad_norm": 11.176274299621582, "learning_rate": 1.1997390640859143e-06, "loss": 3.3811, "step": 66300 }, { "epoch": 0.6744893391927084, "grad_norm": 10.890926361083984, "learning_rate": 1.1993975413232076e-06, "loss": 3.2045, "step": 66305 }, { "epoch": 0.6745402018229166, "grad_norm": 11.679183006286621, "learning_rate": 1.1990560518361056e-06, "loss": 3.306, "step": 66310 }, { "epoch": 0.674591064453125, "grad_norm": 7.901872158050537, "learning_rate": 1.1987145956333458e-06, "loss": 3.4559, "step": 66315 }, { "epoch": 0.6746419270833334, "grad_norm": 12.083430290222168, "learning_rate": 1.198373172723666e-06, "loss": 2.959, "step": 66320 }, { "epoch": 0.6746927897135416, "grad_norm": 15.67698860168457, "learning_rate": 1.1980317831157998e-06, "loss": 3.7072, "step": 66325 }, { "epoch": 0.67474365234375, "grad_norm": 8.207538604736328, "learning_rate": 1.1976904268184816e-06, "loss": 3.5721, "step": 66330 }, { "epoch": 0.6747945149739584, "grad_norm": 7.630964279174805, "learning_rate": 1.1973491038404444e-06, "loss": 3.4813, "step": 66335 }, { "epoch": 0.6748453776041666, "grad_norm": 20.761432647705078, "learning_rate": 1.1970078141904218e-06, "loss": 3.6052, "step": 66340 }, { "epoch": 0.674896240234375, "grad_norm": 7.65514612197876, "learning_rate": 1.1966665578771463e-06, "loss": 3.295, "step": 66345 }, { "epoch": 0.6749471028645834, "grad_norm": 11.542274475097656, "learning_rate": 1.196325334909348e-06, "loss": 3.3361, "step": 66350 }, { "epoch": 0.6749979654947916, "grad_norm": 16.69321060180664, "learning_rate": 1.195984145295756e-06, "loss": 2.9858, "step": 66355 }, { "epoch": 0.675048828125, "grad_norm": 16.257966995239258, "learning_rate": 1.1956429890451016e-06, "loss": 3.1568, "step": 66360 }, { "epoch": 0.6750996907552084, "grad_norm": 13.692949295043945, "learning_rate": 1.1953018661661112e-06, "loss": 2.965, "step": 66365 }, { "epoch": 0.6751505533854166, "grad_norm": 15.775273323059082, "learning_rate": 1.194960776667513e-06, "loss": 3.5861, "step": 66370 }, { "epoch": 0.675201416015625, "grad_norm": 15.816407203674316, "learning_rate": 1.1946197205580354e-06, "loss": 3.4412, "step": 66375 }, { "epoch": 0.6752522786458334, "grad_norm": 8.514810562133789, "learning_rate": 1.1942786978464024e-06, "loss": 3.2944, "step": 66380 }, { "epoch": 0.6753031412760416, "grad_norm": 12.756802558898926, "learning_rate": 1.1939377085413397e-06, "loss": 3.3257, "step": 66385 }, { "epoch": 0.67535400390625, "grad_norm": 13.349703788757324, "learning_rate": 1.1935967526515701e-06, "loss": 3.1229, "step": 66390 }, { "epoch": 0.6754048665364584, "grad_norm": 11.398979187011719, "learning_rate": 1.1932558301858182e-06, "loss": 3.1223, "step": 66395 }, { "epoch": 0.6754557291666666, "grad_norm": 11.720551490783691, "learning_rate": 1.1929149411528068e-06, "loss": 3.0941, "step": 66400 }, { "epoch": 0.675506591796875, "grad_norm": 14.701722145080566, "learning_rate": 1.1925740855612569e-06, "loss": 3.6169, "step": 66405 }, { "epoch": 0.6755574544270834, "grad_norm": 15.986696243286133, "learning_rate": 1.1922332634198883e-06, "loss": 3.8688, "step": 66410 }, { "epoch": 0.6756083170572916, "grad_norm": 11.06942081451416, "learning_rate": 1.1918924747374214e-06, "loss": 3.0135, "step": 66415 }, { "epoch": 0.6756591796875, "grad_norm": 10.790997505187988, "learning_rate": 1.1915517195225768e-06, "loss": 3.078, "step": 66420 }, { "epoch": 0.6757100423177084, "grad_norm": 9.799771308898926, "learning_rate": 1.191210997784071e-06, "loss": 3.1423, "step": 66425 }, { "epoch": 0.6757609049479166, "grad_norm": 11.02479076385498, "learning_rate": 1.1908703095306207e-06, "loss": 3.755, "step": 66430 }, { "epoch": 0.675811767578125, "grad_norm": 13.578268051147461, "learning_rate": 1.1905296547709445e-06, "loss": 3.5172, "step": 66435 }, { "epoch": 0.6758626302083334, "grad_norm": 9.204440116882324, "learning_rate": 1.1901890335137553e-06, "loss": 3.8079, "step": 66440 }, { "epoch": 0.6759134928385416, "grad_norm": 14.687267303466797, "learning_rate": 1.1898484457677703e-06, "loss": 3.5726, "step": 66445 }, { "epoch": 0.67596435546875, "grad_norm": 15.788227081298828, "learning_rate": 1.1895078915417016e-06, "loss": 2.9446, "step": 66450 }, { "epoch": 0.6760152180989584, "grad_norm": 10.923992156982422, "learning_rate": 1.1891673708442636e-06, "loss": 3.3139, "step": 66455 }, { "epoch": 0.6760660807291666, "grad_norm": 12.538166999816895, "learning_rate": 1.1888268836841677e-06, "loss": 3.6844, "step": 66460 }, { "epoch": 0.676116943359375, "grad_norm": 15.767918586730957, "learning_rate": 1.1884864300701244e-06, "loss": 3.818, "step": 66465 }, { "epoch": 0.6761678059895834, "grad_norm": 14.704933166503906, "learning_rate": 1.1881460100108447e-06, "loss": 3.2788, "step": 66470 }, { "epoch": 0.6762186686197916, "grad_norm": 8.814567565917969, "learning_rate": 1.1878056235150395e-06, "loss": 3.4925, "step": 66475 }, { "epoch": 0.67626953125, "grad_norm": 13.270614624023438, "learning_rate": 1.1874652705914163e-06, "loss": 3.2426, "step": 66480 }, { "epoch": 0.6763203938802084, "grad_norm": 9.116535186767578, "learning_rate": 1.1871249512486821e-06, "loss": 3.1969, "step": 66485 }, { "epoch": 0.6763712565104166, "grad_norm": 15.07793140411377, "learning_rate": 1.1867846654955458e-06, "loss": 3.1105, "step": 66490 }, { "epoch": 0.676422119140625, "grad_norm": 13.533629417419434, "learning_rate": 1.1864444133407119e-06, "loss": 3.048, "step": 66495 }, { "epoch": 0.6764729817708334, "grad_norm": 11.392133712768555, "learning_rate": 1.186104194792887e-06, "loss": 3.2305, "step": 66500 }, { "epoch": 0.6765238444010416, "grad_norm": 9.979619026184082, "learning_rate": 1.1857640098607739e-06, "loss": 2.9928, "step": 66505 }, { "epoch": 0.67657470703125, "grad_norm": 7.802496433258057, "learning_rate": 1.185423858553078e-06, "loss": 3.1146, "step": 66510 }, { "epoch": 0.6766255696614584, "grad_norm": 15.561924934387207, "learning_rate": 1.1850837408785013e-06, "loss": 3.2807, "step": 66515 }, { "epoch": 0.6766764322916666, "grad_norm": 14.786020278930664, "learning_rate": 1.1847436568457444e-06, "loss": 3.3316, "step": 66520 }, { "epoch": 0.676727294921875, "grad_norm": 15.622934341430664, "learning_rate": 1.1844036064635091e-06, "loss": 3.1424, "step": 66525 }, { "epoch": 0.6767781575520834, "grad_norm": 15.41285228729248, "learning_rate": 1.1840635897404967e-06, "loss": 3.4605, "step": 66530 }, { "epoch": 0.6768290201822916, "grad_norm": 16.166288375854492, "learning_rate": 1.1837236066854058e-06, "loss": 3.2866, "step": 66535 }, { "epoch": 0.6768798828125, "grad_norm": 11.620336532592773, "learning_rate": 1.1833836573069341e-06, "loss": 3.0053, "step": 66540 }, { "epoch": 0.6769307454427084, "grad_norm": 21.284406661987305, "learning_rate": 1.1830437416137786e-06, "loss": 3.4245, "step": 66545 }, { "epoch": 0.6769816080729166, "grad_norm": 12.208685874938965, "learning_rate": 1.1827038596146368e-06, "loss": 3.4698, "step": 66550 }, { "epoch": 0.677032470703125, "grad_norm": 13.61749267578125, "learning_rate": 1.1823640113182052e-06, "loss": 3.2414, "step": 66555 }, { "epoch": 0.6770833333333334, "grad_norm": 6.914754390716553, "learning_rate": 1.1820241967331775e-06, "loss": 3.3038, "step": 66560 }, { "epoch": 0.6771341959635416, "grad_norm": 14.918646812438965, "learning_rate": 1.1816844158682492e-06, "loss": 3.3787, "step": 66565 }, { "epoch": 0.67718505859375, "grad_norm": 16.732187271118164, "learning_rate": 1.1813446687321118e-06, "loss": 3.1693, "step": 66570 }, { "epoch": 0.6772359212239584, "grad_norm": 7.794482231140137, "learning_rate": 1.181004955333459e-06, "loss": 3.1221, "step": 66575 }, { "epoch": 0.6772867838541666, "grad_norm": 15.513784408569336, "learning_rate": 1.1806652756809811e-06, "loss": 3.147, "step": 66580 }, { "epoch": 0.677337646484375, "grad_norm": 13.813277244567871, "learning_rate": 1.1803256297833704e-06, "loss": 3.3604, "step": 66585 }, { "epoch": 0.6773885091145834, "grad_norm": 11.876188278198242, "learning_rate": 1.1799860176493159e-06, "loss": 3.2065, "step": 66590 }, { "epoch": 0.6774393717447916, "grad_norm": 11.655075073242188, "learning_rate": 1.179646439287505e-06, "loss": 3.0779, "step": 66595 }, { "epoch": 0.677490234375, "grad_norm": 13.3148193359375, "learning_rate": 1.179306894706627e-06, "loss": 3.1405, "step": 66600 }, { "epoch": 0.6775410970052084, "grad_norm": 14.856046676635742, "learning_rate": 1.17896738391537e-06, "loss": 3.5807, "step": 66605 }, { "epoch": 0.6775919596354166, "grad_norm": 11.962355613708496, "learning_rate": 1.1786279069224193e-06, "loss": 3.4211, "step": 66610 }, { "epoch": 0.677642822265625, "grad_norm": 13.368785858154297, "learning_rate": 1.1782884637364606e-06, "loss": 3.3247, "step": 66615 }, { "epoch": 0.6776936848958334, "grad_norm": 9.692715644836426, "learning_rate": 1.1779490543661773e-06, "loss": 3.3903, "step": 66620 }, { "epoch": 0.6777445475260416, "grad_norm": 12.040973663330078, "learning_rate": 1.177609678820254e-06, "loss": 3.5649, "step": 66625 }, { "epoch": 0.67779541015625, "grad_norm": 7.997753143310547, "learning_rate": 1.1772703371073746e-06, "loss": 3.2475, "step": 66630 }, { "epoch": 0.6778462727864584, "grad_norm": 11.54784870147705, "learning_rate": 1.1769310292362202e-06, "loss": 3.3519, "step": 66635 }, { "epoch": 0.6778971354166666, "grad_norm": 10.893509864807129, "learning_rate": 1.1765917552154707e-06, "loss": 3.7834, "step": 66640 }, { "epoch": 0.677947998046875, "grad_norm": 17.195344924926758, "learning_rate": 1.1762525150538085e-06, "loss": 3.4025, "step": 66645 }, { "epoch": 0.6779988606770834, "grad_norm": 12.270585060119629, "learning_rate": 1.175913308759911e-06, "loss": 3.132, "step": 66650 }, { "epoch": 0.6780497233072916, "grad_norm": 15.355175971984863, "learning_rate": 1.1755741363424575e-06, "loss": 4.0733, "step": 66655 }, { "epoch": 0.6781005859375, "grad_norm": 11.925270080566406, "learning_rate": 1.1752349978101269e-06, "loss": 3.2936, "step": 66660 }, { "epoch": 0.6781514485677084, "grad_norm": 9.525686264038086, "learning_rate": 1.174895893171595e-06, "loss": 3.6208, "step": 66665 }, { "epoch": 0.6782023111979166, "grad_norm": 8.701624870300293, "learning_rate": 1.1745568224355376e-06, "loss": 3.2159, "step": 66670 }, { "epoch": 0.678253173828125, "grad_norm": 11.974249839782715, "learning_rate": 1.1742177856106285e-06, "loss": 3.3779, "step": 66675 }, { "epoch": 0.6783040364583334, "grad_norm": 9.864870071411133, "learning_rate": 1.173878782705543e-06, "loss": 3.2289, "step": 66680 }, { "epoch": 0.6783548990885416, "grad_norm": 12.554764747619629, "learning_rate": 1.1735398137289559e-06, "loss": 3.0567, "step": 66685 }, { "epoch": 0.67840576171875, "grad_norm": 9.354292869567871, "learning_rate": 1.173200878689538e-06, "loss": 3.3045, "step": 66690 }, { "epoch": 0.6784566243489584, "grad_norm": 13.207695960998535, "learning_rate": 1.1728619775959602e-06, "loss": 3.0783, "step": 66695 }, { "epoch": 0.6785074869791666, "grad_norm": 9.954910278320312, "learning_rate": 1.172523110456895e-06, "loss": 3.2347, "step": 66700 }, { "epoch": 0.678558349609375, "grad_norm": 11.203584671020508, "learning_rate": 1.1721842772810108e-06, "loss": 3.8233, "step": 66705 }, { "epoch": 0.6786092122395834, "grad_norm": 13.040945053100586, "learning_rate": 1.1718454780769779e-06, "loss": 3.5498, "step": 66710 }, { "epoch": 0.6786600748697916, "grad_norm": 8.814359664916992, "learning_rate": 1.1715067128534625e-06, "loss": 3.3726, "step": 66715 }, { "epoch": 0.6787109375, "grad_norm": 7.560615062713623, "learning_rate": 1.171167981619134e-06, "loss": 3.3403, "step": 66720 }, { "epoch": 0.6787618001302084, "grad_norm": 10.767315864562988, "learning_rate": 1.1708292843826569e-06, "loss": 3.4577, "step": 66725 }, { "epoch": 0.6788126627604166, "grad_norm": 14.322761535644531, "learning_rate": 1.1704906211526985e-06, "loss": 3.1533, "step": 66730 }, { "epoch": 0.678863525390625, "grad_norm": 9.371920585632324, "learning_rate": 1.1701519919379211e-06, "loss": 3.5488, "step": 66735 }, { "epoch": 0.6789143880208334, "grad_norm": 16.8697452545166, "learning_rate": 1.169813396746991e-06, "loss": 3.1652, "step": 66740 }, { "epoch": 0.6789652506510416, "grad_norm": 8.889442443847656, "learning_rate": 1.1694748355885698e-06, "loss": 3.3137, "step": 66745 }, { "epoch": 0.67901611328125, "grad_norm": 15.476335525512695, "learning_rate": 1.169136308471319e-06, "loss": 3.4765, "step": 66750 }, { "epoch": 0.6790669759114584, "grad_norm": 8.582023620605469, "learning_rate": 1.1687978154038998e-06, "loss": 3.2237, "step": 66755 }, { "epoch": 0.6791178385416666, "grad_norm": 8.053441047668457, "learning_rate": 1.1684593563949739e-06, "loss": 2.9329, "step": 66760 }, { "epoch": 0.679168701171875, "grad_norm": 10.91099739074707, "learning_rate": 1.1681209314532e-06, "loss": 3.3324, "step": 66765 }, { "epoch": 0.6792195638020834, "grad_norm": 16.067745208740234, "learning_rate": 1.1677825405872353e-06, "loss": 3.4072, "step": 66770 }, { "epoch": 0.6792704264322916, "grad_norm": 8.088774681091309, "learning_rate": 1.1674441838057396e-06, "loss": 3.0362, "step": 66775 }, { "epoch": 0.6793212890625, "grad_norm": 10.353334426879883, "learning_rate": 1.1671058611173676e-06, "loss": 3.1913, "step": 66780 }, { "epoch": 0.6793721516927084, "grad_norm": 9.857560157775879, "learning_rate": 1.1667675725307772e-06, "loss": 3.5657, "step": 66785 }, { "epoch": 0.6794230143229166, "grad_norm": 10.71377944946289, "learning_rate": 1.1664293180546216e-06, "loss": 3.1443, "step": 66790 }, { "epoch": 0.679473876953125, "grad_norm": 12.475671768188477, "learning_rate": 1.1660910976975566e-06, "loss": 3.2034, "step": 66795 }, { "epoch": 0.6795247395833334, "grad_norm": 12.8106050491333, "learning_rate": 1.1657529114682344e-06, "loss": 3.064, "step": 66800 }, { "epoch": 0.6795756022135416, "grad_norm": 12.289118766784668, "learning_rate": 1.1654147593753073e-06, "loss": 3.429, "step": 66805 }, { "epoch": 0.67962646484375, "grad_norm": 13.987550735473633, "learning_rate": 1.165076641427427e-06, "loss": 3.2674, "step": 66810 }, { "epoch": 0.6796773274739584, "grad_norm": 12.636734008789062, "learning_rate": 1.1647385576332455e-06, "loss": 3.2572, "step": 66815 }, { "epoch": 0.6797281901041666, "grad_norm": 9.804733276367188, "learning_rate": 1.1644005080014111e-06, "loss": 3.2244, "step": 66820 }, { "epoch": 0.679779052734375, "grad_norm": 11.967543601989746, "learning_rate": 1.1640624925405736e-06, "loss": 3.1633, "step": 66825 }, { "epoch": 0.6798299153645834, "grad_norm": 13.213789939880371, "learning_rate": 1.1637245112593793e-06, "loss": 3.2825, "step": 66830 }, { "epoch": 0.6798807779947916, "grad_norm": 14.687386512756348, "learning_rate": 1.1633865641664768e-06, "loss": 3.4654, "step": 66835 }, { "epoch": 0.679931640625, "grad_norm": 9.610382080078125, "learning_rate": 1.1630486512705132e-06, "loss": 2.8534, "step": 66840 }, { "epoch": 0.6799825032552084, "grad_norm": 12.395421981811523, "learning_rate": 1.1627107725801329e-06, "loss": 3.5905, "step": 66845 }, { "epoch": 0.6800333658854166, "grad_norm": 13.9788818359375, "learning_rate": 1.1623729281039798e-06, "loss": 3.6113, "step": 66850 }, { "epoch": 0.680084228515625, "grad_norm": 11.481358528137207, "learning_rate": 1.162035117850699e-06, "loss": 3.5347, "step": 66855 }, { "epoch": 0.6801350911458334, "grad_norm": 8.43794059753418, "learning_rate": 1.1616973418289319e-06, "loss": 3.1554, "step": 66860 }, { "epoch": 0.6801859537760416, "grad_norm": 9.162663459777832, "learning_rate": 1.1613596000473208e-06, "loss": 3.4241, "step": 66865 }, { "epoch": 0.68023681640625, "grad_norm": 17.194950103759766, "learning_rate": 1.161021892514508e-06, "loss": 3.2849, "step": 66870 }, { "epoch": 0.6802876790364584, "grad_norm": 12.317100524902344, "learning_rate": 1.1606842192391326e-06, "loss": 3.4843, "step": 66875 }, { "epoch": 0.6803385416666666, "grad_norm": 8.26466178894043, "learning_rate": 1.1603465802298334e-06, "loss": 3.1841, "step": 66880 }, { "epoch": 0.680389404296875, "grad_norm": 8.200136184692383, "learning_rate": 1.160008975495249e-06, "loss": 3.2368, "step": 66885 }, { "epoch": 0.6804402669270834, "grad_norm": 15.516468048095703, "learning_rate": 1.1596714050440186e-06, "loss": 3.2557, "step": 66890 }, { "epoch": 0.6804911295572916, "grad_norm": 15.397497177124023, "learning_rate": 1.1593338688847775e-06, "loss": 3.3697, "step": 66895 }, { "epoch": 0.6805419921875, "grad_norm": 11.23614501953125, "learning_rate": 1.1589963670261618e-06, "loss": 3.1352, "step": 66900 }, { "epoch": 0.6805928548177084, "grad_norm": 9.06709098815918, "learning_rate": 1.158658899476805e-06, "loss": 2.9887, "step": 66905 }, { "epoch": 0.6806437174479166, "grad_norm": 8.044218063354492, "learning_rate": 1.1583214662453424e-06, "loss": 3.2688, "step": 66910 }, { "epoch": 0.680694580078125, "grad_norm": 13.742280006408691, "learning_rate": 1.157984067340408e-06, "loss": 3.3677, "step": 66915 }, { "epoch": 0.6807454427083334, "grad_norm": 7.34171724319458, "learning_rate": 1.157646702770633e-06, "loss": 3.0535, "step": 66920 }, { "epoch": 0.6807963053385416, "grad_norm": 10.71430492401123, "learning_rate": 1.1573093725446484e-06, "loss": 3.3923, "step": 66925 }, { "epoch": 0.68084716796875, "grad_norm": 8.38379192352295, "learning_rate": 1.156972076671086e-06, "loss": 3.4441, "step": 66930 }, { "epoch": 0.6808980305989584, "grad_norm": 13.166692733764648, "learning_rate": 1.1566348151585735e-06, "loss": 3.2246, "step": 66935 }, { "epoch": 0.6809488932291666, "grad_norm": 12.907691955566406, "learning_rate": 1.156297588015742e-06, "loss": 3.3417, "step": 66940 }, { "epoch": 0.680999755859375, "grad_norm": 12.744908332824707, "learning_rate": 1.1559603952512175e-06, "loss": 3.2215, "step": 66945 }, { "epoch": 0.6810506184895834, "grad_norm": 15.622496604919434, "learning_rate": 1.1556232368736283e-06, "loss": 3.1477, "step": 66950 }, { "epoch": 0.6811014811197916, "grad_norm": 11.325515747070312, "learning_rate": 1.1552861128915998e-06, "loss": 3.243, "step": 66955 }, { "epoch": 0.68115234375, "grad_norm": 14.483476638793945, "learning_rate": 1.1549490233137569e-06, "loss": 3.4576, "step": 66960 }, { "epoch": 0.6812032063802084, "grad_norm": 12.413887977600098, "learning_rate": 1.154611968148724e-06, "loss": 2.9391, "step": 66965 }, { "epoch": 0.6812540690104166, "grad_norm": 12.793086051940918, "learning_rate": 1.1542749474051263e-06, "loss": 2.8667, "step": 66970 }, { "epoch": 0.681304931640625, "grad_norm": 8.617049217224121, "learning_rate": 1.1539379610915848e-06, "loss": 3.3601, "step": 66975 }, { "epoch": 0.6813557942708334, "grad_norm": 8.009932518005371, "learning_rate": 1.1536010092167208e-06, "loss": 3.2637, "step": 66980 }, { "epoch": 0.6814066569010416, "grad_norm": 9.720005989074707, "learning_rate": 1.1532640917891568e-06, "loss": 3.5085, "step": 66985 }, { "epoch": 0.68145751953125, "grad_norm": 8.442168235778809, "learning_rate": 1.152927208817511e-06, "loss": 2.8223, "step": 66990 }, { "epoch": 0.6815083821614584, "grad_norm": 9.75130558013916, "learning_rate": 1.1525903603104039e-06, "loss": 3.4401, "step": 66995 }, { "epoch": 0.6815592447916666, "grad_norm": 11.65346622467041, "learning_rate": 1.1522535462764519e-06, "loss": 3.1385, "step": 67000 }, { "epoch": 0.681610107421875, "grad_norm": 12.073299407958984, "learning_rate": 1.1519167667242748e-06, "loss": 3.1029, "step": 67005 }, { "epoch": 0.6816609700520834, "grad_norm": 10.075000762939453, "learning_rate": 1.1515800216624875e-06, "loss": 3.1759, "step": 67010 }, { "epoch": 0.6817118326822916, "grad_norm": 9.205047607421875, "learning_rate": 1.151243311099705e-06, "loss": 3.2337, "step": 67015 }, { "epoch": 0.6817626953125, "grad_norm": 11.416351318359375, "learning_rate": 1.1509066350445424e-06, "loss": 3.4893, "step": 67020 }, { "epoch": 0.6818135579427084, "grad_norm": 15.491963386535645, "learning_rate": 1.150569993505615e-06, "loss": 3.3777, "step": 67025 }, { "epoch": 0.6818644205729166, "grad_norm": 8.515227317810059, "learning_rate": 1.1502333864915343e-06, "loss": 3.179, "step": 67030 }, { "epoch": 0.681915283203125, "grad_norm": 9.592573165893555, "learning_rate": 1.1498968140109118e-06, "loss": 3.169, "step": 67035 }, { "epoch": 0.6819661458333334, "grad_norm": 10.023240089416504, "learning_rate": 1.149560276072359e-06, "loss": 3.1083, "step": 67040 }, { "epoch": 0.6820170084635416, "grad_norm": 10.103043556213379, "learning_rate": 1.1492237726844877e-06, "loss": 3.141, "step": 67045 }, { "epoch": 0.68206787109375, "grad_norm": 9.0494384765625, "learning_rate": 1.1488873038559055e-06, "loss": 3.1736, "step": 67050 }, { "epoch": 0.6821187337239584, "grad_norm": 14.495260238647461, "learning_rate": 1.1485508695952211e-06, "loss": 3.2705, "step": 67055 }, { "epoch": 0.6821695963541666, "grad_norm": 9.811225891113281, "learning_rate": 1.1482144699110427e-06, "loss": 3.1328, "step": 67060 }, { "epoch": 0.682220458984375, "grad_norm": 8.978487014770508, "learning_rate": 1.1478781048119762e-06, "loss": 3.7037, "step": 67065 }, { "epoch": 0.6822713216145834, "grad_norm": 10.287515640258789, "learning_rate": 1.1475417743066286e-06, "loss": 3.5078, "step": 67070 }, { "epoch": 0.6823221842447916, "grad_norm": 12.768781661987305, "learning_rate": 1.1472054784036033e-06, "loss": 3.256, "step": 67075 }, { "epoch": 0.682373046875, "grad_norm": 7.16929292678833, "learning_rate": 1.146869217111506e-06, "loss": 2.967, "step": 67080 }, { "epoch": 0.6824239095052084, "grad_norm": 11.414715766906738, "learning_rate": 1.146532990438939e-06, "loss": 3.2199, "step": 67085 }, { "epoch": 0.6824747721354166, "grad_norm": 10.97852611541748, "learning_rate": 1.1461967983945039e-06, "loss": 3.3135, "step": 67090 }, { "epoch": 0.682525634765625, "grad_norm": 10.633146286010742, "learning_rate": 1.1458606409868026e-06, "loss": 3.244, "step": 67095 }, { "epoch": 0.6825764973958334, "grad_norm": 15.645685195922852, "learning_rate": 1.1455245182244371e-06, "loss": 3.2323, "step": 67100 }, { "epoch": 0.6826273600260416, "grad_norm": 11.883807182312012, "learning_rate": 1.1451884301160054e-06, "loss": 3.4085, "step": 67105 }, { "epoch": 0.68267822265625, "grad_norm": 12.355230331420898, "learning_rate": 1.1448523766701067e-06, "loss": 3.1958, "step": 67110 }, { "epoch": 0.6827290852864584, "grad_norm": 7.595726013183594, "learning_rate": 1.1445163578953381e-06, "loss": 3.1504, "step": 67115 }, { "epoch": 0.6827799479166666, "grad_norm": 8.888771057128906, "learning_rate": 1.1441803738002969e-06, "loss": 3.1938, "step": 67120 }, { "epoch": 0.682830810546875, "grad_norm": 10.272032737731934, "learning_rate": 1.1438444243935804e-06, "loss": 2.8385, "step": 67125 }, { "epoch": 0.6828816731770834, "grad_norm": 9.087508201599121, "learning_rate": 1.1435085096837828e-06, "loss": 3.0904, "step": 67130 }, { "epoch": 0.6829325358072916, "grad_norm": 7.05070686340332, "learning_rate": 1.1431726296794975e-06, "loss": 2.9817, "step": 67135 }, { "epoch": 0.6829833984375, "grad_norm": 14.386911392211914, "learning_rate": 1.1428367843893202e-06, "loss": 3.3527, "step": 67140 }, { "epoch": 0.6830342610677084, "grad_norm": 13.281949996948242, "learning_rate": 1.142500973821841e-06, "loss": 3.1643, "step": 67145 }, { "epoch": 0.6830851236979166, "grad_norm": 11.068631172180176, "learning_rate": 1.1421651979856523e-06, "loss": 3.206, "step": 67150 }, { "epoch": 0.683135986328125, "grad_norm": 12.887954711914062, "learning_rate": 1.1418294568893464e-06, "loss": 3.3041, "step": 67155 }, { "epoch": 0.6831868489583334, "grad_norm": 8.447509765625, "learning_rate": 1.141493750541512e-06, "loss": 3.5031, "step": 67160 }, { "epoch": 0.6832377115885416, "grad_norm": 9.116706848144531, "learning_rate": 1.1411580789507377e-06, "loss": 2.9863, "step": 67165 }, { "epoch": 0.68328857421875, "grad_norm": 12.063055038452148, "learning_rate": 1.1408224421256102e-06, "loss": 3.2959, "step": 67170 }, { "epoch": 0.6833394368489584, "grad_norm": 11.058523178100586, "learning_rate": 1.1404868400747202e-06, "loss": 3.2343, "step": 67175 }, { "epoch": 0.6833902994791666, "grad_norm": 12.993006706237793, "learning_rate": 1.1401512728066519e-06, "loss": 2.8858, "step": 67180 }, { "epoch": 0.683441162109375, "grad_norm": 10.988066673278809, "learning_rate": 1.1398157403299911e-06, "loss": 3.4298, "step": 67185 }, { "epoch": 0.6834920247395834, "grad_norm": 11.185561180114746, "learning_rate": 1.1394802426533216e-06, "loss": 3.2862, "step": 67190 }, { "epoch": 0.6835428873697916, "grad_norm": 15.70999813079834, "learning_rate": 1.1391447797852272e-06, "loss": 3.257, "step": 67195 }, { "epoch": 0.68359375, "grad_norm": 9.060686111450195, "learning_rate": 1.1388093517342922e-06, "loss": 3.3663, "step": 67200 }, { "epoch": 0.6836446126302084, "grad_norm": 16.518936157226562, "learning_rate": 1.1384739585090973e-06, "loss": 3.9777, "step": 67205 }, { "epoch": 0.6836954752604166, "grad_norm": 14.970965385437012, "learning_rate": 1.1381386001182223e-06, "loss": 3.4142, "step": 67210 }, { "epoch": 0.683746337890625, "grad_norm": 12.52270793914795, "learning_rate": 1.1378032765702496e-06, "loss": 3.4041, "step": 67215 }, { "epoch": 0.6837972005208334, "grad_norm": 9.38988971710205, "learning_rate": 1.137467987873756e-06, "loss": 3.3901, "step": 67220 }, { "epoch": 0.6838480631510416, "grad_norm": 15.922767639160156, "learning_rate": 1.1371327340373222e-06, "loss": 3.5588, "step": 67225 }, { "epoch": 0.68389892578125, "grad_norm": 10.0673189163208, "learning_rate": 1.1367975150695232e-06, "loss": 3.4771, "step": 67230 }, { "epoch": 0.6839497884114584, "grad_norm": 14.637181282043457, "learning_rate": 1.1364623309789378e-06, "loss": 3.7495, "step": 67235 }, { "epoch": 0.6840006510416666, "grad_norm": 9.442575454711914, "learning_rate": 1.1361271817741403e-06, "loss": 3.1345, "step": 67240 }, { "epoch": 0.684051513671875, "grad_norm": 10.584227561950684, "learning_rate": 1.135792067463705e-06, "loss": 3.081, "step": 67245 }, { "epoch": 0.6841023763020834, "grad_norm": 8.191669464111328, "learning_rate": 1.1354569880562057e-06, "loss": 3.3178, "step": 67250 }, { "epoch": 0.6841532389322916, "grad_norm": 14.132744789123535, "learning_rate": 1.1351219435602172e-06, "loss": 3.2586, "step": 67255 }, { "epoch": 0.6842041015625, "grad_norm": 17.84623908996582, "learning_rate": 1.1347869339843104e-06, "loss": 3.186, "step": 67260 }, { "epoch": 0.6842549641927084, "grad_norm": 11.495306968688965, "learning_rate": 1.1344519593370552e-06, "loss": 3.2165, "step": 67265 }, { "epoch": 0.6843058268229166, "grad_norm": 11.648442268371582, "learning_rate": 1.1341170196270238e-06, "loss": 3.3547, "step": 67270 }, { "epoch": 0.684356689453125, "grad_norm": 17.59107780456543, "learning_rate": 1.133782114862784e-06, "loss": 3.57, "step": 67275 }, { "epoch": 0.6844075520833334, "grad_norm": 10.777287483215332, "learning_rate": 1.1334472450529057e-06, "loss": 2.8832, "step": 67280 }, { "epoch": 0.6844584147135416, "grad_norm": 9.979857444763184, "learning_rate": 1.133112410205955e-06, "loss": 3.4884, "step": 67285 }, { "epoch": 0.68450927734375, "grad_norm": 11.093391418457031, "learning_rate": 1.1327776103304998e-06, "loss": 3.3343, "step": 67290 }, { "epoch": 0.6845601399739584, "grad_norm": 11.67663860321045, "learning_rate": 1.1324428454351058e-06, "loss": 3.2795, "step": 67295 }, { "epoch": 0.6846110026041666, "grad_norm": 13.728826522827148, "learning_rate": 1.1321081155283364e-06, "loss": 3.9356, "step": 67300 }, { "epoch": 0.684661865234375, "grad_norm": 17.770524978637695, "learning_rate": 1.1317734206187563e-06, "loss": 3.3391, "step": 67305 }, { "epoch": 0.6847127278645834, "grad_norm": 12.959101676940918, "learning_rate": 1.1314387607149304e-06, "loss": 3.6963, "step": 67310 }, { "epoch": 0.6847635904947916, "grad_norm": 15.099030494689941, "learning_rate": 1.1311041358254191e-06, "loss": 3.5781, "step": 67315 }, { "epoch": 0.684814453125, "grad_norm": 14.204859733581543, "learning_rate": 1.1307695459587841e-06, "loss": 3.0682, "step": 67320 }, { "epoch": 0.6848653157552084, "grad_norm": 11.338966369628906, "learning_rate": 1.130434991123584e-06, "loss": 3.0671, "step": 67325 }, { "epoch": 0.6849161783854166, "grad_norm": 10.393125534057617, "learning_rate": 1.1301004713283817e-06, "loss": 3.4269, "step": 67330 }, { "epoch": 0.684967041015625, "grad_norm": 12.963751792907715, "learning_rate": 1.1297659865817342e-06, "loss": 3.4185, "step": 67335 }, { "epoch": 0.6850179036458334, "grad_norm": 14.359334945678711, "learning_rate": 1.1294315368921993e-06, "loss": 3.2264, "step": 67340 }, { "epoch": 0.6850687662760416, "grad_norm": 11.044459342956543, "learning_rate": 1.1290971222683325e-06, "loss": 2.9845, "step": 67345 }, { "epoch": 0.68511962890625, "grad_norm": 10.04295539855957, "learning_rate": 1.128762742718691e-06, "loss": 3.3286, "step": 67350 }, { "epoch": 0.6851704915364584, "grad_norm": 11.318278312683105, "learning_rate": 1.1284283982518307e-06, "loss": 3.357, "step": 67355 }, { "epoch": 0.6852213541666666, "grad_norm": 13.759521484375, "learning_rate": 1.1280940888763036e-06, "loss": 3.1177, "step": 67360 }, { "epoch": 0.685272216796875, "grad_norm": 8.070262908935547, "learning_rate": 1.127759814600665e-06, "loss": 3.4054, "step": 67365 }, { "epoch": 0.6853230794270834, "grad_norm": 15.429027557373047, "learning_rate": 1.1274255754334665e-06, "loss": 3.0435, "step": 67370 }, { "epoch": 0.6853739420572916, "grad_norm": 13.732335090637207, "learning_rate": 1.1270913713832584e-06, "loss": 3.3527, "step": 67375 }, { "epoch": 0.6854248046875, "grad_norm": 12.337641716003418, "learning_rate": 1.126757202458592e-06, "loss": 3.2012, "step": 67380 }, { "epoch": 0.6854756673177084, "grad_norm": 8.89428997039795, "learning_rate": 1.126423068668018e-06, "loss": 2.9396, "step": 67385 }, { "epoch": 0.6855265299479166, "grad_norm": 8.475600242614746, "learning_rate": 1.1260889700200843e-06, "loss": 3.3154, "step": 67390 }, { "epoch": 0.685577392578125, "grad_norm": 14.109234809875488, "learning_rate": 1.1257549065233387e-06, "loss": 3.4301, "step": 67395 }, { "epoch": 0.6856282552083334, "grad_norm": 13.120919227600098, "learning_rate": 1.1254208781863271e-06, "loss": 3.1728, "step": 67400 }, { "epoch": 0.6856791178385416, "grad_norm": 12.971762657165527, "learning_rate": 1.1250868850175965e-06, "loss": 3.0323, "step": 67405 }, { "epoch": 0.68572998046875, "grad_norm": 10.945058822631836, "learning_rate": 1.1247529270256929e-06, "loss": 3.1961, "step": 67410 }, { "epoch": 0.6857808430989584, "grad_norm": 10.669898986816406, "learning_rate": 1.1244190042191597e-06, "loss": 3.4704, "step": 67415 }, { "epoch": 0.6858317057291666, "grad_norm": 9.132181167602539, "learning_rate": 1.1240851166065395e-06, "loss": 3.5586, "step": 67420 }, { "epoch": 0.685882568359375, "grad_norm": 18.08685874938965, "learning_rate": 1.1237512641963763e-06, "loss": 3.5384, "step": 67425 }, { "epoch": 0.6859334309895834, "grad_norm": 15.136528015136719, "learning_rate": 1.1234174469972099e-06, "loss": 3.0564, "step": 67430 }, { "epoch": 0.6859842936197916, "grad_norm": 11.03237247467041, "learning_rate": 1.1230836650175825e-06, "loss": 2.9931, "step": 67435 }, { "epoch": 0.68603515625, "grad_norm": 7.184826374053955, "learning_rate": 1.1227499182660326e-06, "loss": 3.3431, "step": 67440 }, { "epoch": 0.6860860188802084, "grad_norm": 17.306102752685547, "learning_rate": 1.1224162067511003e-06, "loss": 3.5036, "step": 67445 }, { "epoch": 0.6861368815104166, "grad_norm": 8.99876880645752, "learning_rate": 1.1220825304813228e-06, "loss": 3.4019, "step": 67450 }, { "epoch": 0.686187744140625, "grad_norm": 8.364696502685547, "learning_rate": 1.1217488894652362e-06, "loss": 3.2598, "step": 67455 }, { "epoch": 0.6862386067708334, "grad_norm": 12.502495765686035, "learning_rate": 1.121415283711378e-06, "loss": 3.6549, "step": 67460 }, { "epoch": 0.6862894694010416, "grad_norm": 11.477373123168945, "learning_rate": 1.1210817132282834e-06, "loss": 3.2676, "step": 67465 }, { "epoch": 0.68634033203125, "grad_norm": 7.801740646362305, "learning_rate": 1.1207481780244866e-06, "loss": 3.5728, "step": 67470 }, { "epoch": 0.6863911946614584, "grad_norm": 12.995709419250488, "learning_rate": 1.12041467810852e-06, "loss": 3.4484, "step": 67475 }, { "epoch": 0.6864420572916666, "grad_norm": 26.030847549438477, "learning_rate": 1.1200812134889163e-06, "loss": 3.6054, "step": 67480 }, { "epoch": 0.686492919921875, "grad_norm": 10.021547317504883, "learning_rate": 1.119747784174209e-06, "loss": 3.3571, "step": 67485 }, { "epoch": 0.6865437825520834, "grad_norm": 22.86213493347168, "learning_rate": 1.1194143901729274e-06, "loss": 3.0059, "step": 67490 }, { "epoch": 0.6865946451822916, "grad_norm": 11.521246910095215, "learning_rate": 1.1190810314936007e-06, "loss": 3.2185, "step": 67495 }, { "epoch": 0.6866455078125, "grad_norm": 10.095511436462402, "learning_rate": 1.118747708144759e-06, "loss": 3.5701, "step": 67500 }, { "epoch": 0.6866963704427084, "grad_norm": 14.873655319213867, "learning_rate": 1.1184144201349291e-06, "loss": 3.2541, "step": 67505 }, { "epoch": 0.6867472330729166, "grad_norm": 12.867834091186523, "learning_rate": 1.1180811674726396e-06, "loss": 3.4219, "step": 67510 }, { "epoch": 0.686798095703125, "grad_norm": 6.689752101898193, "learning_rate": 1.117747950166415e-06, "loss": 3.397, "step": 67515 }, { "epoch": 0.6868489583333334, "grad_norm": 21.152128219604492, "learning_rate": 1.1174147682247827e-06, "loss": 3.078, "step": 67520 }, { "epoch": 0.6868998209635416, "grad_norm": 10.34592342376709, "learning_rate": 1.1170816216562655e-06, "loss": 3.0048, "step": 67525 }, { "epoch": 0.68695068359375, "grad_norm": 15.649857521057129, "learning_rate": 1.1167485104693865e-06, "loss": 3.2808, "step": 67530 }, { "epoch": 0.6870015462239584, "grad_norm": 13.62820053100586, "learning_rate": 1.1164154346726688e-06, "loss": 3.4623, "step": 67535 }, { "epoch": 0.6870524088541666, "grad_norm": 11.384383201599121, "learning_rate": 1.1160823942746353e-06, "loss": 3.2355, "step": 67540 }, { "epoch": 0.687103271484375, "grad_norm": 15.764509201049805, "learning_rate": 1.1157493892838057e-06, "loss": 3.4105, "step": 67545 }, { "epoch": 0.6871541341145834, "grad_norm": 14.197961807250977, "learning_rate": 1.1154164197086988e-06, "loss": 3.0341, "step": 67550 }, { "epoch": 0.6872049967447916, "grad_norm": 9.72084903717041, "learning_rate": 1.1150834855578358e-06, "loss": 3.1286, "step": 67555 }, { "epoch": 0.687255859375, "grad_norm": 9.875367164611816, "learning_rate": 1.1147505868397324e-06, "loss": 3.2019, "step": 67560 }, { "epoch": 0.6873067220052084, "grad_norm": 13.429791450500488, "learning_rate": 1.1144177235629081e-06, "loss": 3.5066, "step": 67565 }, { "epoch": 0.6873575846354166, "grad_norm": 12.365313529968262, "learning_rate": 1.1140848957358768e-06, "loss": 3.6088, "step": 67570 }, { "epoch": 0.687408447265625, "grad_norm": 23.173267364501953, "learning_rate": 1.1137521033671558e-06, "loss": 3.3458, "step": 67575 }, { "epoch": 0.6874593098958334, "grad_norm": 10.073822975158691, "learning_rate": 1.1134193464652588e-06, "loss": 3.4049, "step": 67580 }, { "epoch": 0.6875101725260416, "grad_norm": 9.316292762756348, "learning_rate": 1.1130866250386982e-06, "loss": 3.2323, "step": 67585 }, { "epoch": 0.68756103515625, "grad_norm": 11.111005783081055, "learning_rate": 1.1127539390959877e-06, "loss": 3.4849, "step": 67590 }, { "epoch": 0.6876118977864584, "grad_norm": 12.63139533996582, "learning_rate": 1.1124212886456395e-06, "loss": 3.5015, "step": 67595 }, { "epoch": 0.6876627604166666, "grad_norm": 15.805822372436523, "learning_rate": 1.1120886736961639e-06, "loss": 3.8825, "step": 67600 }, { "epoch": 0.687713623046875, "grad_norm": 14.851016998291016, "learning_rate": 1.1117560942560706e-06, "loss": 3.474, "step": 67605 }, { "epoch": 0.6877644856770834, "grad_norm": 11.629377365112305, "learning_rate": 1.1114235503338677e-06, "loss": 3.4285, "step": 67610 }, { "epoch": 0.6878153483072916, "grad_norm": 8.709927558898926, "learning_rate": 1.111091041938064e-06, "loss": 3.2639, "step": 67615 }, { "epoch": 0.6878662109375, "grad_norm": 14.179695129394531, "learning_rate": 1.1107585690771676e-06, "loss": 3.3194, "step": 67620 }, { "epoch": 0.6879170735677084, "grad_norm": 13.241247177124023, "learning_rate": 1.1104261317596842e-06, "loss": 3.141, "step": 67625 }, { "epoch": 0.6879679361979166, "grad_norm": 12.496594429016113, "learning_rate": 1.1100937299941179e-06, "loss": 3.4404, "step": 67630 }, { "epoch": 0.688018798828125, "grad_norm": 18.70077896118164, "learning_rate": 1.109761363788974e-06, "loss": 3.5438, "step": 67635 }, { "epoch": 0.6880696614583334, "grad_norm": 15.268617630004883, "learning_rate": 1.1094290331527572e-06, "loss": 3.7203, "step": 67640 }, { "epoch": 0.6881205240885416, "grad_norm": 10.46624755859375, "learning_rate": 1.1090967380939676e-06, "loss": 3.9093, "step": 67645 }, { "epoch": 0.68817138671875, "grad_norm": 11.635927200317383, "learning_rate": 1.1087644786211095e-06, "loss": 3.1409, "step": 67650 }, { "epoch": 0.6882222493489584, "grad_norm": 9.18773365020752, "learning_rate": 1.1084322547426827e-06, "loss": 3.7527, "step": 67655 }, { "epoch": 0.6882731119791666, "grad_norm": 9.737253189086914, "learning_rate": 1.1081000664671857e-06, "loss": 2.983, "step": 67660 }, { "epoch": 0.688323974609375, "grad_norm": 15.006853103637695, "learning_rate": 1.1077679138031183e-06, "loss": 2.9761, "step": 67665 }, { "epoch": 0.6883748372395834, "grad_norm": 8.402088165283203, "learning_rate": 1.1074357967589799e-06, "loss": 3.4267, "step": 67670 }, { "epoch": 0.6884256998697916, "grad_norm": 17.684112548828125, "learning_rate": 1.1071037153432669e-06, "loss": 3.2649, "step": 67675 }, { "epoch": 0.6884765625, "grad_norm": 12.325630187988281, "learning_rate": 1.1067716695644748e-06, "loss": 3.302, "step": 67680 }, { "epoch": 0.6885274251302084, "grad_norm": 12.473956108093262, "learning_rate": 1.1064396594310986e-06, "loss": 3.3447, "step": 67685 }, { "epoch": 0.6885782877604166, "grad_norm": 12.692052841186523, "learning_rate": 1.1061076849516333e-06, "loss": 3.272, "step": 67690 }, { "epoch": 0.688629150390625, "grad_norm": 9.642125129699707, "learning_rate": 1.1057757461345734e-06, "loss": 3.0806, "step": 67695 }, { "epoch": 0.6886800130208334, "grad_norm": 8.635783195495605, "learning_rate": 1.1054438429884106e-06, "loss": 3.6573, "step": 67700 }, { "epoch": 0.6887308756510416, "grad_norm": 10.294721603393555, "learning_rate": 1.1051119755216354e-06, "loss": 3.2534, "step": 67705 }, { "epoch": 0.68878173828125, "grad_norm": 10.347164154052734, "learning_rate": 1.104780143742741e-06, "loss": 3.7229, "step": 67710 }, { "epoch": 0.6888326009114584, "grad_norm": 15.795777320861816, "learning_rate": 1.104448347660215e-06, "loss": 3.2436, "step": 67715 }, { "epoch": 0.6888834635416666, "grad_norm": 10.483985900878906, "learning_rate": 1.104116587282548e-06, "loss": 3.6812, "step": 67720 }, { "epoch": 0.688934326171875, "grad_norm": 11.902549743652344, "learning_rate": 1.1037848626182263e-06, "loss": 3.4797, "step": 67725 }, { "epoch": 0.6889851888020834, "grad_norm": 12.284526824951172, "learning_rate": 1.1034531736757387e-06, "loss": 3.4157, "step": 67730 }, { "epoch": 0.6890360514322916, "grad_norm": 15.990017890930176, "learning_rate": 1.103121520463571e-06, "loss": 2.9745, "step": 67735 }, { "epoch": 0.6890869140625, "grad_norm": 9.489580154418945, "learning_rate": 1.1027899029902068e-06, "loss": 3.2829, "step": 67740 }, { "epoch": 0.6891377766927084, "grad_norm": 7.084834575653076, "learning_rate": 1.1024583212641321e-06, "loss": 3.1939, "step": 67745 }, { "epoch": 0.6891886393229166, "grad_norm": 9.82347297668457, "learning_rate": 1.1021267752938305e-06, "loss": 3.1355, "step": 67750 }, { "epoch": 0.689239501953125, "grad_norm": 7.685935974121094, "learning_rate": 1.1017952650877844e-06, "loss": 3.4428, "step": 67755 }, { "epoch": 0.6892903645833334, "grad_norm": 16.429901123046875, "learning_rate": 1.1014637906544739e-06, "loss": 3.5422, "step": 67760 }, { "epoch": 0.6893412272135416, "grad_norm": 7.084587574005127, "learning_rate": 1.101132352002382e-06, "loss": 3.3334, "step": 67765 }, { "epoch": 0.68939208984375, "grad_norm": 14.835680961608887, "learning_rate": 1.1008009491399862e-06, "loss": 3.6113, "step": 67770 }, { "epoch": 0.6894429524739584, "grad_norm": 12.166670799255371, "learning_rate": 1.1004695820757674e-06, "loss": 3.2544, "step": 67775 }, { "epoch": 0.6894938151041666, "grad_norm": 10.962254524230957, "learning_rate": 1.1001382508182018e-06, "loss": 3.7049, "step": 67780 }, { "epoch": 0.689544677734375, "grad_norm": 15.7714204788208, "learning_rate": 1.0998069553757681e-06, "loss": 3.4172, "step": 67785 }, { "epoch": 0.6895955403645834, "grad_norm": 12.194293022155762, "learning_rate": 1.0994756957569405e-06, "loss": 3.6877, "step": 67790 }, { "epoch": 0.6896464029947916, "grad_norm": 7.582254409790039, "learning_rate": 1.0991444719701967e-06, "loss": 3.1827, "step": 67795 }, { "epoch": 0.689697265625, "grad_norm": 8.773179054260254, "learning_rate": 1.098813284024008e-06, "loss": 3.3778, "step": 67800 }, { "epoch": 0.6897481282552084, "grad_norm": 11.618633270263672, "learning_rate": 1.0984821319268504e-06, "loss": 3.5551, "step": 67805 }, { "epoch": 0.6897989908854166, "grad_norm": 16.125980377197266, "learning_rate": 1.0981510156871955e-06, "loss": 3.01, "step": 67810 }, { "epoch": 0.689849853515625, "grad_norm": 9.524605751037598, "learning_rate": 1.0978199353135135e-06, "loss": 3.9478, "step": 67815 }, { "epoch": 0.6899007161458334, "grad_norm": 15.00719165802002, "learning_rate": 1.097488890814276e-06, "loss": 3.5804, "step": 67820 }, { "epoch": 0.6899515787760416, "grad_norm": 10.659303665161133, "learning_rate": 1.0971578821979539e-06, "loss": 3.23, "step": 67825 }, { "epoch": 0.69000244140625, "grad_norm": 14.772455215454102, "learning_rate": 1.0968269094730147e-06, "loss": 3.5165, "step": 67830 }, { "epoch": 0.6900533040364584, "grad_norm": 10.8951416015625, "learning_rate": 1.0964959726479266e-06, "loss": 3.4207, "step": 67835 }, { "epoch": 0.6901041666666666, "grad_norm": 12.686936378479004, "learning_rate": 1.0961650717311553e-06, "loss": 3.3497, "step": 67840 }, { "epoch": 0.690155029296875, "grad_norm": 14.472989082336426, "learning_rate": 1.095834206731168e-06, "loss": 3.2775, "step": 67845 }, { "epoch": 0.6902058919270834, "grad_norm": 8.697223663330078, "learning_rate": 1.0955033776564306e-06, "loss": 3.8941, "step": 67850 }, { "epoch": 0.6902567545572916, "grad_norm": 11.928094863891602, "learning_rate": 1.0951725845154053e-06, "loss": 3.0851, "step": 67855 }, { "epoch": 0.6903076171875, "grad_norm": 8.547250747680664, "learning_rate": 1.094841827316558e-06, "loss": 3.2359, "step": 67860 }, { "epoch": 0.6903584798177084, "grad_norm": 8.260056495666504, "learning_rate": 1.094511106068349e-06, "loss": 3.1335, "step": 67865 }, { "epoch": 0.6904093424479166, "grad_norm": 14.270990371704102, "learning_rate": 1.0941804207792391e-06, "loss": 3.4316, "step": 67870 }, { "epoch": 0.690460205078125, "grad_norm": 14.395874977111816, "learning_rate": 1.09384977145769e-06, "loss": 3.2012, "step": 67875 }, { "epoch": 0.6905110677083334, "grad_norm": 12.590879440307617, "learning_rate": 1.0935191581121624e-06, "loss": 3.4119, "step": 67880 }, { "epoch": 0.6905619303385416, "grad_norm": 17.32014274597168, "learning_rate": 1.0931885807511137e-06, "loss": 3.5376, "step": 67885 }, { "epoch": 0.69061279296875, "grad_norm": 14.185200691223145, "learning_rate": 1.0928580393830015e-06, "loss": 3.2657, "step": 67890 }, { "epoch": 0.6906636555989584, "grad_norm": 10.420732498168945, "learning_rate": 1.092527534016282e-06, "loss": 3.3757, "step": 67895 }, { "epoch": 0.6907145182291666, "grad_norm": 11.136979103088379, "learning_rate": 1.092197064659412e-06, "loss": 3.5773, "step": 67900 }, { "epoch": 0.690765380859375, "grad_norm": 12.3397216796875, "learning_rate": 1.0918666313208472e-06, "loss": 3.8071, "step": 67905 }, { "epoch": 0.6908162434895834, "grad_norm": 10.15494441986084, "learning_rate": 1.091536234009041e-06, "loss": 3.1771, "step": 67910 }, { "epoch": 0.6908671061197916, "grad_norm": 16.179298400878906, "learning_rate": 1.0912058727324456e-06, "loss": 3.3966, "step": 67915 }, { "epoch": 0.69091796875, "grad_norm": 12.150471687316895, "learning_rate": 1.0908755474995147e-06, "loss": 3.1623, "step": 67920 }, { "epoch": 0.6909688313802084, "grad_norm": 12.015385627746582, "learning_rate": 1.090545258318698e-06, "loss": 3.1398, "step": 67925 }, { "epoch": 0.6910196940104166, "grad_norm": 10.088885307312012, "learning_rate": 1.090215005198448e-06, "loss": 3.345, "step": 67930 }, { "epoch": 0.691070556640625, "grad_norm": 14.526505470275879, "learning_rate": 1.0898847881472118e-06, "loss": 3.4345, "step": 67935 }, { "epoch": 0.6911214192708334, "grad_norm": 13.822447776794434, "learning_rate": 1.08955460717344e-06, "loss": 3.8992, "step": 67940 }, { "epoch": 0.6911722819010416, "grad_norm": 16.220075607299805, "learning_rate": 1.0892244622855785e-06, "loss": 3.3151, "step": 67945 }, { "epoch": 0.69122314453125, "grad_norm": 8.144336700439453, "learning_rate": 1.0888943534920745e-06, "loss": 3.2789, "step": 67950 }, { "epoch": 0.6912740071614584, "grad_norm": 8.377969741821289, "learning_rate": 1.0885642808013752e-06, "loss": 3.1394, "step": 67955 }, { "epoch": 0.6913248697916666, "grad_norm": 16.388376235961914, "learning_rate": 1.088234244221924e-06, "loss": 3.2968, "step": 67960 }, { "epoch": 0.691375732421875, "grad_norm": 13.155487060546875, "learning_rate": 1.0879042437621651e-06, "loss": 3.5906, "step": 67965 }, { "epoch": 0.6914265950520834, "grad_norm": 8.747903823852539, "learning_rate": 1.0875742794305404e-06, "loss": 3.177, "step": 67970 }, { "epoch": 0.6914774576822916, "grad_norm": 14.255596160888672, "learning_rate": 1.087244351235493e-06, "loss": 3.1131, "step": 67975 }, { "epoch": 0.6915283203125, "grad_norm": 7.685469150543213, "learning_rate": 1.086914459185465e-06, "loss": 3.2055, "step": 67980 }, { "epoch": 0.6915791829427084, "grad_norm": 12.629805564880371, "learning_rate": 1.0865846032888957e-06, "loss": 3.4163, "step": 67985 }, { "epoch": 0.6916300455729166, "grad_norm": 14.303412437438965, "learning_rate": 1.0862547835542235e-06, "loss": 2.9933, "step": 67990 }, { "epoch": 0.691680908203125, "grad_norm": 8.761443138122559, "learning_rate": 1.0859249999898882e-06, "loss": 3.3332, "step": 67995 }, { "epoch": 0.6917317708333334, "grad_norm": 9.855063438415527, "learning_rate": 1.0855952526043256e-06, "loss": 3.4041, "step": 68000 }, { "epoch": 0.6917826334635416, "grad_norm": 14.298595428466797, "learning_rate": 1.0852655414059745e-06, "loss": 3.5639, "step": 68005 }, { "epoch": 0.69183349609375, "grad_norm": 13.105066299438477, "learning_rate": 1.084935866403268e-06, "loss": 3.2815, "step": 68010 }, { "epoch": 0.6918843587239584, "grad_norm": 14.746922492980957, "learning_rate": 1.0846062276046428e-06, "loss": 3.5784, "step": 68015 }, { "epoch": 0.6919352213541666, "grad_norm": 21.065969467163086, "learning_rate": 1.0842766250185322e-06, "loss": 3.4011, "step": 68020 }, { "epoch": 0.691986083984375, "grad_norm": 10.775930404663086, "learning_rate": 1.0839470586533673e-06, "loss": 3.44, "step": 68025 }, { "epoch": 0.6920369466145834, "grad_norm": 13.672934532165527, "learning_rate": 1.0836175285175815e-06, "loss": 3.3936, "step": 68030 }, { "epoch": 0.6920878092447916, "grad_norm": 14.887701988220215, "learning_rate": 1.0832880346196062e-06, "loss": 3.3906, "step": 68035 }, { "epoch": 0.692138671875, "grad_norm": 10.036215782165527, "learning_rate": 1.0829585769678708e-06, "loss": 3.3281, "step": 68040 }, { "epoch": 0.6921895345052084, "grad_norm": 13.498652458190918, "learning_rate": 1.0826291555708034e-06, "loss": 3.4331, "step": 68045 }, { "epoch": 0.6922403971354166, "grad_norm": 10.369086265563965, "learning_rate": 1.0822997704368344e-06, "loss": 3.4975, "step": 68050 }, { "epoch": 0.692291259765625, "grad_norm": 11.20858097076416, "learning_rate": 1.0819704215743884e-06, "loss": 3.0873, "step": 68055 }, { "epoch": 0.6923421223958334, "grad_norm": 14.309041976928711, "learning_rate": 1.081641108991894e-06, "loss": 3.2642, "step": 68060 }, { "epoch": 0.6923929850260416, "grad_norm": 14.366168022155762, "learning_rate": 1.0813118326977748e-06, "loss": 3.1822, "step": 68065 }, { "epoch": 0.69244384765625, "grad_norm": 11.820574760437012, "learning_rate": 1.080982592700457e-06, "loss": 3.0526, "step": 68070 }, { "epoch": 0.6924947102864584, "grad_norm": 9.015646934509277, "learning_rate": 1.080653389008363e-06, "loss": 3.0813, "step": 68075 }, { "epoch": 0.6925455729166666, "grad_norm": 13.694621086120605, "learning_rate": 1.080324221629915e-06, "loss": 3.3853, "step": 68080 }, { "epoch": 0.692596435546875, "grad_norm": 8.71569538116455, "learning_rate": 1.0799950905735353e-06, "loss": 3.5342, "step": 68085 }, { "epoch": 0.6926472981770834, "grad_norm": 7.991396903991699, "learning_rate": 1.0796659958476456e-06, "loss": 3.111, "step": 68090 }, { "epoch": 0.6926981608072916, "grad_norm": 13.465011596679688, "learning_rate": 1.0793369374606646e-06, "loss": 3.4025, "step": 68095 }, { "epoch": 0.6927490234375, "grad_norm": 9.13414192199707, "learning_rate": 1.0790079154210111e-06, "loss": 3.3709, "step": 68100 }, { "epoch": 0.6927998860677084, "grad_norm": 13.184441566467285, "learning_rate": 1.0786789297371019e-06, "loss": 3.3656, "step": 68105 }, { "epoch": 0.6928507486979166, "grad_norm": 11.598027229309082, "learning_rate": 1.078349980417357e-06, "loss": 3.2299, "step": 68110 }, { "epoch": 0.692901611328125, "grad_norm": 10.605900764465332, "learning_rate": 1.0780210674701907e-06, "loss": 3.7082, "step": 68115 }, { "epoch": 0.6929524739583334, "grad_norm": 15.235445976257324, "learning_rate": 1.0776921909040187e-06, "loss": 3.0915, "step": 68120 }, { "epoch": 0.6930033365885416, "grad_norm": 9.415688514709473, "learning_rate": 1.0773633507272537e-06, "loss": 3.338, "step": 68125 }, { "epoch": 0.69305419921875, "grad_norm": 15.426976203918457, "learning_rate": 1.0770345469483103e-06, "loss": 3.2185, "step": 68130 }, { "epoch": 0.6931050618489584, "grad_norm": 13.035916328430176, "learning_rate": 1.0767057795756018e-06, "loss": 3.4561, "step": 68135 }, { "epoch": 0.6931559244791666, "grad_norm": 10.376809120178223, "learning_rate": 1.0763770486175374e-06, "loss": 3.3602, "step": 68140 }, { "epoch": 0.693206787109375, "grad_norm": 16.455488204956055, "learning_rate": 1.0760483540825294e-06, "loss": 3.6002, "step": 68145 }, { "epoch": 0.6932576497395834, "grad_norm": 15.267796516418457, "learning_rate": 1.0757196959789872e-06, "loss": 3.2368, "step": 68150 }, { "epoch": 0.6933085123697916, "grad_norm": 14.585723876953125, "learning_rate": 1.075391074315318e-06, "loss": 3.7951, "step": 68155 }, { "epoch": 0.693359375, "grad_norm": 11.693926811218262, "learning_rate": 1.0750624890999307e-06, "loss": 3.6183, "step": 68160 }, { "epoch": 0.6934102376302084, "grad_norm": 8.625460624694824, "learning_rate": 1.0747339403412325e-06, "loss": 3.4812, "step": 68165 }, { "epoch": 0.6934611002604166, "grad_norm": 12.401426315307617, "learning_rate": 1.0744054280476285e-06, "loss": 3.2696, "step": 68170 }, { "epoch": 0.693511962890625, "grad_norm": 10.516034126281738, "learning_rate": 1.0740769522275237e-06, "loss": 3.0283, "step": 68175 }, { "epoch": 0.6935628255208334, "grad_norm": 12.46440315246582, "learning_rate": 1.0737485128893215e-06, "loss": 3.2971, "step": 68180 }, { "epoch": 0.6936136881510416, "grad_norm": 9.502420425415039, "learning_rate": 1.0734201100414255e-06, "loss": 3.39, "step": 68185 }, { "epoch": 0.69366455078125, "grad_norm": 13.025237083435059, "learning_rate": 1.0730917436922387e-06, "loss": 3.1769, "step": 68190 }, { "epoch": 0.6937154134114584, "grad_norm": 12.869500160217285, "learning_rate": 1.0727634138501613e-06, "loss": 3.6214, "step": 68195 }, { "epoch": 0.6937662760416666, "grad_norm": 14.211162567138672, "learning_rate": 1.0724351205235928e-06, "loss": 3.3619, "step": 68200 }, { "epoch": 0.693817138671875, "grad_norm": 7.965907096862793, "learning_rate": 1.0721068637209342e-06, "loss": 3.0924, "step": 68205 }, { "epoch": 0.6938680013020834, "grad_norm": 14.146011352539062, "learning_rate": 1.0717786434505823e-06, "loss": 3.3728, "step": 68210 }, { "epoch": 0.6939188639322916, "grad_norm": 9.873130798339844, "learning_rate": 1.0714504597209362e-06, "loss": 3.3597, "step": 68215 }, { "epoch": 0.6939697265625, "grad_norm": 9.39063835144043, "learning_rate": 1.0711223125403906e-06, "loss": 3.2263, "step": 68220 }, { "epoch": 0.6940205891927084, "grad_norm": 7.686598777770996, "learning_rate": 1.0707942019173426e-06, "loss": 3.4356, "step": 68225 }, { "epoch": 0.6940714518229166, "grad_norm": 13.04306411743164, "learning_rate": 1.0704661278601863e-06, "loss": 3.3608, "step": 68230 }, { "epoch": 0.694122314453125, "grad_norm": 8.329973220825195, "learning_rate": 1.0701380903773145e-06, "loss": 3.3499, "step": 68235 }, { "epoch": 0.6941731770833334, "grad_norm": 11.92003059387207, "learning_rate": 1.0698100894771207e-06, "loss": 3.3101, "step": 68240 }, { "epoch": 0.6942240397135416, "grad_norm": 12.852375030517578, "learning_rate": 1.0694821251679977e-06, "loss": 3.0116, "step": 68245 }, { "epoch": 0.69427490234375, "grad_norm": 11.638413429260254, "learning_rate": 1.0691541974583355e-06, "loss": 3.2034, "step": 68250 }, { "epoch": 0.6943257649739584, "grad_norm": 10.772770881652832, "learning_rate": 1.068826306356523e-06, "loss": 3.1456, "step": 68255 }, { "epoch": 0.6943766276041666, "grad_norm": 15.013161659240723, "learning_rate": 1.0684984518709502e-06, "loss": 3.8208, "step": 68260 }, { "epoch": 0.694427490234375, "grad_norm": 10.826665878295898, "learning_rate": 1.0681706340100062e-06, "loss": 3.2775, "step": 68265 }, { "epoch": 0.6944783528645834, "grad_norm": 12.998494148254395, "learning_rate": 1.067842852782077e-06, "loss": 3.4062, "step": 68270 }, { "epoch": 0.6945292154947916, "grad_norm": 12.212747573852539, "learning_rate": 1.0675151081955483e-06, "loss": 3.2866, "step": 68275 }, { "epoch": 0.694580078125, "grad_norm": 13.134560585021973, "learning_rate": 1.0671874002588066e-06, "loss": 3.5512, "step": 68280 }, { "epoch": 0.6946309407552084, "grad_norm": 7.740173816680908, "learning_rate": 1.066859728980235e-06, "loss": 3.2582, "step": 68285 }, { "epoch": 0.6946818033854166, "grad_norm": 12.876514434814453, "learning_rate": 1.0665320943682184e-06, "loss": 3.2665, "step": 68290 }, { "epoch": 0.694732666015625, "grad_norm": 14.70106315612793, "learning_rate": 1.0662044964311375e-06, "loss": 3.4344, "step": 68295 }, { "epoch": 0.6947835286458334, "grad_norm": 8.250565528869629, "learning_rate": 1.0658769351773754e-06, "loss": 3.3056, "step": 68300 }, { "epoch": 0.6948343912760416, "grad_norm": 9.84837818145752, "learning_rate": 1.0655494106153121e-06, "loss": 3.7646, "step": 68305 }, { "epoch": 0.69488525390625, "grad_norm": 10.122499465942383, "learning_rate": 1.0652219227533264e-06, "loss": 3.2689, "step": 68310 }, { "epoch": 0.6949361165364584, "grad_norm": 15.315901756286621, "learning_rate": 1.0648944715997977e-06, "loss": 2.8652, "step": 68315 }, { "epoch": 0.6949869791666666, "grad_norm": 7.8511061668396, "learning_rate": 1.0645670571631044e-06, "loss": 3.0047, "step": 68320 }, { "epoch": 0.695037841796875, "grad_norm": 14.779093742370605, "learning_rate": 1.0642396794516233e-06, "loss": 3.5727, "step": 68325 }, { "epoch": 0.6950887044270834, "grad_norm": 9.470014572143555, "learning_rate": 1.063912338473729e-06, "loss": 3.0945, "step": 68330 }, { "epoch": 0.6951395670572916, "grad_norm": 17.567630767822266, "learning_rate": 1.0635850342377969e-06, "loss": 3.3204, "step": 68335 }, { "epoch": 0.6951904296875, "grad_norm": 12.101985931396484, "learning_rate": 1.0632577667522006e-06, "loss": 3.0812, "step": 68340 }, { "epoch": 0.6952412923177084, "grad_norm": 18.240774154663086, "learning_rate": 1.062930536025315e-06, "loss": 3.9153, "step": 68345 }, { "epoch": 0.6952921549479166, "grad_norm": 7.594537258148193, "learning_rate": 1.0626033420655102e-06, "loss": 3.3416, "step": 68350 }, { "epoch": 0.695343017578125, "grad_norm": 14.703880310058594, "learning_rate": 1.0622761848811588e-06, "loss": 3.5369, "step": 68355 }, { "epoch": 0.6953938802083334, "grad_norm": 10.368515968322754, "learning_rate": 1.0619490644806307e-06, "loss": 3.2543, "step": 68360 }, { "epoch": 0.6954447428385416, "grad_norm": 14.770052909851074, "learning_rate": 1.0616219808722938e-06, "loss": 3.1537, "step": 68365 }, { "epoch": 0.69549560546875, "grad_norm": 13.274304389953613, "learning_rate": 1.0612949340645178e-06, "loss": 3.3308, "step": 68370 }, { "epoch": 0.6955464680989584, "grad_norm": 12.72620677947998, "learning_rate": 1.0609679240656707e-06, "loss": 3.2832, "step": 68375 }, { "epoch": 0.6955973307291666, "grad_norm": 9.127725601196289, "learning_rate": 1.0606409508841181e-06, "loss": 3.7104, "step": 68380 }, { "epoch": 0.695648193359375, "grad_norm": 9.640445709228516, "learning_rate": 1.0603140145282259e-06, "loss": 3.2348, "step": 68385 }, { "epoch": 0.6956990559895834, "grad_norm": 10.805159568786621, "learning_rate": 1.059987115006357e-06, "loss": 3.5362, "step": 68390 }, { "epoch": 0.6957499186197916, "grad_norm": 11.10505485534668, "learning_rate": 1.059660252326877e-06, "loss": 3.2466, "step": 68395 }, { "epoch": 0.69580078125, "grad_norm": 12.379951477050781, "learning_rate": 1.0593334264981486e-06, "loss": 3.3931, "step": 68400 }, { "epoch": 0.6958516438802084, "grad_norm": 7.506567001342773, "learning_rate": 1.0590066375285329e-06, "loss": 3.5404, "step": 68405 }, { "epoch": 0.6959025065104166, "grad_norm": 11.790244102478027, "learning_rate": 1.0586798854263901e-06, "loss": 3.3525, "step": 68410 }, { "epoch": 0.695953369140625, "grad_norm": 16.516735076904297, "learning_rate": 1.0583531702000809e-06, "loss": 3.5066, "step": 68415 }, { "epoch": 0.6960042317708334, "grad_norm": 10.455042839050293, "learning_rate": 1.058026491857965e-06, "loss": 3.1206, "step": 68420 }, { "epoch": 0.6960550944010416, "grad_norm": 12.830158233642578, "learning_rate": 1.0576998504083996e-06, "loss": 4.1053, "step": 68425 }, { "epoch": 0.69610595703125, "grad_norm": 13.322319030761719, "learning_rate": 1.0573732458597404e-06, "loss": 3.073, "step": 68430 }, { "epoch": 0.6961568196614584, "grad_norm": 13.685236930847168, "learning_rate": 1.0570466782203462e-06, "loss": 3.3646, "step": 68435 }, { "epoch": 0.6962076822916666, "grad_norm": 16.166658401489258, "learning_rate": 1.0567201474985694e-06, "loss": 3.5602, "step": 68440 }, { "epoch": 0.696258544921875, "grad_norm": 11.911019325256348, "learning_rate": 1.0563936537027656e-06, "loss": 3.1587, "step": 68445 }, { "epoch": 0.6963094075520834, "grad_norm": 10.115235328674316, "learning_rate": 1.056067196841289e-06, "loss": 3.7451, "step": 68450 }, { "epoch": 0.6963602701822916, "grad_norm": 11.368029594421387, "learning_rate": 1.0557407769224909e-06, "loss": 3.4379, "step": 68455 }, { "epoch": 0.6964111328125, "grad_norm": 13.152690887451172, "learning_rate": 1.0554143939547227e-06, "loss": 3.2729, "step": 68460 }, { "epoch": 0.6964619954427084, "grad_norm": 10.852267265319824, "learning_rate": 1.0550880479463338e-06, "loss": 2.9517, "step": 68465 }, { "epoch": 0.6965128580729166, "grad_norm": 12.223347663879395, "learning_rate": 1.0547617389056747e-06, "loss": 3.5249, "step": 68470 }, { "epoch": 0.696563720703125, "grad_norm": 11.630764961242676, "learning_rate": 1.054435466841095e-06, "loss": 3.2771, "step": 68475 }, { "epoch": 0.6966145833333334, "grad_norm": 16.688596725463867, "learning_rate": 1.054109231760941e-06, "loss": 3.6573, "step": 68480 }, { "epoch": 0.6966654459635416, "grad_norm": 12.443121910095215, "learning_rate": 1.053783033673559e-06, "loss": 3.397, "step": 68485 }, { "epoch": 0.69671630859375, "grad_norm": 10.7487154006958, "learning_rate": 1.0534568725872962e-06, "loss": 3.4047, "step": 68490 }, { "epoch": 0.6967671712239584, "grad_norm": 11.965703010559082, "learning_rate": 1.0531307485104956e-06, "loss": 2.9612, "step": 68495 }, { "epoch": 0.6968180338541666, "grad_norm": 10.180152893066406, "learning_rate": 1.0528046614515024e-06, "loss": 3.8558, "step": 68500 }, { "epoch": 0.696868896484375, "grad_norm": 9.827269554138184, "learning_rate": 1.0524786114186582e-06, "loss": 3.1974, "step": 68505 }, { "epoch": 0.6969197591145834, "grad_norm": 14.167684555053711, "learning_rate": 1.0521525984203065e-06, "loss": 3.4702, "step": 68510 }, { "epoch": 0.6969706217447916, "grad_norm": 8.219744682312012, "learning_rate": 1.0518266224647874e-06, "loss": 3.3173, "step": 68515 }, { "epoch": 0.697021484375, "grad_norm": 82.138916015625, "learning_rate": 1.05150068356044e-06, "loss": 3.7151, "step": 68520 }, { "epoch": 0.6970723470052084, "grad_norm": 9.34282398223877, "learning_rate": 1.0511747817156042e-06, "loss": 3.5471, "step": 68525 }, { "epoch": 0.6971232096354166, "grad_norm": 9.755684852600098, "learning_rate": 1.0508489169386194e-06, "loss": 3.4135, "step": 68530 }, { "epoch": 0.697174072265625, "grad_norm": 12.70797061920166, "learning_rate": 1.0505230892378212e-06, "loss": 3.2697, "step": 68535 }, { "epoch": 0.6972249348958334, "grad_norm": 14.959371566772461, "learning_rate": 1.0501972986215456e-06, "loss": 3.1872, "step": 68540 }, { "epoch": 0.6972757975260416, "grad_norm": 15.707871437072754, "learning_rate": 1.0498715450981293e-06, "loss": 3.5537, "step": 68545 }, { "epoch": 0.69732666015625, "grad_norm": 13.715645790100098, "learning_rate": 1.0495458286759046e-06, "loss": 3.2138, "step": 68550 }, { "epoch": 0.6973775227864584, "grad_norm": 16.517990112304688, "learning_rate": 1.0492201493632073e-06, "loss": 3.4732, "step": 68555 }, { "epoch": 0.6974283854166666, "grad_norm": 7.940740585327148, "learning_rate": 1.0488945071683678e-06, "loss": 3.1687, "step": 68560 }, { "epoch": 0.697479248046875, "grad_norm": 9.776067733764648, "learning_rate": 1.0485689020997191e-06, "loss": 3.1857, "step": 68565 }, { "epoch": 0.6975301106770834, "grad_norm": 11.337080001831055, "learning_rate": 1.0482433341655901e-06, "loss": 3.1281, "step": 68570 }, { "epoch": 0.6975809733072916, "grad_norm": 13.244839668273926, "learning_rate": 1.0479178033743124e-06, "loss": 3.1413, "step": 68575 }, { "epoch": 0.6976318359375, "grad_norm": 9.634005546569824, "learning_rate": 1.0475923097342123e-06, "loss": 3.4767, "step": 68580 }, { "epoch": 0.6976826985677084, "grad_norm": 14.75212287902832, "learning_rate": 1.0472668532536197e-06, "loss": 3.2871, "step": 68585 }, { "epoch": 0.6977335611979166, "grad_norm": 7.295383453369141, "learning_rate": 1.0469414339408604e-06, "loss": 3.4713, "step": 68590 }, { "epoch": 0.697784423828125, "grad_norm": 13.375802993774414, "learning_rate": 1.0466160518042592e-06, "loss": 3.0931, "step": 68595 }, { "epoch": 0.6978352864583334, "grad_norm": 8.959383010864258, "learning_rate": 1.0462907068521416e-06, "loss": 3.2586, "step": 68600 }, { "epoch": 0.6978861490885416, "grad_norm": 13.755732536315918, "learning_rate": 1.0459653990928329e-06, "loss": 3.4389, "step": 68605 }, { "epoch": 0.69793701171875, "grad_norm": 15.684408187866211, "learning_rate": 1.045640128534655e-06, "loss": 3.4938, "step": 68610 }, { "epoch": 0.6979878743489584, "grad_norm": 14.712481498718262, "learning_rate": 1.0453148951859291e-06, "loss": 3.1968, "step": 68615 }, { "epoch": 0.6980387369791666, "grad_norm": 13.377435684204102, "learning_rate": 1.0449896990549762e-06, "loss": 3.0805, "step": 68620 }, { "epoch": 0.698089599609375, "grad_norm": 10.302496910095215, "learning_rate": 1.0446645401501172e-06, "loss": 3.2776, "step": 68625 }, { "epoch": 0.6981404622395834, "grad_norm": 13.840866088867188, "learning_rate": 1.0443394184796715e-06, "loss": 3.2508, "step": 68630 }, { "epoch": 0.6981913248697916, "grad_norm": 12.550299644470215, "learning_rate": 1.044014334051956e-06, "loss": 3.4385, "step": 68635 }, { "epoch": 0.6982421875, "grad_norm": 15.650467872619629, "learning_rate": 1.0436892868752895e-06, "loss": 3.4972, "step": 68640 }, { "epoch": 0.6982930501302084, "grad_norm": 14.249421119689941, "learning_rate": 1.0433642769579873e-06, "loss": 3.4271, "step": 68645 }, { "epoch": 0.6983439127604166, "grad_norm": 15.138565063476562, "learning_rate": 1.0430393043083645e-06, "loss": 3.2808, "step": 68650 }, { "epoch": 0.698394775390625, "grad_norm": 15.36414909362793, "learning_rate": 1.0427143689347352e-06, "loss": 3.1296, "step": 68655 }, { "epoch": 0.6984456380208334, "grad_norm": 10.230435371398926, "learning_rate": 1.0423894708454144e-06, "loss": 3.1101, "step": 68660 }, { "epoch": 0.6984965006510416, "grad_norm": 18.308584213256836, "learning_rate": 1.0420646100487136e-06, "loss": 3.034, "step": 68665 }, { "epoch": 0.69854736328125, "grad_norm": 15.090258598327637, "learning_rate": 1.0417397865529442e-06, "loss": 3.7638, "step": 68670 }, { "epoch": 0.6985982259114584, "grad_norm": 9.507784843444824, "learning_rate": 1.0414150003664155e-06, "loss": 3.5592, "step": 68675 }, { "epoch": 0.6986490885416666, "grad_norm": 9.35131549835205, "learning_rate": 1.0410902514974385e-06, "loss": 3.5117, "step": 68680 }, { "epoch": 0.698699951171875, "grad_norm": 14.29448413848877, "learning_rate": 1.0407655399543227e-06, "loss": 3.3596, "step": 68685 }, { "epoch": 0.6987508138020834, "grad_norm": 14.195074081420898, "learning_rate": 1.0404408657453743e-06, "loss": 3.1969, "step": 68690 }, { "epoch": 0.6988016764322916, "grad_norm": 12.963168144226074, "learning_rate": 1.0401162288788998e-06, "loss": 3.2805, "step": 68695 }, { "epoch": 0.6988525390625, "grad_norm": 13.396121978759766, "learning_rate": 1.0397916293632062e-06, "loss": 2.917, "step": 68700 }, { "epoch": 0.6989034016927084, "grad_norm": 10.836840629577637, "learning_rate": 1.039467067206597e-06, "loss": 3.477, "step": 68705 }, { "epoch": 0.6989542643229166, "grad_norm": 15.258602142333984, "learning_rate": 1.0391425424173777e-06, "loss": 3.7361, "step": 68710 }, { "epoch": 0.699005126953125, "grad_norm": 12.586681365966797, "learning_rate": 1.038818055003849e-06, "loss": 3.3715, "step": 68715 }, { "epoch": 0.6990559895833334, "grad_norm": 13.802001953125, "learning_rate": 1.0384936049743149e-06, "loss": 3.7519, "step": 68720 }, { "epoch": 0.6991068522135416, "grad_norm": 14.002902030944824, "learning_rate": 1.0381691923370748e-06, "loss": 3.2382, "step": 68725 }, { "epoch": 0.69915771484375, "grad_norm": 13.671459197998047, "learning_rate": 1.0378448171004294e-06, "loss": 3.2718, "step": 68730 }, { "epoch": 0.6992085774739584, "grad_norm": 12.762028694152832, "learning_rate": 1.0375204792726786e-06, "loss": 3.0765, "step": 68735 }, { "epoch": 0.6992594401041666, "grad_norm": 10.430758476257324, "learning_rate": 1.0371961788621198e-06, "loss": 3.5619, "step": 68740 }, { "epoch": 0.699310302734375, "grad_norm": 8.087532997131348, "learning_rate": 1.03687191587705e-06, "loss": 3.4343, "step": 68745 }, { "epoch": 0.6993611653645834, "grad_norm": 11.718721389770508, "learning_rate": 1.0365476903257644e-06, "loss": 3.2718, "step": 68750 }, { "epoch": 0.6994120279947916, "grad_norm": 14.21630859375, "learning_rate": 1.0362235022165595e-06, "loss": 2.986, "step": 68755 }, { "epoch": 0.699462890625, "grad_norm": 12.95578670501709, "learning_rate": 1.03589935155773e-06, "loss": 3.206, "step": 68760 }, { "epoch": 0.6995137532552084, "grad_norm": 15.721181869506836, "learning_rate": 1.0355752383575687e-06, "loss": 3.1838, "step": 68765 }, { "epoch": 0.6995646158854166, "grad_norm": 13.355118751525879, "learning_rate": 1.035251162624367e-06, "loss": 3.5071, "step": 68770 }, { "epoch": 0.699615478515625, "grad_norm": 15.441234588623047, "learning_rate": 1.0349271243664177e-06, "loss": 3.1547, "step": 68775 }, { "epoch": 0.6996663411458334, "grad_norm": 10.825979232788086, "learning_rate": 1.0346031235920102e-06, "loss": 3.4281, "step": 68780 }, { "epoch": 0.6997172037760416, "grad_norm": 6.40767240524292, "learning_rate": 1.034279160309435e-06, "loss": 3.1522, "step": 68785 }, { "epoch": 0.69976806640625, "grad_norm": 10.204715728759766, "learning_rate": 1.0339552345269794e-06, "loss": 3.6849, "step": 68790 }, { "epoch": 0.6998189290364584, "grad_norm": 12.19060230255127, "learning_rate": 1.0336313462529324e-06, "loss": 3.5668, "step": 68795 }, { "epoch": 0.6998697916666666, "grad_norm": 10.142702102661133, "learning_rate": 1.0333074954955798e-06, "loss": 2.9964, "step": 68800 }, { "epoch": 0.699920654296875, "grad_norm": 16.380889892578125, "learning_rate": 1.0329836822632064e-06, "loss": 2.9444, "step": 68805 }, { "epoch": 0.6999715169270834, "grad_norm": 9.951184272766113, "learning_rate": 1.032659906564098e-06, "loss": 3.5085, "step": 68810 }, { "epoch": 0.7000223795572916, "grad_norm": 8.64416217803955, "learning_rate": 1.0323361684065385e-06, "loss": 3.1798, "step": 68815 }, { "epoch": 0.7000732421875, "grad_norm": 10.92931842803955, "learning_rate": 1.0320124677988108e-06, "loss": 3.269, "step": 68820 }, { "epoch": 0.7001241048177084, "grad_norm": 14.081944465637207, "learning_rate": 1.031688804749195e-06, "loss": 3.5434, "step": 68825 }, { "epoch": 0.7001749674479166, "grad_norm": 14.113259315490723, "learning_rate": 1.031365179265974e-06, "loss": 3.5396, "step": 68830 }, { "epoch": 0.700225830078125, "grad_norm": 14.627796173095703, "learning_rate": 1.0310415913574258e-06, "loss": 3.2589, "step": 68835 }, { "epoch": 0.7002766927083334, "grad_norm": 14.891929626464844, "learning_rate": 1.0307180410318315e-06, "loss": 3.3151, "step": 68840 }, { "epoch": 0.7003275553385416, "grad_norm": 11.644354820251465, "learning_rate": 1.0303945282974667e-06, "loss": 3.0651, "step": 68845 }, { "epoch": 0.70037841796875, "grad_norm": 11.94490909576416, "learning_rate": 1.0300710531626107e-06, "loss": 3.3381, "step": 68850 }, { "epoch": 0.7004292805989584, "grad_norm": 12.369614601135254, "learning_rate": 1.0297476156355382e-06, "loss": 3.4216, "step": 68855 }, { "epoch": 0.7004801432291666, "grad_norm": 11.587510108947754, "learning_rate": 1.0294242157245235e-06, "loss": 3.3817, "step": 68860 }, { "epoch": 0.700531005859375, "grad_norm": 8.7716703414917, "learning_rate": 1.029100853437842e-06, "loss": 3.3383, "step": 68865 }, { "epoch": 0.7005818684895834, "grad_norm": 11.64136791229248, "learning_rate": 1.028777528783767e-06, "loss": 3.2306, "step": 68870 }, { "epoch": 0.7006327311197916, "grad_norm": 14.60140323638916, "learning_rate": 1.0284542417705703e-06, "loss": 3.3414, "step": 68875 }, { "epoch": 0.70068359375, "grad_norm": 13.89550495147705, "learning_rate": 1.0281309924065224e-06, "loss": 2.9955, "step": 68880 }, { "epoch": 0.7007344563802084, "grad_norm": 8.002796173095703, "learning_rate": 1.027807780699894e-06, "loss": 3.3122, "step": 68885 }, { "epoch": 0.7007853190104166, "grad_norm": 13.310931205749512, "learning_rate": 1.0274846066589556e-06, "loss": 3.1744, "step": 68890 }, { "epoch": 0.700836181640625, "grad_norm": 15.553311347961426, "learning_rate": 1.0271614702919745e-06, "loss": 3.3743, "step": 68895 }, { "epoch": 0.7008870442708334, "grad_norm": 9.992782592773438, "learning_rate": 1.0268383716072178e-06, "loss": 3.4188, "step": 68900 }, { "epoch": 0.7009379069010416, "grad_norm": 9.381199836730957, "learning_rate": 1.0265153106129517e-06, "loss": 3.4018, "step": 68905 }, { "epoch": 0.70098876953125, "grad_norm": 12.410082817077637, "learning_rate": 1.0261922873174418e-06, "loss": 3.4402, "step": 68910 }, { "epoch": 0.7010396321614584, "grad_norm": 13.554468154907227, "learning_rate": 1.025869301728954e-06, "loss": 3.4538, "step": 68915 }, { "epoch": 0.7010904947916666, "grad_norm": 14.527251243591309, "learning_rate": 1.0255463538557505e-06, "loss": 3.553, "step": 68920 }, { "epoch": 0.701141357421875, "grad_norm": 12.845770835876465, "learning_rate": 1.0252234437060932e-06, "loss": 3.4095, "step": 68925 }, { "epoch": 0.7011922200520834, "grad_norm": 9.762109756469727, "learning_rate": 1.0249005712882457e-06, "loss": 3.2408, "step": 68930 }, { "epoch": 0.7012430826822916, "grad_norm": 10.933417320251465, "learning_rate": 1.0245777366104662e-06, "loss": 3.2591, "step": 68935 }, { "epoch": 0.7012939453125, "grad_norm": 14.173288345336914, "learning_rate": 1.0242549396810158e-06, "loss": 3.1852, "step": 68940 }, { "epoch": 0.7013448079427084, "grad_norm": 15.338982582092285, "learning_rate": 1.0239321805081536e-06, "loss": 3.0217, "step": 68945 }, { "epoch": 0.7013956705729166, "grad_norm": 9.645343780517578, "learning_rate": 1.0236094591001372e-06, "loss": 3.2361, "step": 68950 }, { "epoch": 0.701446533203125, "grad_norm": 9.594852447509766, "learning_rate": 1.0232867754652227e-06, "loss": 3.2452, "step": 68955 }, { "epoch": 0.7014973958333334, "grad_norm": 16.04738998413086, "learning_rate": 1.0229641296116647e-06, "loss": 3.3969, "step": 68960 }, { "epoch": 0.7015482584635416, "grad_norm": 10.130403518676758, "learning_rate": 1.0226415215477198e-06, "loss": 3.3939, "step": 68965 }, { "epoch": 0.70159912109375, "grad_norm": 10.138354301452637, "learning_rate": 1.022318951281642e-06, "loss": 3.1279, "step": 68970 }, { "epoch": 0.7016499837239584, "grad_norm": 12.420388221740723, "learning_rate": 1.0219964188216838e-06, "loss": 3.7869, "step": 68975 }, { "epoch": 0.7017008463541666, "grad_norm": 16.27092742919922, "learning_rate": 1.021673924176096e-06, "loss": 3.1377, "step": 68980 }, { "epoch": 0.701751708984375, "grad_norm": 10.564739227294922, "learning_rate": 1.021351467353131e-06, "loss": 3.4434, "step": 68985 }, { "epoch": 0.7018025716145834, "grad_norm": 13.05958366394043, "learning_rate": 1.0210290483610378e-06, "loss": 3.3063, "step": 68990 }, { "epoch": 0.7018534342447916, "grad_norm": 15.520635604858398, "learning_rate": 1.0207066672080664e-06, "loss": 2.9001, "step": 68995 }, { "epoch": 0.701904296875, "grad_norm": 8.159523963928223, "learning_rate": 1.0203843239024636e-06, "loss": 3.065, "step": 69000 }, { "epoch": 0.7019551595052084, "grad_norm": 14.460269927978516, "learning_rate": 1.020062018452478e-06, "loss": 3.3251, "step": 69005 }, { "epoch": 0.7020060221354166, "grad_norm": 7.9953837394714355, "learning_rate": 1.019739750866355e-06, "loss": 2.926, "step": 69010 }, { "epoch": 0.702056884765625, "grad_norm": 8.935430526733398, "learning_rate": 1.0194175211523385e-06, "loss": 3.1843, "step": 69015 }, { "epoch": 0.7021077473958334, "grad_norm": 13.662001609802246, "learning_rate": 1.019095329318674e-06, "loss": 3.2766, "step": 69020 }, { "epoch": 0.7021586100260416, "grad_norm": 12.309272766113281, "learning_rate": 1.0187731753736051e-06, "loss": 3.2095, "step": 69025 }, { "epoch": 0.70220947265625, "grad_norm": 9.266861915588379, "learning_rate": 1.0184510593253738e-06, "loss": 3.5939, "step": 69030 }, { "epoch": 0.7022603352864584, "grad_norm": 13.035009384155273, "learning_rate": 1.01812898118222e-06, "loss": 3.036, "step": 69035 }, { "epoch": 0.7023111979166666, "grad_norm": 12.960883140563965, "learning_rate": 1.0178069409523848e-06, "loss": 3.0416, "step": 69040 }, { "epoch": 0.702362060546875, "grad_norm": 15.219749450683594, "learning_rate": 1.0174849386441086e-06, "loss": 3.0765, "step": 69045 }, { "epoch": 0.7024129231770834, "grad_norm": 8.982955932617188, "learning_rate": 1.0171629742656286e-06, "loss": 3.3042, "step": 69050 }, { "epoch": 0.7024637858072916, "grad_norm": 13.161221504211426, "learning_rate": 1.016841047825182e-06, "loss": 3.5382, "step": 69055 }, { "epoch": 0.7025146484375, "grad_norm": 9.551596641540527, "learning_rate": 1.0165191593310061e-06, "loss": 3.3944, "step": 69060 }, { "epoch": 0.7025655110677084, "grad_norm": 11.355164527893066, "learning_rate": 1.0161973087913355e-06, "loss": 3.2315, "step": 69065 }, { "epoch": 0.7026163736979166, "grad_norm": 12.059857368469238, "learning_rate": 1.0158754962144058e-06, "loss": 3.3775, "step": 69070 }, { "epoch": 0.702667236328125, "grad_norm": 15.176041603088379, "learning_rate": 1.0155537216084486e-06, "loss": 3.1982, "step": 69075 }, { "epoch": 0.7027180989583334, "grad_norm": 12.852165222167969, "learning_rate": 1.0152319849816985e-06, "loss": 2.9204, "step": 69080 }, { "epoch": 0.7027689615885416, "grad_norm": 11.793715476989746, "learning_rate": 1.0149102863423862e-06, "loss": 3.3584, "step": 69085 }, { "epoch": 0.70281982421875, "grad_norm": 12.076817512512207, "learning_rate": 1.0145886256987417e-06, "loss": 3.6016, "step": 69090 }, { "epoch": 0.7028706868489584, "grad_norm": 7.835428237915039, "learning_rate": 1.0142670030589946e-06, "loss": 3.2869, "step": 69095 }, { "epoch": 0.7029215494791666, "grad_norm": 12.05679988861084, "learning_rate": 1.0139454184313752e-06, "loss": 3.2197, "step": 69100 }, { "epoch": 0.702972412109375, "grad_norm": 11.922722816467285, "learning_rate": 1.0136238718241101e-06, "loss": 3.2324, "step": 69105 }, { "epoch": 0.7030232747395834, "grad_norm": 10.238421440124512, "learning_rate": 1.0133023632454258e-06, "loss": 3.2494, "step": 69110 }, { "epoch": 0.7030741373697916, "grad_norm": 13.973362922668457, "learning_rate": 1.0129808927035473e-06, "loss": 3.4301, "step": 69115 }, { "epoch": 0.703125, "grad_norm": 8.110556602478027, "learning_rate": 1.0126594602067004e-06, "loss": 3.516, "step": 69120 }, { "epoch": 0.7031758626302084, "grad_norm": 13.717449188232422, "learning_rate": 1.0123380657631093e-06, "loss": 3.4103, "step": 69125 }, { "epoch": 0.7032267252604166, "grad_norm": 13.071977615356445, "learning_rate": 1.0120167093809954e-06, "loss": 3.3773, "step": 69130 }, { "epoch": 0.703277587890625, "grad_norm": 10.020981788635254, "learning_rate": 1.011695391068582e-06, "loss": 3.3164, "step": 69135 }, { "epoch": 0.7033284505208334, "grad_norm": 13.737312316894531, "learning_rate": 1.0113741108340893e-06, "loss": 3.2001, "step": 69140 }, { "epoch": 0.7033793131510416, "grad_norm": 13.198023796081543, "learning_rate": 1.0110528686857366e-06, "loss": 3.4193, "step": 69145 }, { "epoch": 0.70343017578125, "grad_norm": 12.423659324645996, "learning_rate": 1.010731664631743e-06, "loss": 3.2851, "step": 69150 }, { "epoch": 0.7034810384114584, "grad_norm": 14.201518058776855, "learning_rate": 1.0104104986803276e-06, "loss": 3.6451, "step": 69155 }, { "epoch": 0.7035319010416666, "grad_norm": 16.92333221435547, "learning_rate": 1.0100893708397066e-06, "loss": 3.459, "step": 69160 }, { "epoch": 0.703582763671875, "grad_norm": 9.523395538330078, "learning_rate": 1.0097682811180962e-06, "loss": 3.3106, "step": 69165 }, { "epoch": 0.7036336263020834, "grad_norm": 9.247840881347656, "learning_rate": 1.009447229523709e-06, "loss": 3.4684, "step": 69170 }, { "epoch": 0.7036844889322916, "grad_norm": 13.878296852111816, "learning_rate": 1.009126216064763e-06, "loss": 3.444, "step": 69175 }, { "epoch": 0.7037353515625, "grad_norm": 12.755209922790527, "learning_rate": 1.0088052407494695e-06, "loss": 3.4961, "step": 69180 }, { "epoch": 0.7037862141927084, "grad_norm": 13.315839767456055, "learning_rate": 1.0084843035860404e-06, "loss": 3.4761, "step": 69185 }, { "epoch": 0.7038370768229166, "grad_norm": 8.987351417541504, "learning_rate": 1.0081634045826858e-06, "loss": 3.6049, "step": 69190 }, { "epoch": 0.703887939453125, "grad_norm": 18.58706283569336, "learning_rate": 1.0078425437476169e-06, "loss": 3.4532, "step": 69195 }, { "epoch": 0.7039388020833334, "grad_norm": 12.25035285949707, "learning_rate": 1.007521721089044e-06, "loss": 3.6837, "step": 69200 }, { "epoch": 0.7039896647135416, "grad_norm": 18.257944107055664, "learning_rate": 1.0072009366151739e-06, "loss": 3.4599, "step": 69205 }, { "epoch": 0.70404052734375, "grad_norm": 7.997075080871582, "learning_rate": 1.006880190334213e-06, "loss": 3.3349, "step": 69210 }, { "epoch": 0.7040913899739584, "grad_norm": 8.410901069641113, "learning_rate": 1.0065594822543695e-06, "loss": 3.317, "step": 69215 }, { "epoch": 0.7041422526041666, "grad_norm": 13.914037704467773, "learning_rate": 1.0062388123838467e-06, "loss": 2.7173, "step": 69220 }, { "epoch": 0.704193115234375, "grad_norm": 13.227640151977539, "learning_rate": 1.0059181807308502e-06, "loss": 3.4732, "step": 69225 }, { "epoch": 0.7042439778645834, "grad_norm": 10.687392234802246, "learning_rate": 1.0055975873035836e-06, "loss": 3.3346, "step": 69230 }, { "epoch": 0.7042948404947916, "grad_norm": 11.866856575012207, "learning_rate": 1.0052770321102484e-06, "loss": 3.2271, "step": 69235 }, { "epoch": 0.704345703125, "grad_norm": 12.58163833618164, "learning_rate": 1.0049565151590461e-06, "loss": 3.2716, "step": 69240 }, { "epoch": 0.7043965657552084, "grad_norm": 10.621543884277344, "learning_rate": 1.0046360364581762e-06, "loss": 2.9137, "step": 69245 }, { "epoch": 0.7044474283854166, "grad_norm": 14.219128608703613, "learning_rate": 1.0043155960158388e-06, "loss": 3.88, "step": 69250 }, { "epoch": 0.704498291015625, "grad_norm": 10.410592079162598, "learning_rate": 1.003995193840233e-06, "loss": 3.3739, "step": 69255 }, { "epoch": 0.7045491536458334, "grad_norm": 8.266712188720703, "learning_rate": 1.003674829939556e-06, "loss": 3.6565, "step": 69260 }, { "epoch": 0.7046000162760416, "grad_norm": 10.390896797180176, "learning_rate": 1.0033545043220027e-06, "loss": 3.2506, "step": 69265 }, { "epoch": 0.70465087890625, "grad_norm": 8.9793119430542, "learning_rate": 1.0030342169957707e-06, "loss": 3.063, "step": 69270 }, { "epoch": 0.7047017415364584, "grad_norm": 14.336295127868652, "learning_rate": 1.0027139679690526e-06, "loss": 3.5864, "step": 69275 }, { "epoch": 0.7047526041666666, "grad_norm": 8.655488967895508, "learning_rate": 1.0023937572500433e-06, "loss": 3.4331, "step": 69280 }, { "epoch": 0.704803466796875, "grad_norm": 10.18228530883789, "learning_rate": 1.002073584846934e-06, "loss": 3.2225, "step": 69285 }, { "epoch": 0.7048543294270834, "grad_norm": 14.573174476623535, "learning_rate": 1.001753450767918e-06, "loss": 3.39, "step": 69290 }, { "epoch": 0.7049051920572916, "grad_norm": 9.717402458190918, "learning_rate": 1.0014333550211848e-06, "loss": 3.3884, "step": 69295 }, { "epoch": 0.7049560546875, "grad_norm": 7.359352111816406, "learning_rate": 1.001113297614923e-06, "loss": 3.6591, "step": 69300 }, { "epoch": 0.7050069173177084, "grad_norm": 7.139311790466309, "learning_rate": 1.000793278557322e-06, "loss": 3.2576, "step": 69305 }, { "epoch": 0.7050577799479166, "grad_norm": 11.167351722717285, "learning_rate": 1.000473297856571e-06, "loss": 3.2675, "step": 69310 }, { "epoch": 0.705108642578125, "grad_norm": 10.71337890625, "learning_rate": 1.000153355520855e-06, "loss": 3.4261, "step": 69315 }, { "epoch": 0.7051595052083334, "grad_norm": 14.224665641784668, "learning_rate": 9.99833451558359e-07, "loss": 3.3663, "step": 69320 }, { "epoch": 0.7052103678385416, "grad_norm": 10.977479934692383, "learning_rate": 9.995135859772687e-07, "loss": 3.832, "step": 69325 }, { "epoch": 0.70526123046875, "grad_norm": 10.125252723693848, "learning_rate": 9.991937587857684e-07, "loss": 3.582, "step": 69330 }, { "epoch": 0.7053120930989584, "grad_norm": 11.237396240234375, "learning_rate": 9.9887396999204e-07, "loss": 3.3685, "step": 69335 }, { "epoch": 0.7053629557291666, "grad_norm": 7.409149169921875, "learning_rate": 9.985542196042643e-07, "loss": 3.3208, "step": 69340 }, { "epoch": 0.705413818359375, "grad_norm": 11.912979125976562, "learning_rate": 9.982345076306244e-07, "loss": 3.4061, "step": 69345 }, { "epoch": 0.7054646809895834, "grad_norm": 16.61011505126953, "learning_rate": 9.979148340792974e-07, "loss": 3.2365, "step": 69350 }, { "epoch": 0.7055155436197916, "grad_norm": 12.74564266204834, "learning_rate": 9.975951989584643e-07, "loss": 3.5396, "step": 69355 }, { "epoch": 0.70556640625, "grad_norm": 11.513494491577148, "learning_rate": 9.972756022763009e-07, "loss": 3.2285, "step": 69360 }, { "epoch": 0.7056172688802084, "grad_norm": 12.513010025024414, "learning_rate": 9.96956044040986e-07, "loss": 3.0373, "step": 69365 }, { "epoch": 0.7056681315104166, "grad_norm": 14.658462524414062, "learning_rate": 9.966365242606946e-07, "loss": 3.196, "step": 69370 }, { "epoch": 0.705718994140625, "grad_norm": 21.01740264892578, "learning_rate": 9.963170429436003e-07, "loss": 3.4951, "step": 69375 }, { "epoch": 0.7057698567708334, "grad_norm": 12.703271865844727, "learning_rate": 9.959976000978779e-07, "loss": 3.1626, "step": 69380 }, { "epoch": 0.7058207194010416, "grad_norm": 13.965160369873047, "learning_rate": 9.956781957317016e-07, "loss": 3.6084, "step": 69385 }, { "epoch": 0.70587158203125, "grad_norm": 16.33390998840332, "learning_rate": 9.953588298532422e-07, "loss": 3.2092, "step": 69390 }, { "epoch": 0.7059224446614584, "grad_norm": 12.628070831298828, "learning_rate": 9.950395024706703e-07, "loss": 3.4365, "step": 69395 }, { "epoch": 0.7059733072916666, "grad_norm": 9.108073234558105, "learning_rate": 9.94720213592155e-07, "loss": 3.4463, "step": 69400 }, { "epoch": 0.706024169921875, "grad_norm": 9.910865783691406, "learning_rate": 9.944009632258664e-07, "loss": 3.2692, "step": 69405 }, { "epoch": 0.7060750325520834, "grad_norm": 7.650475978851318, "learning_rate": 9.940817513799734e-07, "loss": 3.1788, "step": 69410 }, { "epoch": 0.7061258951822916, "grad_norm": 14.100626945495605, "learning_rate": 9.937625780626405e-07, "loss": 3.3328, "step": 69415 }, { "epoch": 0.7061767578125, "grad_norm": 11.57579231262207, "learning_rate": 9.934434432820362e-07, "loss": 3.272, "step": 69420 }, { "epoch": 0.7062276204427084, "grad_norm": 14.377008438110352, "learning_rate": 9.931243470463242e-07, "loss": 3.5567, "step": 69425 }, { "epoch": 0.7062784830729166, "grad_norm": 13.055729866027832, "learning_rate": 9.928052893636677e-07, "loss": 3.2766, "step": 69430 }, { "epoch": 0.706329345703125, "grad_norm": 10.804378509521484, "learning_rate": 9.924862702422309e-07, "loss": 3.175, "step": 69435 }, { "epoch": 0.7063802083333334, "grad_norm": 11.880969047546387, "learning_rate": 9.921672896901762e-07, "loss": 3.4717, "step": 69440 }, { "epoch": 0.7064310709635416, "grad_norm": 38.35500717163086, "learning_rate": 9.91848347715664e-07, "loss": 3.4817, "step": 69445 }, { "epoch": 0.70648193359375, "grad_norm": 6.873586177825928, "learning_rate": 9.915294443268544e-07, "loss": 3.4381, "step": 69450 }, { "epoch": 0.7065327962239584, "grad_norm": 11.292330741882324, "learning_rate": 9.912105795319054e-07, "loss": 3.6071, "step": 69455 }, { "epoch": 0.7065836588541666, "grad_norm": 15.964972496032715, "learning_rate": 9.908917533389764e-07, "loss": 3.2474, "step": 69460 }, { "epoch": 0.706634521484375, "grad_norm": 13.744715690612793, "learning_rate": 9.905729657562248e-07, "loss": 3.0413, "step": 69465 }, { "epoch": 0.7066853841145834, "grad_norm": 15.283783912658691, "learning_rate": 9.902542167918064e-07, "loss": 3.271, "step": 69470 }, { "epoch": 0.7067362467447916, "grad_norm": 10.285155296325684, "learning_rate": 9.899355064538749e-07, "loss": 3.3835, "step": 69475 }, { "epoch": 0.706787109375, "grad_norm": 10.142790794372559, "learning_rate": 9.896168347505854e-07, "loss": 3.5299, "step": 69480 }, { "epoch": 0.7068379720052084, "grad_norm": 14.190443992614746, "learning_rate": 9.892982016900923e-07, "loss": 2.9823, "step": 69485 }, { "epoch": 0.7068888346354166, "grad_norm": 13.098793029785156, "learning_rate": 9.889796072805466e-07, "loss": 3.1256, "step": 69490 }, { "epoch": 0.706939697265625, "grad_norm": 12.860541343688965, "learning_rate": 9.886610515300986e-07, "loss": 3.9647, "step": 69495 }, { "epoch": 0.7069905598958334, "grad_norm": 10.856886863708496, "learning_rate": 9.883425344469004e-07, "loss": 2.7901, "step": 69500 }, { "epoch": 0.7070414225260416, "grad_norm": 17.13384437561035, "learning_rate": 9.880240560390992e-07, "loss": 3.3969, "step": 69505 }, { "epoch": 0.70709228515625, "grad_norm": 11.889996528625488, "learning_rate": 9.877056163148448e-07, "loss": 3.2367, "step": 69510 }, { "epoch": 0.7071431477864584, "grad_norm": 10.63394832611084, "learning_rate": 9.87387215282283e-07, "loss": 3.5426, "step": 69515 }, { "epoch": 0.7071940104166666, "grad_norm": 9.318733215332031, "learning_rate": 9.870688529495618e-07, "loss": 3.6947, "step": 69520 }, { "epoch": 0.707244873046875, "grad_norm": 13.429479598999023, "learning_rate": 9.867505293248253e-07, "loss": 3.1797, "step": 69525 }, { "epoch": 0.7072957356770834, "grad_norm": 10.64814281463623, "learning_rate": 9.864322444162166e-07, "loss": 2.9945, "step": 69530 }, { "epoch": 0.7073465983072916, "grad_norm": 11.657544136047363, "learning_rate": 9.861139982318807e-07, "loss": 3.2833, "step": 69535 }, { "epoch": 0.7073974609375, "grad_norm": 13.675775527954102, "learning_rate": 9.857957907799601e-07, "loss": 3.5087, "step": 69540 }, { "epoch": 0.7074483235677084, "grad_norm": 15.808775901794434, "learning_rate": 9.85477622068595e-07, "loss": 3.444, "step": 69545 }, { "epoch": 0.7074991861979166, "grad_norm": 10.257986068725586, "learning_rate": 9.851594921059252e-07, "loss": 3.2853, "step": 69550 }, { "epoch": 0.707550048828125, "grad_norm": 12.067086219787598, "learning_rate": 9.848414009000916e-07, "loss": 3.6043, "step": 69555 }, { "epoch": 0.7076009114583334, "grad_norm": 13.973238945007324, "learning_rate": 9.845233484592308e-07, "loss": 3.3014, "step": 69560 }, { "epoch": 0.7076517740885416, "grad_norm": 11.880187034606934, "learning_rate": 9.84205334791482e-07, "loss": 3.3167, "step": 69565 }, { "epoch": 0.70770263671875, "grad_norm": 12.605144500732422, "learning_rate": 9.838873599049795e-07, "loss": 3.3419, "step": 69570 }, { "epoch": 0.7077534993489584, "grad_norm": 14.770031929016113, "learning_rate": 9.835694238078601e-07, "loss": 3.1096, "step": 69575 }, { "epoch": 0.7078043619791666, "grad_norm": 13.251535415649414, "learning_rate": 9.83251526508258e-07, "loss": 3.1371, "step": 69580 }, { "epoch": 0.707855224609375, "grad_norm": 11.815899848937988, "learning_rate": 9.829336680143048e-07, "loss": 3.3847, "step": 69585 }, { "epoch": 0.7079060872395834, "grad_norm": 9.919445037841797, "learning_rate": 9.826158483341342e-07, "loss": 3.4533, "step": 69590 }, { "epoch": 0.7079569498697916, "grad_norm": 12.395959854125977, "learning_rate": 9.822980674758783e-07, "loss": 3.1149, "step": 69595 }, { "epoch": 0.7080078125, "grad_norm": 14.6797456741333, "learning_rate": 9.819803254476668e-07, "loss": 3.2177, "step": 69600 }, { "epoch": 0.7080586751302084, "grad_norm": 12.67190170288086, "learning_rate": 9.816626222576289e-07, "loss": 3.352, "step": 69605 }, { "epoch": 0.7081095377604166, "grad_norm": 9.85501480102539, "learning_rate": 9.81344957913892e-07, "loss": 3.3222, "step": 69610 }, { "epoch": 0.708160400390625, "grad_norm": 16.397167205810547, "learning_rate": 9.810273324245842e-07, "loss": 3.7158, "step": 69615 }, { "epoch": 0.7082112630208334, "grad_norm": 11.681159973144531, "learning_rate": 9.807097457978331e-07, "loss": 3.6375, "step": 69620 }, { "epoch": 0.7082621256510416, "grad_norm": 9.919473648071289, "learning_rate": 9.803921980417622e-07, "loss": 3.0976, "step": 69625 }, { "epoch": 0.70831298828125, "grad_norm": 11.035103797912598, "learning_rate": 9.800746891644975e-07, "loss": 3.1882, "step": 69630 }, { "epoch": 0.7083638509114584, "grad_norm": 12.774720191955566, "learning_rate": 9.797572191741608e-07, "loss": 3.1872, "step": 69635 }, { "epoch": 0.7084147135416666, "grad_norm": 10.840656280517578, "learning_rate": 9.794397880788761e-07, "loss": 3.3356, "step": 69640 }, { "epoch": 0.708465576171875, "grad_norm": 13.177209854125977, "learning_rate": 9.791223958867631e-07, "loss": 3.4211, "step": 69645 }, { "epoch": 0.7085164388020834, "grad_norm": 9.744634628295898, "learning_rate": 9.788050426059442e-07, "loss": 3.279, "step": 69650 }, { "epoch": 0.7085673014322916, "grad_norm": 7.26648473739624, "learning_rate": 9.784877282445376e-07, "loss": 3.1761, "step": 69655 }, { "epoch": 0.7086181640625, "grad_norm": 10.062418937683105, "learning_rate": 9.78170452810661e-07, "loss": 3.3257, "step": 69660 }, { "epoch": 0.7086690266927084, "grad_norm": 11.900903701782227, "learning_rate": 9.778532163124324e-07, "loss": 3.3456, "step": 69665 }, { "epoch": 0.7087198893229166, "grad_norm": 10.382468223571777, "learning_rate": 9.775360187579697e-07, "loss": 3.0791, "step": 69670 }, { "epoch": 0.708770751953125, "grad_norm": 7.0931396484375, "learning_rate": 9.772188601553869e-07, "loss": 3.2358, "step": 69675 }, { "epoch": 0.7088216145833334, "grad_norm": 12.681374549865723, "learning_rate": 9.769017405127987e-07, "loss": 3.0573, "step": 69680 }, { "epoch": 0.7088724772135416, "grad_norm": 16.667367935180664, "learning_rate": 9.765846598383177e-07, "loss": 3.2417, "step": 69685 }, { "epoch": 0.70892333984375, "grad_norm": 11.11923599243164, "learning_rate": 9.762676181400568e-07, "loss": 3.3146, "step": 69690 }, { "epoch": 0.7089742024739584, "grad_norm": 8.557740211486816, "learning_rate": 9.759506154261289e-07, "loss": 3.7042, "step": 69695 }, { "epoch": 0.7090250651041666, "grad_norm": 10.00163745880127, "learning_rate": 9.756336517046431e-07, "loss": 2.9392, "step": 69700 }, { "epoch": 0.709075927734375, "grad_norm": 15.909320831298828, "learning_rate": 9.75316726983708e-07, "loss": 3.6704, "step": 69705 }, { "epoch": 0.7091267903645834, "grad_norm": 12.77523422241211, "learning_rate": 9.749998412714342e-07, "loss": 3.9356, "step": 69710 }, { "epoch": 0.7091776529947916, "grad_norm": 12.011052131652832, "learning_rate": 9.74682994575927e-07, "loss": 3.4001, "step": 69715 }, { "epoch": 0.709228515625, "grad_norm": 15.927411079406738, "learning_rate": 9.743661869052937e-07, "loss": 3.2104, "step": 69720 }, { "epoch": 0.7092793782552084, "grad_norm": 11.16408634185791, "learning_rate": 9.74049418267641e-07, "loss": 3.0609, "step": 69725 }, { "epoch": 0.7093302408854166, "grad_norm": 12.752314567565918, "learning_rate": 9.73732688671072e-07, "loss": 3.6774, "step": 69730 }, { "epoch": 0.709381103515625, "grad_norm": 11.089025497436523, "learning_rate": 9.734159981236906e-07, "loss": 3.4763, "step": 69735 }, { "epoch": 0.7094319661458334, "grad_norm": 10.589859008789062, "learning_rate": 9.730993466335984e-07, "loss": 3.0983, "step": 69740 }, { "epoch": 0.7094828287760416, "grad_norm": 9.92780876159668, "learning_rate": 9.727827342088972e-07, "loss": 3.2294, "step": 69745 }, { "epoch": 0.70953369140625, "grad_norm": 11.25098705291748, "learning_rate": 9.724661608576885e-07, "loss": 3.7487, "step": 69750 }, { "epoch": 0.7095845540364584, "grad_norm": 13.248087882995605, "learning_rate": 9.721496265880713e-07, "loss": 3.1595, "step": 69755 }, { "epoch": 0.7096354166666666, "grad_norm": 10.853324890136719, "learning_rate": 9.718331314081426e-07, "loss": 3.504, "step": 69760 }, { "epoch": 0.709686279296875, "grad_norm": 9.86822509765625, "learning_rate": 9.715166753260022e-07, "loss": 3.2758, "step": 69765 }, { "epoch": 0.7097371419270834, "grad_norm": 13.17049503326416, "learning_rate": 9.712002583497443e-07, "loss": 3.2826, "step": 69770 }, { "epoch": 0.7097880045572916, "grad_norm": 9.153229713439941, "learning_rate": 9.708838804874663e-07, "loss": 3.3263, "step": 69775 }, { "epoch": 0.7098388671875, "grad_norm": 9.449225425720215, "learning_rate": 9.70567541747261e-07, "loss": 3.1877, "step": 69780 }, { "epoch": 0.7098897298177084, "grad_norm": 10.048635482788086, "learning_rate": 9.702512421372234e-07, "loss": 3.1857, "step": 69785 }, { "epoch": 0.7099405924479166, "grad_norm": 13.345157623291016, "learning_rate": 9.699349816654443e-07, "loss": 3.3308, "step": 69790 }, { "epoch": 0.709991455078125, "grad_norm": 12.826769828796387, "learning_rate": 9.69618760340017e-07, "loss": 3.4765, "step": 69795 }, { "epoch": 0.7100423177083334, "grad_norm": 8.920169830322266, "learning_rate": 9.693025781690299e-07, "loss": 3.4218, "step": 69800 }, { "epoch": 0.7100931803385416, "grad_norm": 10.686864852905273, "learning_rate": 9.689864351605746e-07, "loss": 3.2166, "step": 69805 }, { "epoch": 0.71014404296875, "grad_norm": 8.101327896118164, "learning_rate": 9.686703313227386e-07, "loss": 3.4575, "step": 69810 }, { "epoch": 0.7101949055989584, "grad_norm": 9.720623970031738, "learning_rate": 9.683542666636082e-07, "loss": 3.548, "step": 69815 }, { "epoch": 0.7102457682291666, "grad_norm": 12.997328758239746, "learning_rate": 9.680382411912706e-07, "loss": 3.2979, "step": 69820 }, { "epoch": 0.710296630859375, "grad_norm": 9.228484153747559, "learning_rate": 9.677222549138127e-07, "loss": 3.4373, "step": 69825 }, { "epoch": 0.7103474934895834, "grad_norm": 11.505775451660156, "learning_rate": 9.674063078393175e-07, "loss": 3.5473, "step": 69830 }, { "epoch": 0.7103983561197916, "grad_norm": 15.89929485321045, "learning_rate": 9.67090399975868e-07, "loss": 3.222, "step": 69835 }, { "epoch": 0.71044921875, "grad_norm": 7.241600036621094, "learning_rate": 9.66774531331548e-07, "loss": 3.1403, "step": 69840 }, { "epoch": 0.7105000813802084, "grad_norm": 10.587393760681152, "learning_rate": 9.664587019144373e-07, "loss": 3.4444, "step": 69845 }, { "epoch": 0.7105509440104166, "grad_norm": 14.452857971191406, "learning_rate": 9.66142911732618e-07, "loss": 2.9991, "step": 69850 }, { "epoch": 0.710601806640625, "grad_norm": 13.695948600769043, "learning_rate": 9.65827160794168e-07, "loss": 2.9567, "step": 69855 }, { "epoch": 0.7106526692708334, "grad_norm": 11.315937042236328, "learning_rate": 9.655114491071673e-07, "loss": 3.2219, "step": 69860 }, { "epoch": 0.7107035319010416, "grad_norm": 11.866053581237793, "learning_rate": 9.651957766796927e-07, "loss": 3.5155, "step": 69865 }, { "epoch": 0.71075439453125, "grad_norm": 12.22433853149414, "learning_rate": 9.64880143519819e-07, "loss": 3.4572, "step": 69870 }, { "epoch": 0.7108052571614584, "grad_norm": 11.939126014709473, "learning_rate": 9.645645496356234e-07, "loss": 3.1203, "step": 69875 }, { "epoch": 0.7108561197916666, "grad_norm": 12.543699264526367, "learning_rate": 9.642489950351804e-07, "loss": 3.3598, "step": 69880 }, { "epoch": 0.710906982421875, "grad_norm": 8.45138168334961, "learning_rate": 9.63933479726563e-07, "loss": 3.305, "step": 69885 }, { "epoch": 0.7109578450520834, "grad_norm": 10.792311668395996, "learning_rate": 9.636180037178436e-07, "loss": 3.1086, "step": 69890 }, { "epoch": 0.7110087076822916, "grad_norm": 12.166705131530762, "learning_rate": 9.633025670170923e-07, "loss": 3.1846, "step": 69895 }, { "epoch": 0.7110595703125, "grad_norm": 10.266594886779785, "learning_rate": 9.62987169632381e-07, "loss": 3.5435, "step": 69900 }, { "epoch": 0.7111104329427084, "grad_norm": 15.00068187713623, "learning_rate": 9.62671811571779e-07, "loss": 3.2666, "step": 69905 }, { "epoch": 0.7111612955729166, "grad_norm": 17.427892684936523, "learning_rate": 9.623564928433537e-07, "loss": 3.2168, "step": 69910 }, { "epoch": 0.711212158203125, "grad_norm": 11.1777925491333, "learning_rate": 9.620412134551739e-07, "loss": 3.665, "step": 69915 }, { "epoch": 0.7112630208333334, "grad_norm": 13.45982551574707, "learning_rate": 9.61725973415305e-07, "loss": 3.2419, "step": 69920 }, { "epoch": 0.7113138834635416, "grad_norm": 12.354000091552734, "learning_rate": 9.614107727318116e-07, "loss": 3.1457, "step": 69925 }, { "epoch": 0.71136474609375, "grad_norm": 14.382411003112793, "learning_rate": 9.610956114127592e-07, "loss": 3.7577, "step": 69930 }, { "epoch": 0.7114156087239584, "grad_norm": 13.039135932922363, "learning_rate": 9.607804894662114e-07, "loss": 3.369, "step": 69935 }, { "epoch": 0.7114664713541666, "grad_norm": 7.945051193237305, "learning_rate": 9.6046540690023e-07, "loss": 3.5365, "step": 69940 }, { "epoch": 0.711517333984375, "grad_norm": 11.75335693359375, "learning_rate": 9.601503637228763e-07, "loss": 3.4087, "step": 69945 }, { "epoch": 0.7115681966145834, "grad_norm": 6.930155277252197, "learning_rate": 9.59835359942209e-07, "loss": 3.2005, "step": 69950 }, { "epoch": 0.7116190592447916, "grad_norm": 14.273049354553223, "learning_rate": 9.595203955662905e-07, "loss": 3.0292, "step": 69955 }, { "epoch": 0.711669921875, "grad_norm": 10.134856224060059, "learning_rate": 9.592054706031772e-07, "loss": 3.3678, "step": 69960 }, { "epoch": 0.7117207845052084, "grad_norm": 13.185558319091797, "learning_rate": 9.588905850609274e-07, "loss": 3.3033, "step": 69965 }, { "epoch": 0.7117716471354166, "grad_norm": 18.926721572875977, "learning_rate": 9.585757389475953e-07, "loss": 3.2266, "step": 69970 }, { "epoch": 0.711822509765625, "grad_norm": 14.387677192687988, "learning_rate": 9.582609322712377e-07, "loss": 3.305, "step": 69975 }, { "epoch": 0.7118733723958334, "grad_norm": 14.691122055053711, "learning_rate": 9.579461650399097e-07, "loss": 3.3729, "step": 69980 }, { "epoch": 0.7119242350260416, "grad_norm": 15.056170463562012, "learning_rate": 9.576314372616635e-07, "loss": 3.0395, "step": 69985 }, { "epoch": 0.71197509765625, "grad_norm": 17.34141731262207, "learning_rate": 9.573167489445507e-07, "loss": 3.7642, "step": 69990 }, { "epoch": 0.7120259602864584, "grad_norm": 10.424257278442383, "learning_rate": 9.570021000966237e-07, "loss": 3.2243, "step": 69995 }, { "epoch": 0.7120768229166666, "grad_norm": 9.840085983276367, "learning_rate": 9.566874907259316e-07, "loss": 2.7915, "step": 70000 }, { "epoch": 0.712127685546875, "grad_norm": 9.245528221130371, "learning_rate": 9.563729208405253e-07, "loss": 3.2404, "step": 70005 }, { "epoch": 0.7121785481770834, "grad_norm": 14.306405067443848, "learning_rate": 9.560583904484509e-07, "loss": 3.3555, "step": 70010 }, { "epoch": 0.7122294108072916, "grad_norm": 10.101147651672363, "learning_rate": 9.557438995577573e-07, "loss": 3.3163, "step": 70015 }, { "epoch": 0.7122802734375, "grad_norm": 16.2298526763916, "learning_rate": 9.554294481764902e-07, "loss": 3.2587, "step": 70020 }, { "epoch": 0.7123311360677084, "grad_norm": 16.712909698486328, "learning_rate": 9.551150363126937e-07, "loss": 3.5174, "step": 70025 }, { "epoch": 0.7123819986979166, "grad_norm": 12.494762420654297, "learning_rate": 9.548006639744129e-07, "loss": 3.494, "step": 70030 }, { "epoch": 0.712432861328125, "grad_norm": 8.947538375854492, "learning_rate": 9.544863311696914e-07, "loss": 3.1756, "step": 70035 }, { "epoch": 0.7124837239583334, "grad_norm": 12.291443824768066, "learning_rate": 9.541720379065711e-07, "loss": 3.183, "step": 70040 }, { "epoch": 0.7125345865885416, "grad_norm": 15.154228210449219, "learning_rate": 9.538577841930918e-07, "loss": 3.4816, "step": 70045 }, { "epoch": 0.71258544921875, "grad_norm": 12.65209674835205, "learning_rate": 9.535435700372955e-07, "loss": 3.6757, "step": 70050 }, { "epoch": 0.7126363118489584, "grad_norm": 12.21906566619873, "learning_rate": 9.532293954472197e-07, "loss": 3.2487, "step": 70055 }, { "epoch": 0.7126871744791666, "grad_norm": 8.432168960571289, "learning_rate": 9.529152604309039e-07, "loss": 2.9157, "step": 70060 }, { "epoch": 0.712738037109375, "grad_norm": 10.848390579223633, "learning_rate": 9.526011649963837e-07, "loss": 3.3407, "step": 70065 }, { "epoch": 0.7127888997395834, "grad_norm": 18.4654541015625, "learning_rate": 9.522871091516966e-07, "loss": 3.6414, "step": 70070 }, { "epoch": 0.7128397623697916, "grad_norm": 12.46338176727295, "learning_rate": 9.51973092904877e-07, "loss": 3.0989, "step": 70075 }, { "epoch": 0.712890625, "grad_norm": 12.28748893737793, "learning_rate": 9.516591162639582e-07, "loss": 3.4607, "step": 70080 }, { "epoch": 0.7129414876302084, "grad_norm": 8.627691268920898, "learning_rate": 9.513451792369738e-07, "loss": 3.8247, "step": 70085 }, { "epoch": 0.7129923502604166, "grad_norm": 9.558513641357422, "learning_rate": 9.51031281831957e-07, "loss": 3.6145, "step": 70090 }, { "epoch": 0.713043212890625, "grad_norm": 9.67013931274414, "learning_rate": 9.507174240569375e-07, "loss": 3.7408, "step": 70095 }, { "epoch": 0.7130940755208334, "grad_norm": 14.165182113647461, "learning_rate": 9.504036059199453e-07, "loss": 3.1084, "step": 70100 }, { "epoch": 0.7131449381510416, "grad_norm": 14.601203918457031, "learning_rate": 9.500898274290083e-07, "loss": 3.3601, "step": 70105 }, { "epoch": 0.71319580078125, "grad_norm": 8.760289192199707, "learning_rate": 9.49776088592157e-07, "loss": 3.2361, "step": 70110 }, { "epoch": 0.7132466634114584, "grad_norm": 12.00662899017334, "learning_rate": 9.494623894174171e-07, "loss": 3.3172, "step": 70115 }, { "epoch": 0.7132975260416666, "grad_norm": 15.779747009277344, "learning_rate": 9.491487299128138e-07, "loss": 3.1276, "step": 70120 }, { "epoch": 0.713348388671875, "grad_norm": 9.786253929138184, "learning_rate": 9.488351100863732e-07, "loss": 3.1352, "step": 70125 }, { "epoch": 0.7133992513020834, "grad_norm": 10.486870765686035, "learning_rate": 9.485215299461181e-07, "loss": 3.3165, "step": 70130 }, { "epoch": 0.7134501139322916, "grad_norm": 10.869139671325684, "learning_rate": 9.482079895000726e-07, "loss": 3.1894, "step": 70135 }, { "epoch": 0.7135009765625, "grad_norm": 13.020895957946777, "learning_rate": 9.478944887562569e-07, "loss": 3.2775, "step": 70140 }, { "epoch": 0.7135518391927084, "grad_norm": 10.164044380187988, "learning_rate": 9.475810277226941e-07, "loss": 3.8045, "step": 70145 }, { "epoch": 0.7136027018229166, "grad_norm": 13.305707931518555, "learning_rate": 9.472676064074024e-07, "loss": 3.0316, "step": 70150 }, { "epoch": 0.713653564453125, "grad_norm": 15.2430419921875, "learning_rate": 9.469542248183999e-07, "loss": 3.5371, "step": 70155 }, { "epoch": 0.7137044270833334, "grad_norm": 7.959345817565918, "learning_rate": 9.466408829637058e-07, "loss": 3.4444, "step": 70160 }, { "epoch": 0.7137552897135416, "grad_norm": 9.482425689697266, "learning_rate": 9.463275808513373e-07, "loss": 3.3864, "step": 70165 }, { "epoch": 0.71380615234375, "grad_norm": 10.571792602539062, "learning_rate": 9.460143184893094e-07, "loss": 2.9948, "step": 70170 }, { "epoch": 0.7138570149739584, "grad_norm": 11.90029525756836, "learning_rate": 9.45701095885637e-07, "loss": 3.1864, "step": 70175 }, { "epoch": 0.7139078776041666, "grad_norm": 10.637242317199707, "learning_rate": 9.453879130483323e-07, "loss": 3.3903, "step": 70180 }, { "epoch": 0.713958740234375, "grad_norm": 15.166751861572266, "learning_rate": 9.450747699854099e-07, "loss": 3.9749, "step": 70185 }, { "epoch": 0.7140096028645834, "grad_norm": 8.722638130187988, "learning_rate": 9.447616667048814e-07, "loss": 3.7228, "step": 70190 }, { "epoch": 0.7140604654947916, "grad_norm": 7.907155990600586, "learning_rate": 9.444486032147573e-07, "loss": 3.1887, "step": 70195 }, { "epoch": 0.714111328125, "grad_norm": 12.05744457244873, "learning_rate": 9.441355795230459e-07, "loss": 3.4451, "step": 70200 }, { "epoch": 0.7141621907552084, "grad_norm": 11.853819847106934, "learning_rate": 9.438225956377578e-07, "loss": 3.0237, "step": 70205 }, { "epoch": 0.7142130533854166, "grad_norm": 11.00097370147705, "learning_rate": 9.43509651566899e-07, "loss": 3.4609, "step": 70210 }, { "epoch": 0.714263916015625, "grad_norm": 17.25116539001465, "learning_rate": 9.43196747318477e-07, "loss": 3.8878, "step": 70215 }, { "epoch": 0.7143147786458334, "grad_norm": 11.077966690063477, "learning_rate": 9.428838829004978e-07, "loss": 3.3917, "step": 70220 }, { "epoch": 0.7143656412760416, "grad_norm": 11.697901725769043, "learning_rate": 9.425710583209655e-07, "loss": 3.4042, "step": 70225 }, { "epoch": 0.71441650390625, "grad_norm": 10.441009521484375, "learning_rate": 9.422582735878835e-07, "loss": 3.0199, "step": 70230 }, { "epoch": 0.7144673665364584, "grad_norm": 9.411457061767578, "learning_rate": 9.419455287092533e-07, "loss": 3.3712, "step": 70235 }, { "epoch": 0.7145182291666666, "grad_norm": 13.511369705200195, "learning_rate": 9.416328236930777e-07, "loss": 3.3586, "step": 70240 }, { "epoch": 0.714569091796875, "grad_norm": 13.114068984985352, "learning_rate": 9.413201585473577e-07, "loss": 3.1095, "step": 70245 }, { "epoch": 0.7146199544270834, "grad_norm": 9.315244674682617, "learning_rate": 9.41007533280092e-07, "loss": 3.2895, "step": 70250 }, { "epoch": 0.7146708170572916, "grad_norm": 11.047579765319824, "learning_rate": 9.40694947899278e-07, "loss": 3.4116, "step": 70255 }, { "epoch": 0.7147216796875, "grad_norm": 14.086073875427246, "learning_rate": 9.403824024129143e-07, "loss": 3.2052, "step": 70260 }, { "epoch": 0.7147725423177084, "grad_norm": 15.68326187133789, "learning_rate": 9.400698968289981e-07, "loss": 3.2755, "step": 70265 }, { "epoch": 0.7148234049479166, "grad_norm": 14.477516174316406, "learning_rate": 9.397574311555238e-07, "loss": 3.0787, "step": 70270 }, { "epoch": 0.714874267578125, "grad_norm": 11.304795265197754, "learning_rate": 9.39445005400485e-07, "loss": 3.401, "step": 70275 }, { "epoch": 0.7149251302083334, "grad_norm": 14.684220314025879, "learning_rate": 9.391326195718767e-07, "loss": 3.3938, "step": 70280 }, { "epoch": 0.7149759928385416, "grad_norm": 8.723044395446777, "learning_rate": 9.388202736776894e-07, "loss": 3.2873, "step": 70285 }, { "epoch": 0.71502685546875, "grad_norm": 10.842280387878418, "learning_rate": 9.385079677259165e-07, "loss": 3.4228, "step": 70290 }, { "epoch": 0.7150777180989584, "grad_norm": 14.38766860961914, "learning_rate": 9.381957017245461e-07, "loss": 3.2794, "step": 70295 }, { "epoch": 0.7151285807291666, "grad_norm": 14.24864387512207, "learning_rate": 9.378834756815691e-07, "loss": 3.4253, "step": 70300 }, { "epoch": 0.715179443359375, "grad_norm": 11.902130126953125, "learning_rate": 9.375712896049736e-07, "loss": 3.4232, "step": 70305 }, { "epoch": 0.7152303059895834, "grad_norm": 15.008284568786621, "learning_rate": 9.37259143502745e-07, "loss": 3.3055, "step": 70310 }, { "epoch": 0.7152811686197916, "grad_norm": 15.369800567626953, "learning_rate": 9.36947037382871e-07, "loss": 3.1573, "step": 70315 }, { "epoch": 0.71533203125, "grad_norm": 15.493510246276855, "learning_rate": 9.366349712533374e-07, "loss": 3.076, "step": 70320 }, { "epoch": 0.7153828938802084, "grad_norm": 9.27544116973877, "learning_rate": 9.363229451221273e-07, "loss": 3.9509, "step": 70325 }, { "epoch": 0.7154337565104166, "grad_norm": 14.310425758361816, "learning_rate": 9.360109589972236e-07, "loss": 3.301, "step": 70330 }, { "epoch": 0.715484619140625, "grad_norm": 14.848944664001465, "learning_rate": 9.356990128866092e-07, "loss": 3.0776, "step": 70335 }, { "epoch": 0.7155354817708334, "grad_norm": 11.941277503967285, "learning_rate": 9.353871067982642e-07, "loss": 3.4649, "step": 70340 }, { "epoch": 0.7155863444010416, "grad_norm": 14.305868148803711, "learning_rate": 9.3507524074017e-07, "loss": 3.9877, "step": 70345 }, { "epoch": 0.71563720703125, "grad_norm": 15.948720932006836, "learning_rate": 9.347634147203041e-07, "loss": 2.9864, "step": 70350 }, { "epoch": 0.7156880696614584, "grad_norm": 11.72199821472168, "learning_rate": 9.34451628746646e-07, "loss": 3.4454, "step": 70355 }, { "epoch": 0.7157389322916666, "grad_norm": 13.969541549682617, "learning_rate": 9.34139882827172e-07, "loss": 3.5413, "step": 70360 }, { "epoch": 0.715789794921875, "grad_norm": 8.047698974609375, "learning_rate": 9.338281769698567e-07, "loss": 3.3582, "step": 70365 }, { "epoch": 0.7158406575520834, "grad_norm": 12.490907669067383, "learning_rate": 9.335165111826766e-07, "loss": 3.4477, "step": 70370 }, { "epoch": 0.7158915201822916, "grad_norm": 11.708551406860352, "learning_rate": 9.332048854736061e-07, "loss": 3.2479, "step": 70375 }, { "epoch": 0.7159423828125, "grad_norm": 14.371440887451172, "learning_rate": 9.328932998506171e-07, "loss": 3.163, "step": 70380 }, { "epoch": 0.7159932454427084, "grad_norm": 11.833093643188477, "learning_rate": 9.325817543216817e-07, "loss": 3.6719, "step": 70385 }, { "epoch": 0.7160441080729166, "grad_norm": 12.058566093444824, "learning_rate": 9.322702488947699e-07, "loss": 3.4186, "step": 70390 }, { "epoch": 0.716094970703125, "grad_norm": 11.052045822143555, "learning_rate": 9.31958783577852e-07, "loss": 3.2209, "step": 70395 }, { "epoch": 0.7161458333333334, "grad_norm": 14.906253814697266, "learning_rate": 9.316473583788976e-07, "loss": 3.222, "step": 70400 }, { "epoch": 0.7161966959635416, "grad_norm": 8.230878829956055, "learning_rate": 9.313359733058733e-07, "loss": 3.0487, "step": 70405 }, { "epoch": 0.71624755859375, "grad_norm": 12.070849418640137, "learning_rate": 9.310246283667471e-07, "loss": 3.447, "step": 70410 }, { "epoch": 0.7162984212239584, "grad_norm": 10.283710479736328, "learning_rate": 9.307133235694829e-07, "loss": 3.5644, "step": 70415 }, { "epoch": 0.7163492838541666, "grad_norm": 10.542003631591797, "learning_rate": 9.304020589220472e-07, "loss": 3.3768, "step": 70420 }, { "epoch": 0.716400146484375, "grad_norm": 8.863672256469727, "learning_rate": 9.300908344324023e-07, "loss": 3.4833, "step": 70425 }, { "epoch": 0.7164510091145834, "grad_norm": 8.094013214111328, "learning_rate": 9.297796501085115e-07, "loss": 3.1542, "step": 70430 }, { "epoch": 0.7165018717447916, "grad_norm": 14.92911148071289, "learning_rate": 9.294685059583366e-07, "loss": 3.3276, "step": 70435 }, { "epoch": 0.716552734375, "grad_norm": 11.061050415039062, "learning_rate": 9.29157401989837e-07, "loss": 3.2169, "step": 70440 }, { "epoch": 0.7166035970052084, "grad_norm": 7.26305627822876, "learning_rate": 9.288463382109728e-07, "loss": 3.5735, "step": 70445 }, { "epoch": 0.7166544596354166, "grad_norm": 12.462404251098633, "learning_rate": 9.285353146297032e-07, "loss": 3.4802, "step": 70450 }, { "epoch": 0.716705322265625, "grad_norm": 10.22716236114502, "learning_rate": 9.282243312539854e-07, "loss": 3.0933, "step": 70455 }, { "epoch": 0.7167561848958334, "grad_norm": 12.187172889709473, "learning_rate": 9.279133880917754e-07, "loss": 2.952, "step": 70460 }, { "epoch": 0.7168070475260416, "grad_norm": 10.628868103027344, "learning_rate": 9.276024851510279e-07, "loss": 3.1059, "step": 70465 }, { "epoch": 0.71685791015625, "grad_norm": 17.583450317382812, "learning_rate": 9.272916224396983e-07, "loss": 3.4054, "step": 70470 }, { "epoch": 0.7169087727864584, "grad_norm": 9.600113868713379, "learning_rate": 9.269807999657404e-07, "loss": 3.4388, "step": 70475 }, { "epoch": 0.7169596354166666, "grad_norm": 12.207353591918945, "learning_rate": 9.26670017737106e-07, "loss": 3.2619, "step": 70480 }, { "epoch": 0.717010498046875, "grad_norm": 9.641359329223633, "learning_rate": 9.263592757617454e-07, "loss": 3.4046, "step": 70485 }, { "epoch": 0.7170613606770834, "grad_norm": 8.686779975891113, "learning_rate": 9.260485740476105e-07, "loss": 3.4551, "step": 70490 }, { "epoch": 0.7171122233072916, "grad_norm": 12.74880599975586, "learning_rate": 9.257379126026489e-07, "loss": 3.298, "step": 70495 }, { "epoch": 0.7171630859375, "grad_norm": 10.921481132507324, "learning_rate": 9.254272914348097e-07, "loss": 3.3536, "step": 70500 }, { "epoch": 0.7172139485677084, "grad_norm": 15.362860679626465, "learning_rate": 9.251167105520411e-07, "loss": 3.4412, "step": 70505 }, { "epoch": 0.7172648111979166, "grad_norm": 8.349514961242676, "learning_rate": 9.248061699622879e-07, "loss": 3.3569, "step": 70510 }, { "epoch": 0.717315673828125, "grad_norm": 10.782577514648438, "learning_rate": 9.244956696734958e-07, "loss": 3.4511, "step": 70515 }, { "epoch": 0.7173665364583334, "grad_norm": 8.114956855773926, "learning_rate": 9.241852096936074e-07, "loss": 3.1312, "step": 70520 }, { "epoch": 0.7174173990885416, "grad_norm": 8.276371002197266, "learning_rate": 9.23874790030567e-07, "loss": 3.3773, "step": 70525 }, { "epoch": 0.71746826171875, "grad_norm": 12.949410438537598, "learning_rate": 9.235644106923172e-07, "loss": 3.3625, "step": 70530 }, { "epoch": 0.7175191243489584, "grad_norm": 7.578267574310303, "learning_rate": 9.232540716867985e-07, "loss": 3.3437, "step": 70535 }, { "epoch": 0.7175699869791666, "grad_norm": 15.299179077148438, "learning_rate": 9.229437730219499e-07, "loss": 2.824, "step": 70540 }, { "epoch": 0.717620849609375, "grad_norm": 10.61105728149414, "learning_rate": 9.226335147057116e-07, "loss": 3.0508, "step": 70545 }, { "epoch": 0.7176717122395834, "grad_norm": 10.850451469421387, "learning_rate": 9.223232967460202e-07, "loss": 3.3547, "step": 70550 }, { "epoch": 0.7177225748697916, "grad_norm": 11.77407169342041, "learning_rate": 9.220131191508144e-07, "loss": 3.3201, "step": 70555 }, { "epoch": 0.7177734375, "grad_norm": 13.269412994384766, "learning_rate": 9.217029819280279e-07, "loss": 3.5001, "step": 70560 }, { "epoch": 0.7178243001302084, "grad_norm": 8.466712951660156, "learning_rate": 9.213928850855974e-07, "loss": 3.3259, "step": 70565 }, { "epoch": 0.7178751627604166, "grad_norm": 11.160103797912598, "learning_rate": 9.210828286314546e-07, "loss": 3.5799, "step": 70570 }, { "epoch": 0.717926025390625, "grad_norm": 8.970287322998047, "learning_rate": 9.207728125735346e-07, "loss": 3.4847, "step": 70575 }, { "epoch": 0.7179768880208334, "grad_norm": 13.498138427734375, "learning_rate": 9.204628369197671e-07, "loss": 3.1112, "step": 70580 }, { "epoch": 0.7180277506510416, "grad_norm": 9.72287368774414, "learning_rate": 9.201529016780841e-07, "loss": 3.7581, "step": 70585 }, { "epoch": 0.71807861328125, "grad_norm": 10.230852127075195, "learning_rate": 9.198430068564149e-07, "loss": 3.4474, "step": 70590 }, { "epoch": 0.7181294759114584, "grad_norm": 12.656306266784668, "learning_rate": 9.195331524626866e-07, "loss": 3.4458, "step": 70595 }, { "epoch": 0.7181803385416666, "grad_norm": 16.775386810302734, "learning_rate": 9.192233385048282e-07, "loss": 3.7704, "step": 70600 }, { "epoch": 0.718231201171875, "grad_norm": 11.617079734802246, "learning_rate": 9.189135649907668e-07, "loss": 3.426, "step": 70605 }, { "epoch": 0.7182820638020834, "grad_norm": 6.835216522216797, "learning_rate": 9.186038319284274e-07, "loss": 3.1763, "step": 70610 }, { "epoch": 0.7183329264322916, "grad_norm": 10.226903915405273, "learning_rate": 9.182941393257331e-07, "loss": 3.8316, "step": 70615 }, { "epoch": 0.7183837890625, "grad_norm": 10.749029159545898, "learning_rate": 9.179844871906088e-07, "loss": 2.974, "step": 70620 }, { "epoch": 0.7184346516927084, "grad_norm": 7.657063961029053, "learning_rate": 9.176748755309761e-07, "loss": 3.255, "step": 70625 }, { "epoch": 0.7184855143229166, "grad_norm": 15.790289878845215, "learning_rate": 9.173653043547573e-07, "loss": 3.3691, "step": 70630 }, { "epoch": 0.718536376953125, "grad_norm": 8.744561195373535, "learning_rate": 9.17055773669871e-07, "loss": 3.3656, "step": 70635 }, { "epoch": 0.7185872395833334, "grad_norm": 9.560819625854492, "learning_rate": 9.167462834842386e-07, "loss": 3.0058, "step": 70640 }, { "epoch": 0.7186381022135416, "grad_norm": 16.007478713989258, "learning_rate": 9.164368338057772e-07, "loss": 3.3507, "step": 70645 }, { "epoch": 0.71868896484375, "grad_norm": 10.624584197998047, "learning_rate": 9.161274246424034e-07, "loss": 3.6953, "step": 70650 }, { "epoch": 0.7187398274739584, "grad_norm": 15.273704528808594, "learning_rate": 9.158180560020339e-07, "loss": 3.5073, "step": 70655 }, { "epoch": 0.7187906901041666, "grad_norm": 13.692089080810547, "learning_rate": 9.155087278925847e-07, "loss": 3.5895, "step": 70660 }, { "epoch": 0.718841552734375, "grad_norm": 11.459607124328613, "learning_rate": 9.151994403219691e-07, "loss": 3.5057, "step": 70665 }, { "epoch": 0.7188924153645834, "grad_norm": 14.022200584411621, "learning_rate": 9.148901932981002e-07, "loss": 4.0547, "step": 70670 }, { "epoch": 0.7189432779947916, "grad_norm": 10.06373405456543, "learning_rate": 9.145809868288893e-07, "loss": 3.3864, "step": 70675 }, { "epoch": 0.718994140625, "grad_norm": 13.52616024017334, "learning_rate": 9.142718209222481e-07, "loss": 3.4282, "step": 70680 }, { "epoch": 0.7190450032552084, "grad_norm": 13.549678802490234, "learning_rate": 9.139626955860872e-07, "loss": 3.1799, "step": 70685 }, { "epoch": 0.7190958658854166, "grad_norm": 11.853752136230469, "learning_rate": 9.136536108283148e-07, "loss": 3.3303, "step": 70690 }, { "epoch": 0.719146728515625, "grad_norm": 10.186666488647461, "learning_rate": 9.13344566656838e-07, "loss": 3.5477, "step": 70695 }, { "epoch": 0.7191975911458334, "grad_norm": 12.17104434967041, "learning_rate": 9.130355630795651e-07, "loss": 3.476, "step": 70700 }, { "epoch": 0.7192484537760416, "grad_norm": 9.603554725646973, "learning_rate": 9.127266001044005e-07, "loss": 3.5696, "step": 70705 }, { "epoch": 0.71929931640625, "grad_norm": 11.729199409484863, "learning_rate": 9.124176777392496e-07, "loss": 2.8525, "step": 70710 }, { "epoch": 0.7193501790364584, "grad_norm": 12.96235179901123, "learning_rate": 9.121087959920169e-07, "loss": 3.3426, "step": 70715 }, { "epoch": 0.7194010416666666, "grad_norm": 11.475220680236816, "learning_rate": 9.117999548706041e-07, "loss": 3.2288, "step": 70720 }, { "epoch": 0.719451904296875, "grad_norm": 8.891611099243164, "learning_rate": 9.114911543829124e-07, "loss": 3.2626, "step": 70725 }, { "epoch": 0.7195027669270834, "grad_norm": 15.098640441894531, "learning_rate": 9.111823945368429e-07, "loss": 3.3544, "step": 70730 }, { "epoch": 0.7195536295572916, "grad_norm": 10.538352966308594, "learning_rate": 9.108736753402961e-07, "loss": 3.3497, "step": 70735 }, { "epoch": 0.7196044921875, "grad_norm": 10.955540657043457, "learning_rate": 9.105649968011698e-07, "loss": 3.3903, "step": 70740 }, { "epoch": 0.7196553548177084, "grad_norm": 14.484964370727539, "learning_rate": 9.102563589273611e-07, "loss": 3.3029, "step": 70745 }, { "epoch": 0.7197062174479166, "grad_norm": 9.111550331115723, "learning_rate": 9.09947761726766e-07, "loss": 3.1651, "step": 70750 }, { "epoch": 0.719757080078125, "grad_norm": 16.72936248779297, "learning_rate": 9.096392052072803e-07, "loss": 3.2327, "step": 70755 }, { "epoch": 0.7198079427083334, "grad_norm": 9.58584976196289, "learning_rate": 9.093306893767995e-07, "loss": 3.1563, "step": 70760 }, { "epoch": 0.7198588053385416, "grad_norm": 8.176050186157227, "learning_rate": 9.090222142432162e-07, "loss": 3.5646, "step": 70765 }, { "epoch": 0.71990966796875, "grad_norm": 14.241776466369629, "learning_rate": 9.087137798144213e-07, "loss": 3.4337, "step": 70770 }, { "epoch": 0.7199605305989584, "grad_norm": 12.04594612121582, "learning_rate": 9.084053860983083e-07, "loss": 3.3614, "step": 70775 }, { "epoch": 0.7200113932291666, "grad_norm": 8.9103364944458, "learning_rate": 9.080970331027653e-07, "loss": 3.1215, "step": 70780 }, { "epoch": 0.720062255859375, "grad_norm": 13.713004112243652, "learning_rate": 9.077887208356831e-07, "loss": 3.6188, "step": 70785 }, { "epoch": 0.7201131184895834, "grad_norm": 13.118133544921875, "learning_rate": 9.074804493049483e-07, "loss": 3.3569, "step": 70790 }, { "epoch": 0.7201639811197916, "grad_norm": 9.286763191223145, "learning_rate": 9.071722185184495e-07, "loss": 3.3846, "step": 70795 }, { "epoch": 0.72021484375, "grad_norm": 10.336640357971191, "learning_rate": 9.068640284840721e-07, "loss": 3.2238, "step": 70800 }, { "epoch": 0.7202657063802084, "grad_norm": 13.235795021057129, "learning_rate": 9.065558792096998e-07, "loss": 3.329, "step": 70805 }, { "epoch": 0.7203165690104166, "grad_norm": 17.055892944335938, "learning_rate": 9.06247770703218e-07, "loss": 3.4045, "step": 70810 }, { "epoch": 0.720367431640625, "grad_norm": 13.522333145141602, "learning_rate": 9.059397029725101e-07, "loss": 3.5872, "step": 70815 }, { "epoch": 0.7204182942708334, "grad_norm": 12.300740242004395, "learning_rate": 9.056316760254569e-07, "loss": 3.1478, "step": 70820 }, { "epoch": 0.7204691569010416, "grad_norm": 11.573324203491211, "learning_rate": 9.053236898699383e-07, "loss": 3.2798, "step": 70825 }, { "epoch": 0.72052001953125, "grad_norm": 11.538972854614258, "learning_rate": 9.050157445138363e-07, "loss": 3.284, "step": 70830 }, { "epoch": 0.7205708821614584, "grad_norm": 11.63953971862793, "learning_rate": 9.047078399650274e-07, "loss": 3.1113, "step": 70835 }, { "epoch": 0.7206217447916666, "grad_norm": 13.707157135009766, "learning_rate": 9.043999762313913e-07, "loss": 3.1204, "step": 70840 }, { "epoch": 0.720672607421875, "grad_norm": 13.982678413391113, "learning_rate": 9.040921533208025e-07, "loss": 3.3069, "step": 70845 }, { "epoch": 0.7207234700520834, "grad_norm": 11.639885902404785, "learning_rate": 9.037843712411387e-07, "loss": 3.2747, "step": 70850 }, { "epoch": 0.7207743326822916, "grad_norm": 12.32170295715332, "learning_rate": 9.034766300002734e-07, "loss": 3.5874, "step": 70855 }, { "epoch": 0.7208251953125, "grad_norm": 12.784284591674805, "learning_rate": 9.031689296060791e-07, "loss": 3.4228, "step": 70860 }, { "epoch": 0.7208760579427084, "grad_norm": 14.382755279541016, "learning_rate": 9.028612700664294e-07, "loss": 2.9878, "step": 70865 }, { "epoch": 0.7209269205729166, "grad_norm": 16.06764793395996, "learning_rate": 9.025536513891963e-07, "loss": 3.5907, "step": 70870 }, { "epoch": 0.720977783203125, "grad_norm": 13.783523559570312, "learning_rate": 9.022460735822497e-07, "loss": 3.1087, "step": 70875 }, { "epoch": 0.7210286458333334, "grad_norm": 7.396638870239258, "learning_rate": 9.019385366534575e-07, "loss": 3.3846, "step": 70880 }, { "epoch": 0.7210795084635416, "grad_norm": 9.226949691772461, "learning_rate": 9.01631040610689e-07, "loss": 3.1187, "step": 70885 }, { "epoch": 0.72113037109375, "grad_norm": 9.044224739074707, "learning_rate": 9.013235854618124e-07, "loss": 3.422, "step": 70890 }, { "epoch": 0.7211812337239584, "grad_norm": 10.633951187133789, "learning_rate": 9.010161712146931e-07, "loss": 3.2628, "step": 70895 }, { "epoch": 0.7212320963541666, "grad_norm": 9.115630149841309, "learning_rate": 9.00708797877195e-07, "loss": 3.3975, "step": 70900 }, { "epoch": 0.721282958984375, "grad_norm": 15.742044448852539, "learning_rate": 9.004014654571841e-07, "loss": 3.5155, "step": 70905 }, { "epoch": 0.7213338216145834, "grad_norm": 11.0806245803833, "learning_rate": 9.000941739625219e-07, "loss": 3.4679, "step": 70910 }, { "epoch": 0.7213846842447916, "grad_norm": 12.643223762512207, "learning_rate": 8.997869234010717e-07, "loss": 3.729, "step": 70915 }, { "epoch": 0.721435546875, "grad_norm": 11.004573822021484, "learning_rate": 8.994797137806929e-07, "loss": 3.3299, "step": 70920 }, { "epoch": 0.7214864095052084, "grad_norm": 16.230148315429688, "learning_rate": 8.991725451092473e-07, "loss": 3.6769, "step": 70925 }, { "epoch": 0.7215372721354166, "grad_norm": 11.277342796325684, "learning_rate": 8.988654173945924e-07, "loss": 3.6651, "step": 70930 }, { "epoch": 0.721588134765625, "grad_norm": 10.552614212036133, "learning_rate": 8.985583306445855e-07, "loss": 3.4175, "step": 70935 }, { "epoch": 0.7216389973958334, "grad_norm": 9.672837257385254, "learning_rate": 8.982512848670844e-07, "loss": 3.3217, "step": 70940 }, { "epoch": 0.7216898600260416, "grad_norm": 16.051069259643555, "learning_rate": 8.97944280069945e-07, "loss": 3.6637, "step": 70945 }, { "epoch": 0.72174072265625, "grad_norm": 13.610672950744629, "learning_rate": 8.976373162610219e-07, "loss": 3.4613, "step": 70950 }, { "epoch": 0.7217915852864584, "grad_norm": 11.690978050231934, "learning_rate": 8.973303934481681e-07, "loss": 3.5566, "step": 70955 }, { "epoch": 0.7218424479166666, "grad_norm": 15.141119003295898, "learning_rate": 8.970235116392353e-07, "loss": 3.5221, "step": 70960 }, { "epoch": 0.721893310546875, "grad_norm": 13.764825820922852, "learning_rate": 8.96716670842076e-07, "loss": 3.721, "step": 70965 }, { "epoch": 0.7219441731770834, "grad_norm": 11.916561126708984, "learning_rate": 8.964098710645416e-07, "loss": 3.7082, "step": 70970 }, { "epoch": 0.7219950358072916, "grad_norm": 11.584885597229004, "learning_rate": 8.961031123144806e-07, "loss": 3.3698, "step": 70975 }, { "epoch": 0.7220458984375, "grad_norm": 11.479866981506348, "learning_rate": 8.957963945997403e-07, "loss": 3.2962, "step": 70980 }, { "epoch": 0.7220967610677084, "grad_norm": 13.004188537597656, "learning_rate": 8.954897179281701e-07, "loss": 3.2781, "step": 70985 }, { "epoch": 0.7221476236979166, "grad_norm": 11.68873405456543, "learning_rate": 8.951830823076141e-07, "loss": 3.2864, "step": 70990 }, { "epoch": 0.722198486328125, "grad_norm": 13.482848167419434, "learning_rate": 8.948764877459188e-07, "loss": 3.3523, "step": 70995 }, { "epoch": 0.7222493489583334, "grad_norm": 13.140082359313965, "learning_rate": 8.945699342509287e-07, "loss": 3.0924, "step": 71000 }, { "epoch": 0.7223002115885416, "grad_norm": 11.259969711303711, "learning_rate": 8.942634218304863e-07, "loss": 3.3819, "step": 71005 }, { "epoch": 0.72235107421875, "grad_norm": 11.4698486328125, "learning_rate": 8.939569504924339e-07, "loss": 3.2888, "step": 71010 }, { "epoch": 0.7224019368489584, "grad_norm": 11.579285621643066, "learning_rate": 8.936505202446102e-07, "loss": 3.438, "step": 71015 }, { "epoch": 0.7224527994791666, "grad_norm": 15.198299407958984, "learning_rate": 8.933441310948593e-07, "loss": 3.554, "step": 71020 }, { "epoch": 0.722503662109375, "grad_norm": 15.81182861328125, "learning_rate": 8.930377830510175e-07, "loss": 3.6142, "step": 71025 }, { "epoch": 0.7225545247395834, "grad_norm": 9.641409873962402, "learning_rate": 8.927314761209236e-07, "loss": 3.4682, "step": 71030 }, { "epoch": 0.7226053873697916, "grad_norm": 15.797674179077148, "learning_rate": 8.924252103124128e-07, "loss": 3.1733, "step": 71035 }, { "epoch": 0.72265625, "grad_norm": 11.661063194274902, "learning_rate": 8.92118985633322e-07, "loss": 3.2641, "step": 71040 }, { "epoch": 0.7227071126302084, "grad_norm": 8.455240249633789, "learning_rate": 8.918128020914868e-07, "loss": 3.2229, "step": 71045 }, { "epoch": 0.7227579752604166, "grad_norm": 11.377432823181152, "learning_rate": 8.915066596947397e-07, "loss": 3.899, "step": 71050 }, { "epoch": 0.722808837890625, "grad_norm": 9.175016403198242, "learning_rate": 8.912005584509129e-07, "loss": 3.3356, "step": 71055 }, { "epoch": 0.7228597005208334, "grad_norm": 10.886006355285645, "learning_rate": 8.908944983678388e-07, "loss": 3.4519, "step": 71060 }, { "epoch": 0.7229105631510416, "grad_norm": 13.152046203613281, "learning_rate": 8.905884794533473e-07, "loss": 3.2553, "step": 71065 }, { "epoch": 0.72296142578125, "grad_norm": 7.559975624084473, "learning_rate": 8.902825017152686e-07, "loss": 3.901, "step": 71070 }, { "epoch": 0.7230122884114584, "grad_norm": 18.686050415039062, "learning_rate": 8.899765651614297e-07, "loss": 3.6899, "step": 71075 }, { "epoch": 0.7230631510416666, "grad_norm": 15.401456832885742, "learning_rate": 8.896706697996599e-07, "loss": 3.7532, "step": 71080 }, { "epoch": 0.723114013671875, "grad_norm": 15.929218292236328, "learning_rate": 8.89364815637784e-07, "loss": 3.376, "step": 71085 }, { "epoch": 0.7231648763020834, "grad_norm": 8.313657760620117, "learning_rate": 8.890590026836269e-07, "loss": 3.1655, "step": 71090 }, { "epoch": 0.7232157389322916, "grad_norm": 8.160225868225098, "learning_rate": 8.887532309450131e-07, "loss": 3.1708, "step": 71095 }, { "epoch": 0.7232666015625, "grad_norm": 13.99388599395752, "learning_rate": 8.884475004297671e-07, "loss": 3.6068, "step": 71100 }, { "epoch": 0.7233174641927084, "grad_norm": 10.984503746032715, "learning_rate": 8.881418111457096e-07, "loss": 3.312, "step": 71105 }, { "epoch": 0.7233683268229166, "grad_norm": 14.216525077819824, "learning_rate": 8.878361631006613e-07, "loss": 3.6005, "step": 71110 }, { "epoch": 0.723419189453125, "grad_norm": 9.797622680664062, "learning_rate": 8.87530556302443e-07, "loss": 3.1769, "step": 71115 }, { "epoch": 0.7234700520833334, "grad_norm": 14.703811645507812, "learning_rate": 8.872249907588729e-07, "loss": 3.3396, "step": 71120 }, { "epoch": 0.7235209147135416, "grad_norm": 8.875903129577637, "learning_rate": 8.869194664777697e-07, "loss": 3.1241, "step": 71125 }, { "epoch": 0.72357177734375, "grad_norm": 10.04088306427002, "learning_rate": 8.866139834669487e-07, "loss": 3.2426, "step": 71130 }, { "epoch": 0.7236226399739584, "grad_norm": 15.39554214477539, "learning_rate": 8.863085417342274e-07, "loss": 3.4198, "step": 71135 }, { "epoch": 0.7236735026041666, "grad_norm": 15.000868797302246, "learning_rate": 8.860031412874198e-07, "loss": 3.1849, "step": 71140 }, { "epoch": 0.723724365234375, "grad_norm": 9.475614547729492, "learning_rate": 8.856977821343382e-07, "loss": 3.2855, "step": 71145 }, { "epoch": 0.7237752278645834, "grad_norm": 13.789505958557129, "learning_rate": 8.853924642827963e-07, "loss": 3.2375, "step": 71150 }, { "epoch": 0.7238260904947916, "grad_norm": 16.8479061126709, "learning_rate": 8.850871877406061e-07, "loss": 3.2356, "step": 71155 }, { "epoch": 0.723876953125, "grad_norm": 13.068145751953125, "learning_rate": 8.847819525155779e-07, "loss": 3.2793, "step": 71160 }, { "epoch": 0.7239278157552084, "grad_norm": 9.874674797058105, "learning_rate": 8.844767586155203e-07, "loss": 3.0541, "step": 71165 }, { "epoch": 0.7239786783854166, "grad_norm": 12.130133628845215, "learning_rate": 8.841716060482403e-07, "loss": 3.6288, "step": 71170 }, { "epoch": 0.724029541015625, "grad_norm": 9.09591293334961, "learning_rate": 8.838664948215484e-07, "loss": 3.3846, "step": 71175 }, { "epoch": 0.7240804036458334, "grad_norm": 11.738776206970215, "learning_rate": 8.835614249432491e-07, "loss": 3.2677, "step": 71180 }, { "epoch": 0.7241312662760416, "grad_norm": 13.365427017211914, "learning_rate": 8.832563964211477e-07, "loss": 3.0518, "step": 71185 }, { "epoch": 0.72418212890625, "grad_norm": 12.114652633666992, "learning_rate": 8.829514092630473e-07, "loss": 3.125, "step": 71190 }, { "epoch": 0.7242329915364584, "grad_norm": 11.502225875854492, "learning_rate": 8.826464634767518e-07, "loss": 3.5321, "step": 71195 }, { "epoch": 0.7242838541666666, "grad_norm": 12.675405502319336, "learning_rate": 8.82341559070064e-07, "loss": 3.5959, "step": 71200 }, { "epoch": 0.724334716796875, "grad_norm": 9.8812837600708, "learning_rate": 8.820366960507834e-07, "loss": 3.4001, "step": 71205 }, { "epoch": 0.7243855794270834, "grad_norm": 7.821342945098877, "learning_rate": 8.817318744267109e-07, "loss": 3.0264, "step": 71210 }, { "epoch": 0.7244364420572916, "grad_norm": 14.114405632019043, "learning_rate": 8.81427094205645e-07, "loss": 3.5391, "step": 71215 }, { "epoch": 0.7244873046875, "grad_norm": 10.161025047302246, "learning_rate": 8.811223553953824e-07, "loss": 3.1369, "step": 71220 }, { "epoch": 0.7245381673177084, "grad_norm": 11.592391014099121, "learning_rate": 8.808176580037206e-07, "loss": 3.4189, "step": 71225 }, { "epoch": 0.7245890299479166, "grad_norm": 10.845025062561035, "learning_rate": 8.805130020384562e-07, "loss": 3.2114, "step": 71230 }, { "epoch": 0.724639892578125, "grad_norm": 12.971354484558105, "learning_rate": 8.802083875073828e-07, "loss": 3.2945, "step": 71235 }, { "epoch": 0.7246907552083334, "grad_norm": 15.216636657714844, "learning_rate": 8.799038144182937e-07, "loss": 3.3843, "step": 71240 }, { "epoch": 0.7247416178385416, "grad_norm": 9.582465171813965, "learning_rate": 8.795992827789807e-07, "loss": 3.0785, "step": 71245 }, { "epoch": 0.72479248046875, "grad_norm": 12.177659034729004, "learning_rate": 8.792947925972359e-07, "loss": 3.331, "step": 71250 }, { "epoch": 0.7248433430989584, "grad_norm": 7.878180503845215, "learning_rate": 8.789903438808506e-07, "loss": 3.693, "step": 71255 }, { "epoch": 0.7248942057291666, "grad_norm": 14.96727180480957, "learning_rate": 8.786859366376132e-07, "loss": 3.4858, "step": 71260 }, { "epoch": 0.724945068359375, "grad_norm": 14.713326454162598, "learning_rate": 8.78381570875311e-07, "loss": 3.2536, "step": 71265 }, { "epoch": 0.7249959309895834, "grad_norm": 16.95229148864746, "learning_rate": 8.780772466017326e-07, "loss": 3.234, "step": 71270 }, { "epoch": 0.7250467936197916, "grad_norm": 14.073349952697754, "learning_rate": 8.777729638246627e-07, "loss": 3.2735, "step": 71275 }, { "epoch": 0.72509765625, "grad_norm": 14.336599349975586, "learning_rate": 8.774687225518877e-07, "loss": 3.3153, "step": 71280 }, { "epoch": 0.7251485188802084, "grad_norm": 10.15825080871582, "learning_rate": 8.771645227911902e-07, "loss": 3.2011, "step": 71285 }, { "epoch": 0.7251993815104166, "grad_norm": 14.921940803527832, "learning_rate": 8.768603645503546e-07, "loss": 3.5299, "step": 71290 }, { "epoch": 0.725250244140625, "grad_norm": 9.78284740447998, "learning_rate": 8.765562478371617e-07, "loss": 3.2637, "step": 71295 }, { "epoch": 0.7253011067708334, "grad_norm": 14.756291389465332, "learning_rate": 8.762521726593918e-07, "loss": 3.0072, "step": 71300 }, { "epoch": 0.7253519694010416, "grad_norm": 15.028759956359863, "learning_rate": 8.759481390248248e-07, "loss": 3.5329, "step": 71305 }, { "epoch": 0.72540283203125, "grad_norm": 9.425098419189453, "learning_rate": 8.756441469412408e-07, "loss": 3.5755, "step": 71310 }, { "epoch": 0.7254536946614584, "grad_norm": 16.844207763671875, "learning_rate": 8.753401964164162e-07, "loss": 3.503, "step": 71315 }, { "epoch": 0.7255045572916666, "grad_norm": 15.11011791229248, "learning_rate": 8.75036287458127e-07, "loss": 3.2334, "step": 71320 }, { "epoch": 0.725555419921875, "grad_norm": 13.918288230895996, "learning_rate": 8.747324200741489e-07, "loss": 3.2216, "step": 71325 }, { "epoch": 0.7256062825520834, "grad_norm": 7.063535690307617, "learning_rate": 8.744285942722575e-07, "loss": 3.2819, "step": 71330 }, { "epoch": 0.7256571451822916, "grad_norm": 7.297150135040283, "learning_rate": 8.741248100602253e-07, "loss": 3.2245, "step": 71335 }, { "epoch": 0.7257080078125, "grad_norm": 12.161453247070312, "learning_rate": 8.738210674458236e-07, "loss": 3.5233, "step": 71340 }, { "epoch": 0.7257588704427084, "grad_norm": 17.07378578186035, "learning_rate": 8.735173664368251e-07, "loss": 3.1754, "step": 71345 }, { "epoch": 0.7258097330729166, "grad_norm": 12.775858879089355, "learning_rate": 8.732137070409987e-07, "loss": 3.5189, "step": 71350 }, { "epoch": 0.725860595703125, "grad_norm": 14.163113594055176, "learning_rate": 8.729100892661149e-07, "loss": 3.1414, "step": 71355 }, { "epoch": 0.7259114583333334, "grad_norm": 13.765534400939941, "learning_rate": 8.726065131199399e-07, "loss": 3.4463, "step": 71360 }, { "epoch": 0.7259623209635416, "grad_norm": 13.300477027893066, "learning_rate": 8.723029786102421e-07, "loss": 3.039, "step": 71365 }, { "epoch": 0.72601318359375, "grad_norm": 12.640270233154297, "learning_rate": 8.71999485744787e-07, "loss": 3.3477, "step": 71370 }, { "epoch": 0.7260640462239584, "grad_norm": 7.233295917510986, "learning_rate": 8.716960345313382e-07, "loss": 2.9813, "step": 71375 }, { "epoch": 0.7261149088541666, "grad_norm": 14.029919624328613, "learning_rate": 8.713926249776608e-07, "loss": 3.4008, "step": 71380 }, { "epoch": 0.726165771484375, "grad_norm": 12.687051773071289, "learning_rate": 8.710892570915175e-07, "loss": 3.2873, "step": 71385 }, { "epoch": 0.7262166341145834, "grad_norm": 9.008588790893555, "learning_rate": 8.707859308806696e-07, "loss": 3.3346, "step": 71390 }, { "epoch": 0.7262674967447916, "grad_norm": 12.562435150146484, "learning_rate": 8.704826463528768e-07, "loss": 3.3961, "step": 71395 }, { "epoch": 0.726318359375, "grad_norm": 10.56306266784668, "learning_rate": 8.701794035158998e-07, "loss": 3.9296, "step": 71400 }, { "epoch": 0.7263692220052084, "grad_norm": 11.310155868530273, "learning_rate": 8.698762023774957e-07, "loss": 3.3878, "step": 71405 }, { "epoch": 0.7264200846354166, "grad_norm": 9.817281723022461, "learning_rate": 8.695730429454236e-07, "loss": 3.1968, "step": 71410 }, { "epoch": 0.726470947265625, "grad_norm": 15.564590454101562, "learning_rate": 8.692699252274378e-07, "loss": 3.1042, "step": 71415 }, { "epoch": 0.7265218098958334, "grad_norm": 10.80466365814209, "learning_rate": 8.689668492312952e-07, "loss": 3.2071, "step": 71420 }, { "epoch": 0.7265726725260416, "grad_norm": 9.413741111755371, "learning_rate": 8.686638149647492e-07, "loss": 3.1955, "step": 71425 }, { "epoch": 0.72662353515625, "grad_norm": 11.282853126525879, "learning_rate": 8.68360822435552e-07, "loss": 3.3632, "step": 71430 }, { "epoch": 0.7266743977864584, "grad_norm": 11.697951316833496, "learning_rate": 8.680578716514565e-07, "loss": 3.3989, "step": 71435 }, { "epoch": 0.7267252604166666, "grad_norm": 7.446695804595947, "learning_rate": 8.677549626202142e-07, "loss": 3.1363, "step": 71440 }, { "epoch": 0.726776123046875, "grad_norm": 15.161650657653809, "learning_rate": 8.674520953495743e-07, "loss": 3.4819, "step": 71445 }, { "epoch": 0.7268269856770834, "grad_norm": 13.455677032470703, "learning_rate": 8.671492698472858e-07, "loss": 3.4561, "step": 71450 }, { "epoch": 0.7268778483072916, "grad_norm": 19.11655616760254, "learning_rate": 8.668464861210951e-07, "loss": 3.5794, "step": 71455 }, { "epoch": 0.7269287109375, "grad_norm": 9.71003246307373, "learning_rate": 8.665437441787503e-07, "loss": 3.253, "step": 71460 }, { "epoch": 0.7269795735677084, "grad_norm": 9.952095031738281, "learning_rate": 8.662410440279973e-07, "loss": 3.2738, "step": 71465 }, { "epoch": 0.7270304361979166, "grad_norm": 13.357952117919922, "learning_rate": 8.659383856765799e-07, "loss": 3.4893, "step": 71470 }, { "epoch": 0.727081298828125, "grad_norm": 12.198160171508789, "learning_rate": 8.656357691322407e-07, "loss": 3.3286, "step": 71475 }, { "epoch": 0.7271321614583334, "grad_norm": 9.439297676086426, "learning_rate": 8.65333194402723e-07, "loss": 3.3716, "step": 71480 }, { "epoch": 0.7271830240885416, "grad_norm": 10.160725593566895, "learning_rate": 8.65030661495769e-07, "loss": 3.2346, "step": 71485 }, { "epoch": 0.72723388671875, "grad_norm": 11.871495246887207, "learning_rate": 8.647281704191171e-07, "loss": 3.5573, "step": 71490 }, { "epoch": 0.7272847493489584, "grad_norm": 14.323995590209961, "learning_rate": 8.644257211805082e-07, "loss": 3.2742, "step": 71495 }, { "epoch": 0.7273356119791666, "grad_norm": 8.815825462341309, "learning_rate": 8.641233137876795e-07, "loss": 3.4674, "step": 71500 }, { "epoch": 0.727386474609375, "grad_norm": 14.749484062194824, "learning_rate": 8.638209482483673e-07, "loss": 3.6575, "step": 71505 }, { "epoch": 0.7274373372395834, "grad_norm": 13.403284072875977, "learning_rate": 8.635186245703081e-07, "loss": 3.3399, "step": 71510 }, { "epoch": 0.7274881998697916, "grad_norm": 14.631975173950195, "learning_rate": 8.632163427612381e-07, "loss": 3.4657, "step": 71515 }, { "epoch": 0.7275390625, "grad_norm": 9.881767272949219, "learning_rate": 8.629141028288899e-07, "loss": 3.2145, "step": 71520 }, { "epoch": 0.7275899251302084, "grad_norm": 9.652484893798828, "learning_rate": 8.626119047809963e-07, "loss": 3.3607, "step": 71525 }, { "epoch": 0.7276407877604166, "grad_norm": 10.012579917907715, "learning_rate": 8.623097486252885e-07, "loss": 3.7364, "step": 71530 }, { "epoch": 0.727691650390625, "grad_norm": 12.135066986083984, "learning_rate": 8.620076343694972e-07, "loss": 3.7268, "step": 71535 }, { "epoch": 0.7277425130208334, "grad_norm": 13.072932243347168, "learning_rate": 8.617055620213533e-07, "loss": 3.3365, "step": 71540 }, { "epoch": 0.7277933756510416, "grad_norm": 9.491469383239746, "learning_rate": 8.614035315885844e-07, "loss": 3.3821, "step": 71545 }, { "epoch": 0.72784423828125, "grad_norm": 11.99251937866211, "learning_rate": 8.611015430789169e-07, "loss": 3.1887, "step": 71550 }, { "epoch": 0.7278951009114584, "grad_norm": 11.372830390930176, "learning_rate": 8.607995965000787e-07, "loss": 3.2789, "step": 71555 }, { "epoch": 0.7279459635416666, "grad_norm": 13.800250053405762, "learning_rate": 8.604976918597934e-07, "loss": 3.6385, "step": 71560 }, { "epoch": 0.727996826171875, "grad_norm": 13.713117599487305, "learning_rate": 8.60195829165787e-07, "loss": 3.3507, "step": 71565 }, { "epoch": 0.7280476888020834, "grad_norm": 9.791834831237793, "learning_rate": 8.598940084257807e-07, "loss": 3.1859, "step": 71570 }, { "epoch": 0.7280985514322916, "grad_norm": 7.772392272949219, "learning_rate": 8.595922296474984e-07, "loss": 3.2658, "step": 71575 }, { "epoch": 0.7281494140625, "grad_norm": 8.682499885559082, "learning_rate": 8.5929049283866e-07, "loss": 3.3533, "step": 71580 }, { "epoch": 0.7282002766927084, "grad_norm": 16.108346939086914, "learning_rate": 8.589887980069847e-07, "loss": 3.4971, "step": 71585 }, { "epoch": 0.7282511393229166, "grad_norm": 7.182333469390869, "learning_rate": 8.586871451601919e-07, "loss": 3.5022, "step": 71590 }, { "epoch": 0.728302001953125, "grad_norm": 13.270331382751465, "learning_rate": 8.583855343060004e-07, "loss": 3.4255, "step": 71595 }, { "epoch": 0.7283528645833334, "grad_norm": 7.81695556640625, "learning_rate": 8.580839654521258e-07, "loss": 3.1044, "step": 71600 }, { "epoch": 0.7284037272135416, "grad_norm": 10.429226875305176, "learning_rate": 8.57782438606283e-07, "loss": 3.5674, "step": 71605 }, { "epoch": 0.72845458984375, "grad_norm": 10.270829200744629, "learning_rate": 8.574809537761883e-07, "loss": 3.2177, "step": 71610 }, { "epoch": 0.7285054524739584, "grad_norm": 11.659355163574219, "learning_rate": 8.57179510969553e-07, "loss": 3.4913, "step": 71615 }, { "epoch": 0.7285563151041666, "grad_norm": 8.569073677062988, "learning_rate": 8.568781101940913e-07, "loss": 3.8532, "step": 71620 }, { "epoch": 0.728607177734375, "grad_norm": 9.819869995117188, "learning_rate": 8.565767514575129e-07, "loss": 3.2251, "step": 71625 }, { "epoch": 0.7286580403645834, "grad_norm": 7.252403736114502, "learning_rate": 8.562754347675298e-07, "loss": 3.6167, "step": 71630 }, { "epoch": 0.7287089029947916, "grad_norm": 15.263439178466797, "learning_rate": 8.559741601318492e-07, "loss": 3.5082, "step": 71635 }, { "epoch": 0.728759765625, "grad_norm": 16.039209365844727, "learning_rate": 8.556729275581807e-07, "loss": 3.1962, "step": 71640 }, { "epoch": 0.7288106282552084, "grad_norm": 10.13549518585205, "learning_rate": 8.5537173705423e-07, "loss": 3.6743, "step": 71645 }, { "epoch": 0.7288614908854166, "grad_norm": 7.909780979156494, "learning_rate": 8.550705886277042e-07, "loss": 3.1544, "step": 71650 }, { "epoch": 0.728912353515625, "grad_norm": 12.192684173583984, "learning_rate": 8.547694822863076e-07, "loss": 3.4837, "step": 71655 }, { "epoch": 0.7289632161458334, "grad_norm": 13.933347702026367, "learning_rate": 8.54468418037743e-07, "loss": 3.395, "step": 71660 }, { "epoch": 0.7290140787760416, "grad_norm": 10.228572845458984, "learning_rate": 8.541673958897137e-07, "loss": 3.3333, "step": 71665 }, { "epoch": 0.72906494140625, "grad_norm": 12.458081245422363, "learning_rate": 8.538664158499227e-07, "loss": 3.3682, "step": 71670 }, { "epoch": 0.7291158040364584, "grad_norm": 14.956607818603516, "learning_rate": 8.535654779260688e-07, "loss": 3.5155, "step": 71675 }, { "epoch": 0.7291666666666666, "grad_norm": 7.825222492218018, "learning_rate": 8.532645821258523e-07, "loss": 3.1764, "step": 71680 }, { "epoch": 0.729217529296875, "grad_norm": 12.992206573486328, "learning_rate": 8.529637284569703e-07, "loss": 3.2733, "step": 71685 }, { "epoch": 0.7292683919270834, "grad_norm": 12.961661338806152, "learning_rate": 8.526629169271206e-07, "loss": 3.5984, "step": 71690 }, { "epoch": 0.7293192545572916, "grad_norm": 15.560072898864746, "learning_rate": 8.523621475440008e-07, "loss": 3.3413, "step": 71695 }, { "epoch": 0.7293701171875, "grad_norm": 17.057737350463867, "learning_rate": 8.520614203153041e-07, "loss": 3.41, "step": 71700 }, { "epoch": 0.7294209798177084, "grad_norm": 13.707793235778809, "learning_rate": 8.517607352487258e-07, "loss": 3.1453, "step": 71705 }, { "epoch": 0.7294718424479166, "grad_norm": 11.256245613098145, "learning_rate": 8.514600923519589e-07, "loss": 2.9445, "step": 71710 }, { "epoch": 0.729522705078125, "grad_norm": 11.20262622833252, "learning_rate": 8.511594916326937e-07, "loss": 3.1541, "step": 71715 }, { "epoch": 0.7295735677083334, "grad_norm": 9.666428565979004, "learning_rate": 8.508589330986222e-07, "loss": 3.1237, "step": 71720 }, { "epoch": 0.7296244303385416, "grad_norm": 11.764225006103516, "learning_rate": 8.505584167574348e-07, "loss": 3.1072, "step": 71725 }, { "epoch": 0.72967529296875, "grad_norm": 9.681159019470215, "learning_rate": 8.502579426168195e-07, "loss": 3.0048, "step": 71730 }, { "epoch": 0.7297261555989584, "grad_norm": 10.529214859008789, "learning_rate": 8.499575106844634e-07, "loss": 3.3713, "step": 71735 }, { "epoch": 0.7297770182291666, "grad_norm": 13.212870597839355, "learning_rate": 8.496571209680527e-07, "loss": 3.0802, "step": 71740 }, { "epoch": 0.729827880859375, "grad_norm": 11.128890037536621, "learning_rate": 8.493567734752733e-07, "loss": 3.7974, "step": 71745 }, { "epoch": 0.7298787434895834, "grad_norm": 13.671854972839355, "learning_rate": 8.490564682138105e-07, "loss": 3.146, "step": 71750 }, { "epoch": 0.7299296061197916, "grad_norm": 13.513593673706055, "learning_rate": 8.487562051913464e-07, "loss": 2.9435, "step": 71755 }, { "epoch": 0.72998046875, "grad_norm": 12.44311809539795, "learning_rate": 8.484559844155629e-07, "loss": 3.6105, "step": 71760 }, { "epoch": 0.7300313313802084, "grad_norm": 13.10235595703125, "learning_rate": 8.481558058941422e-07, "loss": 3.5066, "step": 71765 }, { "epoch": 0.7300821940104166, "grad_norm": 13.457281112670898, "learning_rate": 8.478556696347629e-07, "loss": 3.5466, "step": 71770 }, { "epoch": 0.730133056640625, "grad_norm": 11.804722785949707, "learning_rate": 8.475555756451056e-07, "loss": 3.2333, "step": 71775 }, { "epoch": 0.7301839192708334, "grad_norm": 12.426148414611816, "learning_rate": 8.472555239328464e-07, "loss": 3.5555, "step": 71780 }, { "epoch": 0.7302347819010416, "grad_norm": 13.05529499053955, "learning_rate": 8.469555145056635e-07, "loss": 3.9392, "step": 71785 }, { "epoch": 0.73028564453125, "grad_norm": 15.0496826171875, "learning_rate": 8.466555473712312e-07, "loss": 3.7523, "step": 71790 }, { "epoch": 0.7303365071614584, "grad_norm": 9.555891036987305, "learning_rate": 8.463556225372249e-07, "loss": 3.2072, "step": 71795 }, { "epoch": 0.7303873697916666, "grad_norm": 12.876121520996094, "learning_rate": 8.46055740011319e-07, "loss": 3.5651, "step": 71800 }, { "epoch": 0.730438232421875, "grad_norm": 11.001485824584961, "learning_rate": 8.457558998011848e-07, "loss": 3.5513, "step": 71805 }, { "epoch": 0.7304890950520834, "grad_norm": 12.766059875488281, "learning_rate": 8.454561019144938e-07, "loss": 3.26, "step": 71810 }, { "epoch": 0.7305399576822916, "grad_norm": 11.093799591064453, "learning_rate": 8.451563463589155e-07, "loss": 3.4896, "step": 71815 }, { "epoch": 0.7305908203125, "grad_norm": 12.005446434020996, "learning_rate": 8.448566331421198e-07, "loss": 3.3718, "step": 71820 }, { "epoch": 0.7306416829427084, "grad_norm": 11.861042022705078, "learning_rate": 8.445569622717756e-07, "loss": 3.2817, "step": 71825 }, { "epoch": 0.7306925455729166, "grad_norm": 9.181353569030762, "learning_rate": 8.442573337555493e-07, "loss": 3.6946, "step": 71830 }, { "epoch": 0.730743408203125, "grad_norm": 12.56906795501709, "learning_rate": 8.439577476011057e-07, "loss": 3.1878, "step": 71835 }, { "epoch": 0.7307942708333334, "grad_norm": 11.338751792907715, "learning_rate": 8.436582038161114e-07, "loss": 3.6575, "step": 71840 }, { "epoch": 0.7308451334635416, "grad_norm": 13.186248779296875, "learning_rate": 8.433587024082284e-07, "loss": 3.2892, "step": 71845 }, { "epoch": 0.73089599609375, "grad_norm": 10.832112312316895, "learning_rate": 8.430592433851214e-07, "loss": 3.4635, "step": 71850 }, { "epoch": 0.7309468587239584, "grad_norm": 6.655515193939209, "learning_rate": 8.427598267544501e-07, "loss": 3.3429, "step": 71855 }, { "epoch": 0.7309977213541666, "grad_norm": 7.147447109222412, "learning_rate": 8.424604525238766e-07, "loss": 3.354, "step": 71860 }, { "epoch": 0.731048583984375, "grad_norm": 9.368706703186035, "learning_rate": 8.421611207010597e-07, "loss": 3.0716, "step": 71865 }, { "epoch": 0.7310994466145834, "grad_norm": 9.368498802185059, "learning_rate": 8.418618312936567e-07, "loss": 3.0367, "step": 71870 }, { "epoch": 0.7311503092447916, "grad_norm": 9.8778076171875, "learning_rate": 8.415625843093253e-07, "loss": 3.3414, "step": 71875 }, { "epoch": 0.731201171875, "grad_norm": 12.284628868103027, "learning_rate": 8.412633797557231e-07, "loss": 3.1554, "step": 71880 }, { "epoch": 0.7312520345052084, "grad_norm": 10.680723190307617, "learning_rate": 8.409642176405045e-07, "loss": 3.9743, "step": 71885 }, { "epoch": 0.7313028971354166, "grad_norm": 10.758345603942871, "learning_rate": 8.406650979713221e-07, "loss": 3.3799, "step": 71890 }, { "epoch": 0.731353759765625, "grad_norm": 13.093633651733398, "learning_rate": 8.403660207558306e-07, "loss": 3.1075, "step": 71895 }, { "epoch": 0.7314046223958334, "grad_norm": 7.281319618225098, "learning_rate": 8.400669860016805e-07, "loss": 3.5233, "step": 71900 }, { "epoch": 0.7314554850260416, "grad_norm": 12.522171974182129, "learning_rate": 8.397679937165237e-07, "loss": 3.2413, "step": 71905 }, { "epoch": 0.73150634765625, "grad_norm": 12.937942504882812, "learning_rate": 8.394690439080086e-07, "loss": 3.2693, "step": 71910 }, { "epoch": 0.7315572102864584, "grad_norm": 12.733464241027832, "learning_rate": 8.391701365837851e-07, "loss": 3.3613, "step": 71915 }, { "epoch": 0.7316080729166666, "grad_norm": 10.784283638000488, "learning_rate": 8.388712717515002e-07, "loss": 3.2883, "step": 71920 }, { "epoch": 0.731658935546875, "grad_norm": 9.458428382873535, "learning_rate": 8.385724494187991e-07, "loss": 3.6336, "step": 71925 }, { "epoch": 0.7317097981770834, "grad_norm": 14.847657203674316, "learning_rate": 8.382736695933283e-07, "loss": 3.5436, "step": 71930 }, { "epoch": 0.7317606608072916, "grad_norm": 15.701521873474121, "learning_rate": 8.379749322827324e-07, "loss": 3.4739, "step": 71935 }, { "epoch": 0.7318115234375, "grad_norm": 13.327818870544434, "learning_rate": 8.37676237494654e-07, "loss": 3.1551, "step": 71940 }, { "epoch": 0.7318623860677084, "grad_norm": 11.132681846618652, "learning_rate": 8.373775852367349e-07, "loss": 3.1077, "step": 71945 }, { "epoch": 0.7319132486979166, "grad_norm": 13.124969482421875, "learning_rate": 8.370789755166148e-07, "loss": 3.0136, "step": 71950 }, { "epoch": 0.731964111328125, "grad_norm": 11.906679153442383, "learning_rate": 8.367804083419361e-07, "loss": 3.4839, "step": 71955 }, { "epoch": 0.7320149739583334, "grad_norm": 8.344532012939453, "learning_rate": 8.36481883720337e-07, "loss": 3.3098, "step": 71960 }, { "epoch": 0.7320658365885416, "grad_norm": 11.388338088989258, "learning_rate": 8.361834016594544e-07, "loss": 3.3811, "step": 71965 }, { "epoch": 0.73211669921875, "grad_norm": 15.55689811706543, "learning_rate": 8.358849621669244e-07, "loss": 3.1579, "step": 71970 }, { "epoch": 0.7321675618489584, "grad_norm": 8.578241348266602, "learning_rate": 8.355865652503828e-07, "loss": 3.6504, "step": 71975 }, { "epoch": 0.7322184244791666, "grad_norm": 15.57926082611084, "learning_rate": 8.352882109174656e-07, "loss": 3.5903, "step": 71980 }, { "epoch": 0.732269287109375, "grad_norm": 13.08041763305664, "learning_rate": 8.349898991758041e-07, "loss": 3.5386, "step": 71985 }, { "epoch": 0.7323201497395834, "grad_norm": 12.157262802124023, "learning_rate": 8.346916300330321e-07, "loss": 3.1902, "step": 71990 }, { "epoch": 0.7323710123697916, "grad_norm": 10.394250869750977, "learning_rate": 8.343934034967802e-07, "loss": 3.6059, "step": 71995 }, { "epoch": 0.732421875, "grad_norm": 7.417933940887451, "learning_rate": 8.340952195746771e-07, "loss": 3.1564, "step": 72000 }, { "epoch": 0.7324727376302084, "grad_norm": 10.798310279846191, "learning_rate": 8.337970782743532e-07, "loss": 3.1306, "step": 72005 }, { "epoch": 0.7325236002604166, "grad_norm": 15.50532054901123, "learning_rate": 8.33498979603437e-07, "loss": 3.2922, "step": 72010 }, { "epoch": 0.732574462890625, "grad_norm": 14.92574405670166, "learning_rate": 8.332009235695543e-07, "loss": 3.1616, "step": 72015 }, { "epoch": 0.7326253255208334, "grad_norm": 11.129034042358398, "learning_rate": 8.32902910180331e-07, "loss": 3.29, "step": 72020 }, { "epoch": 0.7326761881510416, "grad_norm": 11.893033981323242, "learning_rate": 8.326049394433905e-07, "loss": 3.1642, "step": 72025 }, { "epoch": 0.73272705078125, "grad_norm": 18.53277587890625, "learning_rate": 8.323070113663575e-07, "loss": 3.7038, "step": 72030 }, { "epoch": 0.7327779134114584, "grad_norm": 14.153130531311035, "learning_rate": 8.320091259568552e-07, "loss": 3.4444, "step": 72035 }, { "epoch": 0.7328287760416666, "grad_norm": 13.978302001953125, "learning_rate": 8.317112832225044e-07, "loss": 3.357, "step": 72040 }, { "epoch": 0.732879638671875, "grad_norm": 12.95738410949707, "learning_rate": 8.314134831709236e-07, "loss": 3.1264, "step": 72045 }, { "epoch": 0.7329305013020834, "grad_norm": 14.97579574584961, "learning_rate": 8.311157258097346e-07, "loss": 3.1641, "step": 72050 }, { "epoch": 0.7329813639322916, "grad_norm": 16.014860153198242, "learning_rate": 8.308180111465531e-07, "loss": 3.3314, "step": 72055 }, { "epoch": 0.7330322265625, "grad_norm": 16.28436279296875, "learning_rate": 8.305203391889979e-07, "loss": 3.7881, "step": 72060 }, { "epoch": 0.7330830891927084, "grad_norm": 16.736268997192383, "learning_rate": 8.302227099446836e-07, "loss": 3.3879, "step": 72065 }, { "epoch": 0.7331339518229166, "grad_norm": 17.092361450195312, "learning_rate": 8.299251234212261e-07, "loss": 3.4993, "step": 72070 }, { "epoch": 0.733184814453125, "grad_norm": 12.747779846191406, "learning_rate": 8.296275796262385e-07, "loss": 2.9937, "step": 72075 }, { "epoch": 0.7332356770833334, "grad_norm": 13.478667259216309, "learning_rate": 8.293300785673325e-07, "loss": 3.4399, "step": 72080 }, { "epoch": 0.7332865397135416, "grad_norm": 23.306598663330078, "learning_rate": 8.290326202521207e-07, "loss": 3.3257, "step": 72085 }, { "epoch": 0.73333740234375, "grad_norm": 8.487061500549316, "learning_rate": 8.287352046882139e-07, "loss": 3.4002, "step": 72090 }, { "epoch": 0.7333882649739584, "grad_norm": 15.717615127563477, "learning_rate": 8.284378318832206e-07, "loss": 3.2684, "step": 72095 }, { "epoch": 0.7334391276041666, "grad_norm": 14.708165168762207, "learning_rate": 8.281405018447486e-07, "loss": 3.3015, "step": 72100 }, { "epoch": 0.733489990234375, "grad_norm": 8.821043968200684, "learning_rate": 8.278432145804052e-07, "loss": 3.8564, "step": 72105 }, { "epoch": 0.7335408528645834, "grad_norm": 10.754634857177734, "learning_rate": 8.275459700977981e-07, "loss": 3.4867, "step": 72110 }, { "epoch": 0.7335917154947916, "grad_norm": 12.012271881103516, "learning_rate": 8.272487684045305e-07, "loss": 3.1794, "step": 72115 }, { "epoch": 0.733642578125, "grad_norm": 12.912185668945312, "learning_rate": 8.269516095082061e-07, "loss": 3.1557, "step": 72120 }, { "epoch": 0.7336934407552084, "grad_norm": 8.939395904541016, "learning_rate": 8.266544934164292e-07, "loss": 3.2299, "step": 72125 }, { "epoch": 0.7337443033854166, "grad_norm": 11.69654655456543, "learning_rate": 8.263574201367994e-07, "loss": 3.4519, "step": 72130 }, { "epoch": 0.733795166015625, "grad_norm": 13.756976127624512, "learning_rate": 8.260603896769193e-07, "loss": 3.3767, "step": 72135 }, { "epoch": 0.7338460286458334, "grad_norm": 16.499744415283203, "learning_rate": 8.257634020443863e-07, "loss": 3.3217, "step": 72140 }, { "epoch": 0.7338968912760416, "grad_norm": 12.474640846252441, "learning_rate": 8.254664572468008e-07, "loss": 3.1985, "step": 72145 }, { "epoch": 0.73394775390625, "grad_norm": 8.013901710510254, "learning_rate": 8.251695552917591e-07, "loss": 3.7307, "step": 72150 }, { "epoch": 0.7339986165364584, "grad_norm": 13.143088340759277, "learning_rate": 8.248726961868564e-07, "loss": 3.4681, "step": 72155 }, { "epoch": 0.7340494791666666, "grad_norm": 8.778640747070312, "learning_rate": 8.245758799396889e-07, "loss": 3.1804, "step": 72160 }, { "epoch": 0.734100341796875, "grad_norm": 15.068650245666504, "learning_rate": 8.242791065578509e-07, "loss": 3.3836, "step": 72165 }, { "epoch": 0.7341512044270834, "grad_norm": 11.814011573791504, "learning_rate": 8.239823760489352e-07, "loss": 3.3396, "step": 72170 }, { "epoch": 0.7342020670572916, "grad_norm": 14.065567970275879, "learning_rate": 8.236856884205327e-07, "loss": 3.2675, "step": 72175 }, { "epoch": 0.7342529296875, "grad_norm": 12.572928428649902, "learning_rate": 8.233890436802338e-07, "loss": 3.2514, "step": 72180 }, { "epoch": 0.7343037923177084, "grad_norm": 10.292560577392578, "learning_rate": 8.230924418356292e-07, "loss": 3.0932, "step": 72185 }, { "epoch": 0.7343546549479166, "grad_norm": 9.869354248046875, "learning_rate": 8.227958828943075e-07, "loss": 3.3961, "step": 72190 }, { "epoch": 0.734405517578125, "grad_norm": 10.191967964172363, "learning_rate": 8.224993668638548e-07, "loss": 3.1959, "step": 72195 }, { "epoch": 0.7344563802083334, "grad_norm": 14.69416618347168, "learning_rate": 8.222028937518592e-07, "loss": 3.1104, "step": 72200 }, { "epoch": 0.7345072428385416, "grad_norm": 8.91283893585205, "learning_rate": 8.219064635659047e-07, "loss": 4.0492, "step": 72205 }, { "epoch": 0.73455810546875, "grad_norm": 10.318886756896973, "learning_rate": 8.216100763135748e-07, "loss": 3.5003, "step": 72210 }, { "epoch": 0.7346089680989584, "grad_norm": 11.990934371948242, "learning_rate": 8.213137320024534e-07, "loss": 3.1464, "step": 72215 }, { "epoch": 0.7346598307291666, "grad_norm": 8.987807273864746, "learning_rate": 8.210174306401228e-07, "loss": 3.09, "step": 72220 }, { "epoch": 0.734710693359375, "grad_norm": 10.0052490234375, "learning_rate": 8.207211722341635e-07, "loss": 3.7299, "step": 72225 }, { "epoch": 0.7347615559895834, "grad_norm": 14.059890747070312, "learning_rate": 8.204249567921549e-07, "loss": 3.0111, "step": 72230 }, { "epoch": 0.7348124186197916, "grad_norm": 8.948488235473633, "learning_rate": 8.201287843216749e-07, "loss": 3.5322, "step": 72235 }, { "epoch": 0.73486328125, "grad_norm": 10.490068435668945, "learning_rate": 8.198326548303018e-07, "loss": 3.2814, "step": 72240 }, { "epoch": 0.7349141438802084, "grad_norm": 12.267974853515625, "learning_rate": 8.19536568325613e-07, "loss": 3.1633, "step": 72245 }, { "epoch": 0.7349650065104166, "grad_norm": 7.18000602722168, "learning_rate": 8.192405248151825e-07, "loss": 3.5377, "step": 72250 }, { "epoch": 0.735015869140625, "grad_norm": 8.979900360107422, "learning_rate": 8.189445243065838e-07, "loss": 3.1741, "step": 72255 }, { "epoch": 0.7350667317708334, "grad_norm": 10.791680335998535, "learning_rate": 8.186485668073913e-07, "loss": 3.4301, "step": 72260 }, { "epoch": 0.7351175944010416, "grad_norm": 8.889440536499023, "learning_rate": 8.183526523251775e-07, "loss": 3.4613, "step": 72265 }, { "epoch": 0.73516845703125, "grad_norm": 9.243463516235352, "learning_rate": 8.180567808675125e-07, "loss": 3.2286, "step": 72270 }, { "epoch": 0.7352193196614584, "grad_norm": 12.702774047851562, "learning_rate": 8.17760952441965e-07, "loss": 3.4112, "step": 72275 }, { "epoch": 0.7352701822916666, "grad_norm": 13.84238052368164, "learning_rate": 8.174651670561059e-07, "loss": 3.1961, "step": 72280 }, { "epoch": 0.735321044921875, "grad_norm": 15.002708435058594, "learning_rate": 8.171694247175008e-07, "loss": 3.5209, "step": 72285 }, { "epoch": 0.7353719075520834, "grad_norm": 12.038010597229004, "learning_rate": 8.168737254337169e-07, "loss": 3.341, "step": 72290 }, { "epoch": 0.7354227701822916, "grad_norm": 9.858930587768555, "learning_rate": 8.16578069212321e-07, "loss": 3.2706, "step": 72295 }, { "epoch": 0.7354736328125, "grad_norm": 9.680066108703613, "learning_rate": 8.16282456060876e-07, "loss": 3.295, "step": 72300 }, { "epoch": 0.7355244954427084, "grad_norm": 9.628165245056152, "learning_rate": 8.159868859869452e-07, "loss": 3.629, "step": 72305 }, { "epoch": 0.7355753580729166, "grad_norm": 13.200835227966309, "learning_rate": 8.1569135899809e-07, "loss": 3.5584, "step": 72310 }, { "epoch": 0.735626220703125, "grad_norm": 13.335905075073242, "learning_rate": 8.15395875101872e-07, "loss": 3.2494, "step": 72315 }, { "epoch": 0.7356770833333334, "grad_norm": 16.441917419433594, "learning_rate": 8.151004343058524e-07, "loss": 3.6302, "step": 72320 }, { "epoch": 0.7357279459635416, "grad_norm": 10.481017112731934, "learning_rate": 8.148050366175886e-07, "loss": 3.4223, "step": 72325 }, { "epoch": 0.73577880859375, "grad_norm": 10.352675437927246, "learning_rate": 8.145096820446378e-07, "loss": 3.4068, "step": 72330 }, { "epoch": 0.7358296712239584, "grad_norm": 7.219904899597168, "learning_rate": 8.14214370594558e-07, "loss": 2.8696, "step": 72335 }, { "epoch": 0.7358805338541666, "grad_norm": 9.463799476623535, "learning_rate": 8.139191022749033e-07, "loss": 3.4689, "step": 72340 }, { "epoch": 0.735931396484375, "grad_norm": 10.011601448059082, "learning_rate": 8.136238770932292e-07, "loss": 3.0355, "step": 72345 }, { "epoch": 0.7359822591145834, "grad_norm": 7.603102684020996, "learning_rate": 8.133286950570879e-07, "loss": 2.8421, "step": 72350 }, { "epoch": 0.7360331217447916, "grad_norm": 9.22468376159668, "learning_rate": 8.130335561740329e-07, "loss": 3.179, "step": 72355 }, { "epoch": 0.736083984375, "grad_norm": 15.12529182434082, "learning_rate": 8.127384604516147e-07, "loss": 3.5254, "step": 72360 }, { "epoch": 0.7361348470052084, "grad_norm": 11.003714561462402, "learning_rate": 8.124434078973819e-07, "loss": 3.2567, "step": 72365 }, { "epoch": 0.7361857096354166, "grad_norm": 8.871549606323242, "learning_rate": 8.121483985188849e-07, "loss": 3.9288, "step": 72370 }, { "epoch": 0.736236572265625, "grad_norm": 11.090631484985352, "learning_rate": 8.118534323236716e-07, "loss": 3.4408, "step": 72375 }, { "epoch": 0.7362874348958334, "grad_norm": 8.611220359802246, "learning_rate": 8.115585093192882e-07, "loss": 3.488, "step": 72380 }, { "epoch": 0.7363382975260416, "grad_norm": 9.688155174255371, "learning_rate": 8.112636295132795e-07, "loss": 3.7627, "step": 72385 }, { "epoch": 0.73638916015625, "grad_norm": 10.717297554016113, "learning_rate": 8.10968792913191e-07, "loss": 3.582, "step": 72390 }, { "epoch": 0.7364400227864584, "grad_norm": 12.765704154968262, "learning_rate": 8.10673999526565e-07, "loss": 3.3121, "step": 72395 }, { "epoch": 0.7364908854166666, "grad_norm": 8.391258239746094, "learning_rate": 8.10379249360945e-07, "loss": 3.4052, "step": 72400 }, { "epoch": 0.736541748046875, "grad_norm": 14.208325386047363, "learning_rate": 8.100845424238707e-07, "loss": 3.1379, "step": 72405 }, { "epoch": 0.7365926106770834, "grad_norm": 12.104424476623535, "learning_rate": 8.097898787228836e-07, "loss": 3.3069, "step": 72410 }, { "epoch": 0.7366434733072916, "grad_norm": 8.169574737548828, "learning_rate": 8.094952582655213e-07, "loss": 3.6452, "step": 72415 }, { "epoch": 0.7366943359375, "grad_norm": 11.388225555419922, "learning_rate": 8.092006810593225e-07, "loss": 3.5901, "step": 72420 }, { "epoch": 0.7367451985677084, "grad_norm": 7.172380447387695, "learning_rate": 8.089061471118228e-07, "loss": 3.3948, "step": 72425 }, { "epoch": 0.7367960611979166, "grad_norm": 11.014195442199707, "learning_rate": 8.086116564305596e-07, "loss": 3.3273, "step": 72430 }, { "epoch": 0.736846923828125, "grad_norm": 7.348326683044434, "learning_rate": 8.083172090230659e-07, "loss": 3.1885, "step": 72435 }, { "epoch": 0.7368977864583334, "grad_norm": 8.079544067382812, "learning_rate": 8.08022804896875e-07, "loss": 3.4375, "step": 72440 }, { "epoch": 0.7369486490885416, "grad_norm": 13.889323234558105, "learning_rate": 8.077284440595193e-07, "loss": 3.825, "step": 72445 }, { "epoch": 0.73699951171875, "grad_norm": 11.928169250488281, "learning_rate": 8.074341265185312e-07, "loss": 3.3244, "step": 72450 }, { "epoch": 0.7370503743489584, "grad_norm": 7.652375221252441, "learning_rate": 8.071398522814397e-07, "loss": 3.1061, "step": 72455 }, { "epoch": 0.7371012369791666, "grad_norm": 11.007584571838379, "learning_rate": 8.068456213557738e-07, "loss": 2.9223, "step": 72460 }, { "epoch": 0.737152099609375, "grad_norm": 14.736150741577148, "learning_rate": 8.065514337490607e-07, "loss": 3.195, "step": 72465 }, { "epoch": 0.7372029622395834, "grad_norm": 14.029659271240234, "learning_rate": 8.062572894688275e-07, "loss": 3.6088, "step": 72470 }, { "epoch": 0.7372538248697916, "grad_norm": 17.324636459350586, "learning_rate": 8.059631885226013e-07, "loss": 3.2988, "step": 72475 }, { "epoch": 0.7373046875, "grad_norm": 14.843786239624023, "learning_rate": 8.056691309179043e-07, "loss": 3.1665, "step": 72480 }, { "epoch": 0.7373555501302084, "grad_norm": 13.8319673538208, "learning_rate": 8.053751166622617e-07, "loss": 3.1889, "step": 72485 }, { "epoch": 0.7374064127604166, "grad_norm": 15.034490585327148, "learning_rate": 8.050811457631955e-07, "loss": 3.6928, "step": 72490 }, { "epoch": 0.737457275390625, "grad_norm": 15.734111785888672, "learning_rate": 8.047872182282251e-07, "loss": 3.6073, "step": 72495 }, { "epoch": 0.7375081380208334, "grad_norm": 12.015183448791504, "learning_rate": 8.044933340648722e-07, "loss": 3.451, "step": 72500 }, { "epoch": 0.7375590006510416, "grad_norm": 8.523397445678711, "learning_rate": 8.041994932806562e-07, "loss": 3.6919, "step": 72505 }, { "epoch": 0.73760986328125, "grad_norm": 9.58163070678711, "learning_rate": 8.039056958830943e-07, "loss": 3.1693, "step": 72510 }, { "epoch": 0.7376607259114584, "grad_norm": 9.87668228149414, "learning_rate": 8.03611941879703e-07, "loss": 3.3357, "step": 72515 }, { "epoch": 0.7377115885416666, "grad_norm": 14.215899467468262, "learning_rate": 8.033182312779978e-07, "loss": 3.0186, "step": 72520 }, { "epoch": 0.737762451171875, "grad_norm": 11.756176948547363, "learning_rate": 8.030245640854928e-07, "loss": 3.0071, "step": 72525 }, { "epoch": 0.7378133138020834, "grad_norm": 10.167065620422363, "learning_rate": 8.027309403097034e-07, "loss": 2.918, "step": 72530 }, { "epoch": 0.7378641764322916, "grad_norm": 9.327035903930664, "learning_rate": 8.024373599581403e-07, "loss": 3.4352, "step": 72535 }, { "epoch": 0.7379150390625, "grad_norm": 13.697539329528809, "learning_rate": 8.021438230383141e-07, "loss": 3.3313, "step": 72540 }, { "epoch": 0.7379659016927084, "grad_norm": 14.816646575927734, "learning_rate": 8.018503295577368e-07, "loss": 3.4092, "step": 72545 }, { "epoch": 0.7380167643229166, "grad_norm": 10.502012252807617, "learning_rate": 8.015568795239153e-07, "loss": 3.0441, "step": 72550 }, { "epoch": 0.738067626953125, "grad_norm": 13.62344741821289, "learning_rate": 8.012634729443597e-07, "loss": 3.1531, "step": 72555 }, { "epoch": 0.7381184895833334, "grad_norm": 8.448206901550293, "learning_rate": 8.009701098265743e-07, "loss": 3.3725, "step": 72560 }, { "epoch": 0.7381693522135416, "grad_norm": 17.432044982910156, "learning_rate": 8.006767901780669e-07, "loss": 3.2959, "step": 72565 }, { "epoch": 0.73822021484375, "grad_norm": 16.10090446472168, "learning_rate": 8.003835140063402e-07, "loss": 3.6261, "step": 72570 }, { "epoch": 0.7382710774739584, "grad_norm": 8.387552261352539, "learning_rate": 8.000902813188982e-07, "loss": 3.3503, "step": 72575 }, { "epoch": 0.7383219401041666, "grad_norm": 15.424768447875977, "learning_rate": 7.997970921232446e-07, "loss": 4.2835, "step": 72580 }, { "epoch": 0.738372802734375, "grad_norm": 10.04584789276123, "learning_rate": 7.995039464268791e-07, "loss": 3.8604, "step": 72585 }, { "epoch": 0.7384236653645834, "grad_norm": 9.393284797668457, "learning_rate": 7.992108442373023e-07, "loss": 3.1383, "step": 72590 }, { "epoch": 0.7384745279947916, "grad_norm": 9.053309440612793, "learning_rate": 7.98917785562012e-07, "loss": 3.0691, "step": 72595 }, { "epoch": 0.738525390625, "grad_norm": 15.204339981079102, "learning_rate": 7.986247704085068e-07, "loss": 3.2515, "step": 72600 }, { "epoch": 0.7385762532552084, "grad_norm": 11.147443771362305, "learning_rate": 7.983317987842848e-07, "loss": 3.0584, "step": 72605 }, { "epoch": 0.7386271158854166, "grad_norm": 10.606264114379883, "learning_rate": 7.9803887069684e-07, "loss": 3.3795, "step": 72610 }, { "epoch": 0.738677978515625, "grad_norm": 7.328312397003174, "learning_rate": 7.977459861536666e-07, "loss": 3.285, "step": 72615 }, { "epoch": 0.7387288411458334, "grad_norm": 10.47547435760498, "learning_rate": 7.974531451622594e-07, "loss": 3.553, "step": 72620 }, { "epoch": 0.7387797037760416, "grad_norm": 16.50904083251953, "learning_rate": 7.971603477301093e-07, "loss": 3.1711, "step": 72625 }, { "epoch": 0.73883056640625, "grad_norm": 13.123628616333008, "learning_rate": 7.968675938647088e-07, "loss": 3.1417, "step": 72630 }, { "epoch": 0.7388814290364584, "grad_norm": 9.2986478805542, "learning_rate": 7.965748835735465e-07, "loss": 3.3235, "step": 72635 }, { "epoch": 0.7389322916666666, "grad_norm": 15.497564315795898, "learning_rate": 7.962822168641127e-07, "loss": 3.937, "step": 72640 }, { "epoch": 0.738983154296875, "grad_norm": 12.958601951599121, "learning_rate": 7.959895937438946e-07, "loss": 3.7955, "step": 72645 }, { "epoch": 0.7390340169270834, "grad_norm": 13.965435028076172, "learning_rate": 7.956970142203782e-07, "loss": 3.2878, "step": 72650 }, { "epoch": 0.7390848795572916, "grad_norm": 12.274550437927246, "learning_rate": 7.954044783010498e-07, "loss": 3.1043, "step": 72655 }, { "epoch": 0.7391357421875, "grad_norm": 8.859460830688477, "learning_rate": 7.951119859933942e-07, "loss": 3.5147, "step": 72660 }, { "epoch": 0.7391866048177084, "grad_norm": 10.588842391967773, "learning_rate": 7.948195373048947e-07, "loss": 3.6557, "step": 72665 }, { "epoch": 0.7392374674479166, "grad_norm": 11.107576370239258, "learning_rate": 7.94527132243032e-07, "loss": 3.2825, "step": 72670 }, { "epoch": 0.739288330078125, "grad_norm": 11.815327644348145, "learning_rate": 7.942347708152895e-07, "loss": 2.9843, "step": 72675 }, { "epoch": 0.7393391927083334, "grad_norm": 8.988689422607422, "learning_rate": 7.939424530291451e-07, "loss": 3.3577, "step": 72680 }, { "epoch": 0.7393900553385416, "grad_norm": 12.207701683044434, "learning_rate": 7.936501788920798e-07, "loss": 3.3142, "step": 72685 }, { "epoch": 0.73944091796875, "grad_norm": 11.993644714355469, "learning_rate": 7.933579484115689e-07, "loss": 3.1865, "step": 72690 }, { "epoch": 0.7394917805989584, "grad_norm": 10.642743110656738, "learning_rate": 7.930657615950915e-07, "loss": 3.1436, "step": 72695 }, { "epoch": 0.7395426432291666, "grad_norm": 9.111330032348633, "learning_rate": 7.92773618450122e-07, "loss": 3.0421, "step": 72700 }, { "epoch": 0.739593505859375, "grad_norm": 7.885223388671875, "learning_rate": 7.924815189841339e-07, "loss": 3.1854, "step": 72705 }, { "epoch": 0.7396443684895834, "grad_norm": 17.917495727539062, "learning_rate": 7.921894632046015e-07, "loss": 3.8181, "step": 72710 }, { "epoch": 0.7396952311197916, "grad_norm": 11.11347770690918, "learning_rate": 7.918974511189975e-07, "loss": 2.9804, "step": 72715 }, { "epoch": 0.73974609375, "grad_norm": 7.590899467468262, "learning_rate": 7.916054827347925e-07, "loss": 3.3131, "step": 72720 }, { "epoch": 0.7397969563802084, "grad_norm": 16.299772262573242, "learning_rate": 7.913135580594556e-07, "loss": 3.2908, "step": 72725 }, { "epoch": 0.7398478190104166, "grad_norm": 11.786675453186035, "learning_rate": 7.910216771004561e-07, "loss": 3.1259, "step": 72730 }, { "epoch": 0.739898681640625, "grad_norm": 10.466172218322754, "learning_rate": 7.907298398652627e-07, "loss": 3.3856, "step": 72735 }, { "epoch": 0.7399495442708334, "grad_norm": 12.573579788208008, "learning_rate": 7.904380463613414e-07, "loss": 3.5018, "step": 72740 }, { "epoch": 0.7400004069010416, "grad_norm": 12.838423728942871, "learning_rate": 7.901462965961573e-07, "loss": 3.0551, "step": 72745 }, { "epoch": 0.74005126953125, "grad_norm": 8.679061889648438, "learning_rate": 7.898545905771743e-07, "loss": 3.61, "step": 72750 }, { "epoch": 0.7401021321614584, "grad_norm": 7.51031494140625, "learning_rate": 7.895629283118561e-07, "loss": 3.1428, "step": 72755 }, { "epoch": 0.7401529947916666, "grad_norm": 12.5309419631958, "learning_rate": 7.892713098076659e-07, "loss": 3.3681, "step": 72760 }, { "epoch": 0.740203857421875, "grad_norm": 13.049712181091309, "learning_rate": 7.889797350720637e-07, "loss": 2.9711, "step": 72765 }, { "epoch": 0.7402547200520834, "grad_norm": 11.52535343170166, "learning_rate": 7.886882041125085e-07, "loss": 3.2727, "step": 72770 }, { "epoch": 0.7403055826822916, "grad_norm": 11.455509185791016, "learning_rate": 7.883967169364609e-07, "loss": 3.365, "step": 72775 }, { "epoch": 0.7403564453125, "grad_norm": 10.567713737487793, "learning_rate": 7.881052735513769e-07, "loss": 3.1708, "step": 72780 }, { "epoch": 0.7404073079427084, "grad_norm": 8.276542663574219, "learning_rate": 7.878138739647134e-07, "loss": 3.346, "step": 72785 }, { "epoch": 0.7404581705729166, "grad_norm": 8.877224922180176, "learning_rate": 7.875225181839272e-07, "loss": 3.1973, "step": 72790 }, { "epoch": 0.740509033203125, "grad_norm": 12.39207649230957, "learning_rate": 7.872312062164714e-07, "loss": 3.3151, "step": 72795 }, { "epoch": 0.7405598958333334, "grad_norm": 10.976158142089844, "learning_rate": 7.869399380697992e-07, "loss": 2.8954, "step": 72800 }, { "epoch": 0.7406107584635416, "grad_norm": 17.814804077148438, "learning_rate": 7.866487137513618e-07, "loss": 3.5182, "step": 72805 }, { "epoch": 0.74066162109375, "grad_norm": 10.832623481750488, "learning_rate": 7.86357533268611e-07, "loss": 3.3605, "step": 72810 }, { "epoch": 0.7407124837239584, "grad_norm": 15.117820739746094, "learning_rate": 7.860663966289972e-07, "loss": 3.0494, "step": 72815 }, { "epoch": 0.7407633463541666, "grad_norm": 10.470831871032715, "learning_rate": 7.857753038399684e-07, "loss": 3.5723, "step": 72820 }, { "epoch": 0.740814208984375, "grad_norm": 12.327982902526855, "learning_rate": 7.854842549089717e-07, "loss": 3.2256, "step": 72825 }, { "epoch": 0.7408650716145834, "grad_norm": 13.123862266540527, "learning_rate": 7.851932498434544e-07, "loss": 3.1989, "step": 72830 }, { "epoch": 0.7409159342447916, "grad_norm": 12.228321075439453, "learning_rate": 7.849022886508606e-07, "loss": 3.4185, "step": 72835 }, { "epoch": 0.740966796875, "grad_norm": 11.303017616271973, "learning_rate": 7.846113713386361e-07, "loss": 3.5949, "step": 72840 }, { "epoch": 0.7410176595052084, "grad_norm": 6.818903923034668, "learning_rate": 7.843204979142221e-07, "loss": 3.4595, "step": 72845 }, { "epoch": 0.7410685221354166, "grad_norm": 12.604376792907715, "learning_rate": 7.840296683850626e-07, "loss": 3.0496, "step": 72850 }, { "epoch": 0.741119384765625, "grad_norm": 16.697912216186523, "learning_rate": 7.83738882758597e-07, "loss": 3.5606, "step": 72855 }, { "epoch": 0.7411702473958334, "grad_norm": 12.759716033935547, "learning_rate": 7.834481410422649e-07, "loss": 3.3353, "step": 72860 }, { "epoch": 0.7412211100260416, "grad_norm": 13.470650672912598, "learning_rate": 7.831574432435049e-07, "loss": 3.6976, "step": 72865 }, { "epoch": 0.74127197265625, "grad_norm": 7.027815341949463, "learning_rate": 7.828667893697556e-07, "loss": 3.1388, "step": 72870 }, { "epoch": 0.7413228352864584, "grad_norm": 13.945786476135254, "learning_rate": 7.825761794284526e-07, "loss": 3.4302, "step": 72875 }, { "epoch": 0.7413736979166666, "grad_norm": 9.513721466064453, "learning_rate": 7.822856134270301e-07, "loss": 3.1297, "step": 72880 }, { "epoch": 0.741424560546875, "grad_norm": 12.166504859924316, "learning_rate": 7.81995091372923e-07, "loss": 3.8204, "step": 72885 }, { "epoch": 0.7414754231770834, "grad_norm": 14.738011360168457, "learning_rate": 7.81704613273565e-07, "loss": 3.5365, "step": 72890 }, { "epoch": 0.7415262858072916, "grad_norm": 9.779434204101562, "learning_rate": 7.814141791363875e-07, "loss": 3.3975, "step": 72895 }, { "epoch": 0.7415771484375, "grad_norm": 13.886914253234863, "learning_rate": 7.811237889688198e-07, "loss": 3.2775, "step": 72900 }, { "epoch": 0.7416280110677084, "grad_norm": 10.23849868774414, "learning_rate": 7.808334427782934e-07, "loss": 3.5117, "step": 72905 }, { "epoch": 0.7416788736979166, "grad_norm": 16.51127815246582, "learning_rate": 7.805431405722352e-07, "loss": 3.3629, "step": 72910 }, { "epoch": 0.741729736328125, "grad_norm": 12.006612777709961, "learning_rate": 7.802528823580738e-07, "loss": 3.2699, "step": 72915 }, { "epoch": 0.7417805989583334, "grad_norm": 14.054769515991211, "learning_rate": 7.799626681432341e-07, "loss": 3.0308, "step": 72920 }, { "epoch": 0.7418314615885416, "grad_norm": 9.321966171264648, "learning_rate": 7.796724979351428e-07, "loss": 3.0909, "step": 72925 }, { "epoch": 0.74188232421875, "grad_norm": 9.320379257202148, "learning_rate": 7.793823717412224e-07, "loss": 3.6585, "step": 72930 }, { "epoch": 0.7419331868489584, "grad_norm": 7.190203666687012, "learning_rate": 7.79092289568896e-07, "loss": 3.2446, "step": 72935 }, { "epoch": 0.7419840494791666, "grad_norm": 9.347601890563965, "learning_rate": 7.78802251425585e-07, "loss": 3.3853, "step": 72940 }, { "epoch": 0.742034912109375, "grad_norm": 11.347188949584961, "learning_rate": 7.785122573187115e-07, "loss": 3.6131, "step": 72945 }, { "epoch": 0.7420857747395834, "grad_norm": 14.256086349487305, "learning_rate": 7.782223072556938e-07, "loss": 3.1202, "step": 72950 }, { "epoch": 0.7421366373697916, "grad_norm": 14.243017196655273, "learning_rate": 7.779324012439502e-07, "loss": 3.2519, "step": 72955 }, { "epoch": 0.7421875, "grad_norm": 7.689399242401123, "learning_rate": 7.776425392908971e-07, "loss": 3.5394, "step": 72960 }, { "epoch": 0.7422383626302084, "grad_norm": 13.22787094116211, "learning_rate": 7.773527214039514e-07, "loss": 3.5155, "step": 72965 }, { "epoch": 0.7422892252604166, "grad_norm": 9.220317840576172, "learning_rate": 7.770629475905289e-07, "loss": 3.3707, "step": 72970 }, { "epoch": 0.742340087890625, "grad_norm": 9.710553169250488, "learning_rate": 7.767732178580417e-07, "loss": 3.2424, "step": 72975 }, { "epoch": 0.7423909505208334, "grad_norm": 14.730201721191406, "learning_rate": 7.764835322139039e-07, "loss": 3.2154, "step": 72980 }, { "epoch": 0.7424418131510416, "grad_norm": 11.835850715637207, "learning_rate": 7.761938906655266e-07, "loss": 3.3134, "step": 72985 }, { "epoch": 0.74249267578125, "grad_norm": 9.529492378234863, "learning_rate": 7.759042932203193e-07, "loss": 3.0571, "step": 72990 }, { "epoch": 0.7425435384114584, "grad_norm": 6.461589813232422, "learning_rate": 7.756147398856917e-07, "loss": 3.9506, "step": 72995 }, { "epoch": 0.7425944010416666, "grad_norm": 16.31830406188965, "learning_rate": 7.753252306690532e-07, "loss": 3.6828, "step": 73000 }, { "epoch": 0.742645263671875, "grad_norm": 8.438053131103516, "learning_rate": 7.750357655778098e-07, "loss": 3.401, "step": 73005 }, { "epoch": 0.7426961263020834, "grad_norm": 9.632655143737793, "learning_rate": 7.747463446193676e-07, "loss": 3.2164, "step": 73010 }, { "epoch": 0.7427469889322916, "grad_norm": 12.210907936096191, "learning_rate": 7.744569678011296e-07, "loss": 3.3304, "step": 73015 }, { "epoch": 0.7427978515625, "grad_norm": 14.016220092773438, "learning_rate": 7.741676351305027e-07, "loss": 3.6702, "step": 73020 }, { "epoch": 0.7428487141927084, "grad_norm": 13.280328750610352, "learning_rate": 7.73878346614888e-07, "loss": 3.2677, "step": 73025 }, { "epoch": 0.7428995768229166, "grad_norm": 8.581869125366211, "learning_rate": 7.735891022616864e-07, "loss": 3.1247, "step": 73030 }, { "epoch": 0.742950439453125, "grad_norm": 12.272854804992676, "learning_rate": 7.732999020782978e-07, "loss": 3.4134, "step": 73035 }, { "epoch": 0.7430013020833334, "grad_norm": 11.38424015045166, "learning_rate": 7.730107460721217e-07, "loss": 3.4051, "step": 73040 }, { "epoch": 0.7430521647135416, "grad_norm": 9.790849685668945, "learning_rate": 7.727216342505573e-07, "loss": 3.2793, "step": 73045 }, { "epoch": 0.74310302734375, "grad_norm": 10.613922119140625, "learning_rate": 7.724325666210003e-07, "loss": 3.0087, "step": 73050 }, { "epoch": 0.7431538899739584, "grad_norm": 7.071836948394775, "learning_rate": 7.721435431908458e-07, "loss": 3.3846, "step": 73055 }, { "epoch": 0.7432047526041666, "grad_norm": 16.532873153686523, "learning_rate": 7.718545639674901e-07, "loss": 3.2195, "step": 73060 }, { "epoch": 0.743255615234375, "grad_norm": 12.714094161987305, "learning_rate": 7.715656289583251e-07, "loss": 3.2959, "step": 73065 }, { "epoch": 0.7433064778645834, "grad_norm": 11.682530403137207, "learning_rate": 7.712767381707437e-07, "loss": 3.2982, "step": 73070 }, { "epoch": 0.7433573404947916, "grad_norm": 9.41639232635498, "learning_rate": 7.70987891612138e-07, "loss": 3.4517, "step": 73075 }, { "epoch": 0.743408203125, "grad_norm": 7.8489227294921875, "learning_rate": 7.706990892898972e-07, "loss": 3.0582, "step": 73080 }, { "epoch": 0.7434590657552084, "grad_norm": 13.319563865661621, "learning_rate": 7.704103312114103e-07, "loss": 3.3114, "step": 73085 }, { "epoch": 0.7435099283854166, "grad_norm": 11.45560359954834, "learning_rate": 7.701216173840644e-07, "loss": 3.0357, "step": 73090 }, { "epoch": 0.743560791015625, "grad_norm": 9.883466720581055, "learning_rate": 7.698329478152469e-07, "loss": 3.46, "step": 73095 }, { "epoch": 0.7436116536458334, "grad_norm": 7.949326515197754, "learning_rate": 7.695443225123441e-07, "loss": 3.1761, "step": 73100 }, { "epoch": 0.7436625162760416, "grad_norm": 8.533098220825195, "learning_rate": 7.692557414827395e-07, "loss": 3.2972, "step": 73105 }, { "epoch": 0.74371337890625, "grad_norm": 17.932315826416016, "learning_rate": 7.689672047338159e-07, "loss": 3.9118, "step": 73110 }, { "epoch": 0.7437642415364584, "grad_norm": 10.43192195892334, "learning_rate": 7.686787122729569e-07, "loss": 3.4507, "step": 73115 }, { "epoch": 0.7438151041666666, "grad_norm": 15.594086647033691, "learning_rate": 7.683902641075419e-07, "loss": 3.2069, "step": 73120 }, { "epoch": 0.743865966796875, "grad_norm": 13.488203048706055, "learning_rate": 7.681018602449522e-07, "loss": 2.9345, "step": 73125 }, { "epoch": 0.7439168294270834, "grad_norm": 15.50025463104248, "learning_rate": 7.678135006925652e-07, "loss": 3.2471, "step": 73130 }, { "epoch": 0.7439676920572916, "grad_norm": 12.578755378723145, "learning_rate": 7.6752518545776e-07, "loss": 3.4652, "step": 73135 }, { "epoch": 0.7440185546875, "grad_norm": 16.930402755737305, "learning_rate": 7.672369145479122e-07, "loss": 3.2195, "step": 73140 }, { "epoch": 0.7440694173177084, "grad_norm": 8.504794120788574, "learning_rate": 7.669486879703966e-07, "loss": 3.0849, "step": 73145 }, { "epoch": 0.7441202799479166, "grad_norm": 12.299077987670898, "learning_rate": 7.66660505732588e-07, "loss": 3.3146, "step": 73150 }, { "epoch": 0.744171142578125, "grad_norm": 11.325913429260254, "learning_rate": 7.663723678418602e-07, "loss": 3.1139, "step": 73155 }, { "epoch": 0.7442220052083334, "grad_norm": 16.42136573791504, "learning_rate": 7.660842743055846e-07, "loss": 3.4013, "step": 73160 }, { "epoch": 0.7442728678385416, "grad_norm": 9.951093673706055, "learning_rate": 7.65796225131131e-07, "loss": 3.8557, "step": 73165 }, { "epoch": 0.74432373046875, "grad_norm": 6.883090019226074, "learning_rate": 7.655082203258699e-07, "loss": 3.0357, "step": 73170 }, { "epoch": 0.7443745930989584, "grad_norm": 13.703225135803223, "learning_rate": 7.652202598971706e-07, "loss": 3.4455, "step": 73175 }, { "epoch": 0.7444254557291666, "grad_norm": 11.733901977539062, "learning_rate": 7.649323438523997e-07, "loss": 3.3681, "step": 73180 }, { "epoch": 0.744476318359375, "grad_norm": 14.60044002532959, "learning_rate": 7.64644472198923e-07, "loss": 3.135, "step": 73185 }, { "epoch": 0.7445271809895834, "grad_norm": 15.660284996032715, "learning_rate": 7.643566449441067e-07, "loss": 3.5143, "step": 73190 }, { "epoch": 0.7445780436197916, "grad_norm": 12.892966270446777, "learning_rate": 7.640688620953135e-07, "loss": 3.4056, "step": 73195 }, { "epoch": 0.74462890625, "grad_norm": 9.747441291809082, "learning_rate": 7.637811236599079e-07, "loss": 2.9433, "step": 73200 }, { "epoch": 0.7446797688802084, "grad_norm": 9.57300090789795, "learning_rate": 7.634934296452501e-07, "loss": 3.1353, "step": 73205 }, { "epoch": 0.7447306315104166, "grad_norm": 13.052162170410156, "learning_rate": 7.632057800587017e-07, "loss": 3.2355, "step": 73210 }, { "epoch": 0.744781494140625, "grad_norm": 7.5890889167785645, "learning_rate": 7.62918174907622e-07, "loss": 3.483, "step": 73215 }, { "epoch": 0.7448323567708334, "grad_norm": 13.81509780883789, "learning_rate": 7.626306141993684e-07, "loss": 3.4252, "step": 73220 }, { "epoch": 0.7448832194010416, "grad_norm": 12.88431167602539, "learning_rate": 7.623430979412982e-07, "loss": 3.5395, "step": 73225 }, { "epoch": 0.74493408203125, "grad_norm": 15.229802131652832, "learning_rate": 7.620556261407694e-07, "loss": 3.2204, "step": 73230 }, { "epoch": 0.7449849446614584, "grad_norm": 10.262614250183105, "learning_rate": 7.617681988051351e-07, "loss": 3.4247, "step": 73235 }, { "epoch": 0.7450358072916666, "grad_norm": 8.711079597473145, "learning_rate": 7.614808159417494e-07, "loss": 3.5177, "step": 73240 }, { "epoch": 0.745086669921875, "grad_norm": 11.199665069580078, "learning_rate": 7.611934775579644e-07, "loss": 3.3516, "step": 73245 }, { "epoch": 0.7451375325520834, "grad_norm": 8.549784660339355, "learning_rate": 7.609061836611317e-07, "loss": 3.5392, "step": 73250 }, { "epoch": 0.7451883951822916, "grad_norm": 15.495569229125977, "learning_rate": 7.606189342586034e-07, "loss": 3.5441, "step": 73255 }, { "epoch": 0.7452392578125, "grad_norm": 12.841856956481934, "learning_rate": 7.60331729357727e-07, "loss": 3.2102, "step": 73260 }, { "epoch": 0.7452901204427084, "grad_norm": 17.090049743652344, "learning_rate": 7.600445689658503e-07, "loss": 3.2777, "step": 73265 }, { "epoch": 0.7453409830729166, "grad_norm": 14.31063461303711, "learning_rate": 7.597574530903218e-07, "loss": 3.5972, "step": 73270 }, { "epoch": 0.745391845703125, "grad_norm": 8.161468505859375, "learning_rate": 7.594703817384855e-07, "loss": 3.2841, "step": 73275 }, { "epoch": 0.7454427083333334, "grad_norm": 8.875266075134277, "learning_rate": 7.591833549176869e-07, "loss": 3.4786, "step": 73280 }, { "epoch": 0.7454935709635416, "grad_norm": 15.609048843383789, "learning_rate": 7.588963726352702e-07, "loss": 3.1179, "step": 73285 }, { "epoch": 0.74554443359375, "grad_norm": 12.657333374023438, "learning_rate": 7.586094348985773e-07, "loss": 3.2278, "step": 73290 }, { "epoch": 0.7455952962239584, "grad_norm": 10.095488548278809, "learning_rate": 7.583225417149492e-07, "loss": 3.1679, "step": 73295 }, { "epoch": 0.7456461588541666, "grad_norm": 12.093870162963867, "learning_rate": 7.580356930917254e-07, "loss": 3.4705, "step": 73300 }, { "epoch": 0.745697021484375, "grad_norm": 11.52402114868164, "learning_rate": 7.577488890362456e-07, "loss": 3.2479, "step": 73305 }, { "epoch": 0.7457478841145834, "grad_norm": 12.448471069335938, "learning_rate": 7.574621295558479e-07, "loss": 3.5321, "step": 73310 }, { "epoch": 0.7457987467447916, "grad_norm": 13.753480911254883, "learning_rate": 7.571754146578689e-07, "loss": 3.161, "step": 73315 }, { "epoch": 0.745849609375, "grad_norm": 9.864426612854004, "learning_rate": 7.568887443496429e-07, "loss": 3.6129, "step": 73320 }, { "epoch": 0.7459004720052084, "grad_norm": 11.091754913330078, "learning_rate": 7.56602118638505e-07, "loss": 3.4282, "step": 73325 }, { "epoch": 0.7459513346354166, "grad_norm": 9.379804611206055, "learning_rate": 7.563155375317898e-07, "loss": 3.1408, "step": 73330 }, { "epoch": 0.746002197265625, "grad_norm": 7.995944023132324, "learning_rate": 7.560290010368281e-07, "loss": 3.2759, "step": 73335 }, { "epoch": 0.7460530598958334, "grad_norm": 11.444838523864746, "learning_rate": 7.557425091609505e-07, "loss": 3.4338, "step": 73340 }, { "epoch": 0.7461039225260416, "grad_norm": 16.494482040405273, "learning_rate": 7.554560619114879e-07, "loss": 3.5272, "step": 73345 }, { "epoch": 0.74615478515625, "grad_norm": 9.846036911010742, "learning_rate": 7.551696592957677e-07, "loss": 3.4849, "step": 73350 }, { "epoch": 0.7462056477864584, "grad_norm": 9.586414337158203, "learning_rate": 7.54883301321119e-07, "loss": 3.5259, "step": 73355 }, { "epoch": 0.7462565104166666, "grad_norm": 9.466014862060547, "learning_rate": 7.545969879948667e-07, "loss": 3.3932, "step": 73360 }, { "epoch": 0.746307373046875, "grad_norm": 11.308871269226074, "learning_rate": 7.543107193243373e-07, "loss": 3.8975, "step": 73365 }, { "epoch": 0.7463582356770834, "grad_norm": 15.013957023620605, "learning_rate": 7.540244953168546e-07, "loss": 3.5655, "step": 73370 }, { "epoch": 0.7464090983072916, "grad_norm": 8.602811813354492, "learning_rate": 7.537383159797404e-07, "loss": 3.1833, "step": 73375 }, { "epoch": 0.7464599609375, "grad_norm": 10.58324909210205, "learning_rate": 7.534521813203177e-07, "loss": 3.2111, "step": 73380 }, { "epoch": 0.7465108235677084, "grad_norm": 9.13379192352295, "learning_rate": 7.531660913459077e-07, "loss": 3.2012, "step": 73385 }, { "epoch": 0.7465616861979166, "grad_norm": 14.996956825256348, "learning_rate": 7.528800460638292e-07, "loss": 2.994, "step": 73390 }, { "epoch": 0.746612548828125, "grad_norm": 13.060670852661133, "learning_rate": 7.525940454813996e-07, "loss": 3.3944, "step": 73395 }, { "epoch": 0.7466634114583334, "grad_norm": 13.515923500061035, "learning_rate": 7.523080896059381e-07, "loss": 3.6887, "step": 73400 }, { "epoch": 0.7467142740885416, "grad_norm": 12.800006866455078, "learning_rate": 7.52022178444759e-07, "loss": 3.1361, "step": 73405 }, { "epoch": 0.74676513671875, "grad_norm": 11.275392532348633, "learning_rate": 7.51736312005179e-07, "loss": 3.5173, "step": 73410 }, { "epoch": 0.7468159993489584, "grad_norm": 13.225747108459473, "learning_rate": 7.514504902945105e-07, "loss": 3.4082, "step": 73415 }, { "epoch": 0.7468668619791666, "grad_norm": 13.548460960388184, "learning_rate": 7.511647133200673e-07, "loss": 3.2836, "step": 73420 }, { "epoch": 0.746917724609375, "grad_norm": 14.141571998596191, "learning_rate": 7.508789810891603e-07, "loss": 3.8894, "step": 73425 }, { "epoch": 0.7469685872395834, "grad_norm": 11.883014678955078, "learning_rate": 7.505932936090996e-07, "loss": 3.2863, "step": 73430 }, { "epoch": 0.7470194498697916, "grad_norm": 13.795418739318848, "learning_rate": 7.503076508871945e-07, "loss": 3.3979, "step": 73435 }, { "epoch": 0.7470703125, "grad_norm": 15.13593864440918, "learning_rate": 7.500220529307542e-07, "loss": 3.3507, "step": 73440 }, { "epoch": 0.7471211751302084, "grad_norm": 7.632009983062744, "learning_rate": 7.497364997470852e-07, "loss": 3.2965, "step": 73445 }, { "epoch": 0.7471720377604166, "grad_norm": 10.563254356384277, "learning_rate": 7.494509913434928e-07, "loss": 3.2197, "step": 73450 }, { "epoch": 0.747222900390625, "grad_norm": 11.43966293334961, "learning_rate": 7.491655277272808e-07, "loss": 3.508, "step": 73455 }, { "epoch": 0.7472737630208334, "grad_norm": 9.749238967895508, "learning_rate": 7.48880108905754e-07, "loss": 3.1205, "step": 73460 }, { "epoch": 0.7473246256510416, "grad_norm": 14.747815132141113, "learning_rate": 7.485947348862155e-07, "loss": 3.606, "step": 73465 }, { "epoch": 0.74737548828125, "grad_norm": 9.424822807312012, "learning_rate": 7.483094056759646e-07, "loss": 3.0415, "step": 73470 }, { "epoch": 0.7474263509114584, "grad_norm": 10.191957473754883, "learning_rate": 7.480241212823033e-07, "loss": 3.3108, "step": 73475 }, { "epoch": 0.7474772135416666, "grad_norm": 12.010482788085938, "learning_rate": 7.477388817125286e-07, "loss": 3.7169, "step": 73480 }, { "epoch": 0.747528076171875, "grad_norm": 11.904157638549805, "learning_rate": 7.474536869739402e-07, "loss": 3.1726, "step": 73485 }, { "epoch": 0.7475789388020834, "grad_norm": 12.716765403747559, "learning_rate": 7.471685370738329e-07, "loss": 3.7189, "step": 73490 }, { "epoch": 0.7476298014322916, "grad_norm": 7.9622578620910645, "learning_rate": 7.468834320195042e-07, "loss": 3.3852, "step": 73495 }, { "epoch": 0.7476806640625, "grad_norm": 9.606403350830078, "learning_rate": 7.465983718182471e-07, "loss": 3.2574, "step": 73500 }, { "epoch": 0.7477315266927084, "grad_norm": 13.480175971984863, "learning_rate": 7.463133564773544e-07, "loss": 3.423, "step": 73505 }, { "epoch": 0.7477823893229166, "grad_norm": 11.57226276397705, "learning_rate": 7.460283860041187e-07, "loss": 3.2411, "step": 73510 }, { "epoch": 0.747833251953125, "grad_norm": 13.545355796813965, "learning_rate": 7.457434604058319e-07, "loss": 3.6054, "step": 73515 }, { "epoch": 0.7478841145833334, "grad_norm": 12.120404243469238, "learning_rate": 7.454585796897829e-07, "loss": 3.1793, "step": 73520 }, { "epoch": 0.7479349772135416, "grad_norm": 9.005751609802246, "learning_rate": 7.4517374386326e-07, "loss": 3.4021, "step": 73525 }, { "epoch": 0.74798583984375, "grad_norm": 18.179729461669922, "learning_rate": 7.448889529335504e-07, "loss": 3.595, "step": 73530 }, { "epoch": 0.7480367024739584, "grad_norm": 12.268835067749023, "learning_rate": 7.446042069079407e-07, "loss": 3.1909, "step": 73535 }, { "epoch": 0.7480875651041666, "grad_norm": 9.217876434326172, "learning_rate": 7.443195057937172e-07, "loss": 3.4967, "step": 73540 }, { "epoch": 0.748138427734375, "grad_norm": 13.504807472229004, "learning_rate": 7.440348495981631e-07, "loss": 3.445, "step": 73545 }, { "epoch": 0.7481892903645834, "grad_norm": 10.301555633544922, "learning_rate": 7.437502383285603e-07, "loss": 3.5344, "step": 73550 }, { "epoch": 0.7482401529947916, "grad_norm": 15.819905281066895, "learning_rate": 7.434656719921921e-07, "loss": 3.5132, "step": 73555 }, { "epoch": 0.748291015625, "grad_norm": 13.971081733703613, "learning_rate": 7.431811505963376e-07, "loss": 3.3255, "step": 73560 }, { "epoch": 0.7483418782552084, "grad_norm": 9.28151798248291, "learning_rate": 7.428966741482771e-07, "loss": 3.2326, "step": 73565 }, { "epoch": 0.7483927408854166, "grad_norm": 14.84122371673584, "learning_rate": 7.426122426552893e-07, "loss": 3.566, "step": 73570 }, { "epoch": 0.748443603515625, "grad_norm": 13.488147735595703, "learning_rate": 7.423278561246508e-07, "loss": 3.5507, "step": 73575 }, { "epoch": 0.7484944661458334, "grad_norm": 17.054447174072266, "learning_rate": 7.420435145636376e-07, "loss": 3.4997, "step": 73580 }, { "epoch": 0.7485453287760416, "grad_norm": 14.33040714263916, "learning_rate": 7.417592179795233e-07, "loss": 3.1852, "step": 73585 }, { "epoch": 0.74859619140625, "grad_norm": 13.27672004699707, "learning_rate": 7.414749663795828e-07, "loss": 3.2606, "step": 73590 }, { "epoch": 0.7486470540364584, "grad_norm": 14.843589782714844, "learning_rate": 7.411907597710894e-07, "loss": 3.3071, "step": 73595 }, { "epoch": 0.7486979166666666, "grad_norm": 11.863862037658691, "learning_rate": 7.409065981613134e-07, "loss": 2.9452, "step": 73600 }, { "epoch": 0.748748779296875, "grad_norm": 12.077499389648438, "learning_rate": 7.406224815575245e-07, "loss": 3.3808, "step": 73605 }, { "epoch": 0.7487996419270834, "grad_norm": 15.173077583312988, "learning_rate": 7.403384099669931e-07, "loss": 3.3957, "step": 73610 }, { "epoch": 0.7488505045572916, "grad_norm": 14.847683906555176, "learning_rate": 7.400543833969853e-07, "loss": 3.4315, "step": 73615 }, { "epoch": 0.7489013671875, "grad_norm": 8.362418174743652, "learning_rate": 7.397704018547703e-07, "loss": 3.2964, "step": 73620 }, { "epoch": 0.7489522298177084, "grad_norm": 13.943264961242676, "learning_rate": 7.394864653476111e-07, "loss": 3.3526, "step": 73625 }, { "epoch": 0.7490030924479166, "grad_norm": 7.470756530761719, "learning_rate": 7.392025738827743e-07, "loss": 3.4958, "step": 73630 }, { "epoch": 0.749053955078125, "grad_norm": 9.26888656616211, "learning_rate": 7.389187274675216e-07, "loss": 3.5424, "step": 73635 }, { "epoch": 0.7491048177083334, "grad_norm": 12.975512504577637, "learning_rate": 7.386349261091167e-07, "loss": 3.2929, "step": 73640 }, { "epoch": 0.7491556803385416, "grad_norm": 14.786529541015625, "learning_rate": 7.383511698148185e-07, "loss": 3.2, "step": 73645 }, { "epoch": 0.74920654296875, "grad_norm": 12.205375671386719, "learning_rate": 7.380674585918892e-07, "loss": 3.3362, "step": 73650 }, { "epoch": 0.7492574055989584, "grad_norm": 13.627872467041016, "learning_rate": 7.377837924475864e-07, "loss": 3.5033, "step": 73655 }, { "epoch": 0.7493082682291666, "grad_norm": 13.841710090637207, "learning_rate": 7.375001713891667e-07, "loss": 3.6456, "step": 73660 }, { "epoch": 0.749359130859375, "grad_norm": 8.06103801727295, "learning_rate": 7.372165954238874e-07, "loss": 3.5724, "step": 73665 }, { "epoch": 0.7494099934895834, "grad_norm": 12.296360969543457, "learning_rate": 7.369330645590044e-07, "loss": 3.2789, "step": 73670 }, { "epoch": 0.7494608561197916, "grad_norm": 13.1085786819458, "learning_rate": 7.36649578801771e-07, "loss": 3.1036, "step": 73675 }, { "epoch": 0.74951171875, "grad_norm": 16.37004280090332, "learning_rate": 7.363661381594395e-07, "loss": 3.3069, "step": 73680 }, { "epoch": 0.7495625813802084, "grad_norm": 12.110971450805664, "learning_rate": 7.360827426392631e-07, "loss": 3.4275, "step": 73685 }, { "epoch": 0.7496134440104166, "grad_norm": 16.157642364501953, "learning_rate": 7.357993922484907e-07, "loss": 3.5252, "step": 73690 }, { "epoch": 0.749664306640625, "grad_norm": 13.888794898986816, "learning_rate": 7.355160869943737e-07, "loss": 3.1457, "step": 73695 }, { "epoch": 0.7497151692708334, "grad_norm": 15.48414421081543, "learning_rate": 7.352328268841585e-07, "loss": 3.3848, "step": 73700 }, { "epoch": 0.7497660319010416, "grad_norm": 7.177224636077881, "learning_rate": 7.34949611925094e-07, "loss": 3.3622, "step": 73705 }, { "epoch": 0.74981689453125, "grad_norm": 11.376160621643066, "learning_rate": 7.346664421244251e-07, "loss": 3.2707, "step": 73710 }, { "epoch": 0.7498677571614584, "grad_norm": 14.925772666931152, "learning_rate": 7.343833174893961e-07, "loss": 3.0642, "step": 73715 }, { "epoch": 0.7499186197916666, "grad_norm": 10.566452026367188, "learning_rate": 7.341002380272516e-07, "loss": 3.0275, "step": 73720 }, { "epoch": 0.749969482421875, "grad_norm": 11.44359302520752, "learning_rate": 7.338172037452345e-07, "loss": 3.669, "step": 73725 }, { "epoch": 0.75, "eval_loss": 3.3423798084259033, "eval_runtime": 159.4795, "eval_samples_per_second": 12.585, "eval_steps_per_second": 12.585, "step": 73728 }, { "epoch": 0.7500203450520834, "grad_norm": 14.166296005249023, "learning_rate": 7.335342146505859e-07, "loss": 3.3664, "step": 73730 }, { "epoch": 0.7500712076822916, "grad_norm": 10.570517539978027, "learning_rate": 7.332512707505457e-07, "loss": 3.4589, "step": 73735 }, { "epoch": 0.7501220703125, "grad_norm": 14.14139175415039, "learning_rate": 7.32968372052352e-07, "loss": 3.563, "step": 73740 }, { "epoch": 0.7501729329427084, "grad_norm": 15.443755149841309, "learning_rate": 7.326855185632436e-07, "loss": 3.2833, "step": 73745 }, { "epoch": 0.7502237955729166, "grad_norm": 8.380440711975098, "learning_rate": 7.324027102904582e-07, "loss": 3.0448, "step": 73750 }, { "epoch": 0.750274658203125, "grad_norm": 12.28552532196045, "learning_rate": 7.321199472412299e-07, "loss": 3.5558, "step": 73755 }, { "epoch": 0.7503255208333334, "grad_norm": 10.05579662322998, "learning_rate": 7.318372294227944e-07, "loss": 3.5021, "step": 73760 }, { "epoch": 0.7503763834635416, "grad_norm": 10.5604248046875, "learning_rate": 7.31554556842384e-07, "loss": 3.4582, "step": 73765 }, { "epoch": 0.75042724609375, "grad_norm": 13.349933624267578, "learning_rate": 7.312719295072307e-07, "loss": 3.1165, "step": 73770 }, { "epoch": 0.7504781087239584, "grad_norm": 17.509366989135742, "learning_rate": 7.309893474245655e-07, "loss": 3.3821, "step": 73775 }, { "epoch": 0.7505289713541666, "grad_norm": 8.016525268554688, "learning_rate": 7.307068106016196e-07, "loss": 3.3826, "step": 73780 }, { "epoch": 0.750579833984375, "grad_norm": 10.776978492736816, "learning_rate": 7.304243190456203e-07, "loss": 2.9895, "step": 73785 }, { "epoch": 0.7506306966145834, "grad_norm": 10.558320045471191, "learning_rate": 7.301418727637957e-07, "loss": 3.0355, "step": 73790 }, { "epoch": 0.7506815592447916, "grad_norm": 10.417439460754395, "learning_rate": 7.298594717633701e-07, "loss": 3.3279, "step": 73795 }, { "epoch": 0.750732421875, "grad_norm": 15.79559326171875, "learning_rate": 7.295771160515719e-07, "loss": 3.3566, "step": 73800 }, { "epoch": 0.7507832845052084, "grad_norm": 10.064557075500488, "learning_rate": 7.292948056356236e-07, "loss": 3.5683, "step": 73805 }, { "epoch": 0.7508341471354166, "grad_norm": 13.68875789642334, "learning_rate": 7.290125405227478e-07, "loss": 3.172, "step": 73810 }, { "epoch": 0.750885009765625, "grad_norm": 10.799705505371094, "learning_rate": 7.287303207201657e-07, "loss": 3.433, "step": 73815 }, { "epoch": 0.7509358723958334, "grad_norm": 16.007572174072266, "learning_rate": 7.284481462350984e-07, "loss": 3.3508, "step": 73820 }, { "epoch": 0.7509867350260416, "grad_norm": 12.162935256958008, "learning_rate": 7.281660170747665e-07, "loss": 3.274, "step": 73825 }, { "epoch": 0.75103759765625, "grad_norm": 12.926222801208496, "learning_rate": 7.278839332463869e-07, "loss": 3.2844, "step": 73830 }, { "epoch": 0.7510884602864584, "grad_norm": 9.41971492767334, "learning_rate": 7.276018947571759e-07, "loss": 2.9304, "step": 73835 }, { "epoch": 0.7511393229166666, "grad_norm": 15.09054946899414, "learning_rate": 7.273199016143512e-07, "loss": 3.4695, "step": 73840 }, { "epoch": 0.751190185546875, "grad_norm": 10.432167053222656, "learning_rate": 7.270379538251259e-07, "loss": 3.6547, "step": 73845 }, { "epoch": 0.7512410481770834, "grad_norm": 10.546064376831055, "learning_rate": 7.267560513967151e-07, "loss": 3.3185, "step": 73850 }, { "epoch": 0.7512919108072916, "grad_norm": 10.503418922424316, "learning_rate": 7.264741943363296e-07, "loss": 3.227, "step": 73855 }, { "epoch": 0.7513427734375, "grad_norm": 16.155641555786133, "learning_rate": 7.261923826511821e-07, "loss": 3.07, "step": 73860 }, { "epoch": 0.7513936360677084, "grad_norm": 10.841660499572754, "learning_rate": 7.259106163484825e-07, "loss": 3.5704, "step": 73865 }, { "epoch": 0.7514444986979166, "grad_norm": 15.498570442199707, "learning_rate": 7.256288954354382e-07, "loss": 3.3501, "step": 73870 }, { "epoch": 0.751495361328125, "grad_norm": 10.249595642089844, "learning_rate": 7.253472199192579e-07, "loss": 3.7279, "step": 73875 }, { "epoch": 0.7515462239583334, "grad_norm": 11.629800796508789, "learning_rate": 7.250655898071493e-07, "loss": 3.2096, "step": 73880 }, { "epoch": 0.7515970865885416, "grad_norm": 11.37413501739502, "learning_rate": 7.247840051063169e-07, "loss": 3.292, "step": 73885 }, { "epoch": 0.75164794921875, "grad_norm": 8.198013305664062, "learning_rate": 7.245024658239641e-07, "loss": 3.1457, "step": 73890 }, { "epoch": 0.7516988118489584, "grad_norm": 11.581289291381836, "learning_rate": 7.242209719672958e-07, "loss": 3.0306, "step": 73895 }, { "epoch": 0.7517496744791666, "grad_norm": 14.45716381072998, "learning_rate": 7.23939523543512e-07, "loss": 3.0793, "step": 73900 }, { "epoch": 0.751800537109375, "grad_norm": 13.524023056030273, "learning_rate": 7.236581205598154e-07, "loss": 3.2285, "step": 73905 }, { "epoch": 0.7518513997395834, "grad_norm": 10.072444915771484, "learning_rate": 7.23376763023404e-07, "loss": 3.1458, "step": 73910 }, { "epoch": 0.7519022623697916, "grad_norm": 10.554051399230957, "learning_rate": 7.230954509414778e-07, "loss": 3.4933, "step": 73915 }, { "epoch": 0.751953125, "grad_norm": 10.31640911102295, "learning_rate": 7.228141843212333e-07, "loss": 3.2726, "step": 73920 }, { "epoch": 0.7520039876302084, "grad_norm": 14.159457206726074, "learning_rate": 7.225329631698658e-07, "loss": 2.9675, "step": 73925 }, { "epoch": 0.7520548502604166, "grad_norm": 14.815949440002441, "learning_rate": 7.222517874945712e-07, "loss": 3.5891, "step": 73930 }, { "epoch": 0.752105712890625, "grad_norm": 12.3479585647583, "learning_rate": 7.21970657302544e-07, "loss": 3.1654, "step": 73935 }, { "epoch": 0.7521565755208334, "grad_norm": 10.186405181884766, "learning_rate": 7.216895726009762e-07, "loss": 3.558, "step": 73940 }, { "epoch": 0.7522074381510416, "grad_norm": 12.260489463806152, "learning_rate": 7.214085333970591e-07, "loss": 3.6643, "step": 73945 }, { "epoch": 0.75225830078125, "grad_norm": 12.001791954040527, "learning_rate": 7.211275396979814e-07, "loss": 3.4012, "step": 73950 }, { "epoch": 0.7523091634114584, "grad_norm": 13.754630088806152, "learning_rate": 7.208465915109359e-07, "loss": 3.5265, "step": 73955 }, { "epoch": 0.7523600260416666, "grad_norm": 9.70177936553955, "learning_rate": 7.205656888431081e-07, "loss": 3.2286, "step": 73960 }, { "epoch": 0.752410888671875, "grad_norm": 9.411014556884766, "learning_rate": 7.20284831701685e-07, "loss": 3.4721, "step": 73965 }, { "epoch": 0.7524617513020834, "grad_norm": 11.240584373474121, "learning_rate": 7.200040200938532e-07, "loss": 3.3887, "step": 73970 }, { "epoch": 0.7525126139322916, "grad_norm": 10.180984497070312, "learning_rate": 7.197232540267962e-07, "loss": 3.3888, "step": 73975 }, { "epoch": 0.7525634765625, "grad_norm": 10.874776840209961, "learning_rate": 7.194425335076982e-07, "loss": 3.2853, "step": 73980 }, { "epoch": 0.7526143391927084, "grad_norm": 7.493716716766357, "learning_rate": 7.191618585437405e-07, "loss": 3.2489, "step": 73985 }, { "epoch": 0.7526652018229166, "grad_norm": 11.833789825439453, "learning_rate": 7.188812291421049e-07, "loss": 3.3848, "step": 73990 }, { "epoch": 0.752716064453125, "grad_norm": 12.204221725463867, "learning_rate": 7.186006453099712e-07, "loss": 3.3684, "step": 73995 }, { "epoch": 0.7527669270833334, "grad_norm": 10.87446117401123, "learning_rate": 7.18320107054517e-07, "loss": 3.0759, "step": 74000 }, { "epoch": 0.7528177897135416, "grad_norm": 9.297377586364746, "learning_rate": 7.180396143829205e-07, "loss": 3.5556, "step": 74005 }, { "epoch": 0.75286865234375, "grad_norm": 13.512225151062012, "learning_rate": 7.177591673023587e-07, "loss": 3.3424, "step": 74010 }, { "epoch": 0.7529195149739584, "grad_norm": 14.0761079788208, "learning_rate": 7.174787658200064e-07, "loss": 3.2936, "step": 74015 }, { "epoch": 0.7529703776041666, "grad_norm": 12.869799613952637, "learning_rate": 7.171984099430371e-07, "loss": 3.2506, "step": 74020 }, { "epoch": 0.753021240234375, "grad_norm": 9.71020793914795, "learning_rate": 7.169180996786232e-07, "loss": 3.2979, "step": 74025 }, { "epoch": 0.7530721028645834, "grad_norm": 7.054031848907471, "learning_rate": 7.166378350339373e-07, "loss": 2.9761, "step": 74030 }, { "epoch": 0.7531229654947916, "grad_norm": 13.224422454833984, "learning_rate": 7.163576160161503e-07, "loss": 3.2398, "step": 74035 }, { "epoch": 0.753173828125, "grad_norm": 10.546746253967285, "learning_rate": 7.16077442632431e-07, "loss": 3.0981, "step": 74040 }, { "epoch": 0.7532246907552084, "grad_norm": 12.613469123840332, "learning_rate": 7.157973148899466e-07, "loss": 3.3591, "step": 74045 }, { "epoch": 0.7532755533854166, "grad_norm": 13.701570510864258, "learning_rate": 7.155172327958659e-07, "loss": 3.3919, "step": 74050 }, { "epoch": 0.753326416015625, "grad_norm": 7.327104091644287, "learning_rate": 7.152371963573529e-07, "loss": 3.4793, "step": 74055 }, { "epoch": 0.7533772786458334, "grad_norm": 10.980306625366211, "learning_rate": 7.149572055815735e-07, "loss": 3.3033, "step": 74060 }, { "epoch": 0.7534281412760416, "grad_norm": 13.810663223266602, "learning_rate": 7.146772604756913e-07, "loss": 3.6748, "step": 74065 }, { "epoch": 0.75347900390625, "grad_norm": 11.452163696289062, "learning_rate": 7.143973610468685e-07, "loss": 3.6406, "step": 74070 }, { "epoch": 0.7535298665364584, "grad_norm": 13.183918952941895, "learning_rate": 7.141175073022658e-07, "loss": 3.7848, "step": 74075 }, { "epoch": 0.7535807291666666, "grad_norm": 7.297231197357178, "learning_rate": 7.138376992490425e-07, "loss": 3.8056, "step": 74080 }, { "epoch": 0.753631591796875, "grad_norm": 12.576817512512207, "learning_rate": 7.135579368943587e-07, "loss": 3.5714, "step": 74085 }, { "epoch": 0.7536824544270834, "grad_norm": 11.886563301086426, "learning_rate": 7.132782202453722e-07, "loss": 3.1065, "step": 74090 }, { "epoch": 0.7537333170572916, "grad_norm": 10.314664840698242, "learning_rate": 7.129985493092389e-07, "loss": 3.461, "step": 74095 }, { "epoch": 0.7537841796875, "grad_norm": 7.800176620483398, "learning_rate": 7.127189240931135e-07, "loss": 3.4499, "step": 74100 }, { "epoch": 0.7538350423177084, "grad_norm": 11.081335067749023, "learning_rate": 7.124393446041505e-07, "loss": 3.1305, "step": 74105 }, { "epoch": 0.7538859049479166, "grad_norm": 14.861065864562988, "learning_rate": 7.121598108495042e-07, "loss": 3.4695, "step": 74110 }, { "epoch": 0.753936767578125, "grad_norm": 8.500097274780273, "learning_rate": 7.118803228363255e-07, "loss": 3.2133, "step": 74115 }, { "epoch": 0.7539876302083334, "grad_norm": 10.504951477050781, "learning_rate": 7.116008805717639e-07, "loss": 3.1917, "step": 74120 }, { "epoch": 0.7540384928385416, "grad_norm": 9.952469825744629, "learning_rate": 7.113214840629706e-07, "loss": 3.6916, "step": 74125 }, { "epoch": 0.75408935546875, "grad_norm": 16.40432357788086, "learning_rate": 7.110421333170925e-07, "loss": 3.5647, "step": 74130 }, { "epoch": 0.7541402180989584, "grad_norm": 17.41621208190918, "learning_rate": 7.107628283412782e-07, "loss": 3.1238, "step": 74135 }, { "epoch": 0.7541910807291666, "grad_norm": 9.299099922180176, "learning_rate": 7.10483569142672e-07, "loss": 3.1899, "step": 74140 }, { "epoch": 0.754241943359375, "grad_norm": 9.889762878417969, "learning_rate": 7.102043557284205e-07, "loss": 3.2099, "step": 74145 }, { "epoch": 0.7542928059895834, "grad_norm": 8.289566040039062, "learning_rate": 7.09925188105666e-07, "loss": 3.2947, "step": 74150 }, { "epoch": 0.7543436686197916, "grad_norm": 15.631021499633789, "learning_rate": 7.096460662815508e-07, "loss": 3.3074, "step": 74155 }, { "epoch": 0.75439453125, "grad_norm": 8.1798095703125, "learning_rate": 7.093669902632164e-07, "loss": 3.825, "step": 74160 }, { "epoch": 0.7544453938802084, "grad_norm": 14.07030200958252, "learning_rate": 7.09087960057804e-07, "loss": 3.3914, "step": 74165 }, { "epoch": 0.7544962565104166, "grad_norm": 14.111145973205566, "learning_rate": 7.088089756724517e-07, "loss": 3.5105, "step": 74170 }, { "epoch": 0.754547119140625, "grad_norm": 14.813468933105469, "learning_rate": 7.085300371142962e-07, "loss": 3.2479, "step": 74175 }, { "epoch": 0.7545979817708334, "grad_norm": 13.250635147094727, "learning_rate": 7.08251144390476e-07, "loss": 2.969, "step": 74180 }, { "epoch": 0.7546488444010416, "grad_norm": 12.24089241027832, "learning_rate": 7.079722975081249e-07, "loss": 3.4867, "step": 74185 }, { "epoch": 0.75469970703125, "grad_norm": 8.176153182983398, "learning_rate": 7.076934964743784e-07, "loss": 2.9599, "step": 74190 }, { "epoch": 0.7547505696614584, "grad_norm": 8.18547534942627, "learning_rate": 7.074147412963684e-07, "loss": 3.4086, "step": 74195 }, { "epoch": 0.7548014322916666, "grad_norm": 10.30460262298584, "learning_rate": 7.071360319812282e-07, "loss": 3.382, "step": 74200 }, { "epoch": 0.754852294921875, "grad_norm": 10.885030746459961, "learning_rate": 7.068573685360875e-07, "loss": 3.1091, "step": 74205 }, { "epoch": 0.7549031575520834, "grad_norm": 17.012645721435547, "learning_rate": 7.065787509680749e-07, "loss": 2.9846, "step": 74210 }, { "epoch": 0.7549540201822916, "grad_norm": 10.468649864196777, "learning_rate": 7.063001792843202e-07, "loss": 3.1097, "step": 74215 }, { "epoch": 0.7550048828125, "grad_norm": 7.468170166015625, "learning_rate": 7.060216534919512e-07, "loss": 3.4211, "step": 74220 }, { "epoch": 0.7550557454427084, "grad_norm": 14.28119945526123, "learning_rate": 7.057431735980927e-07, "loss": 3.601, "step": 74225 }, { "epoch": 0.7551066080729166, "grad_norm": 7.192823886871338, "learning_rate": 7.054647396098699e-07, "loss": 3.1028, "step": 74230 }, { "epoch": 0.755157470703125, "grad_norm": 15.882479667663574, "learning_rate": 7.051863515344057e-07, "loss": 3.4561, "step": 74235 }, { "epoch": 0.7552083333333334, "grad_norm": 10.127427101135254, "learning_rate": 7.049080093788233e-07, "loss": 3.4995, "step": 74240 }, { "epoch": 0.7552591959635416, "grad_norm": 12.84577751159668, "learning_rate": 7.046297131502447e-07, "loss": 3.7194, "step": 74245 }, { "epoch": 0.75531005859375, "grad_norm": 15.184442520141602, "learning_rate": 7.043514628557885e-07, "loss": 3.2178, "step": 74250 }, { "epoch": 0.7553609212239584, "grad_norm": 7.603909492492676, "learning_rate": 7.040732585025753e-07, "loss": 3.4013, "step": 74255 }, { "epoch": 0.7554117838541666, "grad_norm": 10.720600128173828, "learning_rate": 7.037951000977214e-07, "loss": 3.4708, "step": 74260 }, { "epoch": 0.755462646484375, "grad_norm": 8.314825057983398, "learning_rate": 7.035169876483447e-07, "loss": 3.5037, "step": 74265 }, { "epoch": 0.7555135091145834, "grad_norm": 10.812227249145508, "learning_rate": 7.032389211615595e-07, "loss": 3.3262, "step": 74270 }, { "epoch": 0.7555643717447916, "grad_norm": 9.320564270019531, "learning_rate": 7.029609006444812e-07, "loss": 3.554, "step": 74275 }, { "epoch": 0.755615234375, "grad_norm": 15.716340065002441, "learning_rate": 7.026829261042226e-07, "loss": 3.1336, "step": 74280 }, { "epoch": 0.7556660970052084, "grad_norm": 9.545758247375488, "learning_rate": 7.024049975478944e-07, "loss": 3.3385, "step": 74285 }, { "epoch": 0.7557169596354166, "grad_norm": 17.533021926879883, "learning_rate": 7.021271149826081e-07, "loss": 3.3169, "step": 74290 }, { "epoch": 0.755767822265625, "grad_norm": 13.390016555786133, "learning_rate": 7.018492784154746e-07, "loss": 3.7757, "step": 74295 }, { "epoch": 0.7558186848958334, "grad_norm": 8.253308296203613, "learning_rate": 7.015714878536006e-07, "loss": 3.3973, "step": 74300 }, { "epoch": 0.7558695475260416, "grad_norm": 15.03423023223877, "learning_rate": 7.012937433040942e-07, "loss": 3.3413, "step": 74305 }, { "epoch": 0.75592041015625, "grad_norm": 12.773712158203125, "learning_rate": 7.010160447740602e-07, "loss": 3.1559, "step": 74310 }, { "epoch": 0.7559712727864584, "grad_norm": 9.488603591918945, "learning_rate": 7.00738392270604e-07, "loss": 3.4735, "step": 74315 }, { "epoch": 0.7560221354166666, "grad_norm": 8.902047157287598, "learning_rate": 7.004607858008305e-07, "loss": 3.3458, "step": 74320 }, { "epoch": 0.756072998046875, "grad_norm": 12.052464485168457, "learning_rate": 7.001832253718415e-07, "loss": 3.789, "step": 74325 }, { "epoch": 0.7561238606770834, "grad_norm": 11.685837745666504, "learning_rate": 6.99905710990737e-07, "loss": 3.4724, "step": 74330 }, { "epoch": 0.7561747233072916, "grad_norm": 8.351921081542969, "learning_rate": 6.996282426646192e-07, "loss": 3.1278, "step": 74335 }, { "epoch": 0.7562255859375, "grad_norm": 12.978894233703613, "learning_rate": 6.993508204005853e-07, "loss": 3.1652, "step": 74340 }, { "epoch": 0.7562764485677084, "grad_norm": 11.092260360717773, "learning_rate": 6.990734442057337e-07, "loss": 3.0752, "step": 74345 }, { "epoch": 0.7563273111979166, "grad_norm": 9.3390531539917, "learning_rate": 6.987961140871619e-07, "loss": 3.3607, "step": 74350 }, { "epoch": 0.756378173828125, "grad_norm": 10.838726043701172, "learning_rate": 6.985188300519646e-07, "loss": 3.5749, "step": 74355 }, { "epoch": 0.7564290364583334, "grad_norm": 12.331197738647461, "learning_rate": 6.982415921072361e-07, "loss": 3.3488, "step": 74360 }, { "epoch": 0.7564798990885416, "grad_norm": 9.617509841918945, "learning_rate": 6.979644002600688e-07, "loss": 3.4219, "step": 74365 }, { "epoch": 0.75653076171875, "grad_norm": 14.07290267944336, "learning_rate": 6.976872545175551e-07, "loss": 3.3708, "step": 74370 }, { "epoch": 0.7565816243489584, "grad_norm": 11.84440803527832, "learning_rate": 6.974101548867865e-07, "loss": 3.142, "step": 74375 }, { "epoch": 0.7566324869791666, "grad_norm": 10.46172046661377, "learning_rate": 6.971331013748516e-07, "loss": 3.2734, "step": 74380 }, { "epoch": 0.756683349609375, "grad_norm": 9.306661605834961, "learning_rate": 6.968560939888386e-07, "loss": 3.2955, "step": 74385 }, { "epoch": 0.7567342122395834, "grad_norm": 9.832356452941895, "learning_rate": 6.965791327358357e-07, "loss": 3.3325, "step": 74390 }, { "epoch": 0.7567850748697916, "grad_norm": 8.555065155029297, "learning_rate": 6.963022176229273e-07, "loss": 3.4083, "step": 74395 }, { "epoch": 0.7568359375, "grad_norm": 13.239713668823242, "learning_rate": 6.960253486572e-07, "loss": 3.0292, "step": 74400 }, { "epoch": 0.7568868001302084, "grad_norm": 9.089715003967285, "learning_rate": 6.957485258457358e-07, "loss": 3.4664, "step": 74405 }, { "epoch": 0.7569376627604166, "grad_norm": 14.721821784973145, "learning_rate": 6.954717491956189e-07, "loss": 3.4906, "step": 74410 }, { "epoch": 0.756988525390625, "grad_norm": 12.5114107131958, "learning_rate": 6.951950187139287e-07, "loss": 3.1409, "step": 74415 }, { "epoch": 0.7570393880208334, "grad_norm": 11.109739303588867, "learning_rate": 6.949183344077468e-07, "loss": 3.4038, "step": 74420 }, { "epoch": 0.7570902506510416, "grad_norm": 8.996869087219238, "learning_rate": 6.946416962841507e-07, "loss": 3.247, "step": 74425 }, { "epoch": 0.75714111328125, "grad_norm": 14.235173225402832, "learning_rate": 6.943651043502194e-07, "loss": 3.4906, "step": 74430 }, { "epoch": 0.7571919759114584, "grad_norm": 7.575634002685547, "learning_rate": 6.940885586130295e-07, "loss": 3.1856, "step": 74435 }, { "epoch": 0.7572428385416666, "grad_norm": 16.175704956054688, "learning_rate": 6.938120590796546e-07, "loss": 3.3288, "step": 74440 }, { "epoch": 0.757293701171875, "grad_norm": 14.900594711303711, "learning_rate": 6.935356057571702e-07, "loss": 3.4744, "step": 74445 }, { "epoch": 0.7573445638020834, "grad_norm": 14.816061973571777, "learning_rate": 6.932591986526499e-07, "loss": 3.6081, "step": 74450 }, { "epoch": 0.7573954264322916, "grad_norm": 11.891322135925293, "learning_rate": 6.929828377731649e-07, "loss": 3.2964, "step": 74455 }, { "epoch": 0.7574462890625, "grad_norm": 14.090703964233398, "learning_rate": 6.927065231257846e-07, "loss": 3.371, "step": 74460 }, { "epoch": 0.7574971516927084, "grad_norm": 12.144478797912598, "learning_rate": 6.924302547175805e-07, "loss": 3.3383, "step": 74465 }, { "epoch": 0.7575480143229166, "grad_norm": 12.723793983459473, "learning_rate": 6.921540325556189e-07, "loss": 3.2933, "step": 74470 }, { "epoch": 0.757598876953125, "grad_norm": 15.725260734558105, "learning_rate": 6.918778566469689e-07, "loss": 3.362, "step": 74475 }, { "epoch": 0.7576497395833334, "grad_norm": 11.462191581726074, "learning_rate": 6.916017269986946e-07, "loss": 3.4661, "step": 74480 }, { "epoch": 0.7577006022135416, "grad_norm": 11.932019233703613, "learning_rate": 6.913256436178623e-07, "loss": 3.2483, "step": 74485 }, { "epoch": 0.75775146484375, "grad_norm": 11.867484092712402, "learning_rate": 6.910496065115344e-07, "loss": 4.0898, "step": 74490 }, { "epoch": 0.7578023274739584, "grad_norm": 12.38962459564209, "learning_rate": 6.907736156867731e-07, "loss": 2.8809, "step": 74495 }, { "epoch": 0.7578531901041666, "grad_norm": 10.69157600402832, "learning_rate": 6.904976711506397e-07, "loss": 3.4891, "step": 74500 }, { "epoch": 0.757904052734375, "grad_norm": 10.238336563110352, "learning_rate": 6.902217729101954e-07, "loss": 3.4282, "step": 74505 }, { "epoch": 0.7579549153645834, "grad_norm": 16.624544143676758, "learning_rate": 6.899459209724982e-07, "loss": 3.5674, "step": 74510 }, { "epoch": 0.7580057779947916, "grad_norm": 15.39177417755127, "learning_rate": 6.896701153446056e-07, "loss": 3.376, "step": 74515 }, { "epoch": 0.758056640625, "grad_norm": 13.53531551361084, "learning_rate": 6.893943560335731e-07, "loss": 3.1336, "step": 74520 }, { "epoch": 0.7581075032552084, "grad_norm": 15.663147926330566, "learning_rate": 6.891186430464567e-07, "loss": 3.2657, "step": 74525 }, { "epoch": 0.7581583658854166, "grad_norm": 16.453989028930664, "learning_rate": 6.888429763903115e-07, "loss": 3.0325, "step": 74530 }, { "epoch": 0.758209228515625, "grad_norm": 7.302393913269043, "learning_rate": 6.885673560721895e-07, "loss": 3.2141, "step": 74535 }, { "epoch": 0.7582600911458334, "grad_norm": 16.061988830566406, "learning_rate": 6.882917820991417e-07, "loss": 3.0839, "step": 74540 }, { "epoch": 0.7583109537760416, "grad_norm": 10.075870513916016, "learning_rate": 6.880162544782196e-07, "loss": 3.4304, "step": 74545 }, { "epoch": 0.75836181640625, "grad_norm": 13.291833877563477, "learning_rate": 6.877407732164718e-07, "loss": 3.4443, "step": 74550 }, { "epoch": 0.7584126790364584, "grad_norm": 11.27462387084961, "learning_rate": 6.874653383209464e-07, "loss": 3.2295, "step": 74555 }, { "epoch": 0.7584635416666666, "grad_norm": 14.254870414733887, "learning_rate": 6.871899497986916e-07, "loss": 3.1262, "step": 74560 }, { "epoch": 0.758514404296875, "grad_norm": 7.6913251876831055, "learning_rate": 6.869146076567524e-07, "loss": 3.2504, "step": 74565 }, { "epoch": 0.7585652669270834, "grad_norm": 10.593586921691895, "learning_rate": 6.866393119021725e-07, "loss": 3.1265, "step": 74570 }, { "epoch": 0.7586161295572916, "grad_norm": 14.967764854431152, "learning_rate": 6.863640625419957e-07, "loss": 3.665, "step": 74575 }, { "epoch": 0.7586669921875, "grad_norm": 15.087532997131348, "learning_rate": 6.860888595832654e-07, "loss": 3.13, "step": 74580 }, { "epoch": 0.7587178548177084, "grad_norm": 13.718179702758789, "learning_rate": 6.858137030330217e-07, "loss": 3.2086, "step": 74585 }, { "epoch": 0.7587687174479166, "grad_norm": 11.125592231750488, "learning_rate": 6.855385928983043e-07, "loss": 3.0559, "step": 74590 }, { "epoch": 0.758819580078125, "grad_norm": 7.489245414733887, "learning_rate": 6.852635291861512e-07, "loss": 3.4474, "step": 74595 }, { "epoch": 0.7588704427083334, "grad_norm": 12.32341480255127, "learning_rate": 6.849885119036007e-07, "loss": 3.1301, "step": 74600 }, { "epoch": 0.7589213053385416, "grad_norm": 12.1867036819458, "learning_rate": 6.847135410576897e-07, "loss": 3.5196, "step": 74605 }, { "epoch": 0.75897216796875, "grad_norm": 11.529767990112305, "learning_rate": 6.844386166554523e-07, "loss": 3.4061, "step": 74610 }, { "epoch": 0.7590230305989584, "grad_norm": 11.689623832702637, "learning_rate": 6.841637387039219e-07, "loss": 3.3633, "step": 74615 }, { "epoch": 0.7590738932291666, "grad_norm": 13.131250381469727, "learning_rate": 6.838889072101324e-07, "loss": 3.399, "step": 74620 }, { "epoch": 0.759124755859375, "grad_norm": 7.971841812133789, "learning_rate": 6.836141221811144e-07, "loss": 3.0893, "step": 74625 }, { "epoch": 0.7591756184895834, "grad_norm": 11.301935195922852, "learning_rate": 6.833393836238989e-07, "loss": 3.3561, "step": 74630 }, { "epoch": 0.7592264811197916, "grad_norm": 15.302087783813477, "learning_rate": 6.830646915455141e-07, "loss": 3.3727, "step": 74635 }, { "epoch": 0.75927734375, "grad_norm": 9.979668617248535, "learning_rate": 6.827900459529893e-07, "loss": 3.4487, "step": 74640 }, { "epoch": 0.7593282063802084, "grad_norm": 7.658801555633545, "learning_rate": 6.825154468533504e-07, "loss": 3.0497, "step": 74645 }, { "epoch": 0.7593790690104166, "grad_norm": 15.597314834594727, "learning_rate": 6.822408942536221e-07, "loss": 3.431, "step": 74650 }, { "epoch": 0.759429931640625, "grad_norm": 12.66695499420166, "learning_rate": 6.819663881608299e-07, "loss": 3.4224, "step": 74655 }, { "epoch": 0.7594807942708334, "grad_norm": 19.707422256469727, "learning_rate": 6.816919285819973e-07, "loss": 3.1408, "step": 74660 }, { "epoch": 0.7595316569010416, "grad_norm": 12.803831100463867, "learning_rate": 6.81417515524146e-07, "loss": 3.2538, "step": 74665 }, { "epoch": 0.75958251953125, "grad_norm": 12.708759307861328, "learning_rate": 6.811431489942954e-07, "loss": 3.741, "step": 74670 }, { "epoch": 0.7596333821614584, "grad_norm": 14.123689651489258, "learning_rate": 6.808688289994672e-07, "loss": 2.9288, "step": 74675 }, { "epoch": 0.7596842447916666, "grad_norm": 9.11545181274414, "learning_rate": 6.805945555466781e-07, "loss": 3.0954, "step": 74680 }, { "epoch": 0.759735107421875, "grad_norm": 17.013774871826172, "learning_rate": 6.803203286429466e-07, "loss": 4.0014, "step": 74685 }, { "epoch": 0.7597859700520834, "grad_norm": 14.704337120056152, "learning_rate": 6.800461482952875e-07, "loss": 3.7858, "step": 74690 }, { "epoch": 0.7598368326822916, "grad_norm": 9.8838472366333, "learning_rate": 6.79772014510717e-07, "loss": 3.0953, "step": 74695 }, { "epoch": 0.7598876953125, "grad_norm": 8.987663269042969, "learning_rate": 6.794979272962482e-07, "loss": 3.3941, "step": 74700 }, { "epoch": 0.7599385579427084, "grad_norm": 11.245233535766602, "learning_rate": 6.792238866588927e-07, "loss": 3.1513, "step": 74705 }, { "epoch": 0.7599894205729166, "grad_norm": 7.750741481781006, "learning_rate": 6.789498926056626e-07, "loss": 3.2866, "step": 74710 }, { "epoch": 0.760040283203125, "grad_norm": 10.813319206237793, "learning_rate": 6.786759451435684e-07, "loss": 3.3133, "step": 74715 }, { "epoch": 0.7600911458333334, "grad_norm": 14.955268859863281, "learning_rate": 6.784020442796185e-07, "loss": 3.7137, "step": 74720 }, { "epoch": 0.7601420084635416, "grad_norm": 11.77611255645752, "learning_rate": 6.781281900208197e-07, "loss": 3.3437, "step": 74725 }, { "epoch": 0.76019287109375, "grad_norm": 14.14409065246582, "learning_rate": 6.778543823741796e-07, "loss": 3.2431, "step": 74730 }, { "epoch": 0.7602437337239584, "grad_norm": 13.484630584716797, "learning_rate": 6.775806213467037e-07, "loss": 3.4794, "step": 74735 }, { "epoch": 0.7602945963541666, "grad_norm": 7.718141555786133, "learning_rate": 6.773069069453958e-07, "loss": 3.475, "step": 74740 }, { "epoch": 0.760345458984375, "grad_norm": 13.957528114318848, "learning_rate": 6.770332391772578e-07, "loss": 3.2199, "step": 74745 }, { "epoch": 0.7603963216145834, "grad_norm": 10.419391632080078, "learning_rate": 6.767596180492931e-07, "loss": 3.2068, "step": 74750 }, { "epoch": 0.7604471842447916, "grad_norm": 11.013030052185059, "learning_rate": 6.764860435685005e-07, "loss": 3.5959, "step": 74755 }, { "epoch": 0.760498046875, "grad_norm": 9.451438903808594, "learning_rate": 6.762125157418812e-07, "loss": 3.5107, "step": 74760 }, { "epoch": 0.7605489095052084, "grad_norm": 10.277724266052246, "learning_rate": 6.759390345764316e-07, "loss": 3.1288, "step": 74765 }, { "epoch": 0.7605997721354166, "grad_norm": 14.881711959838867, "learning_rate": 6.756656000791503e-07, "loss": 3.2657, "step": 74770 }, { "epoch": 0.760650634765625, "grad_norm": 7.927708625793457, "learning_rate": 6.75392212257032e-07, "loss": 3.5849, "step": 74775 }, { "epoch": 0.7607014973958334, "grad_norm": 8.746414184570312, "learning_rate": 6.751188711170708e-07, "loss": 3.3846, "step": 74780 }, { "epoch": 0.7607523600260416, "grad_norm": 10.257974624633789, "learning_rate": 6.748455766662609e-07, "loss": 2.8426, "step": 74785 }, { "epoch": 0.76080322265625, "grad_norm": 14.12350082397461, "learning_rate": 6.745723289115949e-07, "loss": 3.0598, "step": 74790 }, { "epoch": 0.7608540852864584, "grad_norm": 12.958858489990234, "learning_rate": 6.742991278600633e-07, "loss": 3.4875, "step": 74795 }, { "epoch": 0.7609049479166666, "grad_norm": 9.312466621398926, "learning_rate": 6.740259735186555e-07, "loss": 3.0938, "step": 74800 }, { "epoch": 0.760955810546875, "grad_norm": 13.80396842956543, "learning_rate": 6.737528658943599e-07, "loss": 3.4107, "step": 74805 }, { "epoch": 0.7610066731770834, "grad_norm": 11.66608715057373, "learning_rate": 6.734798049941644e-07, "loss": 3.3786, "step": 74810 }, { "epoch": 0.7610575358072916, "grad_norm": 14.635140419006348, "learning_rate": 6.732067908250555e-07, "loss": 3.4482, "step": 74815 }, { "epoch": 0.7611083984375, "grad_norm": 8.224568367004395, "learning_rate": 6.729338233940183e-07, "loss": 3.3317, "step": 74820 }, { "epoch": 0.7611592610677084, "grad_norm": 8.623149871826172, "learning_rate": 6.726609027080352e-07, "loss": 3.3037, "step": 74825 }, { "epoch": 0.7612101236979166, "grad_norm": 10.660038948059082, "learning_rate": 6.723880287740905e-07, "loss": 3.0179, "step": 74830 }, { "epoch": 0.761260986328125, "grad_norm": 9.211432456970215, "learning_rate": 6.721152015991644e-07, "loss": 3.4817, "step": 74835 }, { "epoch": 0.7613118489583334, "grad_norm": 11.59577751159668, "learning_rate": 6.718424211902375e-07, "loss": 3.3023, "step": 74840 }, { "epoch": 0.7613627115885416, "grad_norm": 15.24312686920166, "learning_rate": 6.715696875542896e-07, "loss": 3.388, "step": 74845 }, { "epoch": 0.76141357421875, "grad_norm": 12.55749225616455, "learning_rate": 6.712970006982977e-07, "loss": 3.2893, "step": 74850 }, { "epoch": 0.7614644368489584, "grad_norm": 7.277038097381592, "learning_rate": 6.710243606292388e-07, "loss": 3.3295, "step": 74855 }, { "epoch": 0.7615152994791666, "grad_norm": 7.668971538543701, "learning_rate": 6.707517673540867e-07, "loss": 3.1016, "step": 74860 }, { "epoch": 0.761566162109375, "grad_norm": 8.580836296081543, "learning_rate": 6.704792208798183e-07, "loss": 3.5713, "step": 74865 }, { "epoch": 0.7616170247395834, "grad_norm": 7.763233661651611, "learning_rate": 6.702067212134056e-07, "loss": 3.6885, "step": 74870 }, { "epoch": 0.7616678873697916, "grad_norm": 10.616766929626465, "learning_rate": 6.699342683618201e-07, "loss": 3.5457, "step": 74875 }, { "epoch": 0.76171875, "grad_norm": 14.713553428649902, "learning_rate": 6.69661862332032e-07, "loss": 3.1444, "step": 74880 }, { "epoch": 0.7617696126302084, "grad_norm": 12.911894798278809, "learning_rate": 6.69389503131011e-07, "loss": 3.1633, "step": 74885 }, { "epoch": 0.7618204752604166, "grad_norm": 14.415287017822266, "learning_rate": 6.691171907657268e-07, "loss": 3.0614, "step": 74890 }, { "epoch": 0.761871337890625, "grad_norm": 12.509305953979492, "learning_rate": 6.68844925243145e-07, "loss": 3.2278, "step": 74895 }, { "epoch": 0.7619222005208334, "grad_norm": 10.296442985534668, "learning_rate": 6.685727065702311e-07, "loss": 3.3172, "step": 74900 }, { "epoch": 0.7619730631510416, "grad_norm": 9.007887840270996, "learning_rate": 6.683005347539512e-07, "loss": 3.371, "step": 74905 }, { "epoch": 0.76202392578125, "grad_norm": 12.702661514282227, "learning_rate": 6.68028409801267e-07, "loss": 3.3113, "step": 74910 }, { "epoch": 0.7620747884114584, "grad_norm": 13.30461311340332, "learning_rate": 6.677563317191429e-07, "loss": 3.2866, "step": 74915 }, { "epoch": 0.7621256510416666, "grad_norm": 14.909944534301758, "learning_rate": 6.674843005145377e-07, "loss": 3.3629, "step": 74920 }, { "epoch": 0.762176513671875, "grad_norm": 13.380775451660156, "learning_rate": 6.672123161944131e-07, "loss": 3.2381, "step": 74925 }, { "epoch": 0.7622273763020834, "grad_norm": 8.74083423614502, "learning_rate": 6.66940378765727e-07, "loss": 3.2115, "step": 74930 }, { "epoch": 0.7622782389322916, "grad_norm": 8.978840827941895, "learning_rate": 6.66668488235436e-07, "loss": 3.4606, "step": 74935 }, { "epoch": 0.7623291015625, "grad_norm": 11.639257431030273, "learning_rate": 6.663966446104972e-07, "loss": 3.3473, "step": 74940 }, { "epoch": 0.7623799641927084, "grad_norm": 15.828412055969238, "learning_rate": 6.661248478978666e-07, "loss": 3.4771, "step": 74945 }, { "epoch": 0.7624308268229166, "grad_norm": 11.355568885803223, "learning_rate": 6.658530981044969e-07, "loss": 3.4224, "step": 74950 }, { "epoch": 0.762481689453125, "grad_norm": 13.490345001220703, "learning_rate": 6.655813952373402e-07, "loss": 3.1535, "step": 74955 }, { "epoch": 0.7625325520833334, "grad_norm": 8.671171188354492, "learning_rate": 6.653097393033495e-07, "loss": 3.1378, "step": 74960 }, { "epoch": 0.7625834147135416, "grad_norm": 10.636831283569336, "learning_rate": 6.650381303094733e-07, "loss": 3.2926, "step": 74965 }, { "epoch": 0.76263427734375, "grad_norm": 12.167415618896484, "learning_rate": 6.647665682626625e-07, "loss": 3.218, "step": 74970 }, { "epoch": 0.7626851399739584, "grad_norm": 7.13845157623291, "learning_rate": 6.644950531698635e-07, "loss": 3.1291, "step": 74975 }, { "epoch": 0.7627360026041666, "grad_norm": 11.226144790649414, "learning_rate": 6.642235850380241e-07, "loss": 3.3579, "step": 74980 }, { "epoch": 0.762786865234375, "grad_norm": 10.787151336669922, "learning_rate": 6.63952163874089e-07, "loss": 3.4915, "step": 74985 }, { "epoch": 0.7628377278645834, "grad_norm": 12.106456756591797, "learning_rate": 6.63680789685002e-07, "loss": 3.3662, "step": 74990 }, { "epoch": 0.7628885904947916, "grad_norm": 15.901619911193848, "learning_rate": 6.634094624777068e-07, "loss": 3.5041, "step": 74995 }, { "epoch": 0.762939453125, "grad_norm": 12.083741188049316, "learning_rate": 6.631381822591457e-07, "loss": 3.4699, "step": 75000 }, { "epoch": 0.7629903157552084, "grad_norm": 10.314224243164062, "learning_rate": 6.62866949036259e-07, "loss": 2.9315, "step": 75005 }, { "epoch": 0.7630411783854166, "grad_norm": 8.981534957885742, "learning_rate": 6.62595762815986e-07, "loss": 3.1706, "step": 75010 }, { "epoch": 0.763092041015625, "grad_norm": 8.06904125213623, "learning_rate": 6.623246236052633e-07, "loss": 3.1097, "step": 75015 }, { "epoch": 0.7631429036458334, "grad_norm": 13.305237770080566, "learning_rate": 6.62053531411031e-07, "loss": 3.1732, "step": 75020 }, { "epoch": 0.7631937662760416, "grad_norm": 11.285202026367188, "learning_rate": 6.617824862402236e-07, "loss": 3.4915, "step": 75025 }, { "epoch": 0.76324462890625, "grad_norm": 16.577655792236328, "learning_rate": 6.615114880997756e-07, "loss": 3.1498, "step": 75030 }, { "epoch": 0.7632954915364584, "grad_norm": 8.940192222595215, "learning_rate": 6.612405369966193e-07, "loss": 3.4379, "step": 75035 }, { "epoch": 0.7633463541666666, "grad_norm": 13.985937118530273, "learning_rate": 6.609696329376883e-07, "loss": 3.3368, "step": 75040 }, { "epoch": 0.763397216796875, "grad_norm": 13.95054817199707, "learning_rate": 6.606987759299142e-07, "loss": 3.176, "step": 75045 }, { "epoch": 0.7634480794270834, "grad_norm": 10.873615264892578, "learning_rate": 6.604279659802251e-07, "loss": 3.1594, "step": 75050 }, { "epoch": 0.7634989420572916, "grad_norm": 11.51984691619873, "learning_rate": 6.601572030955511e-07, "loss": 3.1424, "step": 75055 }, { "epoch": 0.7635498046875, "grad_norm": 8.51795482635498, "learning_rate": 6.598864872828192e-07, "loss": 3.2808, "step": 75060 }, { "epoch": 0.7636006673177084, "grad_norm": 8.907057762145996, "learning_rate": 6.596158185489543e-07, "loss": 3.1381, "step": 75065 }, { "epoch": 0.7636515299479166, "grad_norm": 9.872109413146973, "learning_rate": 6.593451969008827e-07, "loss": 3.3564, "step": 75070 }, { "epoch": 0.763702392578125, "grad_norm": 9.058270454406738, "learning_rate": 6.590746223455288e-07, "loss": 3.3251, "step": 75075 }, { "epoch": 0.7637532552083334, "grad_norm": 9.94036865234375, "learning_rate": 6.588040948898142e-07, "loss": 3.3208, "step": 75080 }, { "epoch": 0.7638041178385416, "grad_norm": 15.931431770324707, "learning_rate": 6.585336145406604e-07, "loss": 3.4207, "step": 75085 }, { "epoch": 0.76385498046875, "grad_norm": 16.417871475219727, "learning_rate": 6.582631813049867e-07, "loss": 3.483, "step": 75090 }, { "epoch": 0.7639058430989584, "grad_norm": 8.511138916015625, "learning_rate": 6.579927951897134e-07, "loss": 3.4558, "step": 75095 }, { "epoch": 0.7639567057291666, "grad_norm": 9.964131355285645, "learning_rate": 6.577224562017582e-07, "loss": 3.2744, "step": 75100 }, { "epoch": 0.764007568359375, "grad_norm": 9.064841270446777, "learning_rate": 6.574521643480372e-07, "loss": 3.5231, "step": 75105 }, { "epoch": 0.7640584309895834, "grad_norm": 15.779176712036133, "learning_rate": 6.571819196354654e-07, "loss": 3.0945, "step": 75110 }, { "epoch": 0.7641092936197916, "grad_norm": 13.981021881103516, "learning_rate": 6.569117220709581e-07, "loss": 3.1934, "step": 75115 }, { "epoch": 0.76416015625, "grad_norm": 8.831730842590332, "learning_rate": 6.566415716614267e-07, "loss": 3.2304, "step": 75120 }, { "epoch": 0.7642110188802084, "grad_norm": 19.293806076049805, "learning_rate": 6.563714684137845e-07, "loss": 3.0182, "step": 75125 }, { "epoch": 0.7642618815104166, "grad_norm": 14.205883979797363, "learning_rate": 6.561014123349404e-07, "loss": 2.9684, "step": 75130 }, { "epoch": 0.764312744140625, "grad_norm": 6.975743770599365, "learning_rate": 6.558314034318056e-07, "loss": 3.0413, "step": 75135 }, { "epoch": 0.7643636067708334, "grad_norm": 14.651495933532715, "learning_rate": 6.555614417112871e-07, "loss": 3.4957, "step": 75140 }, { "epoch": 0.7644144694010416, "grad_norm": 11.398849487304688, "learning_rate": 6.552915271802915e-07, "loss": 3.2468, "step": 75145 }, { "epoch": 0.76446533203125, "grad_norm": 13.041194915771484, "learning_rate": 6.550216598457246e-07, "loss": 3.2752, "step": 75150 }, { "epoch": 0.7645161946614584, "grad_norm": 9.893031120300293, "learning_rate": 6.547518397144919e-07, "loss": 3.0951, "step": 75155 }, { "epoch": 0.7645670572916666, "grad_norm": 8.85152816772461, "learning_rate": 6.544820667934962e-07, "loss": 3.0603, "step": 75160 }, { "epoch": 0.764617919921875, "grad_norm": 13.413690567016602, "learning_rate": 6.542123410896386e-07, "loss": 3.5851, "step": 75165 }, { "epoch": 0.7646687825520834, "grad_norm": 10.097662925720215, "learning_rate": 6.539426626098208e-07, "loss": 3.5162, "step": 75170 }, { "epoch": 0.7647196451822916, "grad_norm": 10.521745681762695, "learning_rate": 6.536730313609433e-07, "loss": 3.6233, "step": 75175 }, { "epoch": 0.7647705078125, "grad_norm": 9.089445114135742, "learning_rate": 6.534034473499031e-07, "loss": 3.0321, "step": 75180 }, { "epoch": 0.7648213704427084, "grad_norm": 8.658125877380371, "learning_rate": 6.531339105835979e-07, "loss": 3.5217, "step": 75185 }, { "epoch": 0.7648722330729166, "grad_norm": 11.721631050109863, "learning_rate": 6.528644210689242e-07, "loss": 3.087, "step": 75190 }, { "epoch": 0.764923095703125, "grad_norm": 8.90911865234375, "learning_rate": 6.525949788127759e-07, "loss": 3.2395, "step": 75195 }, { "epoch": 0.7649739583333334, "grad_norm": 10.19819450378418, "learning_rate": 6.52325583822048e-07, "loss": 3.8694, "step": 75200 }, { "epoch": 0.7650248209635416, "grad_norm": 9.232595443725586, "learning_rate": 6.520562361036311e-07, "loss": 3.3014, "step": 75205 }, { "epoch": 0.76507568359375, "grad_norm": 11.723776817321777, "learning_rate": 6.517869356644182e-07, "loss": 3.49, "step": 75210 }, { "epoch": 0.7651265462239584, "grad_norm": 11.485301971435547, "learning_rate": 6.515176825112987e-07, "loss": 3.7529, "step": 75215 }, { "epoch": 0.7651774088541666, "grad_norm": 12.483345031738281, "learning_rate": 6.512484766511601e-07, "loss": 3.5844, "step": 75220 }, { "epoch": 0.765228271484375, "grad_norm": 14.69423770904541, "learning_rate": 6.509793180908913e-07, "loss": 3.4327, "step": 75225 }, { "epoch": 0.7652791341145834, "grad_norm": 15.218012809753418, "learning_rate": 6.507102068373788e-07, "loss": 3.1909, "step": 75230 }, { "epoch": 0.7653299967447916, "grad_norm": 13.670184135437012, "learning_rate": 6.504411428975078e-07, "loss": 3.6414, "step": 75235 }, { "epoch": 0.765380859375, "grad_norm": 14.741324424743652, "learning_rate": 6.501721262781607e-07, "loss": 3.3462, "step": 75240 }, { "epoch": 0.7654317220052084, "grad_norm": 8.339617729187012, "learning_rate": 6.499031569862221e-07, "loss": 3.1363, "step": 75245 }, { "epoch": 0.7654825846354166, "grad_norm": 9.616954803466797, "learning_rate": 6.49634235028572e-07, "loss": 3.2762, "step": 75250 }, { "epoch": 0.765533447265625, "grad_norm": 12.235074043273926, "learning_rate": 6.49365360412092e-07, "loss": 3.6203, "step": 75255 }, { "epoch": 0.7655843098958334, "grad_norm": 15.989127159118652, "learning_rate": 6.490965331436599e-07, "loss": 3.6928, "step": 75260 }, { "epoch": 0.7656351725260416, "grad_norm": 8.670943260192871, "learning_rate": 6.488277532301551e-07, "loss": 3.0947, "step": 75265 }, { "epoch": 0.76568603515625, "grad_norm": 12.120124816894531, "learning_rate": 6.485590206784534e-07, "loss": 3.474, "step": 75270 }, { "epoch": 0.7657368977864584, "grad_norm": 16.00341033935547, "learning_rate": 6.482903354954298e-07, "loss": 3.313, "step": 75275 }, { "epoch": 0.7657877604166666, "grad_norm": 15.028971672058105, "learning_rate": 6.480216976879589e-07, "loss": 3.2195, "step": 75280 }, { "epoch": 0.765838623046875, "grad_norm": 15.711023330688477, "learning_rate": 6.477531072629145e-07, "loss": 3.1773, "step": 75285 }, { "epoch": 0.7658894856770834, "grad_norm": 7.689911365509033, "learning_rate": 6.47484564227168e-07, "loss": 3.4065, "step": 75290 }, { "epoch": 0.7659403483072916, "grad_norm": 12.732672691345215, "learning_rate": 6.472160685875897e-07, "loss": 3.3059, "step": 75295 }, { "epoch": 0.7659912109375, "grad_norm": 11.199082374572754, "learning_rate": 6.469476203510486e-07, "loss": 3.0007, "step": 75300 }, { "epoch": 0.7660420735677084, "grad_norm": 10.820151329040527, "learning_rate": 6.466792195244131e-07, "loss": 3.4755, "step": 75305 }, { "epoch": 0.7660929361979166, "grad_norm": 11.690393447875977, "learning_rate": 6.464108661145516e-07, "loss": 3.3837, "step": 75310 }, { "epoch": 0.766143798828125, "grad_norm": 8.426977157592773, "learning_rate": 6.461425601283283e-07, "loss": 3.1037, "step": 75315 }, { "epoch": 0.7661946614583334, "grad_norm": 7.525014400482178, "learning_rate": 6.458743015726079e-07, "loss": 3.142, "step": 75320 }, { "epoch": 0.7662455240885416, "grad_norm": 14.435430526733398, "learning_rate": 6.456060904542538e-07, "loss": 3.1921, "step": 75325 }, { "epoch": 0.76629638671875, "grad_norm": 12.716520309448242, "learning_rate": 6.453379267801291e-07, "loss": 3.3251, "step": 75330 }, { "epoch": 0.7663472493489584, "grad_norm": 12.661298751831055, "learning_rate": 6.45069810557093e-07, "loss": 3.1351, "step": 75335 }, { "epoch": 0.7663981119791666, "grad_norm": 16.14558219909668, "learning_rate": 6.448017417920072e-07, "loss": 3.1468, "step": 75340 }, { "epoch": 0.766448974609375, "grad_norm": 8.328774452209473, "learning_rate": 6.44533720491729e-07, "loss": 3.0908, "step": 75345 }, { "epoch": 0.7664998372395834, "grad_norm": 13.309703826904297, "learning_rate": 6.44265746663115e-07, "loss": 3.279, "step": 75350 }, { "epoch": 0.7665506998697916, "grad_norm": 11.739712715148926, "learning_rate": 6.439978203130218e-07, "loss": 3.774, "step": 75355 }, { "epoch": 0.7666015625, "grad_norm": 11.170668601989746, "learning_rate": 6.437299414483056e-07, "loss": 3.0828, "step": 75360 }, { "epoch": 0.7666524251302084, "grad_norm": 8.549630165100098, "learning_rate": 6.434621100758184e-07, "loss": 3.4049, "step": 75365 }, { "epoch": 0.7667032877604166, "grad_norm": 16.483396530151367, "learning_rate": 6.431943262024135e-07, "loss": 3.0375, "step": 75370 }, { "epoch": 0.766754150390625, "grad_norm": 18.708301544189453, "learning_rate": 6.429265898349407e-07, "loss": 3.7378, "step": 75375 }, { "epoch": 0.7668050130208334, "grad_norm": 8.167903900146484, "learning_rate": 6.426589009802509e-07, "loss": 3.1887, "step": 75380 }, { "epoch": 0.7668558756510416, "grad_norm": 11.62201976776123, "learning_rate": 6.423912596451937e-07, "loss": 3.66, "step": 75385 }, { "epoch": 0.76690673828125, "grad_norm": 13.677452087402344, "learning_rate": 6.421236658366159e-07, "loss": 3.2397, "step": 75390 }, { "epoch": 0.7669576009114584, "grad_norm": 16.496095657348633, "learning_rate": 6.418561195613629e-07, "loss": 3.3751, "step": 75395 }, { "epoch": 0.7670084635416666, "grad_norm": 14.67243480682373, "learning_rate": 6.415886208262814e-07, "loss": 3.3761, "step": 75400 }, { "epoch": 0.767059326171875, "grad_norm": 11.018709182739258, "learning_rate": 6.413211696382137e-07, "loss": 3.3401, "step": 75405 }, { "epoch": 0.7671101888020834, "grad_norm": 15.72050666809082, "learning_rate": 6.410537660040042e-07, "loss": 3.1229, "step": 75410 }, { "epoch": 0.7671610514322916, "grad_norm": 8.102333068847656, "learning_rate": 6.407864099304928e-07, "loss": 3.4936, "step": 75415 }, { "epoch": 0.7672119140625, "grad_norm": 14.650758743286133, "learning_rate": 6.40519101424521e-07, "loss": 3.7129, "step": 75420 }, { "epoch": 0.7672627766927084, "grad_norm": 15.552038192749023, "learning_rate": 6.402518404929273e-07, "loss": 3.7393, "step": 75425 }, { "epoch": 0.7673136393229166, "grad_norm": 14.366085052490234, "learning_rate": 6.399846271425486e-07, "loss": 3.3615, "step": 75430 }, { "epoch": 0.767364501953125, "grad_norm": 7.687265872955322, "learning_rate": 6.397174613802226e-07, "loss": 3.0453, "step": 75435 }, { "epoch": 0.7674153645833334, "grad_norm": 13.427799224853516, "learning_rate": 6.394503432127849e-07, "loss": 3.2276, "step": 75440 }, { "epoch": 0.7674662272135416, "grad_norm": 16.668745040893555, "learning_rate": 6.391832726470693e-07, "loss": 2.894, "step": 75445 }, { "epoch": 0.76751708984375, "grad_norm": 10.8818941116333, "learning_rate": 6.389162496899079e-07, "loss": 3.3841, "step": 75450 }, { "epoch": 0.7675679524739584, "grad_norm": 12.166550636291504, "learning_rate": 6.386492743481337e-07, "loss": 3.178, "step": 75455 }, { "epoch": 0.7676188151041666, "grad_norm": 14.593305587768555, "learning_rate": 6.38382346628576e-07, "loss": 3.5089, "step": 75460 }, { "epoch": 0.767669677734375, "grad_norm": 9.180642127990723, "learning_rate": 6.381154665380654e-07, "loss": 3.5605, "step": 75465 }, { "epoch": 0.7677205403645834, "grad_norm": 11.039069175720215, "learning_rate": 6.378486340834286e-07, "loss": 3.3149, "step": 75470 }, { "epoch": 0.7677714029947916, "grad_norm": 12.083093643188477, "learning_rate": 6.375818492714938e-07, "loss": 3.237, "step": 75475 }, { "epoch": 0.767822265625, "grad_norm": 10.207786560058594, "learning_rate": 6.373151121090853e-07, "loss": 3.4434, "step": 75480 }, { "epoch": 0.7678731282552084, "grad_norm": 13.316230773925781, "learning_rate": 6.370484226030291e-07, "loss": 3.2465, "step": 75485 }, { "epoch": 0.7679239908854166, "grad_norm": 16.586711883544922, "learning_rate": 6.367817807601462e-07, "loss": 3.2579, "step": 75490 }, { "epoch": 0.767974853515625, "grad_norm": 14.756383895874023, "learning_rate": 6.36515186587261e-07, "loss": 3.8092, "step": 75495 }, { "epoch": 0.7680257161458334, "grad_norm": 12.652300834655762, "learning_rate": 6.362486400911927e-07, "loss": 3.3194, "step": 75500 }, { "epoch": 0.7680765787760416, "grad_norm": 12.614615440368652, "learning_rate": 6.359821412787606e-07, "loss": 3.0277, "step": 75505 }, { "epoch": 0.76812744140625, "grad_norm": 9.340450286865234, "learning_rate": 6.357156901567837e-07, "loss": 3.5647, "step": 75510 }, { "epoch": 0.7681783040364584, "grad_norm": 10.129216194152832, "learning_rate": 6.354492867320794e-07, "loss": 3.2587, "step": 75515 }, { "epoch": 0.7682291666666666, "grad_norm": 16.417810440063477, "learning_rate": 6.351829310114635e-07, "loss": 3.3869, "step": 75520 }, { "epoch": 0.768280029296875, "grad_norm": 11.393208503723145, "learning_rate": 6.349166230017501e-07, "loss": 3.363, "step": 75525 }, { "epoch": 0.7683308919270834, "grad_norm": 9.716226577758789, "learning_rate": 6.346503627097522e-07, "loss": 3.0523, "step": 75530 }, { "epoch": 0.7683817545572916, "grad_norm": 13.312603950500488, "learning_rate": 6.343841501422826e-07, "loss": 3.4017, "step": 75535 }, { "epoch": 0.7684326171875, "grad_norm": 9.42683219909668, "learning_rate": 6.34117985306153e-07, "loss": 3.1427, "step": 75540 }, { "epoch": 0.7684834798177084, "grad_norm": 11.141545295715332, "learning_rate": 6.338518682081718e-07, "loss": 3.4514, "step": 75545 }, { "epoch": 0.7685343424479166, "grad_norm": 16.647235870361328, "learning_rate": 6.335857988551489e-07, "loss": 3.3977, "step": 75550 }, { "epoch": 0.768585205078125, "grad_norm": 10.697653770446777, "learning_rate": 6.33319777253891e-07, "loss": 3.4897, "step": 75555 }, { "epoch": 0.7686360677083334, "grad_norm": 10.337068557739258, "learning_rate": 6.330538034112035e-07, "loss": 3.5939, "step": 75560 }, { "epoch": 0.7686869303385416, "grad_norm": 12.20175552368164, "learning_rate": 6.327878773338919e-07, "loss": 3.5823, "step": 75565 }, { "epoch": 0.76873779296875, "grad_norm": 15.400045394897461, "learning_rate": 6.325219990287604e-07, "loss": 3.8783, "step": 75570 }, { "epoch": 0.7687886555989584, "grad_norm": 13.569960594177246, "learning_rate": 6.322561685026112e-07, "loss": 3.0406, "step": 75575 }, { "epoch": 0.7688395182291666, "grad_norm": 14.378377914428711, "learning_rate": 6.319903857622451e-07, "loss": 2.861, "step": 75580 }, { "epoch": 0.768890380859375, "grad_norm": 13.627618789672852, "learning_rate": 6.317246508144614e-07, "loss": 3.1944, "step": 75585 }, { "epoch": 0.7689412434895834, "grad_norm": 8.510849952697754, "learning_rate": 6.314589636660598e-07, "loss": 3.1475, "step": 75590 }, { "epoch": 0.7689921061197916, "grad_norm": 15.343955993652344, "learning_rate": 6.311933243238383e-07, "loss": 3.0304, "step": 75595 }, { "epoch": 0.76904296875, "grad_norm": 15.795459747314453, "learning_rate": 6.309277327945929e-07, "loss": 3.2039, "step": 75600 }, { "epoch": 0.7690938313802084, "grad_norm": 14.032368659973145, "learning_rate": 6.306621890851175e-07, "loss": 3.5281, "step": 75605 }, { "epoch": 0.7691446940104166, "grad_norm": 13.588349342346191, "learning_rate": 6.303966932022076e-07, "loss": 2.9674, "step": 75610 }, { "epoch": 0.769195556640625, "grad_norm": 10.120952606201172, "learning_rate": 6.301312451526545e-07, "loss": 3.4561, "step": 75615 }, { "epoch": 0.7692464192708334, "grad_norm": 10.869763374328613, "learning_rate": 6.298658449432513e-07, "loss": 3.1571, "step": 75620 }, { "epoch": 0.7692972819010416, "grad_norm": 12.92966365814209, "learning_rate": 6.296004925807861e-07, "loss": 3.2906, "step": 75625 }, { "epoch": 0.76934814453125, "grad_norm": 13.265793800354004, "learning_rate": 6.293351880720497e-07, "loss": 3.5597, "step": 75630 }, { "epoch": 0.7693990071614584, "grad_norm": 9.10334587097168, "learning_rate": 6.290699314238294e-07, "loss": 3.1268, "step": 75635 }, { "epoch": 0.7694498697916666, "grad_norm": 8.346096992492676, "learning_rate": 6.288047226429098e-07, "loss": 3.198, "step": 75640 }, { "epoch": 0.769500732421875, "grad_norm": 9.279213905334473, "learning_rate": 6.285395617360793e-07, "loss": 3.5123, "step": 75645 }, { "epoch": 0.7695515950520834, "grad_norm": 9.381355285644531, "learning_rate": 6.282744487101203e-07, "loss": 3.2821, "step": 75650 }, { "epoch": 0.7696024576822916, "grad_norm": 15.720799446105957, "learning_rate": 6.280093835718163e-07, "loss": 3.9323, "step": 75655 }, { "epoch": 0.7696533203125, "grad_norm": 10.18575668334961, "learning_rate": 6.277443663279476e-07, "loss": 3.3029, "step": 75660 }, { "epoch": 0.7697041829427084, "grad_norm": 9.984713554382324, "learning_rate": 6.274793969852954e-07, "loss": 3.5297, "step": 75665 }, { "epoch": 0.7697550455729166, "grad_norm": 8.69368839263916, "learning_rate": 6.272144755506398e-07, "loss": 3.4563, "step": 75670 }, { "epoch": 0.769805908203125, "grad_norm": 14.534234046936035, "learning_rate": 6.26949602030758e-07, "loss": 3.1021, "step": 75675 }, { "epoch": 0.7698567708333334, "grad_norm": 8.975381851196289, "learning_rate": 6.26684776432426e-07, "loss": 3.4122, "step": 75680 }, { "epoch": 0.7699076334635416, "grad_norm": 13.761950492858887, "learning_rate": 6.264199987624206e-07, "loss": 3.6279, "step": 75685 }, { "epoch": 0.76995849609375, "grad_norm": 14.036504745483398, "learning_rate": 6.261552690275147e-07, "loss": 3.4036, "step": 75690 }, { "epoch": 0.7700093587239584, "grad_norm": 10.96495532989502, "learning_rate": 6.258905872344828e-07, "loss": 3.5949, "step": 75695 }, { "epoch": 0.7700602213541666, "grad_norm": 14.842350959777832, "learning_rate": 6.256259533900954e-07, "loss": 3.301, "step": 75700 }, { "epoch": 0.770111083984375, "grad_norm": 7.5997490882873535, "learning_rate": 6.253613675011244e-07, "loss": 3.4585, "step": 75705 }, { "epoch": 0.7701619466145834, "grad_norm": 11.933259963989258, "learning_rate": 6.250968295743384e-07, "loss": 3.3573, "step": 75710 }, { "epoch": 0.7702128092447916, "grad_norm": 15.504426956176758, "learning_rate": 6.248323396165048e-07, "loss": 3.3533, "step": 75715 }, { "epoch": 0.770263671875, "grad_norm": 10.376163482666016, "learning_rate": 6.245678976343916e-07, "loss": 3.15, "step": 75720 }, { "epoch": 0.7703145345052084, "grad_norm": 13.387959480285645, "learning_rate": 6.243035036347647e-07, "loss": 3.9129, "step": 75725 }, { "epoch": 0.7703653971354166, "grad_norm": 8.265640258789062, "learning_rate": 6.240391576243881e-07, "loss": 3.6994, "step": 75730 }, { "epoch": 0.770416259765625, "grad_norm": 15.54826831817627, "learning_rate": 6.237748596100246e-07, "loss": 2.9979, "step": 75735 }, { "epoch": 0.7704671223958334, "grad_norm": 13.252427101135254, "learning_rate": 6.235106095984369e-07, "loss": 3.3592, "step": 75740 }, { "epoch": 0.7705179850260416, "grad_norm": 9.107203483581543, "learning_rate": 6.232464075963851e-07, "loss": 3.2647, "step": 75745 }, { "epoch": 0.77056884765625, "grad_norm": 11.003279685974121, "learning_rate": 6.229822536106298e-07, "loss": 3.1104, "step": 75750 }, { "epoch": 0.7706197102864584, "grad_norm": 11.99866008758545, "learning_rate": 6.227181476479278e-07, "loss": 3.5451, "step": 75755 }, { "epoch": 0.7706705729166666, "grad_norm": 15.338647842407227, "learning_rate": 6.224540897150374e-07, "loss": 3.2154, "step": 75760 }, { "epoch": 0.770721435546875, "grad_norm": 10.090010643005371, "learning_rate": 6.221900798187147e-07, "loss": 3.501, "step": 75765 }, { "epoch": 0.7707722981770834, "grad_norm": 8.421314239501953, "learning_rate": 6.219261179657126e-07, "loss": 2.9642, "step": 75770 }, { "epoch": 0.7708231608072916, "grad_norm": 13.950006484985352, "learning_rate": 6.216622041627857e-07, "loss": 3.2954, "step": 75775 }, { "epoch": 0.7708740234375, "grad_norm": 10.523149490356445, "learning_rate": 6.213983384166868e-07, "loss": 3.0715, "step": 75780 }, { "epoch": 0.7709248860677084, "grad_norm": 11.545313835144043, "learning_rate": 6.211345207341663e-07, "loss": 3.3117, "step": 75785 }, { "epoch": 0.7709757486979166, "grad_norm": 12.871099472045898, "learning_rate": 6.208707511219736e-07, "loss": 3.4587, "step": 75790 }, { "epoch": 0.771026611328125, "grad_norm": 13.466527938842773, "learning_rate": 6.20607029586856e-07, "loss": 3.2627, "step": 75795 }, { "epoch": 0.7710774739583334, "grad_norm": 10.577140808105469, "learning_rate": 6.203433561355634e-07, "loss": 3.4507, "step": 75800 }, { "epoch": 0.7711283365885416, "grad_norm": 12.235979080200195, "learning_rate": 6.200797307748405e-07, "loss": 3.419, "step": 75805 }, { "epoch": 0.77117919921875, "grad_norm": 8.995306015014648, "learning_rate": 6.198161535114322e-07, "loss": 3.3275, "step": 75810 }, { "epoch": 0.7712300618489584, "grad_norm": 17.704317092895508, "learning_rate": 6.195526243520814e-07, "loss": 3.3854, "step": 75815 }, { "epoch": 0.7712809244791666, "grad_norm": 8.587143898010254, "learning_rate": 6.192891433035306e-07, "loss": 3.3279, "step": 75820 }, { "epoch": 0.771331787109375, "grad_norm": 15.102418899536133, "learning_rate": 6.190257103725222e-07, "loss": 3.452, "step": 75825 }, { "epoch": 0.7713826497395834, "grad_norm": 7.975759506225586, "learning_rate": 6.187623255657945e-07, "loss": 3.6413, "step": 75830 }, { "epoch": 0.7714335123697916, "grad_norm": 10.76710033416748, "learning_rate": 6.184989888900874e-07, "loss": 3.334, "step": 75835 }, { "epoch": 0.771484375, "grad_norm": 9.975716590881348, "learning_rate": 6.182357003521377e-07, "loss": 3.4803, "step": 75840 }, { "epoch": 0.7715352376302084, "grad_norm": 13.248037338256836, "learning_rate": 6.179724599586809e-07, "loss": 4.0471, "step": 75845 }, { "epoch": 0.7715861002604166, "grad_norm": 14.565338134765625, "learning_rate": 6.177092677164528e-07, "loss": 3.2401, "step": 75850 }, { "epoch": 0.771636962890625, "grad_norm": 8.690911293029785, "learning_rate": 6.174461236321874e-07, "loss": 3.2437, "step": 75855 }, { "epoch": 0.7716878255208334, "grad_norm": 10.3700532913208, "learning_rate": 6.17183027712617e-07, "loss": 3.4599, "step": 75860 }, { "epoch": 0.7717386881510416, "grad_norm": 8.442960739135742, "learning_rate": 6.169199799644723e-07, "loss": 3.5185, "step": 75865 }, { "epoch": 0.77178955078125, "grad_norm": 13.842940330505371, "learning_rate": 6.166569803944827e-07, "loss": 3.309, "step": 75870 }, { "epoch": 0.7718404134114584, "grad_norm": 12.644615173339844, "learning_rate": 6.163940290093781e-07, "loss": 3.4853, "step": 75875 }, { "epoch": 0.7718912760416666, "grad_norm": 10.915739059448242, "learning_rate": 6.161311258158866e-07, "loss": 3.8723, "step": 75880 }, { "epoch": 0.771942138671875, "grad_norm": 11.840435981750488, "learning_rate": 6.158682708207336e-07, "loss": 3.248, "step": 75885 }, { "epoch": 0.7719930013020834, "grad_norm": 14.093424797058105, "learning_rate": 6.156054640306435e-07, "loss": 3.3579, "step": 75890 }, { "epoch": 0.7720438639322916, "grad_norm": 11.389480590820312, "learning_rate": 6.15342705452342e-07, "loss": 3.1137, "step": 75895 }, { "epoch": 0.7720947265625, "grad_norm": 10.092771530151367, "learning_rate": 6.150799950925496e-07, "loss": 3.3353, "step": 75900 }, { "epoch": 0.7721455891927084, "grad_norm": 11.442878723144531, "learning_rate": 6.148173329579896e-07, "loss": 3.0741, "step": 75905 }, { "epoch": 0.7721964518229166, "grad_norm": 9.948512077331543, "learning_rate": 6.145547190553808e-07, "loss": 3.5393, "step": 75910 }, { "epoch": 0.772247314453125, "grad_norm": 9.788599967956543, "learning_rate": 6.142921533914432e-07, "loss": 3.1825, "step": 75915 }, { "epoch": 0.7722981770833334, "grad_norm": 126.83859252929688, "learning_rate": 6.140296359728937e-07, "loss": 3.2213, "step": 75920 }, { "epoch": 0.7723490397135416, "grad_norm": 12.804304122924805, "learning_rate": 6.137671668064482e-07, "loss": 3.1213, "step": 75925 }, { "epoch": 0.77239990234375, "grad_norm": 13.880815505981445, "learning_rate": 6.135047458988228e-07, "loss": 3.341, "step": 75930 }, { "epoch": 0.7724507649739584, "grad_norm": 9.59664249420166, "learning_rate": 6.13242373256732e-07, "loss": 3.3479, "step": 75935 }, { "epoch": 0.7725016276041666, "grad_norm": 13.731152534484863, "learning_rate": 6.129800488868878e-07, "loss": 3.5091, "step": 75940 }, { "epoch": 0.772552490234375, "grad_norm": 13.55499267578125, "learning_rate": 6.127177727960013e-07, "loss": 3.1433, "step": 75945 }, { "epoch": 0.7726033528645834, "grad_norm": 8.930282592773438, "learning_rate": 6.124555449907832e-07, "loss": 3.3842, "step": 75950 }, { "epoch": 0.7726542154947916, "grad_norm": 10.433351516723633, "learning_rate": 6.12193365477943e-07, "loss": 3.6894, "step": 75955 }, { "epoch": 0.772705078125, "grad_norm": 14.040569305419922, "learning_rate": 6.119312342641883e-07, "loss": 3.3541, "step": 75960 }, { "epoch": 0.7727559407552084, "grad_norm": 11.76894760131836, "learning_rate": 6.116691513562248e-07, "loss": 3.1397, "step": 75965 }, { "epoch": 0.7728068033854166, "grad_norm": 12.630956649780273, "learning_rate": 6.114071167607591e-07, "loss": 3.4149, "step": 75970 }, { "epoch": 0.772857666015625, "grad_norm": 13.352283477783203, "learning_rate": 6.111451304844939e-07, "loss": 3.1505, "step": 75975 }, { "epoch": 0.7729085286458334, "grad_norm": 10.95576286315918, "learning_rate": 6.108831925341339e-07, "loss": 3.2773, "step": 75980 }, { "epoch": 0.7729593912760416, "grad_norm": 12.064718246459961, "learning_rate": 6.106213029163785e-07, "loss": 3.3229, "step": 75985 }, { "epoch": 0.77301025390625, "grad_norm": 15.52635669708252, "learning_rate": 6.103594616379299e-07, "loss": 3.1987, "step": 75990 }, { "epoch": 0.7730611165364584, "grad_norm": 14.002089500427246, "learning_rate": 6.100976687054869e-07, "loss": 3.2175, "step": 75995 }, { "epoch": 0.7731119791666666, "grad_norm": 13.10692024230957, "learning_rate": 6.098359241257462e-07, "loss": 3.4373, "step": 76000 }, { "epoch": 0.773162841796875, "grad_norm": 7.944686412811279, "learning_rate": 6.095742279054053e-07, "loss": 3.4441, "step": 76005 }, { "epoch": 0.7732137044270834, "grad_norm": 18.644197463989258, "learning_rate": 6.093125800511607e-07, "loss": 3.3481, "step": 76010 }, { "epoch": 0.7732645670572916, "grad_norm": 12.620013236999512, "learning_rate": 6.090509805697053e-07, "loss": 3.4923, "step": 76015 }, { "epoch": 0.7733154296875, "grad_norm": 14.259735107421875, "learning_rate": 6.087894294677326e-07, "loss": 3.0934, "step": 76020 }, { "epoch": 0.7733662923177084, "grad_norm": 17.620956420898438, "learning_rate": 6.085279267519334e-07, "loss": 3.453, "step": 76025 }, { "epoch": 0.7734171549479166, "grad_norm": 14.865498542785645, "learning_rate": 6.082664724289986e-07, "loss": 3.2248, "step": 76030 }, { "epoch": 0.773468017578125, "grad_norm": 10.771533966064453, "learning_rate": 6.080050665056186e-07, "loss": 3.1181, "step": 76035 }, { "epoch": 0.7735188802083334, "grad_norm": 9.232945442199707, "learning_rate": 6.077437089884797e-07, "loss": 3.438, "step": 76040 }, { "epoch": 0.7735697428385416, "grad_norm": 11.017297744750977, "learning_rate": 6.074823998842702e-07, "loss": 3.1982, "step": 76045 }, { "epoch": 0.77362060546875, "grad_norm": 15.718427658081055, "learning_rate": 6.072211391996752e-07, "loss": 3.5306, "step": 76050 }, { "epoch": 0.7736714680989584, "grad_norm": 97.13336181640625, "learning_rate": 6.069599269413776e-07, "loss": 3.251, "step": 76055 }, { "epoch": 0.7737223307291666, "grad_norm": 13.607686042785645, "learning_rate": 6.066987631160617e-07, "loss": 3.0443, "step": 76060 }, { "epoch": 0.773773193359375, "grad_norm": 15.883814811706543, "learning_rate": 6.064376477304102e-07, "loss": 3.4098, "step": 76065 }, { "epoch": 0.7738240559895834, "grad_norm": 8.217681884765625, "learning_rate": 6.061765807911022e-07, "loss": 3.145, "step": 76070 }, { "epoch": 0.7738749186197916, "grad_norm": 12.272388458251953, "learning_rate": 6.059155623048177e-07, "loss": 3.1563, "step": 76075 }, { "epoch": 0.77392578125, "grad_norm": 9.546619415283203, "learning_rate": 6.056545922782339e-07, "loss": 2.8569, "step": 76080 }, { "epoch": 0.7739766438802084, "grad_norm": 7.779810428619385, "learning_rate": 6.053936707180283e-07, "loss": 3.3145, "step": 76085 }, { "epoch": 0.7740275065104166, "grad_norm": 18.373014450073242, "learning_rate": 6.051327976308774e-07, "loss": 3.1823, "step": 76090 }, { "epoch": 0.774078369140625, "grad_norm": 18.18805694580078, "learning_rate": 6.048719730234548e-07, "loss": 3.2019, "step": 76095 }, { "epoch": 0.7741292317708334, "grad_norm": 10.839600563049316, "learning_rate": 6.046111969024327e-07, "loss": 3.4768, "step": 76100 }, { "epoch": 0.7741800944010416, "grad_norm": 12.216818809509277, "learning_rate": 6.043504692744837e-07, "loss": 3.3204, "step": 76105 }, { "epoch": 0.77423095703125, "grad_norm": 13.3114595413208, "learning_rate": 6.040897901462797e-07, "loss": 3.1056, "step": 76110 }, { "epoch": 0.7742818196614584, "grad_norm": 12.069828033447266, "learning_rate": 6.038291595244889e-07, "loss": 3.0938, "step": 76115 }, { "epoch": 0.7743326822916666, "grad_norm": 8.250632286071777, "learning_rate": 6.035685774157787e-07, "loss": 3.424, "step": 76120 }, { "epoch": 0.774383544921875, "grad_norm": 10.238628387451172, "learning_rate": 6.033080438268179e-07, "loss": 3.5923, "step": 76125 }, { "epoch": 0.7744344075520834, "grad_norm": 13.416050910949707, "learning_rate": 6.030475587642703e-07, "loss": 3.323, "step": 76130 }, { "epoch": 0.7744852701822916, "grad_norm": 13.140199661254883, "learning_rate": 6.027871222348014e-07, "loss": 3.6046, "step": 76135 }, { "epoch": 0.7745361328125, "grad_norm": 10.973596572875977, "learning_rate": 6.025267342450753e-07, "loss": 3.5131, "step": 76140 }, { "epoch": 0.7745869954427084, "grad_norm": 13.73587703704834, "learning_rate": 6.022663948017524e-07, "loss": 2.9926, "step": 76145 }, { "epoch": 0.7746378580729166, "grad_norm": 10.315624237060547, "learning_rate": 6.020061039114944e-07, "loss": 3.1911, "step": 76150 }, { "epoch": 0.774688720703125, "grad_norm": 14.70566177368164, "learning_rate": 6.017458615809593e-07, "loss": 3.2069, "step": 76155 }, { "epoch": 0.7747395833333334, "grad_norm": 12.014630317687988, "learning_rate": 6.014856678168069e-07, "loss": 3.2442, "step": 76160 }, { "epoch": 0.7747904459635416, "grad_norm": 13.165812492370605, "learning_rate": 6.012255226256938e-07, "loss": 3.3926, "step": 76165 }, { "epoch": 0.77484130859375, "grad_norm": 7.364107131958008, "learning_rate": 6.009654260142761e-07, "loss": 3.0502, "step": 76170 }, { "epoch": 0.7748921712239584, "grad_norm": 11.134225845336914, "learning_rate": 6.007053779892069e-07, "loss": 3.406, "step": 76175 }, { "epoch": 0.7749430338541666, "grad_norm": 8.265276908874512, "learning_rate": 6.004453785571413e-07, "loss": 3.2829, "step": 76180 }, { "epoch": 0.774993896484375, "grad_norm": 11.279505729675293, "learning_rate": 6.001854277247296e-07, "loss": 3.7411, "step": 76185 }, { "epoch": 0.7750447591145834, "grad_norm": 13.352738380432129, "learning_rate": 5.999255254986242e-07, "loss": 3.3503, "step": 76190 }, { "epoch": 0.7750956217447916, "grad_norm": 8.39072322845459, "learning_rate": 5.996656718854733e-07, "loss": 3.1168, "step": 76195 }, { "epoch": 0.775146484375, "grad_norm": 13.08597469329834, "learning_rate": 5.994058668919261e-07, "loss": 3.3923, "step": 76200 }, { "epoch": 0.7751973470052084, "grad_norm": 13.441854476928711, "learning_rate": 5.991461105246296e-07, "loss": 3.1712, "step": 76205 }, { "epoch": 0.7752482096354166, "grad_norm": 10.724113464355469, "learning_rate": 5.988864027902286e-07, "loss": 3.559, "step": 76210 }, { "epoch": 0.775299072265625, "grad_norm": 13.422377586364746, "learning_rate": 5.986267436953683e-07, "loss": 2.9751, "step": 76215 }, { "epoch": 0.7753499348958334, "grad_norm": 10.692633628845215, "learning_rate": 5.983671332466926e-07, "loss": 3.2711, "step": 76220 }, { "epoch": 0.7754007975260416, "grad_norm": 12.448526382446289, "learning_rate": 5.981075714508433e-07, "loss": 3.1235, "step": 76225 }, { "epoch": 0.77545166015625, "grad_norm": 11.251077651977539, "learning_rate": 5.978480583144602e-07, "loss": 4.0709, "step": 76230 }, { "epoch": 0.7755025227864584, "grad_norm": 10.245332717895508, "learning_rate": 5.975885938441844e-07, "loss": 3.1104, "step": 76235 }, { "epoch": 0.7755533854166666, "grad_norm": 8.04434871673584, "learning_rate": 5.973291780466528e-07, "loss": 3.4971, "step": 76240 }, { "epoch": 0.775604248046875, "grad_norm": 14.250527381896973, "learning_rate": 5.970698109285039e-07, "loss": 3.1624, "step": 76245 }, { "epoch": 0.7756551106770834, "grad_norm": 9.206746101379395, "learning_rate": 5.968104924963722e-07, "loss": 3.4041, "step": 76250 }, { "epoch": 0.7757059733072916, "grad_norm": 8.508057594299316, "learning_rate": 5.965512227568937e-07, "loss": 3.6937, "step": 76255 }, { "epoch": 0.7757568359375, "grad_norm": 10.889847755432129, "learning_rate": 5.962920017167001e-07, "loss": 3.5504, "step": 76260 }, { "epoch": 0.7758076985677084, "grad_norm": 12.170618057250977, "learning_rate": 5.960328293824253e-07, "loss": 3.1038, "step": 76265 }, { "epoch": 0.7758585611979166, "grad_norm": 16.20078468322754, "learning_rate": 5.957737057606983e-07, "loss": 3.0198, "step": 76270 }, { "epoch": 0.775909423828125, "grad_norm": 14.83310604095459, "learning_rate": 5.955146308581505e-07, "loss": 3.0933, "step": 76275 }, { "epoch": 0.7759602864583334, "grad_norm": 10.469974517822266, "learning_rate": 5.952556046814096e-07, "loss": 3.3732, "step": 76280 }, { "epoch": 0.7760111490885416, "grad_norm": 9.828927993774414, "learning_rate": 5.949966272371018e-07, "loss": 2.8567, "step": 76285 }, { "epoch": 0.77606201171875, "grad_norm": 7.971617221832275, "learning_rate": 5.947376985318537e-07, "loss": 3.6611, "step": 76290 }, { "epoch": 0.7761128743489584, "grad_norm": 11.64389419555664, "learning_rate": 5.944788185722907e-07, "loss": 3.2097, "step": 76295 }, { "epoch": 0.7761637369791666, "grad_norm": 12.310096740722656, "learning_rate": 5.942199873650356e-07, "loss": 3.4852, "step": 76300 }, { "epoch": 0.776214599609375, "grad_norm": 14.617456436157227, "learning_rate": 5.939612049167104e-07, "loss": 3.422, "step": 76305 }, { "epoch": 0.7762654622395834, "grad_norm": 12.969735145568848, "learning_rate": 5.937024712339351e-07, "loss": 3.469, "step": 76310 }, { "epoch": 0.7763163248697916, "grad_norm": 13.735860824584961, "learning_rate": 5.934437863233303e-07, "loss": 3.2358, "step": 76315 }, { "epoch": 0.7763671875, "grad_norm": 8.250831604003906, "learning_rate": 5.931851501915151e-07, "loss": 2.8565, "step": 76320 }, { "epoch": 0.7764180501302084, "grad_norm": 14.287775993347168, "learning_rate": 5.929265628451051e-07, "loss": 3.3142, "step": 76325 }, { "epoch": 0.7764689127604166, "grad_norm": 13.908318519592285, "learning_rate": 5.926680242907176e-07, "loss": 3.3686, "step": 76330 }, { "epoch": 0.776519775390625, "grad_norm": 18.167110443115234, "learning_rate": 5.924095345349667e-07, "loss": 3.3309, "step": 76335 }, { "epoch": 0.7765706380208334, "grad_norm": 7.273578643798828, "learning_rate": 5.921510935844652e-07, "loss": 4.0699, "step": 76340 }, { "epoch": 0.7766215006510416, "grad_norm": 11.433211326599121, "learning_rate": 5.918927014458253e-07, "loss": 2.9545, "step": 76345 }, { "epoch": 0.77667236328125, "grad_norm": 13.524624824523926, "learning_rate": 5.916343581256596e-07, "loss": 3.3202, "step": 76350 }, { "epoch": 0.7767232259114584, "grad_norm": 13.013619422912598, "learning_rate": 5.91376063630576e-07, "loss": 2.9796, "step": 76355 }, { "epoch": 0.7767740885416666, "grad_norm": 10.851083755493164, "learning_rate": 5.911178179671836e-07, "loss": 3.8777, "step": 76360 }, { "epoch": 0.776824951171875, "grad_norm": 9.591619491577148, "learning_rate": 5.908596211420889e-07, "loss": 3.6392, "step": 76365 }, { "epoch": 0.7768758138020834, "grad_norm": 11.787693977355957, "learning_rate": 5.906014731618981e-07, "loss": 2.8807, "step": 76370 }, { "epoch": 0.7769266764322916, "grad_norm": 16.933032989501953, "learning_rate": 5.903433740332168e-07, "loss": 3.5137, "step": 76375 }, { "epoch": 0.7769775390625, "grad_norm": 12.437687873840332, "learning_rate": 5.900853237626475e-07, "loss": 3.4988, "step": 76380 }, { "epoch": 0.7770284016927084, "grad_norm": 15.326409339904785, "learning_rate": 5.898273223567918e-07, "loss": 3.1273, "step": 76385 }, { "epoch": 0.7770792643229166, "grad_norm": 14.829540252685547, "learning_rate": 5.895693698222521e-07, "loss": 3.4237, "step": 76390 }, { "epoch": 0.777130126953125, "grad_norm": 11.511395454406738, "learning_rate": 5.893114661656266e-07, "loss": 3.0452, "step": 76395 }, { "epoch": 0.7771809895833334, "grad_norm": 12.296662330627441, "learning_rate": 5.89053611393515e-07, "loss": 3.1364, "step": 76400 }, { "epoch": 0.7772318522135416, "grad_norm": 11.894816398620605, "learning_rate": 5.887958055125132e-07, "loss": 3.1094, "step": 76405 }, { "epoch": 0.77728271484375, "grad_norm": 13.634196281433105, "learning_rate": 5.88538048529218e-07, "loss": 3.3801, "step": 76410 }, { "epoch": 0.7773335774739584, "grad_norm": 17.220705032348633, "learning_rate": 5.882803404502235e-07, "loss": 3.2957, "step": 76415 }, { "epoch": 0.7773844401041666, "grad_norm": 8.81678581237793, "learning_rate": 5.880226812821232e-07, "loss": 3.3365, "step": 76420 }, { "epoch": 0.777435302734375, "grad_norm": 9.809067726135254, "learning_rate": 5.877650710315103e-07, "loss": 3.4136, "step": 76425 }, { "epoch": 0.7774861653645834, "grad_norm": 10.364507675170898, "learning_rate": 5.875075097049745e-07, "loss": 3.304, "step": 76430 }, { "epoch": 0.7775370279947916, "grad_norm": 14.615416526794434, "learning_rate": 5.872499973091059e-07, "loss": 3.645, "step": 76435 }, { "epoch": 0.777587890625, "grad_norm": 14.780035018920898, "learning_rate": 5.86992533850492e-07, "loss": 3.2824, "step": 76440 }, { "epoch": 0.7776387532552084, "grad_norm": 14.5195894241333, "learning_rate": 5.867351193357207e-07, "loss": 3.2958, "step": 76445 }, { "epoch": 0.7776896158854166, "grad_norm": 11.425013542175293, "learning_rate": 5.864777537713787e-07, "loss": 3.3065, "step": 76450 }, { "epoch": 0.777740478515625, "grad_norm": 11.649991989135742, "learning_rate": 5.862204371640498e-07, "loss": 3.3888, "step": 76455 }, { "epoch": 0.7777913411458334, "grad_norm": 9.892515182495117, "learning_rate": 5.859631695203167e-07, "loss": 2.9988, "step": 76460 }, { "epoch": 0.7778422037760416, "grad_norm": 7.605737686157227, "learning_rate": 5.857059508467628e-07, "loss": 3.2106, "step": 76465 }, { "epoch": 0.77789306640625, "grad_norm": 14.123431205749512, "learning_rate": 5.854487811499676e-07, "loss": 3.3977, "step": 76470 }, { "epoch": 0.7779439290364584, "grad_norm": 15.220905303955078, "learning_rate": 5.851916604365123e-07, "loss": 2.9927, "step": 76475 }, { "epoch": 0.7779947916666666, "grad_norm": 11.82323169708252, "learning_rate": 5.84934588712974e-07, "loss": 2.9505, "step": 76480 }, { "epoch": 0.778045654296875, "grad_norm": 13.638838768005371, "learning_rate": 5.84677565985931e-07, "loss": 3.25, "step": 76485 }, { "epoch": 0.7780965169270834, "grad_norm": 11.528968811035156, "learning_rate": 5.844205922619584e-07, "loss": 2.7836, "step": 76490 }, { "epoch": 0.7781473795572916, "grad_norm": 8.256325721740723, "learning_rate": 5.841636675476303e-07, "loss": 3.6025, "step": 76495 }, { "epoch": 0.7781982421875, "grad_norm": 16.759963989257812, "learning_rate": 5.839067918495206e-07, "loss": 3.2637, "step": 76500 }, { "epoch": 0.7782491048177084, "grad_norm": 11.771158218383789, "learning_rate": 5.836499651742023e-07, "loss": 3.3356, "step": 76505 }, { "epoch": 0.7782999674479166, "grad_norm": 12.421799659729004, "learning_rate": 5.833931875282451e-07, "loss": 3.6195, "step": 76510 }, { "epoch": 0.778350830078125, "grad_norm": 13.912510871887207, "learning_rate": 5.831364589182184e-07, "loss": 3.1761, "step": 76515 }, { "epoch": 0.7784016927083334, "grad_norm": 14.278749465942383, "learning_rate": 5.828797793506918e-07, "loss": 3.1841, "step": 76520 }, { "epoch": 0.7784525553385416, "grad_norm": 10.667387962341309, "learning_rate": 5.826231488322309e-07, "loss": 3.1582, "step": 76525 }, { "epoch": 0.77850341796875, "grad_norm": 15.149872779846191, "learning_rate": 5.823665673694029e-07, "loss": 3.5367, "step": 76530 }, { "epoch": 0.7785542805989584, "grad_norm": 8.389341354370117, "learning_rate": 5.821100349687711e-07, "loss": 3.115, "step": 76535 }, { "epoch": 0.7786051432291666, "grad_norm": 10.314140319824219, "learning_rate": 5.818535516369003e-07, "loss": 3.3898, "step": 76540 }, { "epoch": 0.778656005859375, "grad_norm": 13.677864074707031, "learning_rate": 5.815971173803515e-07, "loss": 2.963, "step": 76545 }, { "epoch": 0.7787068684895834, "grad_norm": 8.13903522491455, "learning_rate": 5.813407322056852e-07, "loss": 3.3911, "step": 76550 }, { "epoch": 0.7787577311197916, "grad_norm": 10.670424461364746, "learning_rate": 5.810843961194615e-07, "loss": 3.0771, "step": 76555 }, { "epoch": 0.77880859375, "grad_norm": 11.17371654510498, "learning_rate": 5.808281091282392e-07, "loss": 3.5922, "step": 76560 }, { "epoch": 0.7788594563802084, "grad_norm": 14.148518562316895, "learning_rate": 5.805718712385749e-07, "loss": 2.9514, "step": 76565 }, { "epoch": 0.7789103190104166, "grad_norm": 13.83517074584961, "learning_rate": 5.803156824570238e-07, "loss": 3.2211, "step": 76570 }, { "epoch": 0.778961181640625, "grad_norm": 8.224130630493164, "learning_rate": 5.800595427901407e-07, "loss": 3.1588, "step": 76575 }, { "epoch": 0.7790120442708334, "grad_norm": 17.40386962890625, "learning_rate": 5.798034522444801e-07, "loss": 3.3977, "step": 76580 }, { "epoch": 0.7790629069010416, "grad_norm": 11.589737892150879, "learning_rate": 5.795474108265927e-07, "loss": 3.9817, "step": 76585 }, { "epoch": 0.77911376953125, "grad_norm": 11.125046730041504, "learning_rate": 5.792914185430299e-07, "loss": 3.5473, "step": 76590 }, { "epoch": 0.7791646321614584, "grad_norm": 12.34698486328125, "learning_rate": 5.7903547540034e-07, "loss": 3.3408, "step": 76595 }, { "epoch": 0.7792154947916666, "grad_norm": 13.01732063293457, "learning_rate": 5.787795814050722e-07, "loss": 3.4159, "step": 76600 }, { "epoch": 0.779266357421875, "grad_norm": 12.777318954467773, "learning_rate": 5.785237365637742e-07, "loss": 3.358, "step": 76605 }, { "epoch": 0.7793172200520834, "grad_norm": 13.600346565246582, "learning_rate": 5.782679408829911e-07, "loss": 3.0258, "step": 76610 }, { "epoch": 0.7793680826822916, "grad_norm": 12.758298873901367, "learning_rate": 5.780121943692662e-07, "loss": 3.2326, "step": 76615 }, { "epoch": 0.7794189453125, "grad_norm": 11.570501327514648, "learning_rate": 5.777564970291447e-07, "loss": 3.0778, "step": 76620 }, { "epoch": 0.7794698079427084, "grad_norm": 15.946113586425781, "learning_rate": 5.775008488691666e-07, "loss": 3.1848, "step": 76625 }, { "epoch": 0.7795206705729166, "grad_norm": 11.616817474365234, "learning_rate": 5.772452498958739e-07, "loss": 3.3857, "step": 76630 }, { "epoch": 0.779571533203125, "grad_norm": 10.456148147583008, "learning_rate": 5.769897001158065e-07, "loss": 3.1544, "step": 76635 }, { "epoch": 0.7796223958333334, "grad_norm": 9.678732872009277, "learning_rate": 5.767341995355016e-07, "loss": 3.4938, "step": 76640 }, { "epoch": 0.7796732584635416, "grad_norm": 9.791245460510254, "learning_rate": 5.764787481614964e-07, "loss": 3.1334, "step": 76645 }, { "epoch": 0.77972412109375, "grad_norm": 12.438119888305664, "learning_rate": 5.762233460003258e-07, "loss": 3.3304, "step": 76650 }, { "epoch": 0.7797749837239584, "grad_norm": 8.740230560302734, "learning_rate": 5.759679930585249e-07, "loss": 3.2382, "step": 76655 }, { "epoch": 0.7798258463541666, "grad_norm": 10.377050399780273, "learning_rate": 5.757126893426274e-07, "loss": 3.1279, "step": 76660 }, { "epoch": 0.779876708984375, "grad_norm": 10.924396514892578, "learning_rate": 5.754574348591649e-07, "loss": 3.5955, "step": 76665 }, { "epoch": 0.7799275716145834, "grad_norm": 12.242827415466309, "learning_rate": 5.75202229614667e-07, "loss": 3.7679, "step": 76670 }, { "epoch": 0.7799784342447916, "grad_norm": 11.291284561157227, "learning_rate": 5.749470736156646e-07, "loss": 3.1324, "step": 76675 }, { "epoch": 0.780029296875, "grad_norm": 13.19884204864502, "learning_rate": 5.74691966868684e-07, "loss": 3.6457, "step": 76680 }, { "epoch": 0.7800801595052084, "grad_norm": 12.717314720153809, "learning_rate": 5.744369093802541e-07, "loss": 3.0554, "step": 76685 }, { "epoch": 0.7801310221354166, "grad_norm": 11.942824363708496, "learning_rate": 5.741819011568986e-07, "loss": 3.5642, "step": 76690 }, { "epoch": 0.780181884765625, "grad_norm": 9.46130657196045, "learning_rate": 5.739269422051436e-07, "loss": 3.3421, "step": 76695 }, { "epoch": 0.7802327473958334, "grad_norm": 17.4813232421875, "learning_rate": 5.736720325315109e-07, "loss": 3.6149, "step": 76700 }, { "epoch": 0.7802836100260416, "grad_norm": 15.691143035888672, "learning_rate": 5.734171721425222e-07, "loss": 3.1462, "step": 76705 }, { "epoch": 0.78033447265625, "grad_norm": 9.280314445495605, "learning_rate": 5.731623610446985e-07, "loss": 3.3634, "step": 76710 }, { "epoch": 0.7803853352864584, "grad_norm": 13.122340202331543, "learning_rate": 5.729075992445599e-07, "loss": 3.4787, "step": 76715 }, { "epoch": 0.7804361979166666, "grad_norm": 9.506455421447754, "learning_rate": 5.726528867486233e-07, "loss": 3.5734, "step": 76720 }, { "epoch": 0.780487060546875, "grad_norm": 15.977293968200684, "learning_rate": 5.723982235634052e-07, "loss": 3.136, "step": 76725 }, { "epoch": 0.7805379231770834, "grad_norm": 12.063434600830078, "learning_rate": 5.721436096954217e-07, "loss": 3.4242, "step": 76730 }, { "epoch": 0.7805887858072916, "grad_norm": 8.83809757232666, "learning_rate": 5.718890451511877e-07, "loss": 3.318, "step": 76735 }, { "epoch": 0.7806396484375, "grad_norm": 10.073627471923828, "learning_rate": 5.716345299372153e-07, "loss": 2.8855, "step": 76740 }, { "epoch": 0.7806905110677084, "grad_norm": 14.36465072631836, "learning_rate": 5.713800640600159e-07, "loss": 3.5889, "step": 76745 }, { "epoch": 0.7807413736979166, "grad_norm": 8.055367469787598, "learning_rate": 5.711256475261013e-07, "loss": 3.4521, "step": 76750 }, { "epoch": 0.780792236328125, "grad_norm": 9.556485176086426, "learning_rate": 5.708712803419786e-07, "loss": 3.294, "step": 76755 }, { "epoch": 0.7808430989583334, "grad_norm": 11.192523956298828, "learning_rate": 5.70616962514158e-07, "loss": 3.4719, "step": 76760 }, { "epoch": 0.7808939615885416, "grad_norm": 13.200043678283691, "learning_rate": 5.703626940491442e-07, "loss": 3.3841, "step": 76765 }, { "epoch": 0.78094482421875, "grad_norm": 16.064577102661133, "learning_rate": 5.701084749534444e-07, "loss": 3.2305, "step": 76770 }, { "epoch": 0.7809956868489584, "grad_norm": 10.967706680297852, "learning_rate": 5.698543052335617e-07, "loss": 3.1965, "step": 76775 }, { "epoch": 0.7810465494791666, "grad_norm": 15.476178169250488, "learning_rate": 5.696001848959984e-07, "loss": 3.598, "step": 76780 }, { "epoch": 0.781097412109375, "grad_norm": 11.553808212280273, "learning_rate": 5.693461139472569e-07, "loss": 3.3123, "step": 76785 }, { "epoch": 0.7811482747395834, "grad_norm": 11.69206428527832, "learning_rate": 5.690920923938381e-07, "loss": 3.0832, "step": 76790 }, { "epoch": 0.7811991373697916, "grad_norm": 12.78307056427002, "learning_rate": 5.688381202422405e-07, "loss": 3.4091, "step": 76795 }, { "epoch": 0.78125, "grad_norm": 13.889488220214844, "learning_rate": 5.685841974989617e-07, "loss": 2.9902, "step": 76800 }, { "epoch": 0.7813008626302084, "grad_norm": 14.213640213012695, "learning_rate": 5.683303241704979e-07, "loss": 3.1667, "step": 76805 }, { "epoch": 0.7813517252604166, "grad_norm": 15.656432151794434, "learning_rate": 5.680765002633449e-07, "loss": 3.0544, "step": 76810 }, { "epoch": 0.781402587890625, "grad_norm": 14.070866584777832, "learning_rate": 5.678227257839971e-07, "loss": 3.3398, "step": 76815 }, { "epoch": 0.7814534505208334, "grad_norm": 12.139039993286133, "learning_rate": 5.675690007389464e-07, "loss": 3.5697, "step": 76820 }, { "epoch": 0.7815043131510416, "grad_norm": 10.680137634277344, "learning_rate": 5.673153251346858e-07, "loss": 3.1589, "step": 76825 }, { "epoch": 0.78155517578125, "grad_norm": 13.58749771118164, "learning_rate": 5.670616989777042e-07, "loss": 2.7444, "step": 76830 }, { "epoch": 0.7816060384114584, "grad_norm": 13.907711029052734, "learning_rate": 5.668081222744901e-07, "loss": 3.3063, "step": 76835 }, { "epoch": 0.7816569010416666, "grad_norm": 12.027887344360352, "learning_rate": 5.665545950315321e-07, "loss": 3.5625, "step": 76840 }, { "epoch": 0.781707763671875, "grad_norm": 14.913036346435547, "learning_rate": 5.66301117255317e-07, "loss": 2.915, "step": 76845 }, { "epoch": 0.7817586263020834, "grad_norm": 13.611757278442383, "learning_rate": 5.660476889523298e-07, "loss": 3.4866, "step": 76850 }, { "epoch": 0.7818094889322916, "grad_norm": 7.673834323883057, "learning_rate": 5.657943101290539e-07, "loss": 3.2079, "step": 76855 }, { "epoch": 0.7818603515625, "grad_norm": 13.895394325256348, "learning_rate": 5.655409807919706e-07, "loss": 3.1756, "step": 76860 }, { "epoch": 0.7819112141927084, "grad_norm": 14.912735939025879, "learning_rate": 5.652877009475646e-07, "loss": 3.8363, "step": 76865 }, { "epoch": 0.7819620768229166, "grad_norm": 9.267807006835938, "learning_rate": 5.650344706023139e-07, "loss": 3.3846, "step": 76870 }, { "epoch": 0.782012939453125, "grad_norm": 13.284967422485352, "learning_rate": 5.647812897626976e-07, "loss": 3.519, "step": 76875 }, { "epoch": 0.7820638020833334, "grad_norm": 16.61823081970215, "learning_rate": 5.645281584351927e-07, "loss": 3.207, "step": 76880 }, { "epoch": 0.7821146647135416, "grad_norm": 7.993548393249512, "learning_rate": 5.642750766262761e-07, "loss": 3.4191, "step": 76885 }, { "epoch": 0.78216552734375, "grad_norm": 13.926109313964844, "learning_rate": 5.640220443424235e-07, "loss": 3.6736, "step": 76890 }, { "epoch": 0.7822163899739584, "grad_norm": 9.467702865600586, "learning_rate": 5.637690615901082e-07, "loss": 3.6149, "step": 76895 }, { "epoch": 0.7822672526041666, "grad_norm": 8.834341049194336, "learning_rate": 5.635161283758017e-07, "loss": 3.1105, "step": 76900 }, { "epoch": 0.782318115234375, "grad_norm": 9.07247543334961, "learning_rate": 5.632632447059766e-07, "loss": 3.4945, "step": 76905 }, { "epoch": 0.7823689778645834, "grad_norm": 10.527985572814941, "learning_rate": 5.630104105871017e-07, "loss": 3.7146, "step": 76910 }, { "epoch": 0.7824198404947916, "grad_norm": 15.648237228393555, "learning_rate": 5.627576260256465e-07, "loss": 3.4944, "step": 76915 }, { "epoch": 0.782470703125, "grad_norm": 12.348536491394043, "learning_rate": 5.625048910280789e-07, "loss": 3.4807, "step": 76920 }, { "epoch": 0.7825215657552084, "grad_norm": 11.248263359069824, "learning_rate": 5.622522056008642e-07, "loss": 3.3978, "step": 76925 }, { "epoch": 0.7825724283854166, "grad_norm": 7.9914960861206055, "learning_rate": 5.619995697504679e-07, "loss": 3.1907, "step": 76930 }, { "epoch": 0.782623291015625, "grad_norm": 15.79336929321289, "learning_rate": 5.617469834833522e-07, "loss": 3.4765, "step": 76935 }, { "epoch": 0.7826741536458334, "grad_norm": 13.746410369873047, "learning_rate": 5.614944468059808e-07, "loss": 3.3628, "step": 76940 }, { "epoch": 0.7827250162760416, "grad_norm": 12.827302932739258, "learning_rate": 5.612419597248151e-07, "loss": 3.005, "step": 76945 }, { "epoch": 0.78277587890625, "grad_norm": 11.03933048248291, "learning_rate": 5.609895222463141e-07, "loss": 3.5766, "step": 76950 }, { "epoch": 0.7828267415364584, "grad_norm": 11.479743003845215, "learning_rate": 5.60737134376936e-07, "loss": 3.5491, "step": 76955 }, { "epoch": 0.7828776041666666, "grad_norm": 11.110942840576172, "learning_rate": 5.604847961231396e-07, "loss": 3.5511, "step": 76960 }, { "epoch": 0.782928466796875, "grad_norm": 7.810293674468994, "learning_rate": 5.602325074913792e-07, "loss": 2.9781, "step": 76965 }, { "epoch": 0.7829793294270834, "grad_norm": 10.472963333129883, "learning_rate": 5.599802684881109e-07, "loss": 3.6425, "step": 76970 }, { "epoch": 0.7830301920572916, "grad_norm": 10.024048805236816, "learning_rate": 5.597280791197868e-07, "loss": 3.826, "step": 76975 }, { "epoch": 0.7830810546875, "grad_norm": 12.897725105285645, "learning_rate": 5.594759393928608e-07, "loss": 3.1493, "step": 76980 }, { "epoch": 0.7831319173177084, "grad_norm": 14.503350257873535, "learning_rate": 5.592238493137828e-07, "loss": 3.5033, "step": 76985 }, { "epoch": 0.7831827799479166, "grad_norm": 11.452110290527344, "learning_rate": 5.58971808889002e-07, "loss": 3.4164, "step": 76990 }, { "epoch": 0.783233642578125, "grad_norm": 12.399145126342773, "learning_rate": 5.587198181249673e-07, "loss": 3.3973, "step": 76995 }, { "epoch": 0.7832845052083334, "grad_norm": 13.125426292419434, "learning_rate": 5.584678770281269e-07, "loss": 3.4428, "step": 77000 }, { "epoch": 0.7833353678385416, "grad_norm": 11.738371849060059, "learning_rate": 5.582159856049252e-07, "loss": 3.0178, "step": 77005 }, { "epoch": 0.78338623046875, "grad_norm": 13.799607276916504, "learning_rate": 5.579641438618069e-07, "loss": 3.4622, "step": 77010 }, { "epoch": 0.7834370930989584, "grad_norm": 14.546651840209961, "learning_rate": 5.577123518052154e-07, "loss": 3.3319, "step": 77015 }, { "epoch": 0.7834879557291666, "grad_norm": 11.298008918762207, "learning_rate": 5.574606094415941e-07, "loss": 3.4026, "step": 77020 }, { "epoch": 0.783538818359375, "grad_norm": 7.891462326049805, "learning_rate": 5.572089167773823e-07, "loss": 3.1837, "step": 77025 }, { "epoch": 0.7835896809895834, "grad_norm": 18.3826847076416, "learning_rate": 5.569572738190193e-07, "loss": 3.2527, "step": 77030 }, { "epoch": 0.7836405436197916, "grad_norm": 14.749083518981934, "learning_rate": 5.567056805729448e-07, "loss": 3.0726, "step": 77035 }, { "epoch": 0.78369140625, "grad_norm": 12.866004943847656, "learning_rate": 5.564541370455939e-07, "loss": 3.6303, "step": 77040 }, { "epoch": 0.7837422688802084, "grad_norm": 12.914320945739746, "learning_rate": 5.562026432434039e-07, "loss": 3.2979, "step": 77045 }, { "epoch": 0.7837931315104166, "grad_norm": 12.6251802444458, "learning_rate": 5.559511991728081e-07, "loss": 3.2706, "step": 77050 }, { "epoch": 0.783843994140625, "grad_norm": 12.312646865844727, "learning_rate": 5.556998048402407e-07, "loss": 3.1125, "step": 77055 }, { "epoch": 0.7838948567708334, "grad_norm": 9.286629676818848, "learning_rate": 5.554484602521326e-07, "loss": 4.0788, "step": 77060 }, { "epoch": 0.7839457194010416, "grad_norm": 10.3337984085083, "learning_rate": 5.551971654149144e-07, "loss": 3.2471, "step": 77065 }, { "epoch": 0.78399658203125, "grad_norm": 11.835550308227539, "learning_rate": 5.549459203350157e-07, "loss": 3.0746, "step": 77070 }, { "epoch": 0.7840474446614584, "grad_norm": 15.912817001342773, "learning_rate": 5.546947250188653e-07, "loss": 3.9986, "step": 77075 }, { "epoch": 0.7840983072916666, "grad_norm": 13.086148262023926, "learning_rate": 5.544435794728892e-07, "loss": 3.4196, "step": 77080 }, { "epoch": 0.784149169921875, "grad_norm": 11.389708518981934, "learning_rate": 5.541924837035131e-07, "loss": 3.461, "step": 77085 }, { "epoch": 0.7842000325520834, "grad_norm": 15.930747032165527, "learning_rate": 5.539414377171601e-07, "loss": 3.5088, "step": 77090 }, { "epoch": 0.7842508951822916, "grad_norm": 10.358973503112793, "learning_rate": 5.536904415202546e-07, "loss": 3.6609, "step": 77095 }, { "epoch": 0.7843017578125, "grad_norm": 10.959451675415039, "learning_rate": 5.534394951192182e-07, "loss": 3.2099, "step": 77100 }, { "epoch": 0.7843526204427084, "grad_norm": 13.007122993469238, "learning_rate": 5.53188598520471e-07, "loss": 3.0855, "step": 77105 }, { "epoch": 0.7844034830729166, "grad_norm": 13.349512100219727, "learning_rate": 5.529377517304313e-07, "loss": 3.2852, "step": 77110 }, { "epoch": 0.784454345703125, "grad_norm": 77.22537994384766, "learning_rate": 5.526869547555186e-07, "loss": 3.686, "step": 77115 }, { "epoch": 0.7845052083333334, "grad_norm": 9.13428020477295, "learning_rate": 5.524362076021478e-07, "loss": 3.4315, "step": 77120 }, { "epoch": 0.7845560709635416, "grad_norm": 15.25651741027832, "learning_rate": 5.521855102767348e-07, "loss": 3.1172, "step": 77125 }, { "epoch": 0.78460693359375, "grad_norm": 15.090691566467285, "learning_rate": 5.51934862785695e-07, "loss": 3.2714, "step": 77130 }, { "epoch": 0.7846577962239584, "grad_norm": 13.206595420837402, "learning_rate": 5.516842651354395e-07, "loss": 3.1315, "step": 77135 }, { "epoch": 0.7847086588541666, "grad_norm": 12.150652885437012, "learning_rate": 5.514337173323805e-07, "loss": 3.4068, "step": 77140 }, { "epoch": 0.784759521484375, "grad_norm": 12.016641616821289, "learning_rate": 5.511832193829272e-07, "loss": 3.3858, "step": 77145 }, { "epoch": 0.7848103841145834, "grad_norm": 13.401910781860352, "learning_rate": 5.509327712934895e-07, "loss": 2.8194, "step": 77150 }, { "epoch": 0.7848612467447916, "grad_norm": 8.491545677185059, "learning_rate": 5.506823730704755e-07, "loss": 3.0501, "step": 77155 }, { "epoch": 0.784912109375, "grad_norm": 11.794517517089844, "learning_rate": 5.50432024720291e-07, "loss": 3.6858, "step": 77160 }, { "epoch": 0.7849629720052084, "grad_norm": 15.679458618164062, "learning_rate": 5.501817262493403e-07, "loss": 2.8706, "step": 77165 }, { "epoch": 0.7850138346354166, "grad_norm": 10.70201301574707, "learning_rate": 5.499314776640277e-07, "loss": 3.8139, "step": 77170 }, { "epoch": 0.785064697265625, "grad_norm": 14.19828987121582, "learning_rate": 5.49681278970757e-07, "loss": 3.2887, "step": 77175 }, { "epoch": 0.7851155598958334, "grad_norm": 10.544234275817871, "learning_rate": 5.494311301759287e-07, "loss": 3.0972, "step": 77180 }, { "epoch": 0.7851664225260416, "grad_norm": 10.095036506652832, "learning_rate": 5.491810312859417e-07, "loss": 3.2643, "step": 77185 }, { "epoch": 0.78521728515625, "grad_norm": 15.456887245178223, "learning_rate": 5.489309823071962e-07, "loss": 3.5483, "step": 77190 }, { "epoch": 0.7852681477864584, "grad_norm": 11.434060096740723, "learning_rate": 5.486809832460882e-07, "loss": 3.1546, "step": 77195 }, { "epoch": 0.7853190104166666, "grad_norm": 11.786846160888672, "learning_rate": 5.484310341090157e-07, "loss": 3.4915, "step": 77200 }, { "epoch": 0.785369873046875, "grad_norm": 11.120028495788574, "learning_rate": 5.481811349023719e-07, "loss": 3.5105, "step": 77205 }, { "epoch": 0.7854207356770834, "grad_norm": 13.742429733276367, "learning_rate": 5.479312856325519e-07, "loss": 3.2009, "step": 77210 }, { "epoch": 0.7854715983072916, "grad_norm": 12.266082763671875, "learning_rate": 5.476814863059474e-07, "loss": 3.7984, "step": 77215 }, { "epoch": 0.7855224609375, "grad_norm": 12.933247566223145, "learning_rate": 5.474317369289483e-07, "loss": 3.2753, "step": 77220 }, { "epoch": 0.7855733235677084, "grad_norm": 11.963645935058594, "learning_rate": 5.471820375079453e-07, "loss": 3.3267, "step": 77225 }, { "epoch": 0.7856241861979166, "grad_norm": 11.38316822052002, "learning_rate": 5.46932388049328e-07, "loss": 3.4104, "step": 77230 }, { "epoch": 0.785675048828125, "grad_norm": 16.832744598388672, "learning_rate": 5.466827885594828e-07, "loss": 3.7793, "step": 77235 }, { "epoch": 0.7857259114583334, "grad_norm": 9.768202781677246, "learning_rate": 5.464332390447943e-07, "loss": 3.1119, "step": 77240 }, { "epoch": 0.7857767740885416, "grad_norm": 11.314229011535645, "learning_rate": 5.461837395116492e-07, "loss": 3.2299, "step": 77245 }, { "epoch": 0.78582763671875, "grad_norm": 12.870085716247559, "learning_rate": 5.459342899664293e-07, "loss": 3.2766, "step": 77250 }, { "epoch": 0.7858784993489584, "grad_norm": 7.7160515785217285, "learning_rate": 5.456848904155185e-07, "loss": 3.1813, "step": 77255 }, { "epoch": 0.7859293619791666, "grad_norm": 10.903667449951172, "learning_rate": 5.454355408652954e-07, "loss": 3.1384, "step": 77260 }, { "epoch": 0.785980224609375, "grad_norm": 8.76274585723877, "learning_rate": 5.451862413221415e-07, "loss": 3.6507, "step": 77265 }, { "epoch": 0.7860310872395834, "grad_norm": 14.959699630737305, "learning_rate": 5.449369917924344e-07, "loss": 3.6169, "step": 77270 }, { "epoch": 0.7860819498697916, "grad_norm": 8.773665428161621, "learning_rate": 5.446877922825502e-07, "loss": 3.1942, "step": 77275 }, { "epoch": 0.7861328125, "grad_norm": 11.461071014404297, "learning_rate": 5.444386427988655e-07, "loss": 3.3724, "step": 77280 }, { "epoch": 0.7861836751302084, "grad_norm": 11.221567153930664, "learning_rate": 5.441895433477551e-07, "loss": 3.1541, "step": 77285 }, { "epoch": 0.7862345377604166, "grad_norm": 13.72826862335205, "learning_rate": 5.439404939355916e-07, "loss": 3.2305, "step": 77290 }, { "epoch": 0.786285400390625, "grad_norm": 7.363616943359375, "learning_rate": 5.436914945687472e-07, "loss": 3.0984, "step": 77295 }, { "epoch": 0.7863362630208334, "grad_norm": 17.01660919189453, "learning_rate": 5.434425452535913e-07, "loss": 3.1953, "step": 77300 }, { "epoch": 0.7863871256510416, "grad_norm": 12.926891326904297, "learning_rate": 5.431936459964943e-07, "loss": 3.2446, "step": 77305 }, { "epoch": 0.78643798828125, "grad_norm": 11.168578147888184, "learning_rate": 5.429447968038245e-07, "loss": 3.0408, "step": 77310 }, { "epoch": 0.7864888509114584, "grad_norm": 8.66266918182373, "learning_rate": 5.426959976819476e-07, "loss": 3.617, "step": 77315 }, { "epoch": 0.7865397135416666, "grad_norm": 11.905282974243164, "learning_rate": 5.424472486372304e-07, "loss": 3.3476, "step": 77320 }, { "epoch": 0.786590576171875, "grad_norm": 12.501431465148926, "learning_rate": 5.421985496760357e-07, "loss": 3.3132, "step": 77325 }, { "epoch": 0.7866414388020834, "grad_norm": 9.089613914489746, "learning_rate": 5.419499008047277e-07, "loss": 3.2856, "step": 77330 }, { "epoch": 0.7866923014322916, "grad_norm": 10.10815715789795, "learning_rate": 5.417013020296669e-07, "loss": 3.7829, "step": 77335 }, { "epoch": 0.7867431640625, "grad_norm": 14.02984619140625, "learning_rate": 5.414527533572145e-07, "loss": 3.0122, "step": 77340 }, { "epoch": 0.7867940266927084, "grad_norm": 14.067699432373047, "learning_rate": 5.412042547937294e-07, "loss": 3.0384, "step": 77345 }, { "epoch": 0.7868448893229166, "grad_norm": 13.12199592590332, "learning_rate": 5.409558063455683e-07, "loss": 3.3337, "step": 77350 }, { "epoch": 0.786895751953125, "grad_norm": 11.288161277770996, "learning_rate": 5.407074080190886e-07, "loss": 3.4466, "step": 77355 }, { "epoch": 0.7869466145833334, "grad_norm": 13.173538208007812, "learning_rate": 5.404590598206461e-07, "loss": 3.131, "step": 77360 }, { "epoch": 0.7869974772135416, "grad_norm": 8.635846138000488, "learning_rate": 5.402107617565941e-07, "loss": 3.53, "step": 77365 }, { "epoch": 0.78704833984375, "grad_norm": 12.688966751098633, "learning_rate": 5.399625138332854e-07, "loss": 3.4413, "step": 77370 }, { "epoch": 0.7870992024739584, "grad_norm": 11.470646858215332, "learning_rate": 5.397143160570703e-07, "loss": 3.4997, "step": 77375 }, { "epoch": 0.7871500651041666, "grad_norm": 13.815547943115234, "learning_rate": 5.394661684343e-07, "loss": 3.3771, "step": 77380 }, { "epoch": 0.787200927734375, "grad_norm": 16.58146095275879, "learning_rate": 5.392180709713235e-07, "loss": 3.2503, "step": 77385 }, { "epoch": 0.7872517903645834, "grad_norm": 16.185945510864258, "learning_rate": 5.389700236744883e-07, "loss": 3.7747, "step": 77390 }, { "epoch": 0.7873026529947916, "grad_norm": 7.731017589569092, "learning_rate": 5.387220265501392e-07, "loss": 3.1052, "step": 77395 }, { "epoch": 0.787353515625, "grad_norm": 15.839229583740234, "learning_rate": 5.384740796046228e-07, "loss": 3.1966, "step": 77400 }, { "epoch": 0.7874043782552084, "grad_norm": 10.629376411437988, "learning_rate": 5.382261828442817e-07, "loss": 3.1147, "step": 77405 }, { "epoch": 0.7874552408854166, "grad_norm": 17.61418342590332, "learning_rate": 5.379783362754587e-07, "loss": 3.2562, "step": 77410 }, { "epoch": 0.787506103515625, "grad_norm": 10.025396347045898, "learning_rate": 5.377305399044954e-07, "loss": 3.0456, "step": 77415 }, { "epoch": 0.7875569661458334, "grad_norm": 9.159224510192871, "learning_rate": 5.374827937377314e-07, "loss": 3.2772, "step": 77420 }, { "epoch": 0.7876078287760416, "grad_norm": 8.789568901062012, "learning_rate": 5.37235097781505e-07, "loss": 2.9066, "step": 77425 }, { "epoch": 0.78765869140625, "grad_norm": 7.427765369415283, "learning_rate": 5.369874520421525e-07, "loss": 3.4291, "step": 77430 }, { "epoch": 0.7877095540364584, "grad_norm": 9.716547012329102, "learning_rate": 5.367398565260107e-07, "loss": 3.0209, "step": 77435 }, { "epoch": 0.7877604166666666, "grad_norm": 9.71487045288086, "learning_rate": 5.364923112394152e-07, "loss": 3.5569, "step": 77440 }, { "epoch": 0.787811279296875, "grad_norm": 12.40367317199707, "learning_rate": 5.362448161886985e-07, "loss": 3.2814, "step": 77445 }, { "epoch": 0.7878621419270834, "grad_norm": 13.2744140625, "learning_rate": 5.359973713801919e-07, "loss": 3.308, "step": 77450 }, { "epoch": 0.7879130045572916, "grad_norm": 10.100786209106445, "learning_rate": 5.357499768202276e-07, "loss": 3.1497, "step": 77455 }, { "epoch": 0.7879638671875, "grad_norm": 8.46898078918457, "learning_rate": 5.355026325151341e-07, "loss": 3.32, "step": 77460 }, { "epoch": 0.7880147298177084, "grad_norm": 7.277879238128662, "learning_rate": 5.352553384712408e-07, "loss": 3.2946, "step": 77465 }, { "epoch": 0.7880655924479166, "grad_norm": 15.188435554504395, "learning_rate": 5.350080946948732e-07, "loss": 3.06, "step": 77470 }, { "epoch": 0.788116455078125, "grad_norm": 13.343172073364258, "learning_rate": 5.347609011923585e-07, "loss": 2.9495, "step": 77475 }, { "epoch": 0.7881673177083334, "grad_norm": 11.459025382995605, "learning_rate": 5.345137579700202e-07, "loss": 3.5003, "step": 77480 }, { "epoch": 0.7882181803385416, "grad_norm": 9.683830261230469, "learning_rate": 5.34266665034181e-07, "loss": 3.2973, "step": 77485 }, { "epoch": 0.78826904296875, "grad_norm": 8.150592803955078, "learning_rate": 5.340196223911631e-07, "loss": 3.2292, "step": 77490 }, { "epoch": 0.7883199055989584, "grad_norm": 8.487361907958984, "learning_rate": 5.337726300472882e-07, "loss": 3.2224, "step": 77495 }, { "epoch": 0.7883707682291666, "grad_norm": 11.931181907653809, "learning_rate": 5.33525688008874e-07, "loss": 3.3767, "step": 77500 }, { "epoch": 0.788421630859375, "grad_norm": 15.323692321777344, "learning_rate": 5.332787962822386e-07, "loss": 3.1439, "step": 77505 }, { "epoch": 0.7884724934895834, "grad_norm": 13.419575691223145, "learning_rate": 5.330319548736989e-07, "loss": 3.4456, "step": 77510 }, { "epoch": 0.7885233561197916, "grad_norm": 10.741533279418945, "learning_rate": 5.327851637895715e-07, "loss": 2.965, "step": 77515 }, { "epoch": 0.78857421875, "grad_norm": 10.303194999694824, "learning_rate": 5.325384230361689e-07, "loss": 3.5112, "step": 77520 }, { "epoch": 0.7886250813802084, "grad_norm": 12.528802871704102, "learning_rate": 5.322917326198038e-07, "loss": 3.5571, "step": 77525 }, { "epoch": 0.7886759440104166, "grad_norm": 9.583382606506348, "learning_rate": 5.320450925467893e-07, "loss": 3.1225, "step": 77530 }, { "epoch": 0.788726806640625, "grad_norm": 12.141776084899902, "learning_rate": 5.317985028234338e-07, "loss": 2.9998, "step": 77535 }, { "epoch": 0.7887776692708334, "grad_norm": 15.204798698425293, "learning_rate": 5.315519634560476e-07, "loss": 3.3831, "step": 77540 }, { "epoch": 0.7888285319010416, "grad_norm": 8.962828636169434, "learning_rate": 5.313054744509374e-07, "loss": 3.173, "step": 77545 }, { "epoch": 0.78887939453125, "grad_norm": 12.641799926757812, "learning_rate": 5.310590358144105e-07, "loss": 3.7621, "step": 77550 }, { "epoch": 0.7889302571614584, "grad_norm": 15.058570861816406, "learning_rate": 5.308126475527717e-07, "loss": 3.1571, "step": 77555 }, { "epoch": 0.7889811197916666, "grad_norm": 10.706228256225586, "learning_rate": 5.305663096723237e-07, "loss": 3.0751, "step": 77560 }, { "epoch": 0.789031982421875, "grad_norm": 9.943954467773438, "learning_rate": 5.303200221793697e-07, "loss": 3.2576, "step": 77565 }, { "epoch": 0.7890828450520834, "grad_norm": 10.609087944030762, "learning_rate": 5.300737850802121e-07, "loss": 3.4295, "step": 77570 }, { "epoch": 0.7891337076822916, "grad_norm": 12.822108268737793, "learning_rate": 5.298275983811496e-07, "loss": 3.605, "step": 77575 }, { "epoch": 0.7891845703125, "grad_norm": 11.368073463439941, "learning_rate": 5.295814620884812e-07, "loss": 3.2867, "step": 77580 }, { "epoch": 0.7892354329427084, "grad_norm": 13.873319625854492, "learning_rate": 5.293353762085033e-07, "loss": 3.5609, "step": 77585 }, { "epoch": 0.7892862955729166, "grad_norm": 11.019356727600098, "learning_rate": 5.290893407475125e-07, "loss": 3.4088, "step": 77590 }, { "epoch": 0.789337158203125, "grad_norm": 11.461319923400879, "learning_rate": 5.288433557118044e-07, "loss": 3.2105, "step": 77595 }, { "epoch": 0.7893880208333334, "grad_norm": 14.308027267456055, "learning_rate": 5.285974211076714e-07, "loss": 3.3516, "step": 77600 }, { "epoch": 0.7894388834635416, "grad_norm": 14.984569549560547, "learning_rate": 5.283515369414069e-07, "loss": 3.3032, "step": 77605 }, { "epoch": 0.78948974609375, "grad_norm": 9.603241920471191, "learning_rate": 5.281057032193008e-07, "loss": 3.3644, "step": 77610 }, { "epoch": 0.7895406087239584, "grad_norm": 12.334107398986816, "learning_rate": 5.27859919947642e-07, "loss": 3.74, "step": 77615 }, { "epoch": 0.7895914713541666, "grad_norm": 12.757811546325684, "learning_rate": 5.2761418713272e-07, "loss": 3.3776, "step": 77620 }, { "epoch": 0.789642333984375, "grad_norm": 15.814942359924316, "learning_rate": 5.273685047808222e-07, "loss": 3.3104, "step": 77625 }, { "epoch": 0.7896931966145834, "grad_norm": 13.55041790008545, "learning_rate": 5.271228728982333e-07, "loss": 3.6321, "step": 77630 }, { "epoch": 0.7897440592447916, "grad_norm": 12.658421516418457, "learning_rate": 5.268772914912384e-07, "loss": 3.3185, "step": 77635 }, { "epoch": 0.789794921875, "grad_norm": 14.816579818725586, "learning_rate": 5.266317605661189e-07, "loss": 3.0483, "step": 77640 }, { "epoch": 0.7898457845052084, "grad_norm": 9.840069770812988, "learning_rate": 5.263862801291594e-07, "loss": 3.4473, "step": 77645 }, { "epoch": 0.7898966471354166, "grad_norm": 16.5360050201416, "learning_rate": 5.261408501866391e-07, "loss": 3.3267, "step": 77650 }, { "epoch": 0.789947509765625, "grad_norm": 11.0403470993042, "learning_rate": 5.258954707448375e-07, "loss": 3.53, "step": 77655 }, { "epoch": 0.7899983723958334, "grad_norm": 7.623027324676514, "learning_rate": 5.256501418100316e-07, "loss": 3.5695, "step": 77660 }, { "epoch": 0.7900492350260416, "grad_norm": 10.7011137008667, "learning_rate": 5.254048633884987e-07, "loss": 3.437, "step": 77665 }, { "epoch": 0.79010009765625, "grad_norm": 9.61572551727295, "learning_rate": 5.251596354865148e-07, "loss": 3.0816, "step": 77670 }, { "epoch": 0.7901509602864584, "grad_norm": 13.822772979736328, "learning_rate": 5.249144581103541e-07, "loss": 3.3933, "step": 77675 }, { "epoch": 0.7902018229166666, "grad_norm": 9.52763843536377, "learning_rate": 5.246693312662878e-07, "loss": 3.4029, "step": 77680 }, { "epoch": 0.790252685546875, "grad_norm": 8.417424201965332, "learning_rate": 5.24424254960589e-07, "loss": 3.1074, "step": 77685 }, { "epoch": 0.7903035481770834, "grad_norm": 13.455845832824707, "learning_rate": 5.241792291995271e-07, "loss": 3.5337, "step": 77690 }, { "epoch": 0.7903544108072916, "grad_norm": 23.03131103515625, "learning_rate": 5.239342539893719e-07, "loss": 3.5476, "step": 77695 }, { "epoch": 0.7904052734375, "grad_norm": 12.35528564453125, "learning_rate": 5.236893293363895e-07, "loss": 3.417, "step": 77700 }, { "epoch": 0.7904561360677084, "grad_norm": 11.345623970031738, "learning_rate": 5.234444552468482e-07, "loss": 3.1162, "step": 77705 }, { "epoch": 0.7905069986979166, "grad_norm": 16.01922035217285, "learning_rate": 5.231996317270116e-07, "loss": 3.4665, "step": 77710 }, { "epoch": 0.790557861328125, "grad_norm": 7.456033229827881, "learning_rate": 5.229548587831434e-07, "loss": 3.3849, "step": 77715 }, { "epoch": 0.7906087239583334, "grad_norm": 13.59242057800293, "learning_rate": 5.227101364215065e-07, "loss": 3.0704, "step": 77720 }, { "epoch": 0.7906595865885416, "grad_norm": 15.409017562866211, "learning_rate": 5.224654646483629e-07, "loss": 3.2839, "step": 77725 }, { "epoch": 0.79071044921875, "grad_norm": 13.718083381652832, "learning_rate": 5.222208434699716e-07, "loss": 3.6831, "step": 77730 }, { "epoch": 0.7907613118489584, "grad_norm": 14.573699951171875, "learning_rate": 5.219762728925903e-07, "loss": 3.0561, "step": 77735 }, { "epoch": 0.7908121744791666, "grad_norm": 10.57568359375, "learning_rate": 5.217317529224783e-07, "loss": 3.0412, "step": 77740 }, { "epoch": 0.790863037109375, "grad_norm": 13.119362831115723, "learning_rate": 5.214872835658896e-07, "loss": 3.3877, "step": 77745 }, { "epoch": 0.7909138997395834, "grad_norm": 7.986304759979248, "learning_rate": 5.212428648290804e-07, "loss": 2.9841, "step": 77750 }, { "epoch": 0.7909647623697916, "grad_norm": 9.003944396972656, "learning_rate": 5.209984967183029e-07, "loss": 3.1749, "step": 77755 }, { "epoch": 0.791015625, "grad_norm": 14.619218826293945, "learning_rate": 5.207541792398105e-07, "loss": 3.3182, "step": 77760 }, { "epoch": 0.7910664876302084, "grad_norm": 14.97256088256836, "learning_rate": 5.205099123998533e-07, "loss": 3.3165, "step": 77765 }, { "epoch": 0.7911173502604166, "grad_norm": 15.13031005859375, "learning_rate": 5.202656962046801e-07, "loss": 3.5063, "step": 77770 }, { "epoch": 0.791168212890625, "grad_norm": 17.268144607543945, "learning_rate": 5.2002153066054e-07, "loss": 3.4668, "step": 77775 }, { "epoch": 0.7912190755208334, "grad_norm": 10.193543434143066, "learning_rate": 5.197774157736801e-07, "loss": 3.3866, "step": 77780 }, { "epoch": 0.7912699381510416, "grad_norm": 9.963932991027832, "learning_rate": 5.195333515503456e-07, "loss": 2.7359, "step": 77785 }, { "epoch": 0.79132080078125, "grad_norm": 13.159423828125, "learning_rate": 5.192893379967812e-07, "loss": 3.2852, "step": 77790 }, { "epoch": 0.7913716634114584, "grad_norm": 7.917318344116211, "learning_rate": 5.190453751192281e-07, "loss": 3.5465, "step": 77795 }, { "epoch": 0.7914225260416666, "grad_norm": 43.193885803222656, "learning_rate": 5.188014629239308e-07, "loss": 3.7018, "step": 77800 }, { "epoch": 0.791473388671875, "grad_norm": 13.045291900634766, "learning_rate": 5.185576014171287e-07, "loss": 3.1991, "step": 77805 }, { "epoch": 0.7915242513020834, "grad_norm": 10.419424057006836, "learning_rate": 5.183137906050598e-07, "loss": 3.6339, "step": 77810 }, { "epoch": 0.7915751139322916, "grad_norm": 11.010937690734863, "learning_rate": 5.180700304939637e-07, "loss": 3.3555, "step": 77815 }, { "epoch": 0.7916259765625, "grad_norm": 10.980416297912598, "learning_rate": 5.178263210900752e-07, "loss": 3.0911, "step": 77820 }, { "epoch": 0.7916768391927084, "grad_norm": 7.204528331756592, "learning_rate": 5.175826623996313e-07, "loss": 3.272, "step": 77825 }, { "epoch": 0.7917277018229166, "grad_norm": 19.492891311645508, "learning_rate": 5.173390544288645e-07, "loss": 3.3251, "step": 77830 }, { "epoch": 0.791778564453125, "grad_norm": 10.955602645874023, "learning_rate": 5.170954971840087e-07, "loss": 3.5388, "step": 77835 }, { "epoch": 0.7918294270833334, "grad_norm": 12.813833236694336, "learning_rate": 5.168519906712946e-07, "loss": 3.3107, "step": 77840 }, { "epoch": 0.7918802897135416, "grad_norm": 9.102631568908691, "learning_rate": 5.166085348969516e-07, "loss": 3.3827, "step": 77845 }, { "epoch": 0.79193115234375, "grad_norm": 20.36287498474121, "learning_rate": 5.16365129867209e-07, "loss": 3.4709, "step": 77850 }, { "epoch": 0.7919820149739584, "grad_norm": 9.53981876373291, "learning_rate": 5.16121775588295e-07, "loss": 3.2573, "step": 77855 }, { "epoch": 0.7920328776041666, "grad_norm": 10.992317199707031, "learning_rate": 5.158784720664353e-07, "loss": 3.138, "step": 77860 }, { "epoch": 0.792083740234375, "grad_norm": 10.903949737548828, "learning_rate": 5.156352193078545e-07, "loss": 3.539, "step": 77865 }, { "epoch": 0.7921346028645834, "grad_norm": 8.236926078796387, "learning_rate": 5.153920173187757e-07, "loss": 3.426, "step": 77870 }, { "epoch": 0.7921854654947916, "grad_norm": 8.5499267578125, "learning_rate": 5.151488661054216e-07, "loss": 2.9827, "step": 77875 }, { "epoch": 0.792236328125, "grad_norm": 14.410651206970215, "learning_rate": 5.149057656740139e-07, "loss": 4.5449, "step": 77880 }, { "epoch": 0.7922871907552084, "grad_norm": 14.640835762023926, "learning_rate": 5.146627160307718e-07, "loss": 3.1035, "step": 77885 }, { "epoch": 0.7923380533854166, "grad_norm": 9.395439147949219, "learning_rate": 5.144197171819129e-07, "loss": 3.1243, "step": 77890 }, { "epoch": 0.792388916015625, "grad_norm": 9.009712219238281, "learning_rate": 5.141767691336558e-07, "loss": 3.7954, "step": 77895 }, { "epoch": 0.7924397786458334, "grad_norm": 12.002073287963867, "learning_rate": 5.139338718922143e-07, "loss": 3.1543, "step": 77900 }, { "epoch": 0.7924906412760416, "grad_norm": 9.392443656921387, "learning_rate": 5.136910254638042e-07, "loss": 3.1261, "step": 77905 }, { "epoch": 0.79254150390625, "grad_norm": 9.431241035461426, "learning_rate": 5.13448229854639e-07, "loss": 3.3929, "step": 77910 }, { "epoch": 0.7925923665364584, "grad_norm": 9.476242065429688, "learning_rate": 5.132054850709298e-07, "loss": 2.8419, "step": 77915 }, { "epoch": 0.7926432291666666, "grad_norm": 11.033794403076172, "learning_rate": 5.129627911188876e-07, "loss": 3.1382, "step": 77920 }, { "epoch": 0.792694091796875, "grad_norm": 10.795284271240234, "learning_rate": 5.127201480047206e-07, "loss": 3.0693, "step": 77925 }, { "epoch": 0.7927449544270834, "grad_norm": 14.76865291595459, "learning_rate": 5.124775557346376e-07, "loss": 3.5296, "step": 77930 }, { "epoch": 0.7927958170572916, "grad_norm": 8.955913543701172, "learning_rate": 5.122350143148458e-07, "loss": 3.3339, "step": 77935 }, { "epoch": 0.7928466796875, "grad_norm": 13.093903541564941, "learning_rate": 5.119925237515502e-07, "loss": 3.5774, "step": 77940 }, { "epoch": 0.7928975423177084, "grad_norm": 15.690264701843262, "learning_rate": 5.117500840509537e-07, "loss": 3.4012, "step": 77945 }, { "epoch": 0.7929484049479166, "grad_norm": 10.985313415527344, "learning_rate": 5.115076952192605e-07, "loss": 3.2087, "step": 77950 }, { "epoch": 0.792999267578125, "grad_norm": 8.62189769744873, "learning_rate": 5.112653572626719e-07, "loss": 3.5361, "step": 77955 }, { "epoch": 0.7930501302083334, "grad_norm": 14.569122314453125, "learning_rate": 5.110230701873878e-07, "loss": 3.0355, "step": 77960 }, { "epoch": 0.7931009928385416, "grad_norm": 9.006644248962402, "learning_rate": 5.107808339996065e-07, "loss": 3.3467, "step": 77965 }, { "epoch": 0.79315185546875, "grad_norm": 8.4939603805542, "learning_rate": 5.105386487055267e-07, "loss": 3.8445, "step": 77970 }, { "epoch": 0.7932027180989584, "grad_norm": 10.302860260009766, "learning_rate": 5.102965143113431e-07, "loss": 3.6555, "step": 77975 }, { "epoch": 0.7932535807291666, "grad_norm": 8.247651100158691, "learning_rate": 5.100544308232525e-07, "loss": 3.6142, "step": 77980 }, { "epoch": 0.793304443359375, "grad_norm": 10.880356788635254, "learning_rate": 5.098123982474468e-07, "loss": 3.0245, "step": 77985 }, { "epoch": 0.7933553059895834, "grad_norm": 10.192167282104492, "learning_rate": 5.095704165901199e-07, "loss": 2.9427, "step": 77990 }, { "epoch": 0.7934061686197916, "grad_norm": 11.401342391967773, "learning_rate": 5.093284858574621e-07, "loss": 3.198, "step": 77995 }, { "epoch": 0.79345703125, "grad_norm": 55.87403106689453, "learning_rate": 5.090866060556626e-07, "loss": 3.3743, "step": 78000 }, { "epoch": 0.7935078938802084, "grad_norm": 13.474732398986816, "learning_rate": 5.088447771909102e-07, "loss": 3.3572, "step": 78005 }, { "epoch": 0.7935587565104166, "grad_norm": 13.210291862487793, "learning_rate": 5.086029992693931e-07, "loss": 3.3775, "step": 78010 }, { "epoch": 0.793609619140625, "grad_norm": 14.540139198303223, "learning_rate": 5.083612722972961e-07, "loss": 3.5608, "step": 78015 }, { "epoch": 0.7936604817708334, "grad_norm": 9.909323692321777, "learning_rate": 5.081195962808033e-07, "loss": 3.5092, "step": 78020 }, { "epoch": 0.7937113444010416, "grad_norm": 9.023208618164062, "learning_rate": 5.078779712260989e-07, "loss": 3.3079, "step": 78025 }, { "epoch": 0.79376220703125, "grad_norm": 10.259096145629883, "learning_rate": 5.076363971393641e-07, "loss": 3.3976, "step": 78030 }, { "epoch": 0.7938130696614584, "grad_norm": 8.69201374053955, "learning_rate": 5.073948740267804e-07, "loss": 3.624, "step": 78035 }, { "epoch": 0.7938639322916666, "grad_norm": 13.281329154968262, "learning_rate": 5.071534018945259e-07, "loss": 3.7112, "step": 78040 }, { "epoch": 0.793914794921875, "grad_norm": 9.79742431640625, "learning_rate": 5.069119807487799e-07, "loss": 3.6791, "step": 78045 }, { "epoch": 0.7939656575520834, "grad_norm": 12.21177864074707, "learning_rate": 5.066706105957183e-07, "loss": 3.5715, "step": 78050 }, { "epoch": 0.7940165201822916, "grad_norm": 8.74547290802002, "learning_rate": 5.064292914415161e-07, "loss": 3.4031, "step": 78055 }, { "epoch": 0.7940673828125, "grad_norm": 12.38512134552002, "learning_rate": 5.06188023292348e-07, "loss": 3.2038, "step": 78060 }, { "epoch": 0.7941182454427084, "grad_norm": 16.570751190185547, "learning_rate": 5.059468061543874e-07, "loss": 3.3855, "step": 78065 }, { "epoch": 0.7941691080729166, "grad_norm": 10.972708702087402, "learning_rate": 5.057056400338051e-07, "loss": 3.2059, "step": 78070 }, { "epoch": 0.794219970703125, "grad_norm": 12.961288452148438, "learning_rate": 5.054645249367712e-07, "loss": 3.5962, "step": 78075 }, { "epoch": 0.7942708333333334, "grad_norm": 10.799997329711914, "learning_rate": 5.052234608694543e-07, "loss": 3.6069, "step": 78080 }, { "epoch": 0.7943216959635416, "grad_norm": 13.07223129272461, "learning_rate": 5.049824478380219e-07, "loss": 3.2428, "step": 78085 }, { "epoch": 0.79437255859375, "grad_norm": 7.48736047744751, "learning_rate": 5.047414858486416e-07, "loss": 3.0928, "step": 78090 }, { "epoch": 0.7944234212239584, "grad_norm": 14.10659408569336, "learning_rate": 5.045005749074769e-07, "loss": 3.5605, "step": 78095 }, { "epoch": 0.7944742838541666, "grad_norm": 12.803142547607422, "learning_rate": 5.042597150206927e-07, "loss": 3.2086, "step": 78100 }, { "epoch": 0.794525146484375, "grad_norm": 15.745060920715332, "learning_rate": 5.040189061944498e-07, "loss": 3.5553, "step": 78105 }, { "epoch": 0.7945760091145834, "grad_norm": 14.81272029876709, "learning_rate": 5.037781484349108e-07, "loss": 3.2929, "step": 78110 }, { "epoch": 0.7946268717447916, "grad_norm": 15.14911937713623, "learning_rate": 5.035374417482339e-07, "loss": 3.7916, "step": 78115 }, { "epoch": 0.794677734375, "grad_norm": 12.357599258422852, "learning_rate": 5.032967861405794e-07, "loss": 3.2061, "step": 78120 }, { "epoch": 0.7947285970052084, "grad_norm": 9.263826370239258, "learning_rate": 5.030561816181029e-07, "loss": 3.4975, "step": 78125 }, { "epoch": 0.7947794596354166, "grad_norm": 7.138040542602539, "learning_rate": 5.028156281869603e-07, "loss": 3.1083, "step": 78130 }, { "epoch": 0.794830322265625, "grad_norm": 10.713909149169922, "learning_rate": 5.025751258533062e-07, "loss": 3.2477, "step": 78135 }, { "epoch": 0.7948811848958334, "grad_norm": 11.131681442260742, "learning_rate": 5.023346746232952e-07, "loss": 3.186, "step": 78140 }, { "epoch": 0.7949320475260416, "grad_norm": 12.119609832763672, "learning_rate": 5.020942745030777e-07, "loss": 2.9858, "step": 78145 }, { "epoch": 0.79498291015625, "grad_norm": 13.22558879852295, "learning_rate": 5.018539254988047e-07, "loss": 2.9334, "step": 78150 }, { "epoch": 0.7950337727864584, "grad_norm": 10.023087501525879, "learning_rate": 5.016136276166247e-07, "loss": 3.3949, "step": 78155 }, { "epoch": 0.7950846354166666, "grad_norm": 16.232559204101562, "learning_rate": 5.013733808626864e-07, "loss": 3.1979, "step": 78160 }, { "epoch": 0.795135498046875, "grad_norm": 7.1333699226379395, "learning_rate": 5.011331852431369e-07, "loss": 3.2484, "step": 78165 }, { "epoch": 0.7951863606770834, "grad_norm": 12.75872802734375, "learning_rate": 5.008930407641215e-07, "loss": 3.6665, "step": 78170 }, { "epoch": 0.7952372233072916, "grad_norm": 12.93013858795166, "learning_rate": 5.006529474317828e-07, "loss": 3.1155, "step": 78175 }, { "epoch": 0.7952880859375, "grad_norm": 12.088790893554688, "learning_rate": 5.004129052522655e-07, "loss": 3.1999, "step": 78180 }, { "epoch": 0.7953389485677084, "grad_norm": 12.312433242797852, "learning_rate": 5.001729142317091e-07, "loss": 3.3969, "step": 78185 }, { "epoch": 0.7953898111979166, "grad_norm": 13.875852584838867, "learning_rate": 4.999329743762548e-07, "loss": 3.4245, "step": 78190 }, { "epoch": 0.795440673828125, "grad_norm": 12.507823944091797, "learning_rate": 4.996930856920421e-07, "loss": 3.3563, "step": 78195 }, { "epoch": 0.7954915364583334, "grad_norm": 12.309163093566895, "learning_rate": 4.994532481852074e-07, "loss": 3.3727, "step": 78200 }, { "epoch": 0.7955423990885416, "grad_norm": 8.417670249938965, "learning_rate": 4.992134618618874e-07, "loss": 3.0717, "step": 78205 }, { "epoch": 0.79559326171875, "grad_norm": 12.883235931396484, "learning_rate": 4.989737267282157e-07, "loss": 3.1688, "step": 78210 }, { "epoch": 0.7956441243489584, "grad_norm": 17.78324317932129, "learning_rate": 4.987340427903273e-07, "loss": 3.2281, "step": 78215 }, { "epoch": 0.7956949869791666, "grad_norm": 7.117746353149414, "learning_rate": 4.984944100543545e-07, "loss": 3.1447, "step": 78220 }, { "epoch": 0.795745849609375, "grad_norm": 9.414732933044434, "learning_rate": 4.98254828526428e-07, "loss": 3.3095, "step": 78225 }, { "epoch": 0.7957967122395834, "grad_norm": 15.074447631835938, "learning_rate": 4.980152982126765e-07, "loss": 3.9253, "step": 78230 }, { "epoch": 0.7958475748697916, "grad_norm": 8.129987716674805, "learning_rate": 4.977758191192297e-07, "loss": 3.6704, "step": 78235 }, { "epoch": 0.7958984375, "grad_norm": 9.61725902557373, "learning_rate": 4.975363912522136e-07, "loss": 3.3034, "step": 78240 }, { "epoch": 0.7959493001302084, "grad_norm": 15.898126602172852, "learning_rate": 4.972970146177544e-07, "loss": 3.1708, "step": 78245 }, { "epoch": 0.7960001627604166, "grad_norm": 27.77703094482422, "learning_rate": 4.970576892219761e-07, "loss": 3.2187, "step": 78250 }, { "epoch": 0.796051025390625, "grad_norm": 11.904648780822754, "learning_rate": 4.968184150710029e-07, "loss": 3.2447, "step": 78255 }, { "epoch": 0.7961018880208334, "grad_norm": 16.175411224365234, "learning_rate": 4.965791921709548e-07, "loss": 3.4115, "step": 78260 }, { "epoch": 0.7961527506510416, "grad_norm": 13.698991775512695, "learning_rate": 4.96340020527954e-07, "loss": 3.9765, "step": 78265 }, { "epoch": 0.79620361328125, "grad_norm": 10.724797248840332, "learning_rate": 4.961009001481179e-07, "loss": 3.6566, "step": 78270 }, { "epoch": 0.7962544759114584, "grad_norm": 16.67429542541504, "learning_rate": 4.958618310375662e-07, "loss": 3.0561, "step": 78275 }, { "epoch": 0.7963053385416666, "grad_norm": 12.067134857177734, "learning_rate": 4.956228132024146e-07, "loss": 3.2328, "step": 78280 }, { "epoch": 0.796356201171875, "grad_norm": 12.0044527053833, "learning_rate": 4.953838466487773e-07, "loss": 3.1284, "step": 78285 }, { "epoch": 0.7964070638020834, "grad_norm": 13.68885326385498, "learning_rate": 4.951449313827694e-07, "loss": 3.272, "step": 78290 }, { "epoch": 0.7964579264322916, "grad_norm": 10.359649658203125, "learning_rate": 4.949060674105038e-07, "loss": 3.1571, "step": 78295 }, { "epoch": 0.7965087890625, "grad_norm": 7.889382362365723, "learning_rate": 4.946672547380913e-07, "loss": 3.0599, "step": 78300 }, { "epoch": 0.7965596516927084, "grad_norm": 24.841642379760742, "learning_rate": 4.944284933716409e-07, "loss": 3.5161, "step": 78305 }, { "epoch": 0.7966105143229166, "grad_norm": 12.071474075317383, "learning_rate": 4.941897833172629e-07, "loss": 3.3784, "step": 78310 }, { "epoch": 0.796661376953125, "grad_norm": 12.433374404907227, "learning_rate": 4.939511245810632e-07, "loss": 3.0686, "step": 78315 }, { "epoch": 0.7967122395833334, "grad_norm": 13.961923599243164, "learning_rate": 4.93712517169149e-07, "loss": 3.3446, "step": 78320 }, { "epoch": 0.7967631022135416, "grad_norm": 12.11087417602539, "learning_rate": 4.934739610876241e-07, "loss": 3.0639, "step": 78325 }, { "epoch": 0.79681396484375, "grad_norm": 10.080228805541992, "learning_rate": 4.932354563425928e-07, "loss": 3.3742, "step": 78330 }, { "epoch": 0.7968648274739584, "grad_norm": 9.731584548950195, "learning_rate": 4.929970029401568e-07, "loss": 3.0383, "step": 78335 }, { "epoch": 0.7969156901041666, "grad_norm": 15.536563873291016, "learning_rate": 4.92758600886416e-07, "loss": 3.1817, "step": 78340 }, { "epoch": 0.796966552734375, "grad_norm": 10.678982734680176, "learning_rate": 4.925202501874707e-07, "loss": 3.4005, "step": 78345 }, { "epoch": 0.7970174153645834, "grad_norm": 14.26845932006836, "learning_rate": 4.922819508494198e-07, "loss": 3.3219, "step": 78350 }, { "epoch": 0.7970682779947916, "grad_norm": 8.872315406799316, "learning_rate": 4.920437028783589e-07, "loss": 3.5714, "step": 78355 }, { "epoch": 0.797119140625, "grad_norm": 10.538403511047363, "learning_rate": 4.918055062803842e-07, "loss": 3.1552, "step": 78360 }, { "epoch": 0.7971700032552084, "grad_norm": 11.638603210449219, "learning_rate": 4.91567361061589e-07, "loss": 3.2873, "step": 78365 }, { "epoch": 0.7972208658854166, "grad_norm": 9.17963695526123, "learning_rate": 4.913292672280667e-07, "loss": 3.3738, "step": 78370 }, { "epoch": 0.797271728515625, "grad_norm": 14.535566329956055, "learning_rate": 4.910912247859099e-07, "loss": 3.2076, "step": 78375 }, { "epoch": 0.7973225911458334, "grad_norm": 11.816314697265625, "learning_rate": 4.908532337412075e-07, "loss": 3.362, "step": 78380 }, { "epoch": 0.7973734537760416, "grad_norm": 12.55820083618164, "learning_rate": 4.906152941000486e-07, "loss": 3.3663, "step": 78385 }, { "epoch": 0.79742431640625, "grad_norm": 13.613922119140625, "learning_rate": 4.903774058685215e-07, "loss": 3.0585, "step": 78390 }, { "epoch": 0.7974751790364584, "grad_norm": 12.756303787231445, "learning_rate": 4.901395690527116e-07, "loss": 3.701, "step": 78395 }, { "epoch": 0.7975260416666666, "grad_norm": 13.124650955200195, "learning_rate": 4.899017836587047e-07, "loss": 3.4298, "step": 78400 }, { "epoch": 0.797576904296875, "grad_norm": 13.512313842773438, "learning_rate": 4.896640496925845e-07, "loss": 3.626, "step": 78405 }, { "epoch": 0.7976277669270834, "grad_norm": 9.460736274719238, "learning_rate": 4.894263671604332e-07, "loss": 3.1125, "step": 78410 }, { "epoch": 0.7976786295572916, "grad_norm": 9.055636405944824, "learning_rate": 4.891887360683311e-07, "loss": 3.2618, "step": 78415 }, { "epoch": 0.7977294921875, "grad_norm": 9.254910469055176, "learning_rate": 4.889511564223586e-07, "loss": 3.7696, "step": 78420 }, { "epoch": 0.7977803548177084, "grad_norm": 11.433979034423828, "learning_rate": 4.887136282285948e-07, "loss": 3.3128, "step": 78425 }, { "epoch": 0.7978312174479166, "grad_norm": 8.865793228149414, "learning_rate": 4.884761514931161e-07, "loss": 3.1384, "step": 78430 }, { "epoch": 0.797882080078125, "grad_norm": 8.5194730758667, "learning_rate": 4.882387262219981e-07, "loss": 3.3785, "step": 78435 }, { "epoch": 0.7979329427083334, "grad_norm": 13.52230167388916, "learning_rate": 4.880013524213151e-07, "loss": 3.1476, "step": 78440 }, { "epoch": 0.7979838053385416, "grad_norm": 9.095830917358398, "learning_rate": 4.877640300971404e-07, "loss": 3.5677, "step": 78445 }, { "epoch": 0.79803466796875, "grad_norm": 13.468499183654785, "learning_rate": 4.875267592555466e-07, "loss": 3.3346, "step": 78450 }, { "epoch": 0.7980855305989584, "grad_norm": 14.660035133361816, "learning_rate": 4.87289539902604e-07, "loss": 3.6494, "step": 78455 }, { "epoch": 0.7981363932291666, "grad_norm": 10.19507122039795, "learning_rate": 4.870523720443804e-07, "loss": 3.3321, "step": 78460 }, { "epoch": 0.798187255859375, "grad_norm": 8.802572250366211, "learning_rate": 4.868152556869457e-07, "loss": 3.1674, "step": 78465 }, { "epoch": 0.7982381184895834, "grad_norm": 9.512373924255371, "learning_rate": 4.865781908363646e-07, "loss": 3.1545, "step": 78470 }, { "epoch": 0.7982889811197916, "grad_norm": 15.197171211242676, "learning_rate": 4.86341177498704e-07, "loss": 3.3482, "step": 78475 }, { "epoch": 0.79833984375, "grad_norm": 13.10971736907959, "learning_rate": 4.861042156800264e-07, "loss": 3.3086, "step": 78480 }, { "epoch": 0.7983907063802084, "grad_norm": 16.294416427612305, "learning_rate": 4.858673053863957e-07, "loss": 3.6134, "step": 78485 }, { "epoch": 0.7984415690104166, "grad_norm": 11.440515518188477, "learning_rate": 4.856304466238729e-07, "loss": 3.3362, "step": 78490 }, { "epoch": 0.798492431640625, "grad_norm": 13.755946159362793, "learning_rate": 4.853936393985168e-07, "loss": 3.273, "step": 78495 }, { "epoch": 0.7985432942708334, "grad_norm": 15.543915748596191, "learning_rate": 4.85156883716387e-07, "loss": 2.8206, "step": 78500 }, { "epoch": 0.7985941569010416, "grad_norm": 12.707036972045898, "learning_rate": 4.849201795835412e-07, "loss": 3.0078, "step": 78505 }, { "epoch": 0.79864501953125, "grad_norm": 14.186240196228027, "learning_rate": 4.846835270060352e-07, "loss": 3.2222, "step": 78510 }, { "epoch": 0.7986958821614584, "grad_norm": 10.725924491882324, "learning_rate": 4.844469259899226e-07, "loss": 3.4706, "step": 78515 }, { "epoch": 0.7987467447916666, "grad_norm": 17.00894546508789, "learning_rate": 4.842103765412587e-07, "loss": 3.7273, "step": 78520 }, { "epoch": 0.798797607421875, "grad_norm": 13.25726318359375, "learning_rate": 4.839738786660935e-07, "loss": 3.49, "step": 78525 }, { "epoch": 0.7988484700520834, "grad_norm": 8.638480186462402, "learning_rate": 4.837374323704797e-07, "loss": 3.297, "step": 78530 }, { "epoch": 0.7988993326822916, "grad_norm": 11.479582786560059, "learning_rate": 4.835010376604649e-07, "loss": 3.3887, "step": 78535 }, { "epoch": 0.7989501953125, "grad_norm": 11.228867530822754, "learning_rate": 4.832646945420988e-07, "loss": 3.4827, "step": 78540 }, { "epoch": 0.7990010579427084, "grad_norm": 10.890009880065918, "learning_rate": 4.830284030214275e-07, "loss": 3.2497, "step": 78545 }, { "epoch": 0.7990519205729166, "grad_norm": 14.480691909790039, "learning_rate": 4.827921631044957e-07, "loss": 3.2666, "step": 78550 }, { "epoch": 0.799102783203125, "grad_norm": 13.990694046020508, "learning_rate": 4.825559747973482e-07, "loss": 3.3072, "step": 78555 }, { "epoch": 0.7991536458333334, "grad_norm": 12.1658296585083, "learning_rate": 4.823198381060287e-07, "loss": 3.1358, "step": 78560 }, { "epoch": 0.7992045084635416, "grad_norm": 12.146638870239258, "learning_rate": 4.82083753036578e-07, "loss": 3.6579, "step": 78565 }, { "epoch": 0.79925537109375, "grad_norm": 14.979445457458496, "learning_rate": 4.818477195950355e-07, "loss": 3.78, "step": 78570 }, { "epoch": 0.7993062337239584, "grad_norm": 15.680461883544922, "learning_rate": 4.816117377874408e-07, "loss": 3.2552, "step": 78575 }, { "epoch": 0.7993570963541666, "grad_norm": 7.587637424468994, "learning_rate": 4.813758076198316e-07, "loss": 3.3191, "step": 78580 }, { "epoch": 0.799407958984375, "grad_norm": 13.030993461608887, "learning_rate": 4.811399290982443e-07, "loss": 3.2942, "step": 78585 }, { "epoch": 0.7994588216145834, "grad_norm": 15.706942558288574, "learning_rate": 4.809041022287125e-07, "loss": 3.0769, "step": 78590 }, { "epoch": 0.7995096842447916, "grad_norm": 13.303389549255371, "learning_rate": 4.806683270172715e-07, "loss": 3.2164, "step": 78595 }, { "epoch": 0.799560546875, "grad_norm": 11.963903427124023, "learning_rate": 4.804326034699522e-07, "loss": 3.1576, "step": 78600 }, { "epoch": 0.7996114095052084, "grad_norm": 7.553828716278076, "learning_rate": 4.801969315927865e-07, "loss": 3.2115, "step": 78605 }, { "epoch": 0.7996622721354166, "grad_norm": 14.163137435913086, "learning_rate": 4.799613113918028e-07, "loss": 2.9159, "step": 78610 }, { "epoch": 0.799713134765625, "grad_norm": 12.92434310913086, "learning_rate": 4.79725742873031e-07, "loss": 2.9415, "step": 78615 }, { "epoch": 0.7997639973958334, "grad_norm": 13.198997497558594, "learning_rate": 4.794902260424972e-07, "loss": 3.4047, "step": 78620 }, { "epoch": 0.7998148600260416, "grad_norm": 10.787629127502441, "learning_rate": 4.792547609062262e-07, "loss": 3.234, "step": 78625 }, { "epoch": 0.79986572265625, "grad_norm": 15.940741539001465, "learning_rate": 4.790193474702434e-07, "loss": 3.1683, "step": 78630 }, { "epoch": 0.7999165852864584, "grad_norm": 11.246376991271973, "learning_rate": 4.787839857405721e-07, "loss": 3.4445, "step": 78635 }, { "epoch": 0.7999674479166666, "grad_norm": 13.741132736206055, "learning_rate": 4.785486757232336e-07, "loss": 3.4073, "step": 78640 }, { "epoch": 0.800018310546875, "grad_norm": 7.921855449676514, "learning_rate": 4.783134174242479e-07, "loss": 3.7101, "step": 78645 }, { "epoch": 0.8000691731770834, "grad_norm": 16.312631607055664, "learning_rate": 4.780782108496337e-07, "loss": 3.6811, "step": 78650 }, { "epoch": 0.8001200358072916, "grad_norm": 12.837047576904297, "learning_rate": 4.77843056005409e-07, "loss": 3.3388, "step": 78655 }, { "epoch": 0.8001708984375, "grad_norm": 7.334871292114258, "learning_rate": 4.776079528975913e-07, "loss": 3.1737, "step": 78660 }, { "epoch": 0.8002217610677084, "grad_norm": 11.887138366699219, "learning_rate": 4.773729015321949e-07, "loss": 3.0676, "step": 78665 }, { "epoch": 0.8002726236979166, "grad_norm": 11.066459655761719, "learning_rate": 4.771379019152326e-07, "loss": 3.1375, "step": 78670 }, { "epoch": 0.800323486328125, "grad_norm": 9.975939750671387, "learning_rate": 4.769029540527181e-07, "loss": 3.3405, "step": 78675 }, { "epoch": 0.8003743489583334, "grad_norm": 15.235580444335938, "learning_rate": 4.766680579506613e-07, "loss": 3.0982, "step": 78680 }, { "epoch": 0.8004252115885416, "grad_norm": 9.530317306518555, "learning_rate": 4.7643321361507286e-07, "loss": 3.6487, "step": 78685 }, { "epoch": 0.80047607421875, "grad_norm": 6.792818546295166, "learning_rate": 4.761984210519616e-07, "loss": 3.8031, "step": 78690 }, { "epoch": 0.8005269368489584, "grad_norm": 12.819168090820312, "learning_rate": 4.7596368026733396e-07, "loss": 3.2232, "step": 78695 }, { "epoch": 0.8005777994791666, "grad_norm": 14.127707481384277, "learning_rate": 4.7572899126719615e-07, "loss": 4.0349, "step": 78700 }, { "epoch": 0.800628662109375, "grad_norm": 16.820049285888672, "learning_rate": 4.7549435405755075e-07, "loss": 3.4131, "step": 78705 }, { "epoch": 0.8006795247395834, "grad_norm": 9.99593734741211, "learning_rate": 4.7525976864440354e-07, "loss": 3.117, "step": 78710 }, { "epoch": 0.8007303873697916, "grad_norm": 13.380139350891113, "learning_rate": 4.750252350337556e-07, "loss": 3.4492, "step": 78715 }, { "epoch": 0.80078125, "grad_norm": 13.98039722442627, "learning_rate": 4.7479075323160677e-07, "loss": 3.0948, "step": 78720 }, { "epoch": 0.8008321126302084, "grad_norm": 14.347100257873535, "learning_rate": 4.7455632324395596e-07, "loss": 3.5968, "step": 78725 }, { "epoch": 0.8008829752604166, "grad_norm": 14.859460830688477, "learning_rate": 4.7432194507680163e-07, "loss": 3.1246, "step": 78730 }, { "epoch": 0.800933837890625, "grad_norm": 18.239768981933594, "learning_rate": 4.740876187361407e-07, "loss": 3.5149, "step": 78735 }, { "epoch": 0.8009847005208334, "grad_norm": 10.691304206848145, "learning_rate": 4.738533442279677e-07, "loss": 3.1573, "step": 78740 }, { "epoch": 0.8010355631510416, "grad_norm": 14.137451171875, "learning_rate": 4.7361912155827636e-07, "loss": 3.9191, "step": 78745 }, { "epoch": 0.80108642578125, "grad_norm": 15.178362846374512, "learning_rate": 4.733849507330598e-07, "loss": 3.3544, "step": 78750 }, { "epoch": 0.8011372884114584, "grad_norm": 8.599349975585938, "learning_rate": 4.731508317583086e-07, "loss": 3.3475, "step": 78755 }, { "epoch": 0.8011881510416666, "grad_norm": 16.44230842590332, "learning_rate": 4.729167646400135e-07, "loss": 3.3797, "step": 78760 }, { "epoch": 0.801239013671875, "grad_norm": 14.500465393066406, "learning_rate": 4.7268274938416183e-07, "loss": 3.4236, "step": 78765 }, { "epoch": 0.8012898763020834, "grad_norm": 8.624430656433105, "learning_rate": 4.7244878599674196e-07, "loss": 3.342, "step": 78770 }, { "epoch": 0.8013407389322916, "grad_norm": 10.258706092834473, "learning_rate": 4.722148744837393e-07, "loss": 3.2887, "step": 78775 }, { "epoch": 0.8013916015625, "grad_norm": 7.703892707824707, "learning_rate": 4.719810148511381e-07, "loss": 3.1236, "step": 78780 }, { "epoch": 0.8014424641927084, "grad_norm": 12.779461860656738, "learning_rate": 4.717472071049217e-07, "loss": 3.0516, "step": 78785 }, { "epoch": 0.8014933268229166, "grad_norm": 12.617457389831543, "learning_rate": 4.71513451251073e-07, "loss": 3.6554, "step": 78790 }, { "epoch": 0.801544189453125, "grad_norm": 12.944184303283691, "learning_rate": 4.7127974729557183e-07, "loss": 3.4866, "step": 78795 }, { "epoch": 0.8015950520833334, "grad_norm": 8.754607200622559, "learning_rate": 4.710460952443968e-07, "loss": 3.1968, "step": 78800 }, { "epoch": 0.8016459147135416, "grad_norm": 10.901610374450684, "learning_rate": 4.708124951035273e-07, "loss": 3.5131, "step": 78805 }, { "epoch": 0.80169677734375, "grad_norm": 14.882295608520508, "learning_rate": 4.7057894687893847e-07, "loss": 3.4044, "step": 78810 }, { "epoch": 0.8017476399739584, "grad_norm": 10.60777759552002, "learning_rate": 4.703454505766067e-07, "loss": 3.1125, "step": 78815 }, { "epoch": 0.8017985026041666, "grad_norm": 9.532958984375, "learning_rate": 4.70112006202505e-07, "loss": 3.3332, "step": 78820 }, { "epoch": 0.801849365234375, "grad_norm": 10.766741752624512, "learning_rate": 4.6987861376260713e-07, "loss": 3.0745, "step": 78825 }, { "epoch": 0.8019002278645834, "grad_norm": 12.011417388916016, "learning_rate": 4.696452732628837e-07, "loss": 3.0788, "step": 78830 }, { "epoch": 0.8019510904947916, "grad_norm": 11.187000274658203, "learning_rate": 4.69411984709304e-07, "loss": 3.2608, "step": 78835 }, { "epoch": 0.802001953125, "grad_norm": 16.041290283203125, "learning_rate": 4.691787481078375e-07, "loss": 3.0582, "step": 78840 }, { "epoch": 0.8020528157552084, "grad_norm": 7.748333930969238, "learning_rate": 4.6894556346445185e-07, "loss": 3.5502, "step": 78845 }, { "epoch": 0.8021036783854166, "grad_norm": 15.040388107299805, "learning_rate": 4.6871243078511266e-07, "loss": 3.8123, "step": 78850 }, { "epoch": 0.802154541015625, "grad_norm": 14.229958534240723, "learning_rate": 4.684793500757845e-07, "loss": 3.4445, "step": 78855 }, { "epoch": 0.8022054036458334, "grad_norm": 10.111949920654297, "learning_rate": 4.682463213424293e-07, "loss": 3.1247, "step": 78860 }, { "epoch": 0.8022562662760416, "grad_norm": 13.16586685180664, "learning_rate": 4.6801334459101176e-07, "loss": 2.9999, "step": 78865 }, { "epoch": 0.80230712890625, "grad_norm": 10.070747375488281, "learning_rate": 4.67780419827491e-07, "loss": 3.4165, "step": 78870 }, { "epoch": 0.8023579915364584, "grad_norm": 16.717920303344727, "learning_rate": 4.675475470578267e-07, "loss": 3.3036, "step": 78875 }, { "epoch": 0.8024088541666666, "grad_norm": 15.924241065979004, "learning_rate": 4.6731472628797597e-07, "loss": 3.1925, "step": 78880 }, { "epoch": 0.802459716796875, "grad_norm": 12.359611511230469, "learning_rate": 4.6708195752389605e-07, "loss": 3.4127, "step": 78885 }, { "epoch": 0.8025105794270834, "grad_norm": 15.967865943908691, "learning_rate": 4.66849240771543e-07, "loss": 3.1058, "step": 78890 }, { "epoch": 0.8025614420572916, "grad_norm": 11.010781288146973, "learning_rate": 4.666165760368696e-07, "loss": 3.3866, "step": 78895 }, { "epoch": 0.8026123046875, "grad_norm": 14.635432243347168, "learning_rate": 4.663839633258299e-07, "loss": 3.3891, "step": 78900 }, { "epoch": 0.8026631673177084, "grad_norm": 15.67871379852295, "learning_rate": 4.6615140264437447e-07, "loss": 3.2746, "step": 78905 }, { "epoch": 0.8027140299479166, "grad_norm": 9.079788208007812, "learning_rate": 4.6591889399845267e-07, "loss": 3.31, "step": 78910 }, { "epoch": 0.802764892578125, "grad_norm": 11.785703659057617, "learning_rate": 4.65686437394014e-07, "loss": 3.3784, "step": 78915 }, { "epoch": 0.8028157552083334, "grad_norm": 13.959105491638184, "learning_rate": 4.654540328370061e-07, "loss": 3.1514, "step": 78920 }, { "epoch": 0.8028666178385416, "grad_norm": 10.363253593444824, "learning_rate": 4.652216803333745e-07, "loss": 3.2574, "step": 78925 }, { "epoch": 0.80291748046875, "grad_norm": 11.194170951843262, "learning_rate": 4.6498937988906395e-07, "loss": 3.0131, "step": 78930 }, { "epoch": 0.8029683430989584, "grad_norm": 9.353020668029785, "learning_rate": 4.6475713151001686e-07, "loss": 3.3285, "step": 78935 }, { "epoch": 0.8030192057291666, "grad_norm": 13.347886085510254, "learning_rate": 4.6452493520217664e-07, "loss": 3.42, "step": 78940 }, { "epoch": 0.803070068359375, "grad_norm": 14.261474609375, "learning_rate": 4.6429279097148394e-07, "loss": 3.1498, "step": 78945 }, { "epoch": 0.8031209309895834, "grad_norm": 13.230145454406738, "learning_rate": 4.6406069882387743e-07, "loss": 3.3409, "step": 78950 }, { "epoch": 0.8031717936197916, "grad_norm": 8.211542129516602, "learning_rate": 4.6382865876529516e-07, "loss": 3.0068, "step": 78955 }, { "epoch": 0.80322265625, "grad_norm": 11.118182182312012, "learning_rate": 4.6359667080167427e-07, "loss": 3.0683, "step": 78960 }, { "epoch": 0.8032735188802084, "grad_norm": 13.080451965332031, "learning_rate": 4.633647349389495e-07, "loss": 3.6222, "step": 78965 }, { "epoch": 0.8033243815104166, "grad_norm": 16.388444900512695, "learning_rate": 4.6313285118305574e-07, "loss": 3.1887, "step": 78970 }, { "epoch": 0.803375244140625, "grad_norm": 17.810380935668945, "learning_rate": 4.6290101953992473e-07, "loss": 3.3287, "step": 78975 }, { "epoch": 0.8034261067708334, "grad_norm": 8.733531951904297, "learning_rate": 4.6266924001548863e-07, "loss": 3.4616, "step": 78980 }, { "epoch": 0.8034769694010416, "grad_norm": 9.307930946350098, "learning_rate": 4.624375126156774e-07, "loss": 3.1054, "step": 78985 }, { "epoch": 0.80352783203125, "grad_norm": 11.856342315673828, "learning_rate": 4.6220583734641863e-07, "loss": 3.2163, "step": 78990 }, { "epoch": 0.8035786946614584, "grad_norm": 9.549277305603027, "learning_rate": 4.6197421421364075e-07, "loss": 2.9922, "step": 78995 }, { "epoch": 0.8036295572916666, "grad_norm": 13.3350830078125, "learning_rate": 4.6174264322326995e-07, "loss": 2.7833, "step": 79000 }, { "epoch": 0.803680419921875, "grad_norm": 16.208858489990234, "learning_rate": 4.6151112438123064e-07, "loss": 3.294, "step": 79005 }, { "epoch": 0.8037312825520834, "grad_norm": 12.73587703704834, "learning_rate": 4.612796576934453e-07, "loss": 3.2964, "step": 79010 }, { "epoch": 0.8037821451822916, "grad_norm": 9.596652030944824, "learning_rate": 4.6104824316583666e-07, "loss": 3.4701, "step": 79015 }, { "epoch": 0.8038330078125, "grad_norm": 14.039467811584473, "learning_rate": 4.6081688080432616e-07, "loss": 3.1576, "step": 79020 }, { "epoch": 0.8038838704427084, "grad_norm": 15.404362678527832, "learning_rate": 4.605855706148324e-07, "loss": 3.3063, "step": 79025 }, { "epoch": 0.8039347330729166, "grad_norm": 10.67270565032959, "learning_rate": 4.603543126032728e-07, "loss": 3.6506, "step": 79030 }, { "epoch": 0.803985595703125, "grad_norm": 7.46541166305542, "learning_rate": 4.6012310677556545e-07, "loss": 3.2762, "step": 79035 }, { "epoch": 0.8040364583333334, "grad_norm": 10.64223575592041, "learning_rate": 4.598919531376239e-07, "loss": 3.0405, "step": 79040 }, { "epoch": 0.8040873209635416, "grad_norm": 14.383641242980957, "learning_rate": 4.5966085169536407e-07, "loss": 3.1108, "step": 79045 }, { "epoch": 0.80413818359375, "grad_norm": 12.580687522888184, "learning_rate": 4.59429802454697e-07, "loss": 3.1669, "step": 79050 }, { "epoch": 0.8041890462239584, "grad_norm": 11.952764511108398, "learning_rate": 4.5919880542153546e-07, "loss": 3.3763, "step": 79055 }, { "epoch": 0.8042399088541666, "grad_norm": 12.009092330932617, "learning_rate": 4.589678606017889e-07, "loss": 3.3642, "step": 79060 }, { "epoch": 0.804290771484375, "grad_norm": 15.879464149475098, "learning_rate": 4.587369680013651e-07, "loss": 3.4183, "step": 79065 }, { "epoch": 0.8043416341145834, "grad_norm": 10.191134452819824, "learning_rate": 4.585061276261721e-07, "loss": 3.0245, "step": 79070 }, { "epoch": 0.8043924967447916, "grad_norm": 16.796077728271484, "learning_rate": 4.5827533948211657e-07, "loss": 3.6091, "step": 79075 }, { "epoch": 0.804443359375, "grad_norm": 15.804084777832031, "learning_rate": 4.580446035751024e-07, "loss": 3.3889, "step": 79080 }, { "epoch": 0.8044942220052084, "grad_norm": 13.486974716186523, "learning_rate": 4.578139199110326e-07, "loss": 3.3648, "step": 79085 }, { "epoch": 0.8045450846354166, "grad_norm": 12.026664733886719, "learning_rate": 4.5758328849581006e-07, "loss": 3.1254, "step": 79090 }, { "epoch": 0.804595947265625, "grad_norm": 12.640941619873047, "learning_rate": 4.573527093353347e-07, "loss": 3.3413, "step": 79095 }, { "epoch": 0.8046468098958334, "grad_norm": 7.197507381439209, "learning_rate": 4.571221824355065e-07, "loss": 3.3461, "step": 79100 }, { "epoch": 0.8046976725260416, "grad_norm": 15.275164604187012, "learning_rate": 4.5689170780222227e-07, "loss": 3.4683, "step": 79105 }, { "epoch": 0.80474853515625, "grad_norm": 11.376517295837402, "learning_rate": 4.5666128544138047e-07, "loss": 3.2734, "step": 79110 }, { "epoch": 0.8047993977864584, "grad_norm": 11.782517433166504, "learning_rate": 4.56430915358875e-07, "loss": 3.1015, "step": 79115 }, { "epoch": 0.8048502604166666, "grad_norm": 13.285874366760254, "learning_rate": 4.562005975605996e-07, "loss": 3.0926, "step": 79120 }, { "epoch": 0.804901123046875, "grad_norm": 12.198227882385254, "learning_rate": 4.559703320524475e-07, "loss": 3.2659, "step": 79125 }, { "epoch": 0.8049519856770834, "grad_norm": 8.998311042785645, "learning_rate": 4.557401188403107e-07, "loss": 3.3936, "step": 79130 }, { "epoch": 0.8050028483072916, "grad_norm": 13.996612548828125, "learning_rate": 4.555099579300784e-07, "loss": 3.2987, "step": 79135 }, { "epoch": 0.8050537109375, "grad_norm": 9.745990753173828, "learning_rate": 4.552798493276392e-07, "loss": 3.1222, "step": 79140 }, { "epoch": 0.8051045735677084, "grad_norm": 10.147890090942383, "learning_rate": 4.5504979303887974e-07, "loss": 3.3142, "step": 79145 }, { "epoch": 0.8051554361979166, "grad_norm": 10.35171127319336, "learning_rate": 4.5481978906968675e-07, "loss": 3.2781, "step": 79150 }, { "epoch": 0.805206298828125, "grad_norm": 14.873088836669922, "learning_rate": 4.545898374259453e-07, "loss": 3.2424, "step": 79155 }, { "epoch": 0.8052571614583334, "grad_norm": 13.0719575881958, "learning_rate": 4.543599381135377e-07, "loss": 3.7585, "step": 79160 }, { "epoch": 0.8053080240885416, "grad_norm": 14.930803298950195, "learning_rate": 4.541300911383459e-07, "loss": 3.1064, "step": 79165 }, { "epoch": 0.80535888671875, "grad_norm": 10.585026741027832, "learning_rate": 4.5390029650625086e-07, "loss": 3.4128, "step": 79170 }, { "epoch": 0.8054097493489584, "grad_norm": 15.125510215759277, "learning_rate": 4.5367055422313205e-07, "loss": 3.1273, "step": 79175 }, { "epoch": 0.8054606119791666, "grad_norm": 12.9608793258667, "learning_rate": 4.5344086429486675e-07, "loss": 3.4222, "step": 79180 }, { "epoch": 0.805511474609375, "grad_norm": 12.554879188537598, "learning_rate": 4.5321122672733255e-07, "loss": 3.5969, "step": 79185 }, { "epoch": 0.8055623372395834, "grad_norm": 15.822430610656738, "learning_rate": 4.5298164152640376e-07, "loss": 3.5563, "step": 79190 }, { "epoch": 0.8056131998697916, "grad_norm": 13.774492263793945, "learning_rate": 4.527521086979539e-07, "loss": 3.1992, "step": 79195 }, { "epoch": 0.8056640625, "grad_norm": 12.54838752746582, "learning_rate": 4.525226282478559e-07, "loss": 3.1414, "step": 79200 }, { "epoch": 0.8057149251302084, "grad_norm": 13.793286323547363, "learning_rate": 4.522932001819821e-07, "loss": 2.9914, "step": 79205 }, { "epoch": 0.8057657877604166, "grad_norm": 12.090859413146973, "learning_rate": 4.5206382450620117e-07, "loss": 3.3911, "step": 79210 }, { "epoch": 0.805816650390625, "grad_norm": 16.567838668823242, "learning_rate": 4.518345012263822e-07, "loss": 3.4067, "step": 79215 }, { "epoch": 0.8058675130208334, "grad_norm": 10.558396339416504, "learning_rate": 4.516052303483912e-07, "loss": 3.3806, "step": 79220 }, { "epoch": 0.8059183756510416, "grad_norm": 11.566436767578125, "learning_rate": 4.5137601187809464e-07, "loss": 3.1802, "step": 79225 }, { "epoch": 0.80596923828125, "grad_norm": 14.10775375366211, "learning_rate": 4.511468458213583e-07, "loss": 3.9436, "step": 79230 }, { "epoch": 0.8060201009114584, "grad_norm": 9.806373596191406, "learning_rate": 4.509177321840441e-07, "loss": 3.2196, "step": 79235 }, { "epoch": 0.8060709635416666, "grad_norm": 14.403755187988281, "learning_rate": 4.506886709720132e-07, "loss": 3.3581, "step": 79240 }, { "epoch": 0.806121826171875, "grad_norm": 13.174206733703613, "learning_rate": 4.504596621911278e-07, "loss": 3.0128, "step": 79245 }, { "epoch": 0.8061726888020834, "grad_norm": 9.067151069641113, "learning_rate": 4.502307058472455e-07, "loss": 3.4482, "step": 79250 }, { "epoch": 0.8062235514322916, "grad_norm": 10.918560981750488, "learning_rate": 4.5000180194622517e-07, "loss": 3.2611, "step": 79255 }, { "epoch": 0.8062744140625, "grad_norm": 15.179645538330078, "learning_rate": 4.497729504939224e-07, "loss": 3.4583, "step": 79260 }, { "epoch": 0.8063252766927084, "grad_norm": 14.72915267944336, "learning_rate": 4.4954415149619345e-07, "loss": 3.4114, "step": 79265 }, { "epoch": 0.8063761393229166, "grad_norm": 11.220216751098633, "learning_rate": 4.49315404958891e-07, "loss": 3.2721, "step": 79270 }, { "epoch": 0.806427001953125, "grad_norm": 10.747183799743652, "learning_rate": 4.490867108878677e-07, "loss": 3.2829, "step": 79275 }, { "epoch": 0.8064778645833334, "grad_norm": 11.868279457092285, "learning_rate": 4.4885806928897434e-07, "loss": 3.5199, "step": 79280 }, { "epoch": 0.8065287272135416, "grad_norm": 12.011693954467773, "learning_rate": 4.486294801680621e-07, "loss": 3.5379, "step": 79285 }, { "epoch": 0.80657958984375, "grad_norm": 11.23404312133789, "learning_rate": 4.48400943530978e-07, "loss": 3.3113, "step": 79290 }, { "epoch": 0.8066304524739584, "grad_norm": 13.91883659362793, "learning_rate": 4.481724593835693e-07, "loss": 3.2927, "step": 79295 }, { "epoch": 0.8066813151041666, "grad_norm": 9.497418403625488, "learning_rate": 4.479440277316821e-07, "loss": 3.4381, "step": 79300 }, { "epoch": 0.806732177734375, "grad_norm": 13.011350631713867, "learning_rate": 4.4771564858116014e-07, "loss": 3.3248, "step": 79305 }, { "epoch": 0.8067830403645834, "grad_norm": 12.247965812683105, "learning_rate": 4.4748732193784755e-07, "loss": 2.9754, "step": 79310 }, { "epoch": 0.8068339029947916, "grad_norm": 11.67395305633545, "learning_rate": 4.4725904780758443e-07, "loss": 3.6511, "step": 79315 }, { "epoch": 0.806884765625, "grad_norm": 13.142789840698242, "learning_rate": 4.470308261962131e-07, "loss": 3.4572, "step": 79320 }, { "epoch": 0.8069356282552084, "grad_norm": 8.493226051330566, "learning_rate": 4.4680265710957044e-07, "loss": 3.5911, "step": 79325 }, { "epoch": 0.8069864908854166, "grad_norm": 16.766324996948242, "learning_rate": 4.46574540553496e-07, "loss": 3.3472, "step": 79330 }, { "epoch": 0.807037353515625, "grad_norm": 9.673091888427734, "learning_rate": 4.4634647653382435e-07, "loss": 3.7766, "step": 79335 }, { "epoch": 0.8070882161458334, "grad_norm": 10.032527923583984, "learning_rate": 4.4611846505639214e-07, "loss": 3.3584, "step": 79340 }, { "epoch": 0.8071390787760416, "grad_norm": 10.048256874084473, "learning_rate": 4.4589050612703214e-07, "loss": 3.5576, "step": 79345 }, { "epoch": 0.80718994140625, "grad_norm": 12.669438362121582, "learning_rate": 4.4566259975157624e-07, "loss": 3.1749, "step": 79350 }, { "epoch": 0.8072408040364584, "grad_norm": 13.59167194366455, "learning_rate": 4.4543474593585546e-07, "loss": 3.3033, "step": 79355 }, { "epoch": 0.8072916666666666, "grad_norm": 11.786405563354492, "learning_rate": 4.4520694468570047e-07, "loss": 3.2057, "step": 79360 }, { "epoch": 0.807342529296875, "grad_norm": 8.25817584991455, "learning_rate": 4.4497919600693884e-07, "loss": 3.1303, "step": 79365 }, { "epoch": 0.8073933919270834, "grad_norm": 11.9024019241333, "learning_rate": 4.4475149990539727e-07, "loss": 3.1778, "step": 79370 }, { "epoch": 0.8074442545572916, "grad_norm": 14.694074630737305, "learning_rate": 4.4452385638690085e-07, "loss": 3.5578, "step": 79375 }, { "epoch": 0.8074951171875, "grad_norm": 14.93714714050293, "learning_rate": 4.4429626545727437e-07, "loss": 3.5228, "step": 79380 }, { "epoch": 0.8075459798177084, "grad_norm": 12.39831829071045, "learning_rate": 4.440687271223412e-07, "loss": 3.2921, "step": 79385 }, { "epoch": 0.8075968424479166, "grad_norm": 11.452814102172852, "learning_rate": 4.43841241387922e-07, "loss": 3.7657, "step": 79390 }, { "epoch": 0.807647705078125, "grad_norm": 11.564638137817383, "learning_rate": 4.436138082598376e-07, "loss": 3.2557, "step": 79395 }, { "epoch": 0.8076985677083334, "grad_norm": 13.71728801727295, "learning_rate": 4.433864277439068e-07, "loss": 3.3731, "step": 79400 }, { "epoch": 0.8077494303385416, "grad_norm": 12.129942893981934, "learning_rate": 4.431590998459459e-07, "loss": 3.3616, "step": 79405 }, { "epoch": 0.80780029296875, "grad_norm": 10.588050842285156, "learning_rate": 4.42931824571772e-07, "loss": 3.3958, "step": 79410 }, { "epoch": 0.8078511555989584, "grad_norm": 132.06982421875, "learning_rate": 4.427046019272002e-07, "loss": 3.5826, "step": 79415 }, { "epoch": 0.8079020182291666, "grad_norm": 15.277337074279785, "learning_rate": 4.424774319180439e-07, "loss": 3.3452, "step": 79420 }, { "epoch": 0.807952880859375, "grad_norm": 12.683645248413086, "learning_rate": 4.4225031455011445e-07, "loss": 3.1069, "step": 79425 }, { "epoch": 0.8080037434895834, "grad_norm": 13.64078426361084, "learning_rate": 4.420232498292226e-07, "loss": 3.1959, "step": 79430 }, { "epoch": 0.8080546061197916, "grad_norm": 12.579249382019043, "learning_rate": 4.417962377611781e-07, "loss": 3.429, "step": 79435 }, { "epoch": 0.80810546875, "grad_norm": 15.0173978805542, "learning_rate": 4.4156927835178944e-07, "loss": 3.1877, "step": 79440 }, { "epoch": 0.8081563313802084, "grad_norm": 11.585958480834961, "learning_rate": 4.413423716068632e-07, "loss": 3.4828, "step": 79445 }, { "epoch": 0.8082071940104166, "grad_norm": 11.081127166748047, "learning_rate": 4.411155175322035e-07, "loss": 3.5287, "step": 79450 }, { "epoch": 0.808258056640625, "grad_norm": 11.477083206176758, "learning_rate": 4.408887161336162e-07, "loss": 3.6381, "step": 79455 }, { "epoch": 0.8083089192708334, "grad_norm": 10.804614067077637, "learning_rate": 4.4066196741690217e-07, "loss": 3.1416, "step": 79460 }, { "epoch": 0.8083597819010416, "grad_norm": 14.424979209899902, "learning_rate": 4.404352713878643e-07, "loss": 3.9914, "step": 79465 }, { "epoch": 0.80841064453125, "grad_norm": 10.901899337768555, "learning_rate": 4.40208628052301e-07, "loss": 3.081, "step": 79470 }, { "epoch": 0.8084615071614584, "grad_norm": 8.379731178283691, "learning_rate": 4.399820374160127e-07, "loss": 3.6199, "step": 79475 }, { "epoch": 0.8085123697916666, "grad_norm": 9.010814666748047, "learning_rate": 4.397554994847955e-07, "loss": 3.3494, "step": 79480 }, { "epoch": 0.808563232421875, "grad_norm": 12.562276840209961, "learning_rate": 4.3952901426444426e-07, "loss": 3.9035, "step": 79485 }, { "epoch": 0.8086140950520834, "grad_norm": 13.099066734313965, "learning_rate": 4.3930258176075604e-07, "loss": 3.8992, "step": 79490 }, { "epoch": 0.8086649576822916, "grad_norm": 13.581748962402344, "learning_rate": 4.390762019795228e-07, "loss": 3.4234, "step": 79495 }, { "epoch": 0.8087158203125, "grad_norm": 13.964309692382812, "learning_rate": 4.388498749265366e-07, "loss": 2.767, "step": 79500 }, { "epoch": 0.8087666829427084, "grad_norm": 12.141261100769043, "learning_rate": 4.3862360060758724e-07, "loss": 3.391, "step": 79505 }, { "epoch": 0.8088175455729166, "grad_norm": 15.656390190124512, "learning_rate": 4.383973790284643e-07, "loss": 3.2007, "step": 79510 }, { "epoch": 0.808868408203125, "grad_norm": 14.146927833557129, "learning_rate": 4.381712101949562e-07, "loss": 3.2904, "step": 79515 }, { "epoch": 0.8089192708333334, "grad_norm": 8.907366752624512, "learning_rate": 4.3794509411284937e-07, "loss": 3.2788, "step": 79520 }, { "epoch": 0.8089701334635416, "grad_norm": 11.003146171569824, "learning_rate": 4.377190307879275e-07, "loss": 3.6502, "step": 79525 }, { "epoch": 0.80902099609375, "grad_norm": 13.753011703491211, "learning_rate": 4.374930202259764e-07, "loss": 3.4284, "step": 79530 }, { "epoch": 0.8090718587239584, "grad_norm": 11.551970481872559, "learning_rate": 4.3726706243277685e-07, "loss": 3.1446, "step": 79535 }, { "epoch": 0.8091227213541666, "grad_norm": 9.97728157043457, "learning_rate": 4.370411574141112e-07, "loss": 3.4146, "step": 79540 }, { "epoch": 0.809173583984375, "grad_norm": 13.558149337768555, "learning_rate": 4.368153051757579e-07, "loss": 3.4341, "step": 79545 }, { "epoch": 0.8092244466145834, "grad_norm": 9.130220413208008, "learning_rate": 4.3658950572349645e-07, "loss": 3.4231, "step": 79550 }, { "epoch": 0.8092753092447916, "grad_norm": 6.932345390319824, "learning_rate": 4.3636375906310343e-07, "loss": 3.5729, "step": 79555 }, { "epoch": 0.809326171875, "grad_norm": 15.159601211547852, "learning_rate": 4.361380652003541e-07, "loss": 3.4203, "step": 79560 }, { "epoch": 0.8093770345052084, "grad_norm": 8.320847511291504, "learning_rate": 4.3591242414102315e-07, "loss": 3.2303, "step": 79565 }, { "epoch": 0.8094278971354166, "grad_norm": 11.370145797729492, "learning_rate": 4.356868358908839e-07, "loss": 3.3368, "step": 79570 }, { "epoch": 0.809478759765625, "grad_norm": 7.661952495574951, "learning_rate": 4.3546130045570787e-07, "loss": 3.3427, "step": 79575 }, { "epoch": 0.8095296223958334, "grad_norm": 9.848603248596191, "learning_rate": 4.352358178412647e-07, "loss": 3.3152, "step": 79580 }, { "epoch": 0.8095804850260416, "grad_norm": 10.856739044189453, "learning_rate": 4.3501038805332407e-07, "loss": 3.5946, "step": 79585 }, { "epoch": 0.80963134765625, "grad_norm": 9.38187313079834, "learning_rate": 4.347850110976526e-07, "loss": 3.2375, "step": 79590 }, { "epoch": 0.8096822102864584, "grad_norm": 8.472463607788086, "learning_rate": 4.3455968698001783e-07, "loss": 3.7288, "step": 79595 }, { "epoch": 0.8097330729166666, "grad_norm": 7.300169944763184, "learning_rate": 4.3433441570618325e-07, "loss": 3.8467, "step": 79600 }, { "epoch": 0.809783935546875, "grad_norm": 14.652825355529785, "learning_rate": 4.3410919728191365e-07, "loss": 3.8647, "step": 79605 }, { "epoch": 0.8098347981770834, "grad_norm": 11.837065696716309, "learning_rate": 4.3388403171297053e-07, "loss": 3.1634, "step": 79610 }, { "epoch": 0.8098856608072916, "grad_norm": 12.23351001739502, "learning_rate": 4.3365891900511436e-07, "loss": 3.4731, "step": 79615 }, { "epoch": 0.8099365234375, "grad_norm": 9.871082305908203, "learning_rate": 4.3343385916410465e-07, "loss": 3.2557, "step": 79620 }, { "epoch": 0.8099873860677084, "grad_norm": 12.954231262207031, "learning_rate": 4.3320885219570063e-07, "loss": 3.3369, "step": 79625 }, { "epoch": 0.8100382486979166, "grad_norm": 18.884347915649414, "learning_rate": 4.329838981056583e-07, "loss": 3.1643, "step": 79630 }, { "epoch": 0.810089111328125, "grad_norm": 14.413152694702148, "learning_rate": 4.327589968997328e-07, "loss": 3.3079, "step": 79635 }, { "epoch": 0.8101399739583334, "grad_norm": 7.7837934494018555, "learning_rate": 4.325341485836773e-07, "loss": 3.4568, "step": 79640 }, { "epoch": 0.8101908365885416, "grad_norm": 10.19963550567627, "learning_rate": 4.323093531632466e-07, "loss": 3.3346, "step": 79645 }, { "epoch": 0.81024169921875, "grad_norm": 11.307597160339355, "learning_rate": 4.3208461064419114e-07, "loss": 3.4442, "step": 79650 }, { "epoch": 0.8102925618489584, "grad_norm": 18.645917892456055, "learning_rate": 4.3185992103226077e-07, "loss": 3.3448, "step": 79655 }, { "epoch": 0.8103434244791666, "grad_norm": 12.473796844482422, "learning_rate": 4.316352843332036e-07, "loss": 3.2527, "step": 79660 }, { "epoch": 0.810394287109375, "grad_norm": 11.506953239440918, "learning_rate": 4.3141070055276705e-07, "loss": 3.6653, "step": 79665 }, { "epoch": 0.8104451497395834, "grad_norm": 8.191093444824219, "learning_rate": 4.311861696966985e-07, "loss": 3.0089, "step": 79670 }, { "epoch": 0.8104960123697916, "grad_norm": 15.774288177490234, "learning_rate": 4.309616917707404e-07, "loss": 3.8248, "step": 79675 }, { "epoch": 0.810546875, "grad_norm": 19.564104080200195, "learning_rate": 4.307372667806381e-07, "loss": 3.934, "step": 79680 }, { "epoch": 0.8105977376302084, "grad_norm": 14.84902286529541, "learning_rate": 4.305128947321319e-07, "loss": 2.8391, "step": 79685 }, { "epoch": 0.8106486002604166, "grad_norm": 12.76487922668457, "learning_rate": 4.3028857563096246e-07, "loss": 3.3434, "step": 79690 }, { "epoch": 0.810699462890625, "grad_norm": 16.582035064697266, "learning_rate": 4.3006430948286907e-07, "loss": 3.4959, "step": 79695 }, { "epoch": 0.8107503255208334, "grad_norm": 11.676108360290527, "learning_rate": 4.298400962935903e-07, "loss": 3.3459, "step": 79700 }, { "epoch": 0.8108011881510416, "grad_norm": 12.260275840759277, "learning_rate": 4.2961593606886206e-07, "loss": 3.239, "step": 79705 }, { "epoch": 0.81085205078125, "grad_norm": 8.587767601013184, "learning_rate": 4.2939182881441945e-07, "loss": 3.0153, "step": 79710 }, { "epoch": 0.8109029134114584, "grad_norm": 10.28516674041748, "learning_rate": 4.291677745359954e-07, "loss": 3.3732, "step": 79715 }, { "epoch": 0.8109537760416666, "grad_norm": 13.696601867675781, "learning_rate": 4.2894377323932286e-07, "loss": 3.3326, "step": 79720 }, { "epoch": 0.811004638671875, "grad_norm": 10.845861434936523, "learning_rate": 4.2871982493013385e-07, "loss": 3.3103, "step": 79725 }, { "epoch": 0.8110555013020834, "grad_norm": 13.977005958557129, "learning_rate": 4.284959296141569e-07, "loss": 3.6096, "step": 79730 }, { "epoch": 0.8111063639322916, "grad_norm": 12.174431800842285, "learning_rate": 4.282720872971202e-07, "loss": 3.646, "step": 79735 }, { "epoch": 0.8111572265625, "grad_norm": 9.921180725097656, "learning_rate": 4.2804829798475165e-07, "loss": 3.6864, "step": 79740 }, { "epoch": 0.8112080891927084, "grad_norm": 14.429951667785645, "learning_rate": 4.278245616827756e-07, "loss": 3.0137, "step": 79745 }, { "epoch": 0.8112589518229166, "grad_norm": 9.26318645477295, "learning_rate": 4.276008783969177e-07, "loss": 3.5189, "step": 79750 }, { "epoch": 0.811309814453125, "grad_norm": 8.710182189941406, "learning_rate": 4.2737724813289945e-07, "loss": 3.1991, "step": 79755 }, { "epoch": 0.8113606770833334, "grad_norm": 11.180933952331543, "learning_rate": 4.2715367089644354e-07, "loss": 3.5738, "step": 79760 }, { "epoch": 0.8114115397135416, "grad_norm": 15.223068237304688, "learning_rate": 4.2693014669326986e-07, "loss": 3.4119, "step": 79765 }, { "epoch": 0.81146240234375, "grad_norm": 7.4405999183654785, "learning_rate": 4.267066755290963e-07, "loss": 3.0198, "step": 79770 }, { "epoch": 0.8115132649739584, "grad_norm": 17.034854888916016, "learning_rate": 4.2648325740964086e-07, "loss": 3.0463, "step": 79775 }, { "epoch": 0.8115641276041666, "grad_norm": 11.081181526184082, "learning_rate": 4.2625989234062084e-07, "loss": 3.3451, "step": 79780 }, { "epoch": 0.811614990234375, "grad_norm": 11.616988182067871, "learning_rate": 4.260365803277494e-07, "loss": 3.4363, "step": 79785 }, { "epoch": 0.8116658528645834, "grad_norm": 13.727701187133789, "learning_rate": 4.2581332137674014e-07, "loss": 3.273, "step": 79790 }, { "epoch": 0.8117167154947916, "grad_norm": 10.448540687561035, "learning_rate": 4.2559011549330535e-07, "loss": 3.1385, "step": 79795 }, { "epoch": 0.811767578125, "grad_norm": 9.588068962097168, "learning_rate": 4.2536696268315663e-07, "loss": 3.5569, "step": 79800 }, { "epoch": 0.8118184407552084, "grad_norm": 8.408053398132324, "learning_rate": 4.251438629520022e-07, "loss": 2.8845, "step": 79805 }, { "epoch": 0.8118693033854166, "grad_norm": 13.213644981384277, "learning_rate": 4.249208163055496e-07, "loss": 3.2393, "step": 79810 }, { "epoch": 0.811920166015625, "grad_norm": 15.220480918884277, "learning_rate": 4.2469782274950687e-07, "loss": 3.8526, "step": 79815 }, { "epoch": 0.8119710286458334, "grad_norm": 16.236431121826172, "learning_rate": 4.244748822895775e-07, "loss": 3.4945, "step": 79820 }, { "epoch": 0.8120218912760416, "grad_norm": 8.124288558959961, "learning_rate": 4.242519949314672e-07, "loss": 3.219, "step": 79825 }, { "epoch": 0.81207275390625, "grad_norm": 12.191054344177246, "learning_rate": 4.240291606808769e-07, "loss": 3.271, "step": 79830 }, { "epoch": 0.8121236165364584, "grad_norm": 12.073549270629883, "learning_rate": 4.2380637954350904e-07, "loss": 3.9508, "step": 79835 }, { "epoch": 0.8121744791666666, "grad_norm": 17.343311309814453, "learning_rate": 4.2358365152506296e-07, "loss": 3.6859, "step": 79840 }, { "epoch": 0.812225341796875, "grad_norm": 9.703289985656738, "learning_rate": 4.233609766312363e-07, "loss": 3.0531, "step": 79845 }, { "epoch": 0.8122762044270834, "grad_norm": 14.241864204406738, "learning_rate": 4.2313835486772705e-07, "loss": 3.7319, "step": 79850 }, { "epoch": 0.8123270670572916, "grad_norm": 10.567691802978516, "learning_rate": 4.229157862402311e-07, "loss": 3.0844, "step": 79855 }, { "epoch": 0.8123779296875, "grad_norm": 10.341302871704102, "learning_rate": 4.2269327075444233e-07, "loss": 3.596, "step": 79860 }, { "epoch": 0.8124287923177084, "grad_norm": 12.9301118850708, "learning_rate": 4.224708084160542e-07, "loss": 3.2738, "step": 79865 }, { "epoch": 0.8124796549479166, "grad_norm": 15.44719123840332, "learning_rate": 4.2224839923075716e-07, "loss": 3.6493, "step": 79870 }, { "epoch": 0.812530517578125, "grad_norm": 17.016420364379883, "learning_rate": 4.220260432042422e-07, "loss": 3.4857, "step": 79875 }, { "epoch": 0.8125813802083334, "grad_norm": 9.550246238708496, "learning_rate": 4.218037403421993e-07, "loss": 3.4953, "step": 79880 }, { "epoch": 0.8126322428385416, "grad_norm": 12.058992385864258, "learning_rate": 4.2158149065031436e-07, "loss": 3.3917, "step": 79885 }, { "epoch": 0.81268310546875, "grad_norm": 19.84065055847168, "learning_rate": 4.2135929413427504e-07, "loss": 3.7188, "step": 79890 }, { "epoch": 0.8127339680989584, "grad_norm": 10.726000785827637, "learning_rate": 4.211371507997655e-07, "loss": 3.2962, "step": 79895 }, { "epoch": 0.8127848307291666, "grad_norm": 18.25348663330078, "learning_rate": 4.209150606524684e-07, "loss": 3.3035, "step": 79900 }, { "epoch": 0.812835693359375, "grad_norm": 14.618754386901855, "learning_rate": 4.206930236980669e-07, "loss": 3.2014, "step": 79905 }, { "epoch": 0.8128865559895834, "grad_norm": 14.573039054870605, "learning_rate": 4.204710399422421e-07, "loss": 3.3201, "step": 79910 }, { "epoch": 0.8129374186197916, "grad_norm": 16.528791427612305, "learning_rate": 4.202491093906727e-07, "loss": 3.5633, "step": 79915 }, { "epoch": 0.81298828125, "grad_norm": 9.822667121887207, "learning_rate": 4.20027232049037e-07, "loss": 3.1876, "step": 79920 }, { "epoch": 0.8130391438802084, "grad_norm": 19.91170310974121, "learning_rate": 4.19805407923011e-07, "loss": 3.3017, "step": 79925 }, { "epoch": 0.8130900065104166, "grad_norm": 11.826841354370117, "learning_rate": 4.195836370182704e-07, "loss": 3.24, "step": 79930 }, { "epoch": 0.813140869140625, "grad_norm": 15.935693740844727, "learning_rate": 4.193619193404902e-07, "loss": 3.2237, "step": 79935 }, { "epoch": 0.8131917317708334, "grad_norm": 10.841400146484375, "learning_rate": 4.191402548953419e-07, "loss": 2.9862, "step": 79940 }, { "epoch": 0.8132425944010416, "grad_norm": 14.67307186126709, "learning_rate": 4.189186436884965e-07, "loss": 3.4626, "step": 79945 }, { "epoch": 0.81329345703125, "grad_norm": 12.722312927246094, "learning_rate": 4.1869708572562425e-07, "loss": 3.3784, "step": 79950 }, { "epoch": 0.8133443196614584, "grad_norm": 14.460338592529297, "learning_rate": 4.1847558101239414e-07, "loss": 3.571, "step": 79955 }, { "epoch": 0.8133951822916666, "grad_norm": 6.846340179443359, "learning_rate": 4.1825412955447314e-07, "loss": 3.1522, "step": 79960 }, { "epoch": 0.813446044921875, "grad_norm": 13.961633682250977, "learning_rate": 4.1803273135752636e-07, "loss": 3.4373, "step": 79965 }, { "epoch": 0.8134969075520834, "grad_norm": 10.45997428894043, "learning_rate": 4.17811386427219e-07, "loss": 3.4645, "step": 79970 }, { "epoch": 0.8135477701822916, "grad_norm": 14.717330932617188, "learning_rate": 4.1759009476921327e-07, "loss": 3.3381, "step": 79975 }, { "epoch": 0.8135986328125, "grad_norm": 13.302783966064453, "learning_rate": 4.173688563891715e-07, "loss": 3.1185, "step": 79980 }, { "epoch": 0.8136494954427084, "grad_norm": 10.639537811279297, "learning_rate": 4.171476712927544e-07, "loss": 3.1504, "step": 79985 }, { "epoch": 0.8137003580729166, "grad_norm": 12.81106185913086, "learning_rate": 4.1692653948562035e-07, "loss": 3.3925, "step": 79990 }, { "epoch": 0.813751220703125, "grad_norm": 12.533110618591309, "learning_rate": 4.1670546097342723e-07, "loss": 3.5072, "step": 79995 }, { "epoch": 0.8138020833333334, "grad_norm": 8.067879676818848, "learning_rate": 4.164844357618303e-07, "loss": 3.2179, "step": 80000 }, { "epoch": 0.8138529459635416, "grad_norm": 15.618395805358887, "learning_rate": 4.1626346385648553e-07, "loss": 3.1992, "step": 80005 }, { "epoch": 0.81390380859375, "grad_norm": 9.165993690490723, "learning_rate": 4.160425452630465e-07, "loss": 3.168, "step": 80010 }, { "epoch": 0.8139546712239584, "grad_norm": 6.94345760345459, "learning_rate": 4.1582167998716505e-07, "loss": 2.9606, "step": 80015 }, { "epoch": 0.8140055338541666, "grad_norm": 12.507560729980469, "learning_rate": 4.156008680344914e-07, "loss": 3.3534, "step": 80020 }, { "epoch": 0.814056396484375, "grad_norm": 17.14674949645996, "learning_rate": 4.1538010941067575e-07, "loss": 3.5355, "step": 80025 }, { "epoch": 0.8141072591145834, "grad_norm": 12.378942489624023, "learning_rate": 4.151594041213655e-07, "loss": 3.1759, "step": 80030 }, { "epoch": 0.8141581217447916, "grad_norm": 8.899665832519531, "learning_rate": 4.149387521722084e-07, "loss": 3.3154, "step": 80035 }, { "epoch": 0.814208984375, "grad_norm": 13.174806594848633, "learning_rate": 4.1471815356884855e-07, "loss": 3.2039, "step": 80040 }, { "epoch": 0.8142598470052084, "grad_norm": 10.049018859863281, "learning_rate": 4.144976083169311e-07, "loss": 3.5286, "step": 80045 }, { "epoch": 0.8143107096354166, "grad_norm": 10.214993476867676, "learning_rate": 4.1427711642209795e-07, "loss": 3.1437, "step": 80050 }, { "epoch": 0.814361572265625, "grad_norm": 14.105937004089355, "learning_rate": 4.140566778899896e-07, "loss": 3.1775, "step": 80055 }, { "epoch": 0.8144124348958334, "grad_norm": 12.205314636230469, "learning_rate": 4.138362927262471e-07, "loss": 3.3314, "step": 80060 }, { "epoch": 0.8144632975260416, "grad_norm": 9.415129661560059, "learning_rate": 4.13615960936509e-07, "loss": 3.3394, "step": 80065 }, { "epoch": 0.81451416015625, "grad_norm": 14.456554412841797, "learning_rate": 4.133956825264118e-07, "loss": 3.2737, "step": 80070 }, { "epoch": 0.8145650227864584, "grad_norm": 13.304035186767578, "learning_rate": 4.1317545750159114e-07, "loss": 3.7365, "step": 80075 }, { "epoch": 0.8146158854166666, "grad_norm": 11.47937297821045, "learning_rate": 4.1295528586768245e-07, "loss": 3.5942, "step": 80080 }, { "epoch": 0.814666748046875, "grad_norm": 11.948128700256348, "learning_rate": 4.1273516763031736e-07, "loss": 3.3358, "step": 80085 }, { "epoch": 0.8147176106770834, "grad_norm": 14.301944732666016, "learning_rate": 4.125151027951288e-07, "loss": 3.3043, "step": 80090 }, { "epoch": 0.8147684733072916, "grad_norm": 10.94615364074707, "learning_rate": 4.122950913677459e-07, "loss": 3.3121, "step": 80095 }, { "epoch": 0.8148193359375, "grad_norm": 9.940673828125, "learning_rate": 4.1207513335379893e-07, "loss": 3.0426, "step": 80100 }, { "epoch": 0.8148701985677084, "grad_norm": 11.303471565246582, "learning_rate": 4.118552287589142e-07, "loss": 3.0874, "step": 80105 }, { "epoch": 0.8149210611979166, "grad_norm": 15.60290813446045, "learning_rate": 4.1163537758871914e-07, "loss": 3.4599, "step": 80110 }, { "epoch": 0.814971923828125, "grad_norm": 9.669926643371582, "learning_rate": 4.114155798488373e-07, "loss": 3.395, "step": 80115 }, { "epoch": 0.8150227864583334, "grad_norm": 12.754854202270508, "learning_rate": 4.1119583554489334e-07, "loss": 3.4149, "step": 80120 }, { "epoch": 0.8150736490885416, "grad_norm": 8.297149658203125, "learning_rate": 4.109761446825089e-07, "loss": 3.0322, "step": 80125 }, { "epoch": 0.81512451171875, "grad_norm": 8.285845756530762, "learning_rate": 4.107565072673042e-07, "loss": 3.4565, "step": 80130 }, { "epoch": 0.8151753743489584, "grad_norm": 12.29454231262207, "learning_rate": 4.105369233048989e-07, "loss": 3.6493, "step": 80135 }, { "epoch": 0.8152262369791666, "grad_norm": 12.848197937011719, "learning_rate": 4.103173928009116e-07, "loss": 3.1917, "step": 80140 }, { "epoch": 0.815277099609375, "grad_norm": 14.257293701171875, "learning_rate": 4.1009791576095884e-07, "loss": 3.3103, "step": 80145 }, { "epoch": 0.8153279622395834, "grad_norm": 7.8373847007751465, "learning_rate": 4.098784921906554e-07, "loss": 3.8208, "step": 80150 }, { "epoch": 0.8153788248697916, "grad_norm": 14.487406730651855, "learning_rate": 4.096591220956145e-07, "loss": 3.3984, "step": 80155 }, { "epoch": 0.8154296875, "grad_norm": 16.87422752380371, "learning_rate": 4.094398054814497e-07, "loss": 3.8078, "step": 80160 }, { "epoch": 0.8154805501302084, "grad_norm": 13.352436065673828, "learning_rate": 4.0922054235377264e-07, "loss": 3.6581, "step": 80165 }, { "epoch": 0.8155314127604166, "grad_norm": 8.779556274414062, "learning_rate": 4.090013327181916e-07, "loss": 3.0287, "step": 80170 }, { "epoch": 0.815582275390625, "grad_norm": 12.838428497314453, "learning_rate": 4.0878217658031656e-07, "loss": 3.0189, "step": 80175 }, { "epoch": 0.8156331380208334, "grad_norm": 13.048858642578125, "learning_rate": 4.085630739457538e-07, "loss": 3.0629, "step": 80180 }, { "epoch": 0.8156840006510416, "grad_norm": 12.09554386138916, "learning_rate": 4.083440248201087e-07, "loss": 3.3387, "step": 80185 }, { "epoch": 0.81573486328125, "grad_norm": 9.71069049835205, "learning_rate": 4.0812502920898585e-07, "loss": 3.4874, "step": 80190 }, { "epoch": 0.8157857259114584, "grad_norm": 11.654038429260254, "learning_rate": 4.079060871179888e-07, "loss": 3.252, "step": 80195 }, { "epoch": 0.8158365885416666, "grad_norm": 10.531092643737793, "learning_rate": 4.0768719855271866e-07, "loss": 3.2504, "step": 80200 }, { "epoch": 0.815887451171875, "grad_norm": 13.387083053588867, "learning_rate": 4.074683635187757e-07, "loss": 3.0339, "step": 80205 }, { "epoch": 0.8159383138020834, "grad_norm": 10.929668426513672, "learning_rate": 4.0724958202175786e-07, "loss": 3.2993, "step": 80210 }, { "epoch": 0.8159891764322916, "grad_norm": 13.87834358215332, "learning_rate": 4.070308540672635e-07, "loss": 3.1624, "step": 80215 }, { "epoch": 0.8160400390625, "grad_norm": 9.980636596679688, "learning_rate": 4.0681217966088956e-07, "loss": 3.4164, "step": 80220 }, { "epoch": 0.8160909016927084, "grad_norm": 15.008100509643555, "learning_rate": 4.0659355880822955e-07, "loss": 3.0908, "step": 80225 }, { "epoch": 0.8161417643229166, "grad_norm": 8.804240226745605, "learning_rate": 4.0637499151487655e-07, "loss": 3.4293, "step": 80230 }, { "epoch": 0.816192626953125, "grad_norm": 10.993620872497559, "learning_rate": 4.0615647778642383e-07, "loss": 3.4002, "step": 80235 }, { "epoch": 0.8162434895833334, "grad_norm": 11.344161987304688, "learning_rate": 4.059380176284608e-07, "loss": 3.3614, "step": 80240 }, { "epoch": 0.8162943522135416, "grad_norm": 12.82261848449707, "learning_rate": 4.0571961104657773e-07, "loss": 3.1997, "step": 80245 }, { "epoch": 0.81634521484375, "grad_norm": 16.50777816772461, "learning_rate": 4.055012580463613e-07, "loss": 3.7641, "step": 80250 }, { "epoch": 0.8163960774739584, "grad_norm": 8.51815414428711, "learning_rate": 4.052829586333995e-07, "loss": 3.4647, "step": 80255 }, { "epoch": 0.8164469401041666, "grad_norm": 15.575868606567383, "learning_rate": 4.050647128132762e-07, "loss": 3.4649, "step": 80260 }, { "epoch": 0.816497802734375, "grad_norm": 7.848937034606934, "learning_rate": 4.0484652059157554e-07, "loss": 3.2815, "step": 80265 }, { "epoch": 0.8165486653645834, "grad_norm": 15.685480117797852, "learning_rate": 4.0462838197388057e-07, "loss": 3.3155, "step": 80270 }, { "epoch": 0.8165995279947916, "grad_norm": 14.504178047180176, "learning_rate": 4.044102969657718e-07, "loss": 3.155, "step": 80275 }, { "epoch": 0.816650390625, "grad_norm": 14.9198637008667, "learning_rate": 4.0419226557282893e-07, "loss": 3.355, "step": 80280 }, { "epoch": 0.8167012532552084, "grad_norm": 11.11740779876709, "learning_rate": 4.0397428780062975e-07, "loss": 3.098, "step": 80285 }, { "epoch": 0.8167521158854166, "grad_norm": 10.391766548156738, "learning_rate": 4.0375636365475144e-07, "loss": 3.2861, "step": 80290 }, { "epoch": 0.816802978515625, "grad_norm": 11.091429710388184, "learning_rate": 4.0353849314077016e-07, "loss": 3.2244, "step": 80295 }, { "epoch": 0.8168538411458334, "grad_norm": 12.310245513916016, "learning_rate": 4.0332067626425967e-07, "loss": 3.8048, "step": 80300 }, { "epoch": 0.8169047037760416, "grad_norm": 7.162967681884766, "learning_rate": 4.031029130307923e-07, "loss": 3.0874, "step": 80305 }, { "epoch": 0.81695556640625, "grad_norm": 13.751084327697754, "learning_rate": 4.0288520344594045e-07, "loss": 3.2759, "step": 80310 }, { "epoch": 0.8170064290364584, "grad_norm": 10.215845108032227, "learning_rate": 4.0266754751527305e-07, "loss": 2.9698, "step": 80315 }, { "epoch": 0.8170572916666666, "grad_norm": 10.31053638458252, "learning_rate": 4.0244994524435976e-07, "loss": 2.9662, "step": 80320 }, { "epoch": 0.817108154296875, "grad_norm": 8.255980491638184, "learning_rate": 4.0223239663876674e-07, "loss": 3.9405, "step": 80325 }, { "epoch": 0.8171590169270834, "grad_norm": 11.991215705871582, "learning_rate": 4.0201490170406144e-07, "loss": 3.0397, "step": 80330 }, { "epoch": 0.8172098795572916, "grad_norm": 14.226103782653809, "learning_rate": 4.017974604458075e-07, "loss": 3.1728, "step": 80335 }, { "epoch": 0.8172607421875, "grad_norm": 9.138517379760742, "learning_rate": 4.0158007286956765e-07, "loss": 2.9813, "step": 80340 }, { "epoch": 0.8173116048177084, "grad_norm": 9.908602714538574, "learning_rate": 4.013627389809044e-07, "loss": 3.0246, "step": 80345 }, { "epoch": 0.8173624674479166, "grad_norm": 16.149280548095703, "learning_rate": 4.011454587853783e-07, "loss": 3.2871, "step": 80350 }, { "epoch": 0.817413330078125, "grad_norm": 13.814882278442383, "learning_rate": 4.0092823228854857e-07, "loss": 3.459, "step": 80355 }, { "epoch": 0.8174641927083334, "grad_norm": 9.240151405334473, "learning_rate": 4.007110594959715e-07, "loss": 3.6859, "step": 80360 }, { "epoch": 0.8175150553385416, "grad_norm": 14.212241172790527, "learning_rate": 4.0049394041320523e-07, "loss": 3.6496, "step": 80365 }, { "epoch": 0.81756591796875, "grad_norm": 15.584053993225098, "learning_rate": 4.0027687504580333e-07, "loss": 3.3167, "step": 80370 }, { "epoch": 0.8176167805989584, "grad_norm": 9.203264236450195, "learning_rate": 4.000598633993202e-07, "loss": 3.4071, "step": 80375 }, { "epoch": 0.8176676432291666, "grad_norm": 11.013337135314941, "learning_rate": 3.9984290547930737e-07, "loss": 3.8117, "step": 80380 }, { "epoch": 0.817718505859375, "grad_norm": 8.93172550201416, "learning_rate": 3.9962600129131643e-07, "loss": 3.1961, "step": 80385 }, { "epoch": 0.8177693684895834, "grad_norm": 14.087814331054688, "learning_rate": 3.994091508408965e-07, "loss": 3.1039, "step": 80390 }, { "epoch": 0.8178202311197916, "grad_norm": 14.619377136230469, "learning_rate": 3.9919235413359486e-07, "loss": 3.6241, "step": 80395 }, { "epoch": 0.81787109375, "grad_norm": 14.753565788269043, "learning_rate": 3.9897561117495905e-07, "loss": 3.3395, "step": 80400 }, { "epoch": 0.8179219563802084, "grad_norm": 18.728551864624023, "learning_rate": 3.9875892197053486e-07, "loss": 3.2673, "step": 80405 }, { "epoch": 0.8179728190104166, "grad_norm": 11.525137901306152, "learning_rate": 3.9854228652586537e-07, "loss": 3.6199, "step": 80410 }, { "epoch": 0.818023681640625, "grad_norm": 10.524356842041016, "learning_rate": 3.983257048464928e-07, "loss": 3.6003, "step": 80415 }, { "epoch": 0.8180745442708334, "grad_norm": 16.000789642333984, "learning_rate": 3.9810917693795917e-07, "loss": 3.3061, "step": 80420 }, { "epoch": 0.8181254069010416, "grad_norm": 13.808717727661133, "learning_rate": 3.978927028058044e-07, "loss": 3.1418, "step": 80425 }, { "epoch": 0.81817626953125, "grad_norm": 14.330815315246582, "learning_rate": 3.976762824555666e-07, "loss": 3.0111, "step": 80430 }, { "epoch": 0.8182271321614584, "grad_norm": 15.074409484863281, "learning_rate": 3.97459915892783e-07, "loss": 3.6294, "step": 80435 }, { "epoch": 0.8182779947916666, "grad_norm": 11.594634056091309, "learning_rate": 3.972436031229884e-07, "loss": 3.4432, "step": 80440 }, { "epoch": 0.818328857421875, "grad_norm": 11.404607772827148, "learning_rate": 3.970273441517178e-07, "loss": 3.7119, "step": 80445 }, { "epoch": 0.8183797200520834, "grad_norm": 10.453607559204102, "learning_rate": 3.968111389845047e-07, "loss": 3.321, "step": 80450 }, { "epoch": 0.8184305826822916, "grad_norm": 12.711167335510254, "learning_rate": 3.9659498762688016e-07, "loss": 3.3319, "step": 80455 }, { "epoch": 0.8184814453125, "grad_norm": 11.754729270935059, "learning_rate": 3.9637889008437375e-07, "loss": 3.0155, "step": 80460 }, { "epoch": 0.8185323079427084, "grad_norm": 10.359018325805664, "learning_rate": 3.961628463625153e-07, "loss": 3.2291, "step": 80465 }, { "epoch": 0.8185831705729166, "grad_norm": 12.879046440124512, "learning_rate": 3.9594685646683117e-07, "loss": 3.2591, "step": 80470 }, { "epoch": 0.818634033203125, "grad_norm": 11.944828033447266, "learning_rate": 3.957309204028481e-07, "loss": 3.0912, "step": 80475 }, { "epoch": 0.8186848958333334, "grad_norm": 11.149519920349121, "learning_rate": 3.955150381760911e-07, "loss": 3.2856, "step": 80480 }, { "epoch": 0.8187357584635416, "grad_norm": 9.422290802001953, "learning_rate": 3.95299209792083e-07, "loss": 3.38, "step": 80485 }, { "epoch": 0.81878662109375, "grad_norm": 10.456183433532715, "learning_rate": 3.95083435256346e-07, "loss": 3.3319, "step": 80490 }, { "epoch": 0.8188374837239584, "grad_norm": 13.086915969848633, "learning_rate": 3.948677145743995e-07, "loss": 3.7862, "step": 80495 }, { "epoch": 0.8188883463541666, "grad_norm": 10.651822090148926, "learning_rate": 3.9465204775176373e-07, "loss": 3.6872, "step": 80500 }, { "epoch": 0.818939208984375, "grad_norm": 9.868151664733887, "learning_rate": 3.944364347939569e-07, "loss": 3.0285, "step": 80505 }, { "epoch": 0.8189900716145834, "grad_norm": 11.998753547668457, "learning_rate": 3.94220875706495e-07, "loss": 3.2442, "step": 80510 }, { "epoch": 0.8190409342447916, "grad_norm": 11.459403991699219, "learning_rate": 3.940053704948921e-07, "loss": 3.3359, "step": 80515 }, { "epoch": 0.819091796875, "grad_norm": 14.966922760009766, "learning_rate": 3.9378991916466295e-07, "loss": 3.3292, "step": 80520 }, { "epoch": 0.8191426595052084, "grad_norm": 10.536090850830078, "learning_rate": 3.9357452172131937e-07, "loss": 2.9818, "step": 80525 }, { "epoch": 0.8191935221354166, "grad_norm": 8.744247436523438, "learning_rate": 3.9335917817037274e-07, "loss": 3.3249, "step": 80530 }, { "epoch": 0.819244384765625, "grad_norm": 11.743062973022461, "learning_rate": 3.9314388851733136e-07, "loss": 3.2735, "step": 80535 }, { "epoch": 0.8192952473958334, "grad_norm": 13.098162651062012, "learning_rate": 3.929286527677051e-07, "loss": 3.361, "step": 80540 }, { "epoch": 0.8193461100260416, "grad_norm": 15.775094985961914, "learning_rate": 3.9271347092699975e-07, "loss": 3.0978, "step": 80545 }, { "epoch": 0.81939697265625, "grad_norm": 10.93581485748291, "learning_rate": 3.924983430007204e-07, "loss": 3.2549, "step": 80550 }, { "epoch": 0.8194478352864584, "grad_norm": 7.797806739807129, "learning_rate": 3.922832689943712e-07, "loss": 3.5114, "step": 80555 }, { "epoch": 0.8194986979166666, "grad_norm": 14.676947593688965, "learning_rate": 3.9206824891345556e-07, "loss": 3.2431, "step": 80560 }, { "epoch": 0.819549560546875, "grad_norm": 13.802629470825195, "learning_rate": 3.918532827634741e-07, "loss": 3.2136, "step": 80565 }, { "epoch": 0.8196004231770834, "grad_norm": 16.6387939453125, "learning_rate": 3.9163837054992634e-07, "loss": 3.5163, "step": 80570 }, { "epoch": 0.8196512858072916, "grad_norm": 14.772869110107422, "learning_rate": 3.914235122783108e-07, "loss": 3.9268, "step": 80575 }, { "epoch": 0.8197021484375, "grad_norm": 9.712909698486328, "learning_rate": 3.9120870795412575e-07, "loss": 3.4206, "step": 80580 }, { "epoch": 0.8197530110677084, "grad_norm": 7.7602691650390625, "learning_rate": 3.9099395758286586e-07, "loss": 3.282, "step": 80585 }, { "epoch": 0.8198038736979166, "grad_norm": 12.946903228759766, "learning_rate": 3.907792611700253e-07, "loss": 3.226, "step": 80590 }, { "epoch": 0.819854736328125, "grad_norm": 15.746535301208496, "learning_rate": 3.9056461872109794e-07, "loss": 3.3232, "step": 80595 }, { "epoch": 0.8199055989583334, "grad_norm": 11.906881332397461, "learning_rate": 3.903500302415741e-07, "loss": 3.2913, "step": 80600 }, { "epoch": 0.8199564615885416, "grad_norm": 10.969463348388672, "learning_rate": 3.901354957369455e-07, "loss": 3.8135, "step": 80605 }, { "epoch": 0.82000732421875, "grad_norm": 10.680028915405273, "learning_rate": 3.8992101521269947e-07, "loss": 3.3671, "step": 80610 }, { "epoch": 0.8200581868489584, "grad_norm": 14.710232734680176, "learning_rate": 3.8970658867432454e-07, "loss": 3.5764, "step": 80615 }, { "epoch": 0.8201090494791666, "grad_norm": 9.277506828308105, "learning_rate": 3.8949221612730653e-07, "loss": 3.781, "step": 80620 }, { "epoch": 0.820159912109375, "grad_norm": 9.739691734313965, "learning_rate": 3.892778975771294e-07, "loss": 3.6042, "step": 80625 }, { "epoch": 0.8202107747395834, "grad_norm": 10.505010604858398, "learning_rate": 3.8906363302927686e-07, "loss": 3.2156, "step": 80630 }, { "epoch": 0.8202616373697916, "grad_norm": 9.160713195800781, "learning_rate": 3.888494224892314e-07, "loss": 3.11, "step": 80635 }, { "epoch": 0.8203125, "grad_norm": 10.293058395385742, "learning_rate": 3.886352659624734e-07, "loss": 3.5203, "step": 80640 }, { "epoch": 0.8203633626302084, "grad_norm": 8.177082061767578, "learning_rate": 3.8842116345448126e-07, "loss": 3.4225, "step": 80645 }, { "epoch": 0.8204142252604166, "grad_norm": 16.60538101196289, "learning_rate": 3.882071149707328e-07, "loss": 3.0571, "step": 80650 }, { "epoch": 0.820465087890625, "grad_norm": 9.453953742980957, "learning_rate": 3.8799312051670477e-07, "loss": 3.1622, "step": 80655 }, { "epoch": 0.8205159505208334, "grad_norm": 11.494791984558105, "learning_rate": 3.877791800978728e-07, "loss": 3.3414, "step": 80660 }, { "epoch": 0.8205668131510416, "grad_norm": 12.629426002502441, "learning_rate": 3.875652937197091e-07, "loss": 3.4787, "step": 80665 }, { "epoch": 0.82061767578125, "grad_norm": 13.683670997619629, "learning_rate": 3.873514613876872e-07, "loss": 3.5224, "step": 80670 }, { "epoch": 0.8206685384114584, "grad_norm": 12.38894271850586, "learning_rate": 3.871376831072776e-07, "loss": 3.3603, "step": 80675 }, { "epoch": 0.8207194010416666, "grad_norm": 10.823607444763184, "learning_rate": 3.869239588839488e-07, "loss": 3.3912, "step": 80680 }, { "epoch": 0.820770263671875, "grad_norm": 12.35561466217041, "learning_rate": 3.8671028872317e-07, "loss": 3.4972, "step": 80685 }, { "epoch": 0.8208211263020834, "grad_norm": 10.105222702026367, "learning_rate": 3.8649667263040784e-07, "loss": 3.5993, "step": 80690 }, { "epoch": 0.8208719889322916, "grad_norm": 15.339810371398926, "learning_rate": 3.862831106111273e-07, "loss": 3.2245, "step": 80695 }, { "epoch": 0.8209228515625, "grad_norm": 10.440420150756836, "learning_rate": 3.8606960267079244e-07, "loss": 3.1923, "step": 80700 }, { "epoch": 0.8209737141927084, "grad_norm": 15.502533912658691, "learning_rate": 3.858561488148646e-07, "loss": 3.3615, "step": 80705 }, { "epoch": 0.8210245768229166, "grad_norm": 17.10973358154297, "learning_rate": 3.856427490488074e-07, "loss": 3.0419, "step": 80710 }, { "epoch": 0.821075439453125, "grad_norm": 7.994612216949463, "learning_rate": 3.8542940337807923e-07, "loss": 3.5715, "step": 80715 }, { "epoch": 0.8211263020833334, "grad_norm": 13.796808242797852, "learning_rate": 3.852161118081385e-07, "loss": 3.35, "step": 80720 }, { "epoch": 0.8211771647135416, "grad_norm": 14.857696533203125, "learning_rate": 3.8500287434444173e-07, "loss": 3.1631, "step": 80725 }, { "epoch": 0.82122802734375, "grad_norm": 10.685078620910645, "learning_rate": 3.847896909924451e-07, "loss": 3.1853, "step": 80730 }, { "epoch": 0.8212788899739584, "grad_norm": 7.9912285804748535, "learning_rate": 3.8457656175760335e-07, "loss": 3.4768, "step": 80735 }, { "epoch": 0.8213297526041666, "grad_norm": 11.736037254333496, "learning_rate": 3.843634866453691e-07, "loss": 3.1898, "step": 80740 }, { "epoch": 0.821380615234375, "grad_norm": 9.419626235961914, "learning_rate": 3.8415046566119276e-07, "loss": 3.0104, "step": 80745 }, { "epoch": 0.8214314778645834, "grad_norm": 12.664139747619629, "learning_rate": 3.83937498810526e-07, "loss": 3.0973, "step": 80750 }, { "epoch": 0.8214823404947916, "grad_norm": 17.065969467163086, "learning_rate": 3.8372458609881593e-07, "loss": 3.1934, "step": 80755 }, { "epoch": 0.821533203125, "grad_norm": 11.615386962890625, "learning_rate": 3.835117275315109e-07, "loss": 3.2291, "step": 80760 }, { "epoch": 0.8215840657552084, "grad_norm": 8.41677188873291, "learning_rate": 3.8329892311405744e-07, "loss": 3.2941, "step": 80765 }, { "epoch": 0.8216349283854166, "grad_norm": 9.951393127441406, "learning_rate": 3.830861728518989e-07, "loss": 3.3313, "step": 80770 }, { "epoch": 0.821685791015625, "grad_norm": 13.11707878112793, "learning_rate": 3.828734767504791e-07, "loss": 3.159, "step": 80775 }, { "epoch": 0.8217366536458334, "grad_norm": 12.716404914855957, "learning_rate": 3.826608348152391e-07, "loss": 3.4763, "step": 80780 }, { "epoch": 0.8217875162760416, "grad_norm": 10.998289108276367, "learning_rate": 3.824482470516197e-07, "loss": 2.9195, "step": 80785 }, { "epoch": 0.82183837890625, "grad_norm": 9.000075340270996, "learning_rate": 3.8223571346506034e-07, "loss": 3.261, "step": 80790 }, { "epoch": 0.8218892415364584, "grad_norm": 10.649809837341309, "learning_rate": 3.820232340609986e-07, "loss": 3.6669, "step": 80795 }, { "epoch": 0.8219401041666666, "grad_norm": 16.897676467895508, "learning_rate": 3.8181080884486965e-07, "loss": 3.5245, "step": 80800 }, { "epoch": 0.821990966796875, "grad_norm": 15.520227432250977, "learning_rate": 3.8159843782210966e-07, "loss": 3.2351, "step": 80805 }, { "epoch": 0.8220418294270834, "grad_norm": 12.184919357299805, "learning_rate": 3.813861209981512e-07, "loss": 3.4723, "step": 80810 }, { "epoch": 0.8220926920572916, "grad_norm": 9.80728530883789, "learning_rate": 3.8117385837842693e-07, "loss": 3.0831, "step": 80815 }, { "epoch": 0.8221435546875, "grad_norm": 9.381410598754883, "learning_rate": 3.8096164996836686e-07, "loss": 3.2771, "step": 80820 }, { "epoch": 0.8221944173177084, "grad_norm": 12.89774227142334, "learning_rate": 3.807494957734012e-07, "loss": 3.1174, "step": 80825 }, { "epoch": 0.8222452799479166, "grad_norm": 8.189101219177246, "learning_rate": 3.805373957989575e-07, "loss": 3.1917, "step": 80830 }, { "epoch": 0.822296142578125, "grad_norm": 18.067596435546875, "learning_rate": 3.803253500504614e-07, "loss": 3.3046, "step": 80835 }, { "epoch": 0.8223470052083334, "grad_norm": 13.102996826171875, "learning_rate": 3.801133585333389e-07, "loss": 3.4559, "step": 80840 }, { "epoch": 0.8223978678385416, "grad_norm": 12.395398139953613, "learning_rate": 3.7990142125301417e-07, "loss": 3.0941, "step": 80845 }, { "epoch": 0.82244873046875, "grad_norm": 13.059562683105469, "learning_rate": 3.7968953821490904e-07, "loss": 3.542, "step": 80850 }, { "epoch": 0.8224995930989584, "grad_norm": 13.447836875915527, "learning_rate": 3.7947770942444425e-07, "loss": 3.4232, "step": 80855 }, { "epoch": 0.8225504557291666, "grad_norm": 13.841771125793457, "learning_rate": 3.792659348870392e-07, "loss": 3.1639, "step": 80860 }, { "epoch": 0.822601318359375, "grad_norm": 9.623292922973633, "learning_rate": 3.790542146081133e-07, "loss": 2.8842, "step": 80865 }, { "epoch": 0.8226521809895834, "grad_norm": 16.11116600036621, "learning_rate": 3.7884254859308273e-07, "loss": 3.4362, "step": 80870 }, { "epoch": 0.8227030436197916, "grad_norm": 9.370688438415527, "learning_rate": 3.78630936847362e-07, "loss": 3.2373, "step": 80875 }, { "epoch": 0.82275390625, "grad_norm": 17.88738250732422, "learning_rate": 3.784193793763666e-07, "loss": 3.5616, "step": 80880 }, { "epoch": 0.8228047688802084, "grad_norm": 13.496973991394043, "learning_rate": 3.782078761855079e-07, "loss": 3.4115, "step": 80885 }, { "epoch": 0.8228556315104166, "grad_norm": 9.512796401977539, "learning_rate": 3.7799642728019837e-07, "loss": 3.0619, "step": 80890 }, { "epoch": 0.822906494140625, "grad_norm": 10.060030937194824, "learning_rate": 3.7778503266584665e-07, "loss": 3.4031, "step": 80895 }, { "epoch": 0.8229573567708334, "grad_norm": 13.565192222595215, "learning_rate": 3.775736923478626e-07, "loss": 3.1807, "step": 80900 }, { "epoch": 0.8230082194010416, "grad_norm": 13.113893508911133, "learning_rate": 3.773624063316525e-07, "loss": 3.4714, "step": 80905 }, { "epoch": 0.82305908203125, "grad_norm": 14.605281829833984, "learning_rate": 3.771511746226214e-07, "loss": 3.3566, "step": 80910 }, { "epoch": 0.8231099446614584, "grad_norm": 8.636752128601074, "learning_rate": 3.7693999722617445e-07, "loss": 3.2208, "step": 80915 }, { "epoch": 0.8231608072916666, "grad_norm": 9.907641410827637, "learning_rate": 3.7672887414771514e-07, "loss": 3.4945, "step": 80920 }, { "epoch": 0.823211669921875, "grad_norm": 13.11463451385498, "learning_rate": 3.765178053926441e-07, "loss": 3.6002, "step": 80925 }, { "epoch": 0.8232625325520834, "grad_norm": 14.705850601196289, "learning_rate": 3.7630679096636175e-07, "loss": 3.4234, "step": 80930 }, { "epoch": 0.8233133951822916, "grad_norm": 9.67563533782959, "learning_rate": 3.760958308742663e-07, "loss": 3.1482, "step": 80935 }, { "epoch": 0.8233642578125, "grad_norm": 13.453156471252441, "learning_rate": 3.758849251217553e-07, "loss": 3.4172, "step": 80940 }, { "epoch": 0.8234151204427084, "grad_norm": 13.665534973144531, "learning_rate": 3.756740737142259e-07, "loss": 3.4066, "step": 80945 }, { "epoch": 0.8234659830729166, "grad_norm": 17.34937858581543, "learning_rate": 3.7546327665707175e-07, "loss": 3.3283, "step": 80950 }, { "epoch": 0.823516845703125, "grad_norm": 11.575714111328125, "learning_rate": 3.7525253395568536e-07, "loss": 3.3866, "step": 80955 }, { "epoch": 0.8235677083333334, "grad_norm": 8.186203956604004, "learning_rate": 3.7504184561546004e-07, "loss": 3.1128, "step": 80960 }, { "epoch": 0.8236185709635416, "grad_norm": 11.50184440612793, "learning_rate": 3.748312116417846e-07, "loss": 3.1947, "step": 80965 }, { "epoch": 0.82366943359375, "grad_norm": 9.66376781463623, "learning_rate": 3.7462063204004917e-07, "loss": 3.4334, "step": 80970 }, { "epoch": 0.8237202962239584, "grad_norm": 15.963284492492676, "learning_rate": 3.744101068156417e-07, "loss": 3.4715, "step": 80975 }, { "epoch": 0.8237711588541666, "grad_norm": 8.475472450256348, "learning_rate": 3.741996359739475e-07, "loss": 3.3516, "step": 80980 }, { "epoch": 0.823822021484375, "grad_norm": 14.08765983581543, "learning_rate": 3.7398921952035203e-07, "loss": 3.1434, "step": 80985 }, { "epoch": 0.8238728841145834, "grad_norm": 10.457538604736328, "learning_rate": 3.737788574602377e-07, "loss": 3.2368, "step": 80990 }, { "epoch": 0.8239237467447916, "grad_norm": 9.279627799987793, "learning_rate": 3.7356854979898745e-07, "loss": 3.2903, "step": 80995 }, { "epoch": 0.823974609375, "grad_norm": 8.412238121032715, "learning_rate": 3.733582965419824e-07, "loss": 3.2409, "step": 81000 }, { "epoch": 0.8240254720052084, "grad_norm": 12.581768989562988, "learning_rate": 3.731480976946011e-07, "loss": 3.2264, "step": 81005 }, { "epoch": 0.8240763346354166, "grad_norm": 9.827341079711914, "learning_rate": 3.7293795326222127e-07, "loss": 3.1994, "step": 81010 }, { "epoch": 0.824127197265625, "grad_norm": 10.232128143310547, "learning_rate": 3.727278632502196e-07, "loss": 3.208, "step": 81015 }, { "epoch": 0.8241780598958334, "grad_norm": 27.526899337768555, "learning_rate": 3.725178276639721e-07, "loss": 3.2577, "step": 81020 }, { "epoch": 0.8242289225260416, "grad_norm": 12.687592506408691, "learning_rate": 3.7230784650885144e-07, "loss": 3.5804, "step": 81025 }, { "epoch": 0.82427978515625, "grad_norm": 17.577566146850586, "learning_rate": 3.7209791979022967e-07, "loss": 3.3725, "step": 81030 }, { "epoch": 0.8243306477864584, "grad_norm": 8.21080493927002, "learning_rate": 3.7188804751347893e-07, "loss": 3.6424, "step": 81035 }, { "epoch": 0.8243815104166666, "grad_norm": 13.241853713989258, "learning_rate": 3.716782296839677e-07, "loss": 3.5274, "step": 81040 }, { "epoch": 0.824432373046875, "grad_norm": 9.96329116821289, "learning_rate": 3.714684663070647e-07, "loss": 3.1033, "step": 81045 }, { "epoch": 0.8244832356770834, "grad_norm": 9.271282196044922, "learning_rate": 3.71258757388136e-07, "loss": 3.7882, "step": 81050 }, { "epoch": 0.8245340983072916, "grad_norm": 13.86997127532959, "learning_rate": 3.71049102932548e-07, "loss": 3.495, "step": 81055 }, { "epoch": 0.8245849609375, "grad_norm": 8.317121505737305, "learning_rate": 3.708395029456643e-07, "loss": 3.3285, "step": 81060 }, { "epoch": 0.8246358235677084, "grad_norm": 15.157517433166504, "learning_rate": 3.7062995743284647e-07, "loss": 3.3673, "step": 81065 }, { "epoch": 0.8246866861979166, "grad_norm": 9.469864845275879, "learning_rate": 3.704204663994565e-07, "loss": 3.3481, "step": 81070 }, { "epoch": 0.824737548828125, "grad_norm": 11.107138633728027, "learning_rate": 3.702110298508546e-07, "loss": 3.301, "step": 81075 }, { "epoch": 0.8247884114583334, "grad_norm": 11.196867942810059, "learning_rate": 3.70001647792399e-07, "loss": 3.3571, "step": 81080 }, { "epoch": 0.8248392740885416, "grad_norm": 17.41468620300293, "learning_rate": 3.697923202294457e-07, "loss": 3.4212, "step": 81085 }, { "epoch": 0.82489013671875, "grad_norm": 14.888216018676758, "learning_rate": 3.6958304716735115e-07, "loss": 3.0231, "step": 81090 }, { "epoch": 0.8249409993489584, "grad_norm": 15.072734832763672, "learning_rate": 3.6937382861146927e-07, "loss": 3.4232, "step": 81095 }, { "epoch": 0.8249918619791666, "grad_norm": 17.388551712036133, "learning_rate": 3.6916466456715344e-07, "loss": 3.3821, "step": 81100 }, { "epoch": 0.825042724609375, "grad_norm": 12.327610969543457, "learning_rate": 3.6895555503975386e-07, "loss": 3.184, "step": 81105 }, { "epoch": 0.8250935872395834, "grad_norm": 15.96401596069336, "learning_rate": 3.6874650003462186e-07, "loss": 3.3028, "step": 81110 }, { "epoch": 0.8251444498697916, "grad_norm": 13.771220207214355, "learning_rate": 3.685374995571056e-07, "loss": 3.6253, "step": 81115 }, { "epoch": 0.8251953125, "grad_norm": 7.984499931335449, "learning_rate": 3.6832855361255163e-07, "loss": 3.4847, "step": 81120 }, { "epoch": 0.8252461751302084, "grad_norm": 14.82887077331543, "learning_rate": 3.681196622063063e-07, "loss": 3.193, "step": 81125 }, { "epoch": 0.8252970377604166, "grad_norm": 9.447796821594238, "learning_rate": 3.6791082534371495e-07, "loss": 3.6481, "step": 81130 }, { "epoch": 0.825347900390625, "grad_norm": 9.615379333496094, "learning_rate": 3.6770204303011953e-07, "loss": 3.0771, "step": 81135 }, { "epoch": 0.8253987630208334, "grad_norm": 10.152069091796875, "learning_rate": 3.674933152708618e-07, "loss": 3.3728, "step": 81140 }, { "epoch": 0.8254496256510416, "grad_norm": 7.463944911956787, "learning_rate": 3.672846420712817e-07, "loss": 3.2594, "step": 81145 }, { "epoch": 0.82550048828125, "grad_norm": 20.239410400390625, "learning_rate": 3.670760234367185e-07, "loss": 3.9383, "step": 81150 }, { "epoch": 0.8255513509114584, "grad_norm": 16.32979393005371, "learning_rate": 3.6686745937251053e-07, "loss": 3.2126, "step": 81155 }, { "epoch": 0.8256022135416666, "grad_norm": 10.634185791015625, "learning_rate": 3.6665894988399204e-07, "loss": 3.1509, "step": 81160 }, { "epoch": 0.825653076171875, "grad_norm": 8.721284866333008, "learning_rate": 3.664504949764994e-07, "loss": 3.1079, "step": 81165 }, { "epoch": 0.8257039388020834, "grad_norm": 7.4027276039123535, "learning_rate": 3.6624209465536464e-07, "loss": 3.3191, "step": 81170 }, { "epoch": 0.8257548014322916, "grad_norm": 8.177949905395508, "learning_rate": 3.660337489259205e-07, "loss": 3.2868, "step": 81175 }, { "epoch": 0.8258056640625, "grad_norm": 13.622292518615723, "learning_rate": 3.658254577934967e-07, "loss": 2.9148, "step": 81180 }, { "epoch": 0.8258565266927084, "grad_norm": 15.886625289916992, "learning_rate": 3.656172212634232e-07, "loss": 3.1127, "step": 81185 }, { "epoch": 0.8259073893229166, "grad_norm": 12.290112495422363, "learning_rate": 3.6540903934102727e-07, "loss": 3.1256, "step": 81190 }, { "epoch": 0.825958251953125, "grad_norm": 15.316094398498535, "learning_rate": 3.652009120316344e-07, "loss": 3.6454, "step": 81195 }, { "epoch": 0.8260091145833334, "grad_norm": 13.505681037902832, "learning_rate": 3.649928393405702e-07, "loss": 3.288, "step": 81200 }, { "epoch": 0.8260599772135416, "grad_norm": 10.823572158813477, "learning_rate": 3.647848212731589e-07, "loss": 3.7845, "step": 81205 }, { "epoch": 0.82611083984375, "grad_norm": 9.712890625, "learning_rate": 3.645768578347217e-07, "loss": 3.23, "step": 81210 }, { "epoch": 0.8261617024739584, "grad_norm": 12.620218276977539, "learning_rate": 3.643689490305796e-07, "loss": 3.3941, "step": 81215 }, { "epoch": 0.8262125651041666, "grad_norm": 14.910798072814941, "learning_rate": 3.6416109486605095e-07, "loss": 3.096, "step": 81220 }, { "epoch": 0.826263427734375, "grad_norm": 9.891182899475098, "learning_rate": 3.6395329534645444e-07, "loss": 3.4, "step": 81225 }, { "epoch": 0.8263142903645834, "grad_norm": 8.116037368774414, "learning_rate": 3.637455504771073e-07, "loss": 3.2759, "step": 81230 }, { "epoch": 0.8263651529947916, "grad_norm": 10.094236373901367, "learning_rate": 3.635378602633238e-07, "loss": 3.9303, "step": 81235 }, { "epoch": 0.826416015625, "grad_norm": 14.345175743103027, "learning_rate": 3.633302247104173e-07, "loss": 3.1749, "step": 81240 }, { "epoch": 0.8264668782552084, "grad_norm": 17.376415252685547, "learning_rate": 3.631226438237012e-07, "loss": 3.2547, "step": 81245 }, { "epoch": 0.8265177408854166, "grad_norm": 15.243380546569824, "learning_rate": 3.6291511760848496e-07, "loss": 3.6868, "step": 81250 }, { "epoch": 0.826568603515625, "grad_norm": 11.07956600189209, "learning_rate": 3.6270764607007904e-07, "loss": 3.4299, "step": 81255 }, { "epoch": 0.8266194661458334, "grad_norm": 10.591047286987305, "learning_rate": 3.625002292137919e-07, "loss": 3.2476, "step": 81260 }, { "epoch": 0.8266703287760416, "grad_norm": 19.205686569213867, "learning_rate": 3.6229286704492967e-07, "loss": 3.316, "step": 81265 }, { "epoch": 0.82672119140625, "grad_norm": 9.71511173248291, "learning_rate": 3.620855595687978e-07, "loss": 3.5159, "step": 81270 }, { "epoch": 0.8267720540364584, "grad_norm": 9.56340217590332, "learning_rate": 3.6187830679069976e-07, "loss": 3.3286, "step": 81275 }, { "epoch": 0.8268229166666666, "grad_norm": 16.750654220581055, "learning_rate": 3.616711087159383e-07, "loss": 3.3161, "step": 81280 }, { "epoch": 0.826873779296875, "grad_norm": 7.3144001960754395, "learning_rate": 3.61463965349815e-07, "loss": 3.5281, "step": 81285 }, { "epoch": 0.8269246419270834, "grad_norm": 9.85859203338623, "learning_rate": 3.612568766976296e-07, "loss": 3.8354, "step": 81290 }, { "epoch": 0.8269755045572916, "grad_norm": 10.094393730163574, "learning_rate": 3.610498427646794e-07, "loss": 3.3545, "step": 81295 }, { "epoch": 0.8270263671875, "grad_norm": 9.70475959777832, "learning_rate": 3.6084286355626274e-07, "loss": 3.2521, "step": 81300 }, { "epoch": 0.8270772298177084, "grad_norm": 15.505373001098633, "learning_rate": 3.606359390776734e-07, "loss": 3.2219, "step": 81305 }, { "epoch": 0.8271280924479166, "grad_norm": 7.791288375854492, "learning_rate": 3.6042906933420745e-07, "loss": 2.8279, "step": 81310 }, { "epoch": 0.827178955078125, "grad_norm": 14.234278678894043, "learning_rate": 3.6022225433115595e-07, "loss": 3.6315, "step": 81315 }, { "epoch": 0.8272298177083334, "grad_norm": 7.739132404327393, "learning_rate": 3.600154940738113e-07, "loss": 3.2901, "step": 81320 }, { "epoch": 0.8272806803385416, "grad_norm": 13.4205904006958, "learning_rate": 3.598087885674631e-07, "loss": 3.1889, "step": 81325 }, { "epoch": 0.82733154296875, "grad_norm": 13.76759147644043, "learning_rate": 3.5960213781739917e-07, "loss": 3.3293, "step": 81330 }, { "epoch": 0.8273824055989584, "grad_norm": 10.153120040893555, "learning_rate": 3.5939554182890716e-07, "loss": 3.208, "step": 81335 }, { "epoch": 0.8274332682291666, "grad_norm": 10.668922424316406, "learning_rate": 3.5918900060727376e-07, "loss": 3.1456, "step": 81340 }, { "epoch": 0.827484130859375, "grad_norm": 10.668487548828125, "learning_rate": 3.5898251415778187e-07, "loss": 3.4589, "step": 81345 }, { "epoch": 0.8275349934895834, "grad_norm": 13.005240440368652, "learning_rate": 3.587760824857145e-07, "loss": 2.8548, "step": 81350 }, { "epoch": 0.8275858561197916, "grad_norm": 13.068918228149414, "learning_rate": 3.5856970559635355e-07, "loss": 3.222, "step": 81355 }, { "epoch": 0.82763671875, "grad_norm": 9.316664695739746, "learning_rate": 3.5836338349497983e-07, "loss": 3.4893, "step": 81360 }, { "epoch": 0.8276875813802084, "grad_norm": 7.974001884460449, "learning_rate": 3.58157116186871e-07, "loss": 3.3725, "step": 81365 }, { "epoch": 0.8277384440104166, "grad_norm": 11.986618995666504, "learning_rate": 3.579509036773043e-07, "loss": 3.3193, "step": 81370 }, { "epoch": 0.827789306640625, "grad_norm": 14.938953399658203, "learning_rate": 3.577447459715566e-07, "loss": 3.2821, "step": 81375 }, { "epoch": 0.8278401692708334, "grad_norm": 16.224943161010742, "learning_rate": 3.575386430749012e-07, "loss": 3.3977, "step": 81380 }, { "epoch": 0.8278910319010416, "grad_norm": 11.836395263671875, "learning_rate": 3.5733259499261236e-07, "loss": 3.4964, "step": 81385 }, { "epoch": 0.82794189453125, "grad_norm": 8.097487449645996, "learning_rate": 3.5712660172996074e-07, "loss": 3.3076, "step": 81390 }, { "epoch": 0.8279927571614584, "grad_norm": 11.223922729492188, "learning_rate": 3.569206632922176e-07, "loss": 3.2277, "step": 81395 }, { "epoch": 0.8280436197916666, "grad_norm": 11.929049491882324, "learning_rate": 3.567147796846512e-07, "loss": 3.4273, "step": 81400 }, { "epoch": 0.828094482421875, "grad_norm": 8.872072219848633, "learning_rate": 3.5650895091252856e-07, "loss": 3.1472, "step": 81405 }, { "epoch": 0.8281453450520834, "grad_norm": 14.635814666748047, "learning_rate": 3.563031769811165e-07, "loss": 3.5105, "step": 81410 }, { "epoch": 0.8281962076822916, "grad_norm": 12.586482048034668, "learning_rate": 3.5609745789567996e-07, "loss": 3.1589, "step": 81415 }, { "epoch": 0.8282470703125, "grad_norm": 15.408994674682617, "learning_rate": 3.5589179366148195e-07, "loss": 3.3757, "step": 81420 }, { "epoch": 0.8282979329427084, "grad_norm": 8.373656272888184, "learning_rate": 3.5568618428378415e-07, "loss": 3.2097, "step": 81425 }, { "epoch": 0.8283487955729166, "grad_norm": 12.243661880493164, "learning_rate": 3.554806297678465e-07, "loss": 3.3773, "step": 81430 }, { "epoch": 0.828399658203125, "grad_norm": 15.660725593566895, "learning_rate": 3.552751301189286e-07, "loss": 3.6567, "step": 81435 }, { "epoch": 0.8284505208333334, "grad_norm": 15.802828788757324, "learning_rate": 3.5506968534228886e-07, "loss": 3.3132, "step": 81440 }, { "epoch": 0.8285013834635416, "grad_norm": 12.42456340789795, "learning_rate": 3.548642954431822e-07, "loss": 3.5158, "step": 81445 }, { "epoch": 0.82855224609375, "grad_norm": 14.236919403076172, "learning_rate": 3.546589604268644e-07, "loss": 3.9095, "step": 81450 }, { "epoch": 0.8286031087239584, "grad_norm": 11.435016632080078, "learning_rate": 3.544536802985887e-07, "loss": 3.2436, "step": 81455 }, { "epoch": 0.8286539713541666, "grad_norm": 10.88861083984375, "learning_rate": 3.542484550636066e-07, "loss": 3.4391, "step": 81460 }, { "epoch": 0.828704833984375, "grad_norm": 9.29944896697998, "learning_rate": 3.540432847271694e-07, "loss": 3.2777, "step": 81465 }, { "epoch": 0.8287556966145834, "grad_norm": 13.211068153381348, "learning_rate": 3.5383816929452614e-07, "loss": 3.3127, "step": 81470 }, { "epoch": 0.8288065592447916, "grad_norm": 11.630187034606934, "learning_rate": 3.5363310877092494e-07, "loss": 3.1737, "step": 81475 }, { "epoch": 0.828857421875, "grad_norm": 13.085993766784668, "learning_rate": 3.5342810316161183e-07, "loss": 3.4868, "step": 81480 }, { "epoch": 0.8289082845052084, "grad_norm": 8.661873817443848, "learning_rate": 3.5322315247183074e-07, "loss": 3.5383, "step": 81485 }, { "epoch": 0.8289591471354166, "grad_norm": 12.713909149169922, "learning_rate": 3.530182567068277e-07, "loss": 3.2469, "step": 81490 }, { "epoch": 0.829010009765625, "grad_norm": 12.60568618774414, "learning_rate": 3.528134158718438e-07, "loss": 3.2449, "step": 81495 }, { "epoch": 0.8290608723958334, "grad_norm": 15.496371269226074, "learning_rate": 3.526086299721193e-07, "loss": 3.4259, "step": 81500 }, { "epoch": 0.8291117350260416, "grad_norm": 8.93329906463623, "learning_rate": 3.524038990128939e-07, "loss": 3.0466, "step": 81505 }, { "epoch": 0.82916259765625, "grad_norm": 16.786598205566406, "learning_rate": 3.5219922299940536e-07, "loss": 3.6377, "step": 81510 }, { "epoch": 0.8292134602864584, "grad_norm": 11.137445449829102, "learning_rate": 3.5199460193689137e-07, "loss": 2.959, "step": 81515 }, { "epoch": 0.8292643229166666, "grad_norm": 17.52461814880371, "learning_rate": 3.517900358305862e-07, "loss": 3.3231, "step": 81520 }, { "epoch": 0.829315185546875, "grad_norm": 10.99956226348877, "learning_rate": 3.515855246857233e-07, "loss": 2.9984, "step": 81525 }, { "epoch": 0.8293660481770834, "grad_norm": 11.812445640563965, "learning_rate": 3.5138106850753613e-07, "loss": 3.4031, "step": 81530 }, { "epoch": 0.8294169108072916, "grad_norm": 12.35213565826416, "learning_rate": 3.5117666730125455e-07, "loss": 3.3872, "step": 81535 }, { "epoch": 0.8294677734375, "grad_norm": 14.233818054199219, "learning_rate": 3.5097232107210916e-07, "loss": 3.4455, "step": 81540 }, { "epoch": 0.8295186360677084, "grad_norm": 8.840458869934082, "learning_rate": 3.507680298253269e-07, "loss": 3.3539, "step": 81545 }, { "epoch": 0.8295694986979166, "grad_norm": 12.002241134643555, "learning_rate": 3.5056379356613573e-07, "loss": 3.0824, "step": 81550 }, { "epoch": 0.829620361328125, "grad_norm": 13.244787216186523, "learning_rate": 3.5035961229976045e-07, "loss": 3.6522, "step": 81555 }, { "epoch": 0.8296712239583334, "grad_norm": 9.030220985412598, "learning_rate": 3.5015548603142433e-07, "loss": 3.3114, "step": 81560 }, { "epoch": 0.8297220865885416, "grad_norm": 8.869832992553711, "learning_rate": 3.499514147663505e-07, "loss": 3.1669, "step": 81565 }, { "epoch": 0.82977294921875, "grad_norm": 12.157954216003418, "learning_rate": 3.4974739850976074e-07, "loss": 3.5634, "step": 81570 }, { "epoch": 0.8298238118489584, "grad_norm": 14.200581550598145, "learning_rate": 3.495434372668743e-07, "loss": 3.2098, "step": 81575 }, { "epoch": 0.8298746744791666, "grad_norm": 9.806853294372559, "learning_rate": 3.493395310429085e-07, "loss": 3.3559, "step": 81580 }, { "epoch": 0.829925537109375, "grad_norm": 12.339767456054688, "learning_rate": 3.491356798430817e-07, "loss": 3.3705, "step": 81585 }, { "epoch": 0.8299763997395834, "grad_norm": 15.809764862060547, "learning_rate": 3.4893188367260826e-07, "loss": 3.6926, "step": 81590 }, { "epoch": 0.8300272623697916, "grad_norm": 14.739584922790527, "learning_rate": 3.487281425367031e-07, "loss": 3.2144, "step": 81595 }, { "epoch": 0.830078125, "grad_norm": 6.578415393829346, "learning_rate": 3.485244564405779e-07, "loss": 3.2765, "step": 81600 }, { "epoch": 0.8301289876302084, "grad_norm": 7.773172378540039, "learning_rate": 3.4832082538944543e-07, "loss": 3.7121, "step": 81605 }, { "epoch": 0.8301798502604166, "grad_norm": 13.908849716186523, "learning_rate": 3.481172493885143e-07, "loss": 3.075, "step": 81610 }, { "epoch": 0.830230712890625, "grad_norm": 13.709275245666504, "learning_rate": 3.4791372844299285e-07, "loss": 3.3412, "step": 81615 }, { "epoch": 0.8302815755208334, "grad_norm": 10.29182243347168, "learning_rate": 3.477102625580886e-07, "loss": 3.5131, "step": 81620 }, { "epoch": 0.8303324381510416, "grad_norm": 9.028854370117188, "learning_rate": 3.4750685173900764e-07, "loss": 3.4103, "step": 81625 }, { "epoch": 0.83038330078125, "grad_norm": 16.737489700317383, "learning_rate": 3.473034959909538e-07, "loss": 3.5828, "step": 81630 }, { "epoch": 0.8304341634114584, "grad_norm": 10.40755558013916, "learning_rate": 3.471001953191294e-07, "loss": 3.1235, "step": 81635 }, { "epoch": 0.8304850260416666, "grad_norm": 9.349138259887695, "learning_rate": 3.4689694972873554e-07, "loss": 3.6267, "step": 81640 }, { "epoch": 0.830535888671875, "grad_norm": 15.301726341247559, "learning_rate": 3.466937592249739e-07, "loss": 3.4729, "step": 81645 }, { "epoch": 0.8305867513020834, "grad_norm": 12.60606575012207, "learning_rate": 3.4649062381304195e-07, "loss": 3.5103, "step": 81650 }, { "epoch": 0.8306376139322916, "grad_norm": 6.902255535125732, "learning_rate": 3.4628754349813636e-07, "loss": 3.4327, "step": 81655 }, { "epoch": 0.8306884765625, "grad_norm": 12.60377311706543, "learning_rate": 3.460845182854539e-07, "loss": 3.351, "step": 81660 }, { "epoch": 0.8307393391927084, "grad_norm": 8.240616798400879, "learning_rate": 3.4588154818018834e-07, "loss": 3.3058, "step": 81665 }, { "epoch": 0.8307902018229166, "grad_norm": 11.642782211303711, "learning_rate": 3.4567863318753307e-07, "loss": 3.171, "step": 81670 }, { "epoch": 0.830841064453125, "grad_norm": 10.349153518676758, "learning_rate": 3.4547577331267873e-07, "loss": 3.2889, "step": 81675 }, { "epoch": 0.8308919270833334, "grad_norm": 7.751547336578369, "learning_rate": 3.4527296856081664e-07, "loss": 3.5357, "step": 81680 }, { "epoch": 0.8309427897135416, "grad_norm": 11.443355560302734, "learning_rate": 3.4507021893713523e-07, "loss": 3.2525, "step": 81685 }, { "epoch": 0.83099365234375, "grad_norm": 9.293235778808594, "learning_rate": 3.4486752444682053e-07, "loss": 3.2354, "step": 81690 }, { "epoch": 0.8310445149739584, "grad_norm": 13.506486892700195, "learning_rate": 3.446648850950596e-07, "loss": 3.6791, "step": 81695 }, { "epoch": 0.8310953776041666, "grad_norm": 8.528145790100098, "learning_rate": 3.4446230088703734e-07, "loss": 3.48, "step": 81700 }, { "epoch": 0.831146240234375, "grad_norm": 7.487433433532715, "learning_rate": 3.442597718279361e-07, "loss": 3.3777, "step": 81705 }, { "epoch": 0.8311971028645834, "grad_norm": 10.32982349395752, "learning_rate": 3.440572979229376e-07, "loss": 3.3119, "step": 81710 }, { "epoch": 0.8312479654947916, "grad_norm": 10.598235130310059, "learning_rate": 3.4385487917722144e-07, "loss": 3.519, "step": 81715 }, { "epoch": 0.831298828125, "grad_norm": 13.667901992797852, "learning_rate": 3.4365251559596746e-07, "loss": 3.3822, "step": 81720 }, { "epoch": 0.8313496907552084, "grad_norm": 16.000396728515625, "learning_rate": 3.434502071843532e-07, "loss": 3.3401, "step": 81725 }, { "epoch": 0.8314005533854166, "grad_norm": 11.901004791259766, "learning_rate": 3.4324795394755415e-07, "loss": 3.1247, "step": 81730 }, { "epoch": 0.831451416015625, "grad_norm": 10.683159828186035, "learning_rate": 3.4304575589074435e-07, "loss": 3.8819, "step": 81735 }, { "epoch": 0.8315022786458334, "grad_norm": 9.5502290725708, "learning_rate": 3.428436130190982e-07, "loss": 3.1455, "step": 81740 }, { "epoch": 0.8315531412760416, "grad_norm": 13.432830810546875, "learning_rate": 3.4264152533778655e-07, "loss": 3.2658, "step": 81745 }, { "epoch": 0.83160400390625, "grad_norm": 15.859672546386719, "learning_rate": 3.4243949285198004e-07, "loss": 3.0781, "step": 81750 }, { "epoch": 0.8316548665364584, "grad_norm": 12.149288177490234, "learning_rate": 3.4223751556684807e-07, "loss": 3.6371, "step": 81755 }, { "epoch": 0.8317057291666666, "grad_norm": 11.400646209716797, "learning_rate": 3.4203559348755826e-07, "loss": 3.6175, "step": 81760 }, { "epoch": 0.831756591796875, "grad_norm": 12.680248260498047, "learning_rate": 3.418337266192759e-07, "loss": 3.3646, "step": 81765 }, { "epoch": 0.8318074544270834, "grad_norm": 13.315074920654297, "learning_rate": 3.416319149671657e-07, "loss": 3.2721, "step": 81770 }, { "epoch": 0.8318583170572916, "grad_norm": 8.332613945007324, "learning_rate": 3.4143015853639134e-07, "loss": 3.8566, "step": 81775 }, { "epoch": 0.8319091796875, "grad_norm": 10.389419555664062, "learning_rate": 3.412284573321153e-07, "loss": 3.3517, "step": 81780 }, { "epoch": 0.8319600423177084, "grad_norm": 10.17501449584961, "learning_rate": 3.410268113594975e-07, "loss": 3.6708, "step": 81785 }, { "epoch": 0.8320109049479166, "grad_norm": 14.361525535583496, "learning_rate": 3.4082522062369645e-07, "loss": 3.9283, "step": 81790 }, { "epoch": 0.832061767578125, "grad_norm": 9.244938850402832, "learning_rate": 3.406236851298703e-07, "loss": 3.2875, "step": 81795 }, { "epoch": 0.8321126302083334, "grad_norm": 17.720565795898438, "learning_rate": 3.40422204883176e-07, "loss": 3.5046, "step": 81800 }, { "epoch": 0.8321634928385416, "grad_norm": 9.65694808959961, "learning_rate": 3.4022077988876746e-07, "loss": 3.2818, "step": 81805 }, { "epoch": 0.83221435546875, "grad_norm": 15.131839752197266, "learning_rate": 3.400194101517981e-07, "loss": 3.471, "step": 81810 }, { "epoch": 0.8322652180989584, "grad_norm": 15.426616668701172, "learning_rate": 3.3981809567742054e-07, "loss": 3.2494, "step": 81815 }, { "epoch": 0.8323160807291666, "grad_norm": 12.537252426147461, "learning_rate": 3.3961683647078444e-07, "loss": 3.2033, "step": 81820 }, { "epoch": 0.832366943359375, "grad_norm": 18.45199203491211, "learning_rate": 3.394156325370401e-07, "loss": 3.3196, "step": 81825 }, { "epoch": 0.8324178059895834, "grad_norm": 10.200485229492188, "learning_rate": 3.392144838813341e-07, "loss": 3.448, "step": 81830 }, { "epoch": 0.8324686686197916, "grad_norm": 10.197514533996582, "learning_rate": 3.3901339050881415e-07, "loss": 3.6044, "step": 81835 }, { "epoch": 0.83251953125, "grad_norm": 10.978058815002441, "learning_rate": 3.38812352424624e-07, "loss": 3.9155, "step": 81840 }, { "epoch": 0.8325703938802084, "grad_norm": 20.063888549804688, "learning_rate": 3.3861136963390715e-07, "loss": 3.902, "step": 81845 }, { "epoch": 0.8326212565104166, "grad_norm": 13.886176109313965, "learning_rate": 3.384104421418063e-07, "loss": 3.1731, "step": 81850 }, { "epoch": 0.832672119140625, "grad_norm": 8.191770553588867, "learning_rate": 3.382095699534624e-07, "loss": 3.0654, "step": 81855 }, { "epoch": 0.8327229817708334, "grad_norm": 12.459845542907715, "learning_rate": 3.380087530740142e-07, "loss": 4.0921, "step": 81860 }, { "epoch": 0.8327738444010416, "grad_norm": 12.545857429504395, "learning_rate": 3.378079915085988e-07, "loss": 3.1966, "step": 81865 }, { "epoch": 0.83282470703125, "grad_norm": 16.804519653320312, "learning_rate": 3.376072852623544e-07, "loss": 3.0937, "step": 81870 }, { "epoch": 0.8328755696614584, "grad_norm": 13.775388717651367, "learning_rate": 3.374066343404142e-07, "loss": 2.9983, "step": 81875 }, { "epoch": 0.8329264322916666, "grad_norm": 17.29413604736328, "learning_rate": 3.3720603874791343e-07, "loss": 3.4973, "step": 81880 }, { "epoch": 0.832977294921875, "grad_norm": 9.611454963684082, "learning_rate": 3.370054984899829e-07, "loss": 3.333, "step": 81885 }, { "epoch": 0.8330281575520834, "grad_norm": 13.825835227966309, "learning_rate": 3.368050135717546e-07, "loss": 3.1104, "step": 81890 }, { "epoch": 0.8330790201822916, "grad_norm": 10.248576164245605, "learning_rate": 3.366045839983573e-07, "loss": 3.4458, "step": 81895 }, { "epoch": 0.8331298828125, "grad_norm": 9.065658569335938, "learning_rate": 3.3640420977491846e-07, "loss": 2.9167, "step": 81900 }, { "epoch": 0.8331807454427084, "grad_norm": 10.88503646850586, "learning_rate": 3.3620389090656484e-07, "loss": 3.2034, "step": 81905 }, { "epoch": 0.8332316080729166, "grad_norm": 9.16987133026123, "learning_rate": 3.360036273984224e-07, "loss": 3.2866, "step": 81910 }, { "epoch": 0.833282470703125, "grad_norm": 17.341602325439453, "learning_rate": 3.3580341925561447e-07, "loss": 3.5577, "step": 81915 }, { "epoch": 0.8333333333333334, "grad_norm": 12.902583122253418, "learning_rate": 3.3560326648326285e-07, "loss": 3.5347, "step": 81920 }, { "epoch": 0.8333841959635416, "grad_norm": 8.747199058532715, "learning_rate": 3.354031690864881e-07, "loss": 3.2654, "step": 81925 }, { "epoch": 0.83343505859375, "grad_norm": 13.0546875, "learning_rate": 3.3520312707041035e-07, "loss": 3.6446, "step": 81930 }, { "epoch": 0.8334859212239584, "grad_norm": 8.177398681640625, "learning_rate": 3.35003140440148e-07, "loss": 3.5208, "step": 81935 }, { "epoch": 0.8335367838541666, "grad_norm": 10.24907398223877, "learning_rate": 3.348032092008166e-07, "loss": 3.251, "step": 81940 }, { "epoch": 0.833587646484375, "grad_norm": 11.564193725585938, "learning_rate": 3.3460333335753214e-07, "loss": 3.0913, "step": 81945 }, { "epoch": 0.8336385091145834, "grad_norm": 12.961506843566895, "learning_rate": 3.34403512915408e-07, "loss": 3.3601, "step": 81950 }, { "epoch": 0.8336893717447916, "grad_norm": 9.499430656433105, "learning_rate": 3.3420374787955706e-07, "loss": 3.0469, "step": 81955 }, { "epoch": 0.833740234375, "grad_norm": 7.4130988121032715, "learning_rate": 3.3400403825508936e-07, "loss": 3.1784, "step": 81960 }, { "epoch": 0.8337910970052084, "grad_norm": 12.141097068786621, "learning_rate": 3.338043840471153e-07, "loss": 3.6559, "step": 81965 }, { "epoch": 0.8338419596354166, "grad_norm": 15.333015441894531, "learning_rate": 3.3360478526074296e-07, "loss": 3.5068, "step": 81970 }, { "epoch": 0.833892822265625, "grad_norm": 11.44824504852295, "learning_rate": 3.3340524190107774e-07, "loss": 3.1621, "step": 81975 }, { "epoch": 0.8339436848958334, "grad_norm": 10.404541969299316, "learning_rate": 3.3320575397322614e-07, "loss": 2.9386, "step": 81980 }, { "epoch": 0.8339945475260416, "grad_norm": 9.791139602661133, "learning_rate": 3.3300632148229234e-07, "loss": 3.7692, "step": 81985 }, { "epoch": 0.83404541015625, "grad_norm": 15.392584800720215, "learning_rate": 3.328069444333781e-07, "loss": 3.3095, "step": 81990 }, { "epoch": 0.8340962727864584, "grad_norm": 10.879815101623535, "learning_rate": 3.3260762283158446e-07, "loss": 3.1776, "step": 81995 }, { "epoch": 0.8341471354166666, "grad_norm": 14.615254402160645, "learning_rate": 3.324083566820105e-07, "loss": 3.5583, "step": 82000 }, { "epoch": 0.834197998046875, "grad_norm": 11.408792495727539, "learning_rate": 3.32209145989755e-07, "loss": 3.1966, "step": 82005 }, { "epoch": 0.8342488606770834, "grad_norm": 11.398858070373535, "learning_rate": 3.3200999075991533e-07, "loss": 3.0101, "step": 82010 }, { "epoch": 0.8342997233072916, "grad_norm": 7.233194828033447, "learning_rate": 3.318108909975859e-07, "loss": 3.0912, "step": 82015 }, { "epoch": 0.8343505859375, "grad_norm": 10.705859184265137, "learning_rate": 3.316118467078605e-07, "loss": 3.4198, "step": 82020 }, { "epoch": 0.8344014485677084, "grad_norm": 12.724525451660156, "learning_rate": 3.314128578958328e-07, "loss": 3.3804, "step": 82025 }, { "epoch": 0.8344523111979166, "grad_norm": 11.468843460083008, "learning_rate": 3.312139245665924e-07, "loss": 3.0083, "step": 82030 }, { "epoch": 0.834503173828125, "grad_norm": 16.455656051635742, "learning_rate": 3.310150467252302e-07, "loss": 3.3906, "step": 82035 }, { "epoch": 0.8345540364583334, "grad_norm": 10.620047569274902, "learning_rate": 3.308162243768334e-07, "loss": 3.2541, "step": 82040 }, { "epoch": 0.8346048990885416, "grad_norm": 11.70545768737793, "learning_rate": 3.3061745752648976e-07, "loss": 3.1841, "step": 82045 }, { "epoch": 0.83465576171875, "grad_norm": 9.349608421325684, "learning_rate": 3.3041874617928434e-07, "loss": 3.1505, "step": 82050 }, { "epoch": 0.8347066243489584, "grad_norm": 8.12730598449707, "learning_rate": 3.302200903403008e-07, "loss": 3.3881, "step": 82055 }, { "epoch": 0.8347574869791666, "grad_norm": 12.16651725769043, "learning_rate": 3.300214900146215e-07, "loss": 3.0127, "step": 82060 }, { "epoch": 0.834808349609375, "grad_norm": 12.703459739685059, "learning_rate": 3.29822945207329e-07, "loss": 3.3549, "step": 82065 }, { "epoch": 0.8348592122395834, "grad_norm": 15.172588348388672, "learning_rate": 3.296244559235018e-07, "loss": 3.3399, "step": 82070 }, { "epoch": 0.8349100748697916, "grad_norm": 8.584330558776855, "learning_rate": 3.29426022168218e-07, "loss": 3.672, "step": 82075 }, { "epoch": 0.8349609375, "grad_norm": 11.948514938354492, "learning_rate": 3.2922764394655526e-07, "loss": 3.4415, "step": 82080 }, { "epoch": 0.8350118001302084, "grad_norm": 9.167913436889648, "learning_rate": 3.290293212635884e-07, "loss": 3.4968, "step": 82085 }, { "epoch": 0.8350626627604166, "grad_norm": 10.437028884887695, "learning_rate": 3.288310541243925e-07, "loss": 3.5139, "step": 82090 }, { "epoch": 0.835113525390625, "grad_norm": 10.218053817749023, "learning_rate": 3.286328425340385e-07, "loss": 3.1128, "step": 82095 }, { "epoch": 0.8351643880208334, "grad_norm": 9.678133964538574, "learning_rate": 3.284346864975993e-07, "loss": 3.3178, "step": 82100 }, { "epoch": 0.8352152506510416, "grad_norm": 12.035988807678223, "learning_rate": 3.282365860201434e-07, "loss": 3.5213, "step": 82105 }, { "epoch": 0.83526611328125, "grad_norm": 8.763233184814453, "learning_rate": 3.280385411067402e-07, "loss": 3.4057, "step": 82110 }, { "epoch": 0.8353169759114584, "grad_norm": 15.29818058013916, "learning_rate": 3.2784055176245553e-07, "loss": 3.1658, "step": 82115 }, { "epoch": 0.8353678385416666, "grad_norm": 11.715181350708008, "learning_rate": 3.276426179923561e-07, "loss": 3.4415, "step": 82120 }, { "epoch": 0.835418701171875, "grad_norm": 13.8848295211792, "learning_rate": 3.274447398015054e-07, "loss": 3.4918, "step": 82125 }, { "epoch": 0.8354695638020834, "grad_norm": 14.780017852783203, "learning_rate": 3.272469171949655e-07, "loss": 3.0801, "step": 82130 }, { "epoch": 0.8355204264322916, "grad_norm": 15.15134334564209, "learning_rate": 3.2704915017779835e-07, "loss": 3.405, "step": 82135 }, { "epoch": 0.8355712890625, "grad_norm": 16.6488094329834, "learning_rate": 3.268514387550642e-07, "loss": 3.2162, "step": 82140 }, { "epoch": 0.8356221516927084, "grad_norm": 8.617156982421875, "learning_rate": 3.266537829318206e-07, "loss": 3.4536, "step": 82145 }, { "epoch": 0.8356730143229166, "grad_norm": 14.403000831604004, "learning_rate": 3.2645618271312434e-07, "loss": 2.9412, "step": 82150 }, { "epoch": 0.835723876953125, "grad_norm": 8.742593765258789, "learning_rate": 3.2625863810403226e-07, "loss": 3.2624, "step": 82155 }, { "epoch": 0.8357747395833334, "grad_norm": 14.44861888885498, "learning_rate": 3.260611491095969e-07, "loss": 3.5661, "step": 82160 }, { "epoch": 0.8358256022135416, "grad_norm": 11.183387756347656, "learning_rate": 3.258637157348726e-07, "loss": 3.425, "step": 82165 }, { "epoch": 0.83587646484375, "grad_norm": 14.68026351928711, "learning_rate": 3.25666337984909e-07, "loss": 3.0987, "step": 82170 }, { "epoch": 0.8359273274739584, "grad_norm": 9.858868598937988, "learning_rate": 3.254690158647572e-07, "loss": 3.5059, "step": 82175 }, { "epoch": 0.8359781901041666, "grad_norm": 7.8813629150390625, "learning_rate": 3.2527174937946546e-07, "loss": 3.2584, "step": 82180 }, { "epoch": 0.836029052734375, "grad_norm": 10.828450202941895, "learning_rate": 3.2507453853408004e-07, "loss": 3.7391, "step": 82185 }, { "epoch": 0.8360799153645834, "grad_norm": 7.974329471588135, "learning_rate": 3.248773833336469e-07, "loss": 3.5249, "step": 82190 }, { "epoch": 0.8361307779947916, "grad_norm": 15.77200984954834, "learning_rate": 3.246802837832108e-07, "loss": 3.2638, "step": 82195 }, { "epoch": 0.836181640625, "grad_norm": 13.883968353271484, "learning_rate": 3.244832398878142e-07, "loss": 3.8104, "step": 82200 }, { "epoch": 0.8362325032552084, "grad_norm": 8.318225860595703, "learning_rate": 3.2428625165249834e-07, "loss": 3.2504, "step": 82205 }, { "epoch": 0.8362833658854166, "grad_norm": 11.81405258178711, "learning_rate": 3.2408931908230217e-07, "loss": 3.3791, "step": 82210 }, { "epoch": 0.836334228515625, "grad_norm": 9.700438499450684, "learning_rate": 3.2389244218226533e-07, "loss": 3.2398, "step": 82215 }, { "epoch": 0.8363850911458334, "grad_norm": 14.125505447387695, "learning_rate": 3.236956209574249e-07, "loss": 3.2274, "step": 82220 }, { "epoch": 0.8364359537760416, "grad_norm": 10.842809677124023, "learning_rate": 3.2349885541281626e-07, "loss": 3.0955, "step": 82225 }, { "epoch": 0.83648681640625, "grad_norm": 10.028032302856445, "learning_rate": 3.2330214555347297e-07, "loss": 3.1531, "step": 82230 }, { "epoch": 0.8365376790364584, "grad_norm": 10.2706298828125, "learning_rate": 3.231054913844289e-07, "loss": 3.6072, "step": 82235 }, { "epoch": 0.8365885416666666, "grad_norm": 12.325729370117188, "learning_rate": 3.2290889291071434e-07, "loss": 3.5087, "step": 82240 }, { "epoch": 0.836639404296875, "grad_norm": 11.384655952453613, "learning_rate": 3.227123501373597e-07, "loss": 3.5067, "step": 82245 }, { "epoch": 0.8366902669270834, "grad_norm": 7.226029396057129, "learning_rate": 3.225158630693939e-07, "loss": 3.604, "step": 82250 }, { "epoch": 0.8367411295572916, "grad_norm": 7.987247943878174, "learning_rate": 3.223194317118439e-07, "loss": 3.1628, "step": 82255 }, { "epoch": 0.8367919921875, "grad_norm": 9.990367889404297, "learning_rate": 3.221230560697344e-07, "loss": 3.2523, "step": 82260 }, { "epoch": 0.8368428548177084, "grad_norm": 11.045588493347168, "learning_rate": 3.2192673614809015e-07, "loss": 3.1911, "step": 82265 }, { "epoch": 0.8368937174479166, "grad_norm": 9.236793518066406, "learning_rate": 3.217304719519349e-07, "loss": 3.6594, "step": 82270 }, { "epoch": 0.836944580078125, "grad_norm": 7.034839153289795, "learning_rate": 3.215342634862889e-07, "loss": 3.1625, "step": 82275 }, { "epoch": 0.8369954427083334, "grad_norm": 12.31274700164795, "learning_rate": 3.2133811075617275e-07, "loss": 3.5175, "step": 82280 }, { "epoch": 0.8370463053385416, "grad_norm": 15.286792755126953, "learning_rate": 3.2114201376660407e-07, "loss": 3.272, "step": 82285 }, { "epoch": 0.83709716796875, "grad_norm": 8.54918384552002, "learning_rate": 3.2094597252260024e-07, "loss": 3.3357, "step": 82290 }, { "epoch": 0.8371480305989584, "grad_norm": 13.110342025756836, "learning_rate": 3.20749987029178e-07, "loss": 3.361, "step": 82295 }, { "epoch": 0.8371988932291666, "grad_norm": 15.439437866210938, "learning_rate": 3.205540572913507e-07, "loss": 3.4519, "step": 82300 }, { "epoch": 0.837249755859375, "grad_norm": 8.734394073486328, "learning_rate": 3.203581833141306e-07, "loss": 3.4643, "step": 82305 }, { "epoch": 0.8373006184895834, "grad_norm": 15.3256196975708, "learning_rate": 3.2016236510253067e-07, "loss": 3.2861, "step": 82310 }, { "epoch": 0.8373514811197916, "grad_norm": 12.538545608520508, "learning_rate": 3.1996660266155913e-07, "loss": 3.1708, "step": 82315 }, { "epoch": 0.83740234375, "grad_norm": 13.305954933166504, "learning_rate": 3.197708959962259e-07, "loss": 3.5401, "step": 82320 }, { "epoch": 0.8374532063802084, "grad_norm": 9.767401695251465, "learning_rate": 3.1957524511153694e-07, "loss": 3.1989, "step": 82325 }, { "epoch": 0.8375040690104166, "grad_norm": 12.346323013305664, "learning_rate": 3.19379650012499e-07, "loss": 3.5382, "step": 82330 }, { "epoch": 0.837554931640625, "grad_norm": 7.60872745513916, "learning_rate": 3.1918411070411605e-07, "loss": 4.027, "step": 82335 }, { "epoch": 0.8376057942708334, "grad_norm": 12.669442176818848, "learning_rate": 3.1898862719139003e-07, "loss": 3.7596, "step": 82340 }, { "epoch": 0.8376566569010416, "grad_norm": 8.977251052856445, "learning_rate": 3.187931994793231e-07, "loss": 3.2094, "step": 82345 }, { "epoch": 0.83770751953125, "grad_norm": 10.287897109985352, "learning_rate": 3.185978275729157e-07, "loss": 3.2141, "step": 82350 }, { "epoch": 0.8377583821614584, "grad_norm": 13.242105484008789, "learning_rate": 3.184025114771658e-07, "loss": 3.4793, "step": 82355 }, { "epoch": 0.8378092447916666, "grad_norm": 13.227606773376465, "learning_rate": 3.182072511970702e-07, "loss": 3.2601, "step": 82360 }, { "epoch": 0.837860107421875, "grad_norm": 7.591468811035156, "learning_rate": 3.180120467376252e-07, "loss": 3.4581, "step": 82365 }, { "epoch": 0.8379109700520834, "grad_norm": 12.946093559265137, "learning_rate": 3.178168981038243e-07, "loss": 3.2214, "step": 82370 }, { "epoch": 0.8379618326822916, "grad_norm": 11.933862686157227, "learning_rate": 3.176218053006616e-07, "loss": 3.2006, "step": 82375 }, { "epoch": 0.8380126953125, "grad_norm": 8.173795700073242, "learning_rate": 3.17426768333127e-07, "loss": 3.1671, "step": 82380 }, { "epoch": 0.8380635579427084, "grad_norm": 10.22460651397705, "learning_rate": 3.1723178720621167e-07, "loss": 2.9694, "step": 82385 }, { "epoch": 0.8381144205729166, "grad_norm": 10.599739074707031, "learning_rate": 3.1703686192490395e-07, "loss": 3.0268, "step": 82390 }, { "epoch": 0.838165283203125, "grad_norm": 13.632969856262207, "learning_rate": 3.1684199249418983e-07, "loss": 3.4566, "step": 82395 }, { "epoch": 0.8382161458333334, "grad_norm": 10.012470245361328, "learning_rate": 3.1664717891905616e-07, "loss": 3.1663, "step": 82400 }, { "epoch": 0.8382670084635416, "grad_norm": 17.0555419921875, "learning_rate": 3.1645242120448725e-07, "loss": 3.4753, "step": 82405 }, { "epoch": 0.83831787109375, "grad_norm": 13.731001853942871, "learning_rate": 3.1625771935546557e-07, "loss": 3.2863, "step": 82410 }, { "epoch": 0.8383687337239584, "grad_norm": 9.11953067779541, "learning_rate": 3.1606307337697235e-07, "loss": 3.403, "step": 82415 }, { "epoch": 0.8384195963541666, "grad_norm": 10.7665433883667, "learning_rate": 3.1586848327398724e-07, "loss": 3.2105, "step": 82420 }, { "epoch": 0.838470458984375, "grad_norm": 14.06084156036377, "learning_rate": 3.156739490514901e-07, "loss": 3.635, "step": 82425 }, { "epoch": 0.8385213216145834, "grad_norm": 13.289153099060059, "learning_rate": 3.1547947071445736e-07, "loss": 3.1906, "step": 82430 }, { "epoch": 0.8385721842447916, "grad_norm": 16.027151107788086, "learning_rate": 3.152850482678638e-07, "loss": 3.6236, "step": 82435 }, { "epoch": 0.838623046875, "grad_norm": 11.879664421081543, "learning_rate": 3.1509068171668514e-07, "loss": 3.5951, "step": 82440 }, { "epoch": 0.8386739095052084, "grad_norm": 14.067399978637695, "learning_rate": 3.148963710658928e-07, "loss": 3.3605, "step": 82445 }, { "epoch": 0.8387247721354166, "grad_norm": 8.084772109985352, "learning_rate": 3.1470211632045944e-07, "loss": 3.5825, "step": 82450 }, { "epoch": 0.838775634765625, "grad_norm": 7.3616766929626465, "learning_rate": 3.145079174853541e-07, "loss": 3.0249, "step": 82455 }, { "epoch": 0.8388264973958334, "grad_norm": 15.187037467956543, "learning_rate": 3.143137745655461e-07, "loss": 3.3028, "step": 82460 }, { "epoch": 0.8388773600260416, "grad_norm": 13.803275108337402, "learning_rate": 3.141196875660021e-07, "loss": 3.3048, "step": 82465 }, { "epoch": 0.83892822265625, "grad_norm": 15.948885917663574, "learning_rate": 3.1392565649168735e-07, "loss": 3.3256, "step": 82470 }, { "epoch": 0.8389790852864584, "grad_norm": 8.560640335083008, "learning_rate": 3.137316813475663e-07, "loss": 3.5241, "step": 82475 }, { "epoch": 0.8390299479166666, "grad_norm": 9.086495399475098, "learning_rate": 3.1353776213860267e-07, "loss": 3.7053, "step": 82480 }, { "epoch": 0.839080810546875, "grad_norm": 9.937238693237305, "learning_rate": 3.1334389886975707e-07, "loss": 3.1616, "step": 82485 }, { "epoch": 0.8391316731770834, "grad_norm": 15.993388175964355, "learning_rate": 3.131500915459898e-07, "loss": 3.5369, "step": 82490 }, { "epoch": 0.8391825358072916, "grad_norm": 16.628915786743164, "learning_rate": 3.129563401722582e-07, "loss": 3.3434, "step": 82495 }, { "epoch": 0.8392333984375, "grad_norm": 7.18716287612915, "learning_rate": 3.1276264475352036e-07, "loss": 3.1887, "step": 82500 }, { "epoch": 0.8392842610677084, "grad_norm": 12.53756046295166, "learning_rate": 3.125690052947325e-07, "loss": 3.2412, "step": 82505 }, { "epoch": 0.8393351236979166, "grad_norm": 11.504670143127441, "learning_rate": 3.1237542180084817e-07, "loss": 3.1316, "step": 82510 }, { "epoch": 0.839385986328125, "grad_norm": 9.89035415649414, "learning_rate": 3.121818942768193e-07, "loss": 3.5028, "step": 82515 }, { "epoch": 0.8394368489583334, "grad_norm": 9.171295166015625, "learning_rate": 3.1198842272759905e-07, "loss": 3.3716, "step": 82520 }, { "epoch": 0.8394877115885416, "grad_norm": 12.68211841583252, "learning_rate": 3.117950071581355e-07, "loss": 3.3988, "step": 82525 }, { "epoch": 0.83953857421875, "grad_norm": 7.9228739738464355, "learning_rate": 3.1160164757337826e-07, "loss": 3.2639, "step": 82530 }, { "epoch": 0.8395894368489584, "grad_norm": 16.38813591003418, "learning_rate": 3.114083439782745e-07, "loss": 3.4395, "step": 82535 }, { "epoch": 0.8396402994791666, "grad_norm": 7.744754314422607, "learning_rate": 3.1121509637776966e-07, "loss": 3.4227, "step": 82540 }, { "epoch": 0.839691162109375, "grad_norm": 11.593255043029785, "learning_rate": 3.1102190477680763e-07, "loss": 3.1681, "step": 82545 }, { "epoch": 0.8397420247395834, "grad_norm": 9.948272705078125, "learning_rate": 3.1082876918033053e-07, "loss": 3.2039, "step": 82550 }, { "epoch": 0.8397928873697916, "grad_norm": 14.423352241516113, "learning_rate": 3.106356895932813e-07, "loss": 3.664, "step": 82555 }, { "epoch": 0.83984375, "grad_norm": 9.781453132629395, "learning_rate": 3.104426660205989e-07, "loss": 3.2269, "step": 82560 }, { "epoch": 0.8398946126302084, "grad_norm": 12.1110200881958, "learning_rate": 3.1024969846722235e-07, "loss": 3.458, "step": 82565 }, { "epoch": 0.8399454752604166, "grad_norm": 14.258819580078125, "learning_rate": 3.1005678693808745e-07, "loss": 3.3119, "step": 82570 }, { "epoch": 0.839996337890625, "grad_norm": 9.788232803344727, "learning_rate": 3.098639314381305e-07, "loss": 3.3446, "step": 82575 }, { "epoch": 0.8400472005208334, "grad_norm": 12.888335227966309, "learning_rate": 3.0967113197228643e-07, "loss": 3.2827, "step": 82580 }, { "epoch": 0.8400980631510416, "grad_norm": 16.238554000854492, "learning_rate": 3.0947838854548717e-07, "loss": 3.9365, "step": 82585 }, { "epoch": 0.84014892578125, "grad_norm": 14.492205619812012, "learning_rate": 3.092857011626635e-07, "loss": 3.6415, "step": 82590 }, { "epoch": 0.8401997884114584, "grad_norm": 9.4275541305542, "learning_rate": 3.090930698287467e-07, "loss": 3.8095, "step": 82595 }, { "epoch": 0.8402506510416666, "grad_norm": 12.679570198059082, "learning_rate": 3.089004945486637e-07, "loss": 3.2866, "step": 82600 }, { "epoch": 0.840301513671875, "grad_norm": 12.28504467010498, "learning_rate": 3.087079753273428e-07, "loss": 3.4753, "step": 82605 }, { "epoch": 0.8403523763020834, "grad_norm": 7.86800479888916, "learning_rate": 3.085155121697084e-07, "loss": 3.365, "step": 82610 }, { "epoch": 0.8404032389322916, "grad_norm": 7.812675952911377, "learning_rate": 3.0832310508068564e-07, "loss": 3.4058, "step": 82615 }, { "epoch": 0.8404541015625, "grad_norm": 14.575812339782715, "learning_rate": 3.0813075406519705e-07, "loss": 3.2456, "step": 82620 }, { "epoch": 0.8405049641927084, "grad_norm": 12.685293197631836, "learning_rate": 3.079384591281628e-07, "loss": 3.446, "step": 82625 }, { "epoch": 0.8405558268229166, "grad_norm": 9.814604759216309, "learning_rate": 3.077462202745035e-07, "loss": 3.2492, "step": 82630 }, { "epoch": 0.840606689453125, "grad_norm": 12.434163093566895, "learning_rate": 3.075540375091379e-07, "loss": 3.3287, "step": 82635 }, { "epoch": 0.8406575520833334, "grad_norm": 15.110447883605957, "learning_rate": 3.0736191083698297e-07, "loss": 3.2399, "step": 82640 }, { "epoch": 0.8407084147135416, "grad_norm": 16.645370483398438, "learning_rate": 3.0716984026295303e-07, "loss": 3.5208, "step": 82645 }, { "epoch": 0.84075927734375, "grad_norm": 12.585023880004883, "learning_rate": 3.069778257919637e-07, "loss": 3.171, "step": 82650 }, { "epoch": 0.8408101399739584, "grad_norm": 8.575360298156738, "learning_rate": 3.067858674289262e-07, "loss": 3.3535, "step": 82655 }, { "epoch": 0.8408610026041666, "grad_norm": 7.247581481933594, "learning_rate": 3.0659396517875305e-07, "loss": 3.3298, "step": 82660 }, { "epoch": 0.840911865234375, "grad_norm": 9.335526466369629, "learning_rate": 3.0640211904635287e-07, "loss": 3.0408, "step": 82665 }, { "epoch": 0.8409627278645834, "grad_norm": 9.049152374267578, "learning_rate": 3.06210329036635e-07, "loss": 3.067, "step": 82670 }, { "epoch": 0.8410135904947916, "grad_norm": 10.849244117736816, "learning_rate": 3.060185951545061e-07, "loss": 3.3345, "step": 82675 }, { "epoch": 0.841064453125, "grad_norm": 12.523625373840332, "learning_rate": 3.058269174048706e-07, "loss": 3.2685, "step": 82680 }, { "epoch": 0.8411153157552084, "grad_norm": 14.451581001281738, "learning_rate": 3.056352957926334e-07, "loss": 3.2992, "step": 82685 }, { "epoch": 0.8411661783854166, "grad_norm": 14.779975891113281, "learning_rate": 3.054437303226976e-07, "loss": 3.5794, "step": 82690 }, { "epoch": 0.841217041015625, "grad_norm": 13.709236145019531, "learning_rate": 3.052522209999639e-07, "loss": 3.1511, "step": 82695 }, { "epoch": 0.8412679036458334, "grad_norm": 7.6075358390808105, "learning_rate": 3.0506076782933185e-07, "loss": 3.5767, "step": 82700 }, { "epoch": 0.8413187662760416, "grad_norm": 12.561664581298828, "learning_rate": 3.0486937081569855e-07, "loss": 3.5576, "step": 82705 }, { "epoch": 0.84136962890625, "grad_norm": 12.769579887390137, "learning_rate": 3.0467802996396346e-07, "loss": 3.3144, "step": 82710 }, { "epoch": 0.8414204915364584, "grad_norm": 13.259459495544434, "learning_rate": 3.044867452790204e-07, "loss": 3.2626, "step": 82715 }, { "epoch": 0.8414713541666666, "grad_norm": 14.529406547546387, "learning_rate": 3.042955167657635e-07, "loss": 3.5059, "step": 82720 }, { "epoch": 0.841522216796875, "grad_norm": 10.720898628234863, "learning_rate": 3.0410434442908504e-07, "loss": 3.2629, "step": 82725 }, { "epoch": 0.8415730794270834, "grad_norm": 7.874776363372803, "learning_rate": 3.039132282738766e-07, "loss": 3.2861, "step": 82730 }, { "epoch": 0.8416239420572916, "grad_norm": 11.8858642578125, "learning_rate": 3.037221683050279e-07, "loss": 3.2717, "step": 82735 }, { "epoch": 0.8416748046875, "grad_norm": 13.591537475585938, "learning_rate": 3.0353116452742635e-07, "loss": 3.1717, "step": 82740 }, { "epoch": 0.8417256673177084, "grad_norm": 9.644363403320312, "learning_rate": 3.033402169459601e-07, "loss": 2.9345, "step": 82745 }, { "epoch": 0.8417765299479166, "grad_norm": 19.541770935058594, "learning_rate": 3.031493255655138e-07, "loss": 3.3933, "step": 82750 }, { "epoch": 0.841827392578125, "grad_norm": 10.669909477233887, "learning_rate": 3.029584903909705e-07, "loss": 3.119, "step": 82755 }, { "epoch": 0.8418782552083334, "grad_norm": 14.149001121520996, "learning_rate": 3.0276771142721373e-07, "loss": 3.4355, "step": 82760 }, { "epoch": 0.8419291178385416, "grad_norm": 11.106964111328125, "learning_rate": 3.025769886791247e-07, "loss": 3.2454, "step": 82765 }, { "epoch": 0.84197998046875, "grad_norm": 14.55089282989502, "learning_rate": 3.023863221515824e-07, "loss": 3.1274, "step": 82770 }, { "epoch": 0.8420308430989584, "grad_norm": 11.762307167053223, "learning_rate": 3.0219571184946536e-07, "loss": 3.5242, "step": 82775 }, { "epoch": 0.8420817057291666, "grad_norm": 8.462861061096191, "learning_rate": 3.020051577776495e-07, "loss": 3.2378, "step": 82780 }, { "epoch": 0.842132568359375, "grad_norm": 7.629974365234375, "learning_rate": 3.018146599410107e-07, "loss": 3.2623, "step": 82785 }, { "epoch": 0.8421834309895834, "grad_norm": 12.82613468170166, "learning_rate": 3.0162421834442316e-07, "loss": 3.6228, "step": 82790 }, { "epoch": 0.8422342936197916, "grad_norm": 12.538959503173828, "learning_rate": 3.014338329927591e-07, "loss": 3.0027, "step": 82795 }, { "epoch": 0.84228515625, "grad_norm": 15.235918998718262, "learning_rate": 3.0124350389088894e-07, "loss": 3.3562, "step": 82800 }, { "epoch": 0.8423360188802084, "grad_norm": 14.518644332885742, "learning_rate": 3.0105323104368285e-07, "loss": 3.6012, "step": 82805 }, { "epoch": 0.8423868815104166, "grad_norm": 8.101212501525879, "learning_rate": 3.0086301445600816e-07, "loss": 3.3002, "step": 82810 }, { "epoch": 0.842437744140625, "grad_norm": 7.306506633758545, "learning_rate": 3.006728541327325e-07, "loss": 3.3596, "step": 82815 }, { "epoch": 0.8424886067708334, "grad_norm": 11.812324523925781, "learning_rate": 3.0048275007872e-07, "loss": 3.365, "step": 82820 }, { "epoch": 0.8425394694010416, "grad_norm": 12.635832786560059, "learning_rate": 3.002927022988358e-07, "loss": 3.4852, "step": 82825 }, { "epoch": 0.84259033203125, "grad_norm": 14.342005729675293, "learning_rate": 3.001027107979412e-07, "loss": 3.2962, "step": 82830 }, { "epoch": 0.8426411946614584, "grad_norm": 10.07796859741211, "learning_rate": 2.9991277558089673e-07, "loss": 3.4087, "step": 82835 }, { "epoch": 0.8426920572916666, "grad_norm": 13.13611888885498, "learning_rate": 2.9972289665256256e-07, "loss": 3.1798, "step": 82840 }, { "epoch": 0.842742919921875, "grad_norm": 12.30323314666748, "learning_rate": 2.995330740177971e-07, "loss": 2.9818, "step": 82845 }, { "epoch": 0.8427937825520834, "grad_norm": 13.685778617858887, "learning_rate": 2.993433076814561e-07, "loss": 3.4816, "step": 82850 }, { "epoch": 0.8428446451822916, "grad_norm": 10.468883514404297, "learning_rate": 2.991535976483947e-07, "loss": 3.3311, "step": 82855 }, { "epoch": 0.8428955078125, "grad_norm": 12.333459854125977, "learning_rate": 2.989639439234668e-07, "loss": 3.3499, "step": 82860 }, { "epoch": 0.8429463704427084, "grad_norm": 12.511208534240723, "learning_rate": 2.9877434651152506e-07, "loss": 3.5325, "step": 82865 }, { "epoch": 0.8429972330729166, "grad_norm": 11.679605484008789, "learning_rate": 2.985848054174201e-07, "loss": 3.553, "step": 82870 }, { "epoch": 0.843048095703125, "grad_norm": 8.571442604064941, "learning_rate": 2.983953206460005e-07, "loss": 3.3191, "step": 82875 }, { "epoch": 0.8430989583333334, "grad_norm": 9.685628890991211, "learning_rate": 2.982058922021153e-07, "loss": 3.2059, "step": 82880 }, { "epoch": 0.8431498209635416, "grad_norm": 12.202717781066895, "learning_rate": 2.980165200906102e-07, "loss": 3.248, "step": 82885 }, { "epoch": 0.84320068359375, "grad_norm": 12.813880920410156, "learning_rate": 2.978272043163308e-07, "loss": 3.4659, "step": 82890 }, { "epoch": 0.8432515462239584, "grad_norm": 13.448853492736816, "learning_rate": 2.976379448841199e-07, "loss": 3.2986, "step": 82895 }, { "epoch": 0.8433024088541666, "grad_norm": 14.669266700744629, "learning_rate": 2.974487417988206e-07, "loss": 3.674, "step": 82900 }, { "epoch": 0.843353271484375, "grad_norm": 8.112249374389648, "learning_rate": 2.9725959506527317e-07, "loss": 3.2246, "step": 82905 }, { "epoch": 0.8434041341145834, "grad_norm": 14.91462230682373, "learning_rate": 2.970705046883166e-07, "loss": 3.377, "step": 82910 }, { "epoch": 0.8434549967447916, "grad_norm": 14.564872741699219, "learning_rate": 2.968814706727888e-07, "loss": 4.0069, "step": 82915 }, { "epoch": 0.843505859375, "grad_norm": 10.619250297546387, "learning_rate": 2.966924930235268e-07, "loss": 3.4485, "step": 82920 }, { "epoch": 0.8435567220052084, "grad_norm": 8.611906051635742, "learning_rate": 2.965035717453654e-07, "loss": 3.3972, "step": 82925 }, { "epoch": 0.8436075846354166, "grad_norm": 8.428669929504395, "learning_rate": 2.9631470684313713e-07, "loss": 3.6412, "step": 82930 }, { "epoch": 0.843658447265625, "grad_norm": 11.347962379455566, "learning_rate": 2.961258983216755e-07, "loss": 3.332, "step": 82935 }, { "epoch": 0.8437093098958334, "grad_norm": 10.147064208984375, "learning_rate": 2.959371461858096e-07, "loss": 3.2402, "step": 82940 }, { "epoch": 0.8437601725260416, "grad_norm": 14.61273193359375, "learning_rate": 2.9574845044037013e-07, "loss": 3.3754, "step": 82945 }, { "epoch": 0.84381103515625, "grad_norm": 10.914712905883789, "learning_rate": 2.955598110901833e-07, "loss": 3.3198, "step": 82950 }, { "epoch": 0.8438618977864584, "grad_norm": 8.582925796508789, "learning_rate": 2.9537122814007695e-07, "loss": 3.4709, "step": 82955 }, { "epoch": 0.8439127604166666, "grad_norm": 14.4943208694458, "learning_rate": 2.9518270159487524e-07, "loss": 3.2319, "step": 82960 }, { "epoch": 0.843963623046875, "grad_norm": 12.431747436523438, "learning_rate": 2.9499423145940067e-07, "loss": 3.3942, "step": 82965 }, { "epoch": 0.8440144856770834, "grad_norm": 15.191832542419434, "learning_rate": 2.9480581773847636e-07, "loss": 3.448, "step": 82970 }, { "epoch": 0.8440653483072916, "grad_norm": 11.43300724029541, "learning_rate": 2.9461746043692286e-07, "loss": 3.5234, "step": 82975 }, { "epoch": 0.8441162109375, "grad_norm": 11.807929992675781, "learning_rate": 2.9442915955955885e-07, "loss": 3.2456, "step": 82980 }, { "epoch": 0.8441670735677084, "grad_norm": 11.288966178894043, "learning_rate": 2.942409151112022e-07, "loss": 3.5194, "step": 82985 }, { "epoch": 0.8442179361979166, "grad_norm": 16.285200119018555, "learning_rate": 2.940527270966684e-07, "loss": 3.1871, "step": 82990 }, { "epoch": 0.844268798828125, "grad_norm": 16.906225204467773, "learning_rate": 2.9386459552077254e-07, "loss": 3.2949, "step": 82995 }, { "epoch": 0.8443196614583334, "grad_norm": 11.90079116821289, "learning_rate": 2.9367652038832876e-07, "loss": 3.2382, "step": 83000 }, { "epoch": 0.8443705240885416, "grad_norm": 17.509187698364258, "learning_rate": 2.9348850170414805e-07, "loss": 3.4055, "step": 83005 }, { "epoch": 0.84442138671875, "grad_norm": 12.756876945495605, "learning_rate": 2.9330053947304065e-07, "loss": 3.3672, "step": 83010 }, { "epoch": 0.8444722493489584, "grad_norm": 13.759655952453613, "learning_rate": 2.9311263369981606e-07, "loss": 3.3343, "step": 83015 }, { "epoch": 0.8445231119791666, "grad_norm": 11.43967342376709, "learning_rate": 2.9292478438928183e-07, "loss": 3.0752, "step": 83020 }, { "epoch": 0.844573974609375, "grad_norm": 9.05441951751709, "learning_rate": 2.9273699154624356e-07, "loss": 3.0076, "step": 83025 }, { "epoch": 0.8446248372395834, "grad_norm": 14.203214645385742, "learning_rate": 2.925492551755066e-07, "loss": 3.6563, "step": 83030 }, { "epoch": 0.8446756998697916, "grad_norm": 14.050749778747559, "learning_rate": 2.92361575281874e-07, "loss": 3.8481, "step": 83035 }, { "epoch": 0.8447265625, "grad_norm": 8.36119270324707, "learning_rate": 2.9217395187014635e-07, "loss": 3.4725, "step": 83040 }, { "epoch": 0.8447774251302084, "grad_norm": 14.630526542663574, "learning_rate": 2.919863849451249e-07, "loss": 3.7519, "step": 83045 }, { "epoch": 0.8448282877604166, "grad_norm": 13.809406280517578, "learning_rate": 2.9179887451160906e-07, "loss": 2.903, "step": 83050 }, { "epoch": 0.844879150390625, "grad_norm": 12.206207275390625, "learning_rate": 2.9161142057439556e-07, "loss": 3.3143, "step": 83055 }, { "epoch": 0.8449300130208334, "grad_norm": 10.303620338439941, "learning_rate": 2.9142402313828033e-07, "loss": 3.3072, "step": 83060 }, { "epoch": 0.8449808756510416, "grad_norm": 11.616304397583008, "learning_rate": 2.912366822080576e-07, "loss": 3.3516, "step": 83065 }, { "epoch": 0.84503173828125, "grad_norm": 11.960399627685547, "learning_rate": 2.910493977885209e-07, "loss": 3.0442, "step": 83070 }, { "epoch": 0.8450826009114584, "grad_norm": 8.009824752807617, "learning_rate": 2.9086216988446215e-07, "loss": 3.3228, "step": 83075 }, { "epoch": 0.8451334635416666, "grad_norm": 17.964433670043945, "learning_rate": 2.9067499850067124e-07, "loss": 3.5728, "step": 83080 }, { "epoch": 0.845184326171875, "grad_norm": 9.637298583984375, "learning_rate": 2.9048788364193615e-07, "loss": 3.3517, "step": 83085 }, { "epoch": 0.8452351888020834, "grad_norm": 16.583690643310547, "learning_rate": 2.9030082531304546e-07, "loss": 3.2254, "step": 83090 }, { "epoch": 0.8452860514322916, "grad_norm": 8.117792129516602, "learning_rate": 2.901138235187839e-07, "loss": 3.1893, "step": 83095 }, { "epoch": 0.8453369140625, "grad_norm": 9.406477928161621, "learning_rate": 2.8992687826393696e-07, "loss": 3.3288, "step": 83100 }, { "epoch": 0.8453877766927084, "grad_norm": 11.14752197265625, "learning_rate": 2.897399895532863e-07, "loss": 3.5397, "step": 83105 }, { "epoch": 0.8454386393229166, "grad_norm": 8.81459903717041, "learning_rate": 2.895531573916147e-07, "loss": 3.3393, "step": 83110 }, { "epoch": 0.845489501953125, "grad_norm": 14.247279167175293, "learning_rate": 2.893663817837017e-07, "loss": 3.4098, "step": 83115 }, { "epoch": 0.8455403645833334, "grad_norm": 7.800309658050537, "learning_rate": 2.891796627343252e-07, "loss": 3.3938, "step": 83120 }, { "epoch": 0.8455912272135416, "grad_norm": 14.212621688842773, "learning_rate": 2.8899300024826326e-07, "loss": 3.0912, "step": 83125 }, { "epoch": 0.84564208984375, "grad_norm": 12.19606876373291, "learning_rate": 2.8880639433029155e-07, "loss": 3.509, "step": 83130 }, { "epoch": 0.8456929524739584, "grad_norm": 13.947073936462402, "learning_rate": 2.886198449851843e-07, "loss": 2.942, "step": 83135 }, { "epoch": 0.8457438151041666, "grad_norm": 13.300604820251465, "learning_rate": 2.884333522177135e-07, "loss": 3.3743, "step": 83140 }, { "epoch": 0.845794677734375, "grad_norm": 10.905712127685547, "learning_rate": 2.8824691603265205e-07, "loss": 5.0671, "step": 83145 }, { "epoch": 0.8458455403645834, "grad_norm": 13.009864807128906, "learning_rate": 2.880605364347683e-07, "loss": 3.2609, "step": 83150 }, { "epoch": 0.8458964029947916, "grad_norm": 13.033794403076172, "learning_rate": 2.8787421342883174e-07, "loss": 3.1639, "step": 83155 }, { "epoch": 0.845947265625, "grad_norm": 15.466184616088867, "learning_rate": 2.876879470196087e-07, "loss": 3.0835, "step": 83160 }, { "epoch": 0.8459981282552084, "grad_norm": 10.184174537658691, "learning_rate": 2.8750173721186576e-07, "loss": 3.2319, "step": 83165 }, { "epoch": 0.8460489908854166, "grad_norm": 12.413190841674805, "learning_rate": 2.873155840103664e-07, "loss": 3.7967, "step": 83170 }, { "epoch": 0.846099853515625, "grad_norm": 12.316962242126465, "learning_rate": 2.8712948741987296e-07, "loss": 3.5576, "step": 83175 }, { "epoch": 0.8461507161458334, "grad_norm": 13.641777992248535, "learning_rate": 2.869434474451471e-07, "loss": 3.7668, "step": 83180 }, { "epoch": 0.8462015787760416, "grad_norm": 15.345667839050293, "learning_rate": 2.86757464090949e-07, "loss": 3.6061, "step": 83185 }, { "epoch": 0.84625244140625, "grad_norm": 8.986593246459961, "learning_rate": 2.8657153736203646e-07, "loss": 3.1737, "step": 83190 }, { "epoch": 0.8463033040364584, "grad_norm": 8.112719535827637, "learning_rate": 2.8638566726316595e-07, "loss": 3.4327, "step": 83195 }, { "epoch": 0.8463541666666666, "grad_norm": 6.413035869598389, "learning_rate": 2.861998537990937e-07, "loss": 3.2719, "step": 83200 }, { "epoch": 0.846405029296875, "grad_norm": 16.672090530395508, "learning_rate": 2.860140969745739e-07, "loss": 3.1371, "step": 83205 }, { "epoch": 0.8464558919270834, "grad_norm": 7.789152145385742, "learning_rate": 2.858283967943587e-07, "loss": 3.3255, "step": 83210 }, { "epoch": 0.8465067545572916, "grad_norm": 10.188570976257324, "learning_rate": 2.8564275326319917e-07, "loss": 3.5444, "step": 83215 }, { "epoch": 0.8465576171875, "grad_norm": 8.872726440429688, "learning_rate": 2.8545716638584437e-07, "loss": 3.2315, "step": 83220 }, { "epoch": 0.8466084798177084, "grad_norm": 10.287165641784668, "learning_rate": 2.8527163616704296e-07, "loss": 3.4485, "step": 83225 }, { "epoch": 0.8466593424479166, "grad_norm": 11.121027946472168, "learning_rate": 2.8508616261154224e-07, "loss": 3.1061, "step": 83230 }, { "epoch": 0.846710205078125, "grad_norm": 9.999377250671387, "learning_rate": 2.849007457240868e-07, "loss": 3.4964, "step": 83235 }, { "epoch": 0.8467610677083334, "grad_norm": 10.139492988586426, "learning_rate": 2.847153855094212e-07, "loss": 3.1607, "step": 83240 }, { "epoch": 0.8468119303385416, "grad_norm": 10.248892784118652, "learning_rate": 2.8453008197228736e-07, "loss": 2.9952, "step": 83245 }, { "epoch": 0.84686279296875, "grad_norm": 13.99163818359375, "learning_rate": 2.843448351174255e-07, "loss": 2.7576, "step": 83250 }, { "epoch": 0.8469136555989584, "grad_norm": 13.969670295715332, "learning_rate": 2.841596449495762e-07, "loss": 3.3207, "step": 83255 }, { "epoch": 0.8469645182291666, "grad_norm": 12.393842697143555, "learning_rate": 2.839745114734777e-07, "loss": 3.2999, "step": 83260 }, { "epoch": 0.847015380859375, "grad_norm": 10.31478214263916, "learning_rate": 2.8378943469386577e-07, "loss": 3.0767, "step": 83265 }, { "epoch": 0.8470662434895834, "grad_norm": 14.564029693603516, "learning_rate": 2.8360441461547595e-07, "loss": 3.316, "step": 83270 }, { "epoch": 0.8471171061197916, "grad_norm": 9.800872802734375, "learning_rate": 2.8341945124304133e-07, "loss": 3.4844, "step": 83275 }, { "epoch": 0.84716796875, "grad_norm": 15.539414405822754, "learning_rate": 2.832345445812948e-07, "loss": 3.5376, "step": 83280 }, { "epoch": 0.8472188313802084, "grad_norm": 40.019187927246094, "learning_rate": 2.8304969463496754e-07, "loss": 3.3367, "step": 83285 }, { "epoch": 0.8472696940104166, "grad_norm": 9.034164428710938, "learning_rate": 2.828649014087881e-07, "loss": 3.3207, "step": 83290 }, { "epoch": 0.847320556640625, "grad_norm": 7.02910041809082, "learning_rate": 2.826801649074845e-07, "loss": 3.1843, "step": 83295 }, { "epoch": 0.8473714192708334, "grad_norm": 14.586396217346191, "learning_rate": 2.8249548513578364e-07, "loss": 3.238, "step": 83300 }, { "epoch": 0.8474222819010416, "grad_norm": 8.197888374328613, "learning_rate": 2.823108620984097e-07, "loss": 3.5281, "step": 83305 }, { "epoch": 0.84747314453125, "grad_norm": 9.780033111572266, "learning_rate": 2.82126295800087e-07, "loss": 3.3656, "step": 83310 }, { "epoch": 0.8475240071614584, "grad_norm": 14.232123374938965, "learning_rate": 2.81941786245537e-07, "loss": 3.7687, "step": 83315 }, { "epoch": 0.8475748697916666, "grad_norm": 12.66684341430664, "learning_rate": 2.817573334394813e-07, "loss": 3.492, "step": 83320 }, { "epoch": 0.847625732421875, "grad_norm": 9.312463760375977, "learning_rate": 2.8157293738663826e-07, "loss": 3.2143, "step": 83325 }, { "epoch": 0.8476765950520834, "grad_norm": 9.992286682128906, "learning_rate": 2.8138859809172467e-07, "loss": 3.3887, "step": 83330 }, { "epoch": 0.8477274576822916, "grad_norm": 19.722196578979492, "learning_rate": 2.812043155594593e-07, "loss": 3.2264, "step": 83335 }, { "epoch": 0.8477783203125, "grad_norm": 10.095829963684082, "learning_rate": 2.810200897945553e-07, "loss": 3.257, "step": 83340 }, { "epoch": 0.8478291829427084, "grad_norm": 14.524836540222168, "learning_rate": 2.808359208017264e-07, "loss": 3.3492, "step": 83345 }, { "epoch": 0.8478800455729166, "grad_norm": 10.989709854125977, "learning_rate": 2.8065180858568406e-07, "loss": 3.4612, "step": 83350 }, { "epoch": 0.847930908203125, "grad_norm": 13.342979431152344, "learning_rate": 2.804677531511391e-07, "loss": 3.2874, "step": 83355 }, { "epoch": 0.8479817708333334, "grad_norm": 15.804058074951172, "learning_rate": 2.8028375450280117e-07, "loss": 3.1215, "step": 83360 }, { "epoch": 0.8480326334635416, "grad_norm": 9.274304389953613, "learning_rate": 2.8009981264537715e-07, "loss": 3.5346, "step": 83365 }, { "epoch": 0.84808349609375, "grad_norm": 10.4225492477417, "learning_rate": 2.799159275835728e-07, "loss": 3.4584, "step": 83370 }, { "epoch": 0.8481343587239584, "grad_norm": 9.473432540893555, "learning_rate": 2.7973209932209367e-07, "loss": 3.2326, "step": 83375 }, { "epoch": 0.8481852213541666, "grad_norm": 13.394980430603027, "learning_rate": 2.7954832786564216e-07, "loss": 3.5046, "step": 83380 }, { "epoch": 0.848236083984375, "grad_norm": 12.589401245117188, "learning_rate": 2.793646132189207e-07, "loss": 3.2171, "step": 83385 }, { "epoch": 0.8482869466145834, "grad_norm": 14.52203369140625, "learning_rate": 2.7918095538662873e-07, "loss": 3.263, "step": 83390 }, { "epoch": 0.8483378092447916, "grad_norm": 7.75312614440918, "learning_rate": 2.7899735437346644e-07, "loss": 3.1735, "step": 83395 }, { "epoch": 0.848388671875, "grad_norm": 12.05152416229248, "learning_rate": 2.788138101841301e-07, "loss": 3.3721, "step": 83400 }, { "epoch": 0.8484395345052084, "grad_norm": 15.198909759521484, "learning_rate": 2.786303228233156e-07, "loss": 3.2584, "step": 83405 }, { "epoch": 0.8484903971354166, "grad_norm": 12.124019622802734, "learning_rate": 2.784468922957176e-07, "loss": 3.3842, "step": 83410 }, { "epoch": 0.848541259765625, "grad_norm": 11.108325004577637, "learning_rate": 2.782635186060298e-07, "loss": 3.3118, "step": 83415 }, { "epoch": 0.8485921223958334, "grad_norm": 8.87848949432373, "learning_rate": 2.780802017589435e-07, "loss": 3.3771, "step": 83420 }, { "epoch": 0.8486429850260416, "grad_norm": 8.800379753112793, "learning_rate": 2.7789694175914787e-07, "loss": 3.3291, "step": 83425 }, { "epoch": 0.84869384765625, "grad_norm": 10.400461196899414, "learning_rate": 2.777137386113329e-07, "loss": 3.203, "step": 83430 }, { "epoch": 0.8487447102864584, "grad_norm": 13.805158615112305, "learning_rate": 2.775305923201846e-07, "loss": 3.357, "step": 83435 }, { "epoch": 0.8487955729166666, "grad_norm": 12.790278434753418, "learning_rate": 2.7734750289038994e-07, "loss": 3.2673, "step": 83440 }, { "epoch": 0.848846435546875, "grad_norm": 6.896450996398926, "learning_rate": 2.7716447032663215e-07, "loss": 3.4602, "step": 83445 }, { "epoch": 0.8488972981770834, "grad_norm": 13.737564086914062, "learning_rate": 2.769814946335947e-07, "loss": 3.5267, "step": 83450 }, { "epoch": 0.8489481608072916, "grad_norm": 8.333404541015625, "learning_rate": 2.7679857581595913e-07, "loss": 3.6422, "step": 83455 }, { "epoch": 0.8489990234375, "grad_norm": 20.389053344726562, "learning_rate": 2.7661571387840425e-07, "loss": 3.3173, "step": 83460 }, { "epoch": 0.8490498860677084, "grad_norm": 14.098817825317383, "learning_rate": 2.764329088256096e-07, "loss": 3.1836, "step": 83465 }, { "epoch": 0.8491007486979166, "grad_norm": 10.861899375915527, "learning_rate": 2.7625016066225223e-07, "loss": 3.4802, "step": 83470 }, { "epoch": 0.849151611328125, "grad_norm": 13.729740142822266, "learning_rate": 2.7606746939300766e-07, "loss": 3.28, "step": 83475 }, { "epoch": 0.8492024739583334, "grad_norm": 8.617941856384277, "learning_rate": 2.758848350225496e-07, "loss": 3.288, "step": 83480 }, { "epoch": 0.8492533365885416, "grad_norm": 8.380363464355469, "learning_rate": 2.7570225755554985e-07, "loss": 3.4407, "step": 83485 }, { "epoch": 0.84930419921875, "grad_norm": 8.086506843566895, "learning_rate": 2.7551973699668166e-07, "loss": 3.5749, "step": 83490 }, { "epoch": 0.8493550618489584, "grad_norm": 11.313240051269531, "learning_rate": 2.753372733506135e-07, "loss": 3.5966, "step": 83495 }, { "epoch": 0.8494059244791666, "grad_norm": 11.246660232543945, "learning_rate": 2.7515486662201414e-07, "loss": 3.0105, "step": 83500 }, { "epoch": 0.849456787109375, "grad_norm": 10.222160339355469, "learning_rate": 2.749725168155498e-07, "loss": 3.5749, "step": 83505 }, { "epoch": 0.8495076497395834, "grad_norm": 11.909274101257324, "learning_rate": 2.74790223935886e-07, "loss": 3.535, "step": 83510 }, { "epoch": 0.8495585123697916, "grad_norm": 18.18555450439453, "learning_rate": 2.746079879876876e-07, "loss": 3.3431, "step": 83515 }, { "epoch": 0.849609375, "grad_norm": 8.408985137939453, "learning_rate": 2.744258089756155e-07, "loss": 3.1151, "step": 83520 }, { "epoch": 0.8496602376302084, "grad_norm": 12.356221199035645, "learning_rate": 2.742436869043324e-07, "loss": 3.3734, "step": 83525 }, { "epoch": 0.8497111002604166, "grad_norm": 14.428342819213867, "learning_rate": 2.7406162177849693e-07, "loss": 3.3936, "step": 83530 }, { "epoch": 0.849761962890625, "grad_norm": 7.772458076477051, "learning_rate": 2.738796136027666e-07, "loss": 3.1878, "step": 83535 }, { "epoch": 0.8498128255208334, "grad_norm": 10.488436698913574, "learning_rate": 2.7369766238179884e-07, "loss": 3.2562, "step": 83540 }, { "epoch": 0.8498636881510416, "grad_norm": 10.883633613586426, "learning_rate": 2.735157681202491e-07, "loss": 3.2546, "step": 83545 }, { "epoch": 0.84991455078125, "grad_norm": 11.330202102661133, "learning_rate": 2.733339308227709e-07, "loss": 3.3776, "step": 83550 }, { "epoch": 0.8499654134114584, "grad_norm": 10.628450393676758, "learning_rate": 2.73152150494016e-07, "loss": 3.3326, "step": 83555 }, { "epoch": 0.8500162760416666, "grad_norm": 11.719513893127441, "learning_rate": 2.729704271386352e-07, "loss": 3.2302, "step": 83560 }, { "epoch": 0.850067138671875, "grad_norm": 13.615124702453613, "learning_rate": 2.727887607612784e-07, "loss": 3.3346, "step": 83565 }, { "epoch": 0.8501180013020834, "grad_norm": 14.910679817199707, "learning_rate": 2.726071513665934e-07, "loss": 3.3568, "step": 83570 }, { "epoch": 0.8501688639322916, "grad_norm": 12.558643341064453, "learning_rate": 2.724255989592264e-07, "loss": 3.2009, "step": 83575 }, { "epoch": 0.8502197265625, "grad_norm": 18.387767791748047, "learning_rate": 2.7224410354382227e-07, "loss": 3.0721, "step": 83580 }, { "epoch": 0.8502705891927084, "grad_norm": 14.17619514465332, "learning_rate": 2.720626651250249e-07, "loss": 2.9736, "step": 83585 }, { "epoch": 0.8503214518229166, "grad_norm": 9.583733558654785, "learning_rate": 2.718812837074761e-07, "loss": 3.5239, "step": 83590 }, { "epoch": 0.850372314453125, "grad_norm": 7.749961853027344, "learning_rate": 2.7169995929581666e-07, "loss": 3.2825, "step": 83595 }, { "epoch": 0.8504231770833334, "grad_norm": 7.960093021392822, "learning_rate": 2.715186918946852e-07, "loss": 3.2317, "step": 83600 }, { "epoch": 0.8504740397135416, "grad_norm": 12.65415096282959, "learning_rate": 2.713374815087202e-07, "loss": 3.2379, "step": 83605 }, { "epoch": 0.85052490234375, "grad_norm": 11.380934715270996, "learning_rate": 2.711563281425578e-07, "loss": 3.315, "step": 83610 }, { "epoch": 0.8505757649739584, "grad_norm": 10.991366386413574, "learning_rate": 2.7097523180083156e-07, "loss": 3.0801, "step": 83615 }, { "epoch": 0.8506266276041666, "grad_norm": 14.657071113586426, "learning_rate": 2.70794192488176e-07, "loss": 3.34, "step": 83620 }, { "epoch": 0.850677490234375, "grad_norm": 8.607914924621582, "learning_rate": 2.706132102092229e-07, "loss": 3.1656, "step": 83625 }, { "epoch": 0.8507283528645834, "grad_norm": 14.747588157653809, "learning_rate": 2.704322849686025e-07, "loss": 3.0968, "step": 83630 }, { "epoch": 0.8507792154947916, "grad_norm": 12.232775688171387, "learning_rate": 2.702514167709433e-07, "loss": 3.1602, "step": 83635 }, { "epoch": 0.850830078125, "grad_norm": 8.508930206298828, "learning_rate": 2.7007060562087293e-07, "loss": 3.322, "step": 83640 }, { "epoch": 0.8508809407552084, "grad_norm": 9.501651763916016, "learning_rate": 2.6988985152301805e-07, "loss": 3.1027, "step": 83645 }, { "epoch": 0.8509318033854166, "grad_norm": 15.47403621673584, "learning_rate": 2.6970915448200267e-07, "loss": 3.0541, "step": 83650 }, { "epoch": 0.850982666015625, "grad_norm": 12.417950630187988, "learning_rate": 2.6952851450244953e-07, "loss": 3.3576, "step": 83655 }, { "epoch": 0.8510335286458334, "grad_norm": 12.438616752624512, "learning_rate": 2.6934793158898105e-07, "loss": 3.5725, "step": 83660 }, { "epoch": 0.8510843912760416, "grad_norm": 13.789573669433594, "learning_rate": 2.691674057462165e-07, "loss": 3.4432, "step": 83665 }, { "epoch": 0.85113525390625, "grad_norm": 10.428175926208496, "learning_rate": 2.6898693697877593e-07, "loss": 3.5053, "step": 83670 }, { "epoch": 0.8511861165364584, "grad_norm": 9.609519004821777, "learning_rate": 2.6880652529127497e-07, "loss": 3.2183, "step": 83675 }, { "epoch": 0.8512369791666666, "grad_norm": 14.939981460571289, "learning_rate": 2.6862617068833086e-07, "loss": 3.2356, "step": 83680 }, { "epoch": 0.851287841796875, "grad_norm": 15.604842185974121, "learning_rate": 2.6844587317455704e-07, "loss": 3.3283, "step": 83685 }, { "epoch": 0.8513387044270834, "grad_norm": 8.777827262878418, "learning_rate": 2.682656327545663e-07, "loss": 3.263, "step": 83690 }, { "epoch": 0.8513895670572916, "grad_norm": 13.913224220275879, "learning_rate": 2.6808544943297044e-07, "loss": 3.1008, "step": 83695 }, { "epoch": 0.8514404296875, "grad_norm": 11.493755340576172, "learning_rate": 2.679053232143797e-07, "loss": 3.2256, "step": 83700 }, { "epoch": 0.8514912923177084, "grad_norm": 15.015609741210938, "learning_rate": 2.6772525410340225e-07, "loss": 3.3691, "step": 83705 }, { "epoch": 0.8515421549479166, "grad_norm": 13.246819496154785, "learning_rate": 2.6754524210464513e-07, "loss": 3.3471, "step": 83710 }, { "epoch": 0.851593017578125, "grad_norm": 14.715752601623535, "learning_rate": 2.673652872227131e-07, "loss": 3.4959, "step": 83715 }, { "epoch": 0.8516438802083334, "grad_norm": 11.939118385314941, "learning_rate": 2.6718538946221116e-07, "loss": 3.5762, "step": 83720 }, { "epoch": 0.8516947428385416, "grad_norm": 11.789021492004395, "learning_rate": 2.670055488277423e-07, "loss": 3.1403, "step": 83725 }, { "epoch": 0.85174560546875, "grad_norm": 18.092769622802734, "learning_rate": 2.6682576532390673e-07, "loss": 3.8434, "step": 83730 }, { "epoch": 0.8517964680989584, "grad_norm": 10.054718971252441, "learning_rate": 2.666460389553052e-07, "loss": 3.3368, "step": 83735 }, { "epoch": 0.8518473307291666, "grad_norm": 11.558505058288574, "learning_rate": 2.6646636972653546e-07, "loss": 3.333, "step": 83740 }, { "epoch": 0.851898193359375, "grad_norm": 7.389967918395996, "learning_rate": 2.6628675764219346e-07, "loss": 3.3784, "step": 83745 }, { "epoch": 0.8519490559895834, "grad_norm": 14.015900611877441, "learning_rate": 2.6610720270687563e-07, "loss": 3.8593, "step": 83750 }, { "epoch": 0.8519999186197916, "grad_norm": 12.08255672454834, "learning_rate": 2.6592770492517574e-07, "loss": 3.3945, "step": 83755 }, { "epoch": 0.85205078125, "grad_norm": 8.277338981628418, "learning_rate": 2.657482643016862e-07, "loss": 3.1407, "step": 83760 }, { "epoch": 0.8521016438802084, "grad_norm": 16.168481826782227, "learning_rate": 2.655688808409976e-07, "loss": 3.4876, "step": 83765 }, { "epoch": 0.8521525065104166, "grad_norm": 13.869182586669922, "learning_rate": 2.6538955454769896e-07, "loss": 3.3737, "step": 83770 }, { "epoch": 0.852203369140625, "grad_norm": 7.083217144012451, "learning_rate": 2.6521028542637913e-07, "loss": 3.1822, "step": 83775 }, { "epoch": 0.8522542317708334, "grad_norm": 12.952024459838867, "learning_rate": 2.65031073481625e-07, "loss": 3.4927, "step": 83780 }, { "epoch": 0.8523050944010416, "grad_norm": 11.621052742004395, "learning_rate": 2.6485191871802095e-07, "loss": 3.1839, "step": 83785 }, { "epoch": 0.85235595703125, "grad_norm": 11.824081420898438, "learning_rate": 2.646728211401503e-07, "loss": 3.0807, "step": 83790 }, { "epoch": 0.8524068196614584, "grad_norm": 13.243668556213379, "learning_rate": 2.644937807525957e-07, "loss": 3.215, "step": 83795 }, { "epoch": 0.8524576822916666, "grad_norm": 12.352242469787598, "learning_rate": 2.6431479755993833e-07, "loss": 3.1282, "step": 83800 }, { "epoch": 0.852508544921875, "grad_norm": 9.239322662353516, "learning_rate": 2.6413587156675693e-07, "loss": 3.2456, "step": 83805 }, { "epoch": 0.8525594075520834, "grad_norm": 11.568145751953125, "learning_rate": 2.6395700277762894e-07, "loss": 3.4627, "step": 83810 }, { "epoch": 0.8526102701822916, "grad_norm": 13.08260440826416, "learning_rate": 2.6377819119713125e-07, "loss": 3.4344, "step": 83815 }, { "epoch": 0.8526611328125, "grad_norm": 12.213603019714355, "learning_rate": 2.635994368298381e-07, "loss": 3.6741, "step": 83820 }, { "epoch": 0.8527119954427084, "grad_norm": 12.419811248779297, "learning_rate": 2.6342073968032346e-07, "loss": 3.2684, "step": 83825 }, { "epoch": 0.8527628580729166, "grad_norm": 6.682579040527344, "learning_rate": 2.632420997531596e-07, "loss": 3.0465, "step": 83830 }, { "epoch": 0.852813720703125, "grad_norm": 11.076421737670898, "learning_rate": 2.630635170529161e-07, "loss": 3.2169, "step": 83835 }, { "epoch": 0.8528645833333334, "grad_norm": 10.50160026550293, "learning_rate": 2.628849915841625e-07, "loss": 3.3344, "step": 83840 }, { "epoch": 0.8529154459635416, "grad_norm": 12.040009498596191, "learning_rate": 2.6270652335146556e-07, "loss": 3.4984, "step": 83845 }, { "epoch": 0.85296630859375, "grad_norm": 13.960234642028809, "learning_rate": 2.62528112359392e-07, "loss": 3.0605, "step": 83850 }, { "epoch": 0.8530171712239584, "grad_norm": 9.576654434204102, "learning_rate": 2.6234975861250703e-07, "loss": 3.4375, "step": 83855 }, { "epoch": 0.8530680338541666, "grad_norm": 9.949191093444824, "learning_rate": 2.6217146211537286e-07, "loss": 3.7202, "step": 83860 }, { "epoch": 0.853118896484375, "grad_norm": 10.487893104553223, "learning_rate": 2.6199322287255105e-07, "loss": 3.2605, "step": 83865 }, { "epoch": 0.8531697591145834, "grad_norm": 10.049798965454102, "learning_rate": 2.6181504088860247e-07, "loss": 3.539, "step": 83870 }, { "epoch": 0.8532206217447916, "grad_norm": 13.798386573791504, "learning_rate": 2.616369161680851e-07, "loss": 3.3636, "step": 83875 }, { "epoch": 0.853271484375, "grad_norm": 12.284562110900879, "learning_rate": 2.6145884871555725e-07, "loss": 3.4647, "step": 83880 }, { "epoch": 0.8533223470052084, "grad_norm": 14.121574401855469, "learning_rate": 2.612808385355739e-07, "loss": 3.2157, "step": 83885 }, { "epoch": 0.8533732096354166, "grad_norm": 12.580548286437988, "learning_rate": 2.611028856326897e-07, "loss": 3.4041, "step": 83890 }, { "epoch": 0.853424072265625, "grad_norm": 8.253116607666016, "learning_rate": 2.609249900114577e-07, "loss": 3.7125, "step": 83895 }, { "epoch": 0.8534749348958334, "grad_norm": 15.323606491088867, "learning_rate": 2.607471516764287e-07, "loss": 3.099, "step": 83900 }, { "epoch": 0.8535257975260416, "grad_norm": 12.911646842956543, "learning_rate": 2.6056937063215297e-07, "loss": 3.4895, "step": 83905 }, { "epoch": 0.85357666015625, "grad_norm": 13.993356704711914, "learning_rate": 2.6039164688317966e-07, "loss": 3.4062, "step": 83910 }, { "epoch": 0.8536275227864584, "grad_norm": 12.202286720275879, "learning_rate": 2.602139804340553e-07, "loss": 3.5569, "step": 83915 }, { "epoch": 0.8536783854166666, "grad_norm": 21.464672088623047, "learning_rate": 2.6003637128932476e-07, "loss": 3.174, "step": 83920 }, { "epoch": 0.853729248046875, "grad_norm": 12.035834312438965, "learning_rate": 2.5985881945353315e-07, "loss": 3.4139, "step": 83925 }, { "epoch": 0.8537801106770834, "grad_norm": 8.957596778869629, "learning_rate": 2.596813249312222e-07, "loss": 3.1844, "step": 83930 }, { "epoch": 0.8538309733072916, "grad_norm": 8.745210647583008, "learning_rate": 2.5950388772693403e-07, "loss": 3.3869, "step": 83935 }, { "epoch": 0.8538818359375, "grad_norm": 14.097525596618652, "learning_rate": 2.5932650784520755e-07, "loss": 3.2003, "step": 83940 }, { "epoch": 0.8539326985677084, "grad_norm": 11.521891593933105, "learning_rate": 2.591491852905814e-07, "loss": 3.1345, "step": 83945 }, { "epoch": 0.8539835611979166, "grad_norm": 11.581704139709473, "learning_rate": 2.5897192006759193e-07, "loss": 3.2405, "step": 83950 }, { "epoch": 0.854034423828125, "grad_norm": 8.957106590270996, "learning_rate": 2.587947121807752e-07, "loss": 3.6836, "step": 83955 }, { "epoch": 0.8540852864583334, "grad_norm": 9.746102333068848, "learning_rate": 2.5861756163466423e-07, "loss": 3.3639, "step": 83960 }, { "epoch": 0.8541361490885416, "grad_norm": 15.997204780578613, "learning_rate": 2.584404684337918e-07, "loss": 3.0967, "step": 83965 }, { "epoch": 0.85418701171875, "grad_norm": 14.250624656677246, "learning_rate": 2.5826343258268883e-07, "loss": 3.0947, "step": 83970 }, { "epoch": 0.8542378743489584, "grad_norm": 13.187626838684082, "learning_rate": 2.5808645408588406e-07, "loss": 3.3481, "step": 83975 }, { "epoch": 0.8542887369791666, "grad_norm": 7.7368645668029785, "learning_rate": 2.5790953294790616e-07, "loss": 2.9572, "step": 83980 }, { "epoch": 0.854339599609375, "grad_norm": 9.206607818603516, "learning_rate": 2.577326691732818e-07, "loss": 3.1199, "step": 83985 }, { "epoch": 0.8543904622395834, "grad_norm": 14.619028091430664, "learning_rate": 2.575558627665356e-07, "loss": 3.4817, "step": 83990 }, { "epoch": 0.8544413248697916, "grad_norm": 11.724611282348633, "learning_rate": 2.57379113732191e-07, "loss": 3.3051, "step": 83995 }, { "epoch": 0.8544921875, "grad_norm": 17.9774227142334, "learning_rate": 2.5720242207476987e-07, "loss": 3.3657, "step": 84000 }, { "epoch": 0.8545430501302084, "grad_norm": 14.093949317932129, "learning_rate": 2.570257877987931e-07, "loss": 3.3961, "step": 84005 }, { "epoch": 0.8545939127604166, "grad_norm": 7.811964988708496, "learning_rate": 2.5684921090878056e-07, "loss": 3.2081, "step": 84010 }, { "epoch": 0.854644775390625, "grad_norm": 9.574098587036133, "learning_rate": 2.566726914092488e-07, "loss": 3.3077, "step": 84015 }, { "epoch": 0.8546956380208334, "grad_norm": 10.139022827148438, "learning_rate": 2.5649622930471463e-07, "loss": 3.5255, "step": 84020 }, { "epoch": 0.8547465006510416, "grad_norm": 15.366785049438477, "learning_rate": 2.5631982459969314e-07, "loss": 3.4556, "step": 84025 }, { "epoch": 0.85479736328125, "grad_norm": 9.718144416809082, "learning_rate": 2.561434772986962e-07, "loss": 3.3304, "step": 84030 }, { "epoch": 0.8548482259114584, "grad_norm": 14.18233585357666, "learning_rate": 2.5596718740623694e-07, "loss": 3.207, "step": 84035 }, { "epoch": 0.8548990885416666, "grad_norm": 10.997235298156738, "learning_rate": 2.557909549268253e-07, "loss": 3.3991, "step": 84040 }, { "epoch": 0.854949951171875, "grad_norm": 7.477236270904541, "learning_rate": 2.5561477986497053e-07, "loss": 3.3484, "step": 84045 }, { "epoch": 0.8550008138020834, "grad_norm": 10.705188751220703, "learning_rate": 2.554386622251795e-07, "loss": 3.2581, "step": 84050 }, { "epoch": 0.8550516764322916, "grad_norm": 13.796873092651367, "learning_rate": 2.552626020119578e-07, "loss": 3.6276, "step": 84055 }, { "epoch": 0.8551025390625, "grad_norm": 12.384038925170898, "learning_rate": 2.5508659922981046e-07, "loss": 3.3599, "step": 84060 }, { "epoch": 0.8551534016927084, "grad_norm": 14.427067756652832, "learning_rate": 2.549106538832408e-07, "loss": 3.4612, "step": 84065 }, { "epoch": 0.8552042643229166, "grad_norm": 11.042256355285645, "learning_rate": 2.547347659767499e-07, "loss": 3.1627, "step": 84070 }, { "epoch": 0.855255126953125, "grad_norm": 9.276700019836426, "learning_rate": 2.545589355148373e-07, "loss": 3.139, "step": 84075 }, { "epoch": 0.8553059895833334, "grad_norm": 10.519864082336426, "learning_rate": 2.5438316250200267e-07, "loss": 3.8058, "step": 84080 }, { "epoch": 0.8553568522135416, "grad_norm": 15.285600662231445, "learning_rate": 2.5420744694274216e-07, "loss": 3.1438, "step": 84085 }, { "epoch": 0.85540771484375, "grad_norm": 15.675111770629883, "learning_rate": 2.540317888415522e-07, "loss": 3.4444, "step": 84090 }, { "epoch": 0.8554585774739584, "grad_norm": 44.94129180908203, "learning_rate": 2.5385618820292613e-07, "loss": 3.3156, "step": 84095 }, { "epoch": 0.8555094401041666, "grad_norm": 16.871110916137695, "learning_rate": 2.536806450313575e-07, "loss": 3.1729, "step": 84100 }, { "epoch": 0.855560302734375, "grad_norm": 10.780709266662598, "learning_rate": 2.5350515933133643e-07, "loss": 3.9411, "step": 84105 }, { "epoch": 0.8556111653645834, "grad_norm": 14.381935119628906, "learning_rate": 2.533297311073538e-07, "loss": 3.2177, "step": 84110 }, { "epoch": 0.8556620279947916, "grad_norm": 9.522446632385254, "learning_rate": 2.531543603638978e-07, "loss": 3.5338, "step": 84115 }, { "epoch": 0.855712890625, "grad_norm": 13.749747276306152, "learning_rate": 2.5297904710545493e-07, "loss": 3.1861, "step": 84120 }, { "epoch": 0.8557637532552084, "grad_norm": 9.4033784866333, "learning_rate": 2.5280379133651055e-07, "loss": 3.4675, "step": 84125 }, { "epoch": 0.8558146158854166, "grad_norm": 14.979850769042969, "learning_rate": 2.52628593061548e-07, "loss": 3.5098, "step": 84130 }, { "epoch": 0.855865478515625, "grad_norm": 12.472001075744629, "learning_rate": 2.5245345228505024e-07, "loss": 3.4768, "step": 84135 }, { "epoch": 0.8559163411458334, "grad_norm": 14.229353904724121, "learning_rate": 2.5227836901149857e-07, "loss": 3.4148, "step": 84140 }, { "epoch": 0.8559672037760416, "grad_norm": 9.634638786315918, "learning_rate": 2.5210334324537204e-07, "loss": 3.5641, "step": 84145 }, { "epoch": 0.85601806640625, "grad_norm": 12.365317344665527, "learning_rate": 2.519283749911483e-07, "loss": 3.316, "step": 84150 }, { "epoch": 0.8560689290364584, "grad_norm": 11.534818649291992, "learning_rate": 2.5175346425330455e-07, "loss": 3.3737, "step": 84155 }, { "epoch": 0.8561197916666666, "grad_norm": 8.3718843460083, "learning_rate": 2.5157861103631524e-07, "loss": 3.1727, "step": 84160 }, { "epoch": 0.856170654296875, "grad_norm": 9.538040161132812, "learning_rate": 2.5140381534465465e-07, "loss": 3.3974, "step": 84165 }, { "epoch": 0.8562215169270834, "grad_norm": 14.558609962463379, "learning_rate": 2.512290771827941e-07, "loss": 3.2332, "step": 84170 }, { "epoch": 0.8562723795572916, "grad_norm": 7.958302021026611, "learning_rate": 2.510543965552048e-07, "loss": 3.1759, "step": 84175 }, { "epoch": 0.8563232421875, "grad_norm": 16.645124435424805, "learning_rate": 2.5087977346635596e-07, "loss": 3.3306, "step": 84180 }, { "epoch": 0.8563741048177084, "grad_norm": 8.358590126037598, "learning_rate": 2.507052079207145e-07, "loss": 3.3807, "step": 84185 }, { "epoch": 0.8564249674479166, "grad_norm": 10.768516540527344, "learning_rate": 2.505306999227472e-07, "loss": 3.2633, "step": 84190 }, { "epoch": 0.856475830078125, "grad_norm": 12.022039413452148, "learning_rate": 2.5035624947691907e-07, "loss": 3.7514, "step": 84195 }, { "epoch": 0.8565266927083334, "grad_norm": 15.358153343200684, "learning_rate": 2.5018185658769315e-07, "loss": 3.2351, "step": 84200 }, { "epoch": 0.8565775553385416, "grad_norm": 10.9464750289917, "learning_rate": 2.500075212595307e-07, "loss": 3.4272, "step": 84205 }, { "epoch": 0.85662841796875, "grad_norm": 14.145750045776367, "learning_rate": 2.498332434968928e-07, "loss": 3.379, "step": 84210 }, { "epoch": 0.8566792805989584, "grad_norm": 10.068707466125488, "learning_rate": 2.4965902330423785e-07, "loss": 3.0267, "step": 84215 }, { "epoch": 0.8567301432291666, "grad_norm": 17.326412200927734, "learning_rate": 2.4948486068602367e-07, "loss": 3.482, "step": 84220 }, { "epoch": 0.856781005859375, "grad_norm": 14.676739692687988, "learning_rate": 2.493107556467056e-07, "loss": 3.2957, "step": 84225 }, { "epoch": 0.8568318684895834, "grad_norm": 10.21787166595459, "learning_rate": 2.4913670819073863e-07, "loss": 2.9701, "step": 84230 }, { "epoch": 0.8568827311197916, "grad_norm": 10.94861888885498, "learning_rate": 2.489627183225757e-07, "loss": 3.6475, "step": 84235 }, { "epoch": 0.85693359375, "grad_norm": 14.115302085876465, "learning_rate": 2.4878878604666726e-07, "loss": 3.4225, "step": 84240 }, { "epoch": 0.8569844563802084, "grad_norm": 8.742629051208496, "learning_rate": 2.4861491136746436e-07, "loss": 3.5919, "step": 84245 }, { "epoch": 0.8570353190104166, "grad_norm": 15.903121948242188, "learning_rate": 2.484410942894158e-07, "loss": 3.4988, "step": 84250 }, { "epoch": 0.857086181640625, "grad_norm": 11.232239723205566, "learning_rate": 2.4826733481696815e-07, "loss": 3.3979, "step": 84255 }, { "epoch": 0.8571370442708334, "grad_norm": 12.156452178955078, "learning_rate": 2.480936329545666e-07, "loss": 3.6659, "step": 84260 }, { "epoch": 0.8571879069010416, "grad_norm": 8.747230529785156, "learning_rate": 2.479199887066555e-07, "loss": 3.8577, "step": 84265 }, { "epoch": 0.85723876953125, "grad_norm": 13.07475471496582, "learning_rate": 2.477464020776782e-07, "loss": 3.8486, "step": 84270 }, { "epoch": 0.8572896321614584, "grad_norm": 12.68109130859375, "learning_rate": 2.4757287307207555e-07, "loss": 3.4064, "step": 84275 }, { "epoch": 0.8573404947916666, "grad_norm": 15.027631759643555, "learning_rate": 2.473994016942868e-07, "loss": 3.2732, "step": 84280 }, { "epoch": 0.857391357421875, "grad_norm": 10.339942932128906, "learning_rate": 2.4722598794875014e-07, "loss": 3.049, "step": 84285 }, { "epoch": 0.8574422200520834, "grad_norm": 13.480106353759766, "learning_rate": 2.470526318399027e-07, "loss": 3.4281, "step": 84290 }, { "epoch": 0.8574930826822916, "grad_norm": 13.331635475158691, "learning_rate": 2.4687933337218e-07, "loss": 3.5205, "step": 84295 }, { "epoch": 0.8575439453125, "grad_norm": 10.807036399841309, "learning_rate": 2.467060925500156e-07, "loss": 3.108, "step": 84300 }, { "epoch": 0.8575948079427084, "grad_norm": 10.52746295928955, "learning_rate": 2.4653290937784127e-07, "loss": 3.3692, "step": 84305 }, { "epoch": 0.8576456705729166, "grad_norm": 11.065303802490234, "learning_rate": 2.463597838600887e-07, "loss": 3.4338, "step": 84310 }, { "epoch": 0.857696533203125, "grad_norm": 14.478635787963867, "learning_rate": 2.4618671600118633e-07, "loss": 3.6296, "step": 84315 }, { "epoch": 0.8577473958333334, "grad_norm": 11.326279640197754, "learning_rate": 2.460137058055628e-07, "loss": 3.1279, "step": 84320 }, { "epoch": 0.8577982584635416, "grad_norm": 11.119710922241211, "learning_rate": 2.4584075327764466e-07, "loss": 3.1166, "step": 84325 }, { "epoch": 0.85784912109375, "grad_norm": 10.756710052490234, "learning_rate": 2.456678584218566e-07, "loss": 3.1323, "step": 84330 }, { "epoch": 0.8578999837239584, "grad_norm": 9.484225273132324, "learning_rate": 2.4549502124262217e-07, "loss": 3.3545, "step": 84335 }, { "epoch": 0.8579508463541666, "grad_norm": 12.128405570983887, "learning_rate": 2.4532224174436266e-07, "loss": 3.3032, "step": 84340 }, { "epoch": 0.858001708984375, "grad_norm": 11.937442779541016, "learning_rate": 2.4514951993149933e-07, "loss": 3.6189, "step": 84345 }, { "epoch": 0.8580525716145834, "grad_norm": 8.71423053741455, "learning_rate": 2.4497685580845143e-07, "loss": 3.3089, "step": 84350 }, { "epoch": 0.8581034342447916, "grad_norm": 11.5895414352417, "learning_rate": 2.4480424937963596e-07, "loss": 3.1087, "step": 84355 }, { "epoch": 0.858154296875, "grad_norm": 16.375810623168945, "learning_rate": 2.446317006494692e-07, "loss": 3.3446, "step": 84360 }, { "epoch": 0.8582051595052084, "grad_norm": 7.415759086608887, "learning_rate": 2.444592096223658e-07, "loss": 3.6263, "step": 84365 }, { "epoch": 0.8582560221354166, "grad_norm": 9.510431289672852, "learning_rate": 2.442867763027387e-07, "loss": 3.3406, "step": 84370 }, { "epoch": 0.858306884765625, "grad_norm": 15.061565399169922, "learning_rate": 2.4411440069500023e-07, "loss": 3.4792, "step": 84375 }, { "epoch": 0.8583577473958334, "grad_norm": 17.945556640625, "learning_rate": 2.4394208280355965e-07, "loss": 3.63, "step": 84380 }, { "epoch": 0.8584086100260416, "grad_norm": 14.503132820129395, "learning_rate": 2.437698226328267e-07, "loss": 3.1014, "step": 84385 }, { "epoch": 0.85845947265625, "grad_norm": 11.974645614624023, "learning_rate": 2.435976201872078e-07, "loss": 3.0912, "step": 84390 }, { "epoch": 0.8585103352864584, "grad_norm": 11.591052055358887, "learning_rate": 2.4342547547110877e-07, "loss": 3.1476, "step": 84395 }, { "epoch": 0.8585611979166666, "grad_norm": 11.908485412597656, "learning_rate": 2.4325338848893375e-07, "loss": 3.0585, "step": 84400 }, { "epoch": 0.858612060546875, "grad_norm": 13.596232414245605, "learning_rate": 2.430813592450865e-07, "loss": 3.4634, "step": 84405 }, { "epoch": 0.8586629231770834, "grad_norm": 8.452485084533691, "learning_rate": 2.4290938774396783e-07, "loss": 3.201, "step": 84410 }, { "epoch": 0.8587137858072916, "grad_norm": 8.984891891479492, "learning_rate": 2.427374739899771e-07, "loss": 3.2724, "step": 84415 }, { "epoch": 0.8587646484375, "grad_norm": 9.335774421691895, "learning_rate": 2.425656179875127e-07, "loss": 3.6574, "step": 84420 }, { "epoch": 0.8588155110677084, "grad_norm": 13.307247161865234, "learning_rate": 2.4239381974097254e-07, "loss": 3.5403, "step": 84425 }, { "epoch": 0.8588663736979166, "grad_norm": 7.800132751464844, "learning_rate": 2.4222207925475165e-07, "loss": 3.3321, "step": 84430 }, { "epoch": 0.858917236328125, "grad_norm": 16.67194366455078, "learning_rate": 2.4205039653324296e-07, "loss": 3.0226, "step": 84435 }, { "epoch": 0.8589680989583334, "grad_norm": 13.30135726928711, "learning_rate": 2.4187877158084017e-07, "loss": 3.4109, "step": 84440 }, { "epoch": 0.8590189615885416, "grad_norm": 12.091148376464844, "learning_rate": 2.4170720440193337e-07, "loss": 3.1486, "step": 84445 }, { "epoch": 0.85906982421875, "grad_norm": 12.820271492004395, "learning_rate": 2.415356950009129e-07, "loss": 3.3555, "step": 84450 }, { "epoch": 0.8591206868489584, "grad_norm": 9.892502784729004, "learning_rate": 2.4136424338216583e-07, "loss": 3.3226, "step": 84455 }, { "epoch": 0.8591715494791666, "grad_norm": 10.605794906616211, "learning_rate": 2.4119284955007983e-07, "loss": 3.2693, "step": 84460 }, { "epoch": 0.859222412109375, "grad_norm": 15.961316108703613, "learning_rate": 2.4102151350903933e-07, "loss": 3.9426, "step": 84465 }, { "epoch": 0.8592732747395834, "grad_norm": 8.051385879516602, "learning_rate": 2.408502352634276e-07, "loss": 3.3205, "step": 84470 }, { "epoch": 0.8593241373697916, "grad_norm": 10.661064147949219, "learning_rate": 2.406790148176269e-07, "loss": 3.4702, "step": 84475 }, { "epoch": 0.859375, "grad_norm": 13.357624053955078, "learning_rate": 2.4050785217601876e-07, "loss": 3.3406, "step": 84480 }, { "epoch": 0.8594258626302084, "grad_norm": 12.120694160461426, "learning_rate": 2.403367473429816e-07, "loss": 3.1657, "step": 84485 }, { "epoch": 0.8594767252604166, "grad_norm": 10.15761947631836, "learning_rate": 2.40165700322893e-07, "loss": 3.0561, "step": 84490 }, { "epoch": 0.859527587890625, "grad_norm": 10.528076171875, "learning_rate": 2.3999471112012924e-07, "loss": 3.1222, "step": 84495 }, { "epoch": 0.8595784505208334, "grad_norm": 14.525897979736328, "learning_rate": 2.3982377973906487e-07, "loss": 2.9979, "step": 84500 }, { "epoch": 0.8596293131510416, "grad_norm": 9.03371524810791, "learning_rate": 2.396529061840738e-07, "loss": 3.4179, "step": 84505 }, { "epoch": 0.85968017578125, "grad_norm": 13.414257049560547, "learning_rate": 2.3948209045952684e-07, "loss": 3.2412, "step": 84510 }, { "epoch": 0.8597310384114584, "grad_norm": 7.893692493438721, "learning_rate": 2.393113325697954e-07, "loss": 3.0921, "step": 84515 }, { "epoch": 0.8597819010416666, "grad_norm": 16.511638641357422, "learning_rate": 2.3914063251924774e-07, "loss": 3.3977, "step": 84520 }, { "epoch": 0.859832763671875, "grad_norm": 8.734496116638184, "learning_rate": 2.3896999031225054e-07, "loss": 3.5417, "step": 84525 }, { "epoch": 0.8598836263020834, "grad_norm": 8.743608474731445, "learning_rate": 2.387994059531701e-07, "loss": 4.0634, "step": 84530 }, { "epoch": 0.8599344889322916, "grad_norm": 14.12103271484375, "learning_rate": 2.3862887944637146e-07, "loss": 3.3827, "step": 84535 }, { "epoch": 0.8599853515625, "grad_norm": 11.939756393432617, "learning_rate": 2.3845841079621704e-07, "loss": 3.5587, "step": 84540 }, { "epoch": 0.8600362141927084, "grad_norm": 10.308426856994629, "learning_rate": 2.382880000070681e-07, "loss": 3.3251, "step": 84545 }, { "epoch": 0.8600870768229166, "grad_norm": 17.604177474975586, "learning_rate": 2.3811764708328357e-07, "loss": 3.7324, "step": 84550 }, { "epoch": 0.860137939453125, "grad_norm": 11.988261222839355, "learning_rate": 2.3794735202922387e-07, "loss": 3.3324, "step": 84555 }, { "epoch": 0.8601888020833334, "grad_norm": 13.497099876403809, "learning_rate": 2.37777114849245e-07, "loss": 3.1458, "step": 84560 }, { "epoch": 0.8602396647135416, "grad_norm": 10.483180046081543, "learning_rate": 2.376069355477026e-07, "loss": 3.0151, "step": 84565 }, { "epoch": 0.86029052734375, "grad_norm": 14.707589149475098, "learning_rate": 2.3743681412894986e-07, "loss": 3.2507, "step": 84570 }, { "epoch": 0.8603413899739584, "grad_norm": 12.384051322937012, "learning_rate": 2.3726675059734e-07, "loss": 3.5616, "step": 84575 }, { "epoch": 0.8603922526041666, "grad_norm": 13.253273963928223, "learning_rate": 2.3709674495722422e-07, "loss": 3.5315, "step": 84580 }, { "epoch": 0.860443115234375, "grad_norm": 11.346092224121094, "learning_rate": 2.3692679721295215e-07, "loss": 3.4492, "step": 84585 }, { "epoch": 0.8604939778645834, "grad_norm": 11.59700870513916, "learning_rate": 2.3675690736887108e-07, "loss": 3.4211, "step": 84590 }, { "epoch": 0.8605448404947916, "grad_norm": 12.86052131652832, "learning_rate": 2.365870754293287e-07, "loss": 3.1789, "step": 84595 }, { "epoch": 0.860595703125, "grad_norm": 8.454562187194824, "learning_rate": 2.364173013986687e-07, "loss": 3.6257, "step": 84600 }, { "epoch": 0.8606465657552084, "grad_norm": 14.786294937133789, "learning_rate": 2.362475852812357e-07, "loss": 3.5912, "step": 84605 }, { "epoch": 0.8606974283854166, "grad_norm": 14.837119102478027, "learning_rate": 2.3607792708137205e-07, "loss": 2.9005, "step": 84610 }, { "epoch": 0.860748291015625, "grad_norm": 8.102725982666016, "learning_rate": 2.3590832680341818e-07, "loss": 3.4289, "step": 84615 }, { "epoch": 0.8607991536458334, "grad_norm": 16.29010772705078, "learning_rate": 2.3573878445171312e-07, "loss": 3.1857, "step": 84620 }, { "epoch": 0.8608500162760416, "grad_norm": 8.154257774353027, "learning_rate": 2.3556930003059392e-07, "loss": 3.3696, "step": 84625 }, { "epoch": 0.86090087890625, "grad_norm": 11.515044212341309, "learning_rate": 2.353998735443974e-07, "loss": 3.8192, "step": 84630 }, { "epoch": 0.8609517415364584, "grad_norm": 12.323447227478027, "learning_rate": 2.3523050499745904e-07, "loss": 3.7585, "step": 84635 }, { "epoch": 0.8610026041666666, "grad_norm": 14.341964721679688, "learning_rate": 2.350611943941114e-07, "loss": 4.0131, "step": 84640 }, { "epoch": 0.861053466796875, "grad_norm": 11.782180786132812, "learning_rate": 2.348919417386855e-07, "loss": 3.4359, "step": 84645 }, { "epoch": 0.8611043294270834, "grad_norm": 12.30263900756836, "learning_rate": 2.3472274703551317e-07, "loss": 3.4441, "step": 84650 }, { "epoch": 0.8611551920572916, "grad_norm": 11.4652099609375, "learning_rate": 2.3455361028892176e-07, "loss": 3.3107, "step": 84655 }, { "epoch": 0.8612060546875, "grad_norm": 8.915072441101074, "learning_rate": 2.3438453150323975e-07, "loss": 3.1885, "step": 84660 }, { "epoch": 0.8612569173177084, "grad_norm": 12.996953964233398, "learning_rate": 2.342155106827923e-07, "loss": 3.14, "step": 84665 }, { "epoch": 0.8613077799479166, "grad_norm": 12.54077434539795, "learning_rate": 2.3404654783190401e-07, "loss": 3.5618, "step": 84670 }, { "epoch": 0.861358642578125, "grad_norm": 10.259812355041504, "learning_rate": 2.3387764295489806e-07, "loss": 3.8982, "step": 84675 }, { "epoch": 0.8614095052083334, "grad_norm": 11.128485679626465, "learning_rate": 2.3370879605609464e-07, "loss": 3.0753, "step": 84680 }, { "epoch": 0.8614603678385416, "grad_norm": 12.832159996032715, "learning_rate": 2.3354000713981474e-07, "loss": 2.9776, "step": 84685 }, { "epoch": 0.86151123046875, "grad_norm": 13.322426795959473, "learning_rate": 2.333712762103771e-07, "loss": 3.1568, "step": 84690 }, { "epoch": 0.8615620930989584, "grad_norm": 12.448477745056152, "learning_rate": 2.33202603272098e-07, "loss": 3.298, "step": 84695 }, { "epoch": 0.8616129557291666, "grad_norm": 9.18695068359375, "learning_rate": 2.3303398832929259e-07, "loss": 3.3789, "step": 84700 }, { "epoch": 0.861663818359375, "grad_norm": 9.873669624328613, "learning_rate": 2.328654313862752e-07, "loss": 3.3122, "step": 84705 }, { "epoch": 0.8617146809895834, "grad_norm": 13.608736991882324, "learning_rate": 2.326969324473588e-07, "loss": 3.4122, "step": 84710 } ], "logging_steps": 5, "max_steps": 98304, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 394, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 3.288882461717299e+17, "train_batch_size": 1, "trial_name": null, "trial_params": null }