diff --git "a/checkpoint-200245/trainer_state.json" "b/checkpoint-200245/trainer_state.json" new file mode 100644--- /dev/null +++ "b/checkpoint-200245/trainer_state.json" @@ -0,0 +1,140201 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.0, + "eval_steps": 500, + "global_step": 200245, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 4.993882493944918e-05, + "grad_norm": 0.09688733518123627, + "learning_rate": 6.000000000000001e-07, + "loss": 10.7967, + "step": 10 + }, + { + "epoch": 9.987764987889836e-05, + "grad_norm": 0.09242203086614609, + "learning_rate": 1.2000000000000002e-06, + "loss": 10.7959, + "step": 20 + }, + { + "epoch": 0.00014981647481834753, + "grad_norm": 0.09640923142433167, + "learning_rate": 1.8e-06, + "loss": 10.7964, + "step": 30 + }, + { + "epoch": 0.0001997552997577967, + "grad_norm": 0.09338584542274475, + "learning_rate": 2.4000000000000003e-06, + "loss": 10.7963, + "step": 40 + }, + { + "epoch": 0.00024969412469724587, + "grad_norm": 0.09739788621664047, + "learning_rate": 3e-06, + "loss": 10.796, + "step": 50 + }, + { + "epoch": 0.00029963294963669505, + "grad_norm": 0.09315921366214752, + "learning_rate": 3.6e-06, + "loss": 10.7954, + "step": 60 + }, + { + "epoch": 0.00034957177457614424, + "grad_norm": 0.09130595624446869, + "learning_rate": 4.2000000000000004e-06, + "loss": 10.7956, + "step": 70 + }, + { + "epoch": 0.0003995105995155934, + "grad_norm": 0.09694769978523254, + "learning_rate": 4.800000000000001e-06, + "loss": 10.7961, + "step": 80 + }, + { + "epoch": 0.00044944942445504255, + "grad_norm": 0.09814093261957169, + "learning_rate": 5.4e-06, + "loss": 10.7956, + "step": 90 + }, + { + "epoch": 0.0004993882493944917, + "grad_norm": 0.0940699502825737, + "learning_rate": 6e-06, + "loss": 10.7957, + "step": 100 + }, + { + "epoch": 0.000549327074333941, + "grad_norm": 0.09281250834465027, + "learning_rate": 6.6e-06, + "loss": 10.7957, + "step": 110 + }, + { + "epoch": 0.0005992658992733901, + "grad_norm": 0.09708160907030106, + "learning_rate": 7.2e-06, + "loss": 10.7961, + "step": 120 + }, + { + "epoch": 0.0006492047242128392, + "grad_norm": 0.09620024263858795, + "learning_rate": 7.8e-06, + "loss": 10.7957, + "step": 130 + }, + { + "epoch": 0.0006991435491522885, + "grad_norm": 0.09342040121555328, + "learning_rate": 8.400000000000001e-06, + "loss": 10.7958, + "step": 140 + }, + { + "epoch": 0.0007490823740917376, + "grad_norm": 0.09522400051355362, + "learning_rate": 9e-06, + "loss": 10.7951, + "step": 150 + }, + { + "epoch": 0.0007990211990311868, + "grad_norm": 0.09366803616285324, + "learning_rate": 9.600000000000001e-06, + "loss": 10.7963, + "step": 160 + }, + { + "epoch": 0.000848960023970636, + "grad_norm": 0.10423287004232407, + "learning_rate": 1.02e-05, + "loss": 10.796, + "step": 170 + }, + { + "epoch": 0.0008988988489100851, + "grad_norm": 0.09598524123430252, + "learning_rate": 1.08e-05, + "loss": 10.7951, + "step": 180 + }, + { + "epoch": 0.0009488376738495343, + "grad_norm": 0.09360150247812271, + "learning_rate": 1.1400000000000001e-05, + "loss": 10.795, + "step": 190 + }, + { + "epoch": 0.0009987764987889835, + "grad_norm": 0.09207968413829803, + "learning_rate": 1.2e-05, + "loss": 10.7947, + "step": 200 + }, + { + "epoch": 0.0010487153237284327, + "grad_norm": 0.09178674221038818, + "learning_rate": 1.26e-05, + "loss": 10.7944, + "step": 210 + }, + { + "epoch": 0.001098654148667882, + "grad_norm": 0.09687130898237228, + "learning_rate": 1.32e-05, + "loss": 10.7945, + "step": 220 + }, + { + "epoch": 0.001148592973607331, + "grad_norm": 0.08996697515249252, + "learning_rate": 1.3800000000000002e-05, + "loss": 10.7939, + "step": 230 + }, + { + "epoch": 0.0011985317985467802, + "grad_norm": 0.09358258545398712, + "learning_rate": 1.44e-05, + "loss": 10.7943, + "step": 240 + }, + { + "epoch": 0.0012484706234862294, + "grad_norm": 0.09620951116085052, + "learning_rate": 1.5e-05, + "loss": 12.5216, + "step": 250 + }, + { + "epoch": 0.0012984094484256785, + "grad_norm": 12312.0439453125, + "learning_rate": 1.56e-05, + "loss": 15.3101, + "step": 260 + }, + { + "epoch": 0.0013483482733651277, + "grad_norm": 4373971.5, + "learning_rate": 1.62e-05, + "loss": 11.4047, + "step": 270 + }, + { + "epoch": 0.001398287098304577, + "grad_norm": 0.09267476201057434, + "learning_rate": 1.6800000000000002e-05, + "loss": 13.7008, + "step": 280 + }, + { + "epoch": 0.001448225923244026, + "grad_norm": 0.09616652131080627, + "learning_rate": 1.74e-05, + "loss": 10.7943, + "step": 290 + }, + { + "epoch": 0.0014981647481834752, + "grad_norm": 0.09366879612207413, + "learning_rate": 1.8e-05, + "loss": 10.7936, + "step": 300 + }, + { + "epoch": 0.0015481035731229244, + "grad_norm": 0.10064131766557693, + "learning_rate": 1.86e-05, + "loss": 10.7939, + "step": 310 + }, + { + "epoch": 0.0015980423980623737, + "grad_norm": 0.09544051438570023, + "learning_rate": 1.9200000000000003e-05, + "loss": 10.7934, + "step": 320 + }, + { + "epoch": 0.0016479812230018227, + "grad_norm": 0.09638150781393051, + "learning_rate": 1.98e-05, + "loss": 10.7934, + "step": 330 + }, + { + "epoch": 0.001697920047941272, + "grad_norm": 0.09321698546409607, + "learning_rate": 2.04e-05, + "loss": 10.793, + "step": 340 + }, + { + "epoch": 0.0017478588728807212, + "grad_norm": 0.09490924328565598, + "learning_rate": 2.1e-05, + "loss": 10.7924, + "step": 350 + }, + { + "epoch": 0.0017977976978201702, + "grad_norm": 0.09169238060712814, + "learning_rate": 2.16e-05, + "loss": 10.7926, + "step": 360 + }, + { + "epoch": 0.0018477365227596194, + "grad_norm": 0.09536827355623245, + "learning_rate": 2.22e-05, + "loss": 10.7916, + "step": 370 + }, + { + "epoch": 0.0018976753476990687, + "grad_norm": 0.0935748964548111, + "learning_rate": 2.2800000000000002e-05, + "loss": 10.7922, + "step": 380 + }, + { + "epoch": 0.001947614172638518, + "grad_norm": 0.09191051870584488, + "learning_rate": 2.3400000000000003e-05, + "loss": 10.7917, + "step": 390 + }, + { + "epoch": 0.001997552997577967, + "grad_norm": 0.09062574058771133, + "learning_rate": 2.4e-05, + "loss": 10.7913, + "step": 400 + }, + { + "epoch": 0.002047491822517416, + "grad_norm": 0.09686335176229477, + "learning_rate": 2.4599999999999998e-05, + "loss": 10.7911, + "step": 410 + }, + { + "epoch": 0.0020974306474568654, + "grad_norm": 0.09458820521831512, + "learning_rate": 2.52e-05, + "loss": 10.7904, + "step": 420 + }, + { + "epoch": 0.0021473694723963144, + "grad_norm": 0.09392788261175156, + "learning_rate": 2.58e-05, + "loss": 10.79, + "step": 430 + }, + { + "epoch": 0.002197308297335764, + "grad_norm": 0.09329679608345032, + "learning_rate": 2.64e-05, + "loss": 10.7903, + "step": 440 + }, + { + "epoch": 0.002247247122275213, + "grad_norm": 0.09585896879434586, + "learning_rate": 2.7000000000000002e-05, + "loss": 10.7897, + "step": 450 + }, + { + "epoch": 0.002297185947214662, + "grad_norm": 0.0919712483882904, + "learning_rate": 2.7600000000000003e-05, + "loss": 10.7897, + "step": 460 + }, + { + "epoch": 0.0023471247721541114, + "grad_norm": 0.09430967271327972, + "learning_rate": 2.8199999999999998e-05, + "loss": 10.7888, + "step": 470 + }, + { + "epoch": 0.0023970635970935604, + "grad_norm": 0.09424952417612076, + "learning_rate": 2.88e-05, + "loss": 10.7897, + "step": 480 + }, + { + "epoch": 0.0024470024220330094, + "grad_norm": 0.09542497992515564, + "learning_rate": 2.94e-05, + "loss": 10.7901, + "step": 490 + }, + { + "epoch": 0.002496941246972459, + "grad_norm": 0.09828152507543564, + "learning_rate": 3e-05, + "loss": 11.0404, + "step": 500 + }, + { + "epoch": 0.002546880071911908, + "grad_norm": 0.09262736141681671, + "learning_rate": 2.9998498085058448e-05, + "loss": 10.7884, + "step": 510 + }, + { + "epoch": 0.002596818896851357, + "grad_norm": 0.0968865230679512, + "learning_rate": 2.99969961701169e-05, + "loss": 10.7878, + "step": 520 + }, + { + "epoch": 0.0026467577217908064, + "grad_norm": 0.0930342897772789, + "learning_rate": 2.9995494255175348e-05, + "loss": 10.7871, + "step": 530 + }, + { + "epoch": 0.0026966965467302554, + "grad_norm": 0.0979408249258995, + "learning_rate": 2.99939923402338e-05, + "loss": 10.7879, + "step": 540 + }, + { + "epoch": 0.0027466353716697044, + "grad_norm": 0.09784050285816193, + "learning_rate": 2.9992490425292248e-05, + "loss": 10.7864, + "step": 550 + }, + { + "epoch": 0.002796574196609154, + "grad_norm": 0.0974036231637001, + "learning_rate": 2.9990988510350695e-05, + "loss": 10.786, + "step": 560 + }, + { + "epoch": 0.002846513021548603, + "grad_norm": 0.09453345090150833, + "learning_rate": 2.998948659540915e-05, + "loss": 10.786, + "step": 570 + }, + { + "epoch": 0.002896451846488052, + "grad_norm": 0.09815464913845062, + "learning_rate": 2.9987984680467595e-05, + "loss": 10.7855, + "step": 580 + }, + { + "epoch": 0.0029463906714275014, + "grad_norm": 0.0883261188864708, + "learning_rate": 2.998648276552605e-05, + "loss": 10.7854, + "step": 590 + }, + { + "epoch": 0.0029963294963669504, + "grad_norm": 0.09347826987504959, + "learning_rate": 2.9984980850584496e-05, + "loss": 10.785, + "step": 600 + }, + { + "epoch": 0.0030462683213064, + "grad_norm": 0.09312307834625244, + "learning_rate": 2.9983478935642943e-05, + "loss": 10.7847, + "step": 610 + }, + { + "epoch": 0.003096207146245849, + "grad_norm": 0.09311032295227051, + "learning_rate": 2.9981977020701396e-05, + "loss": 10.785, + "step": 620 + }, + { + "epoch": 0.003146145971185298, + "grad_norm": 0.09509855508804321, + "learning_rate": 2.9980475105759843e-05, + "loss": 10.7844, + "step": 630 + }, + { + "epoch": 0.0031960847961247474, + "grad_norm": 0.09063342213630676, + "learning_rate": 2.9978973190818296e-05, + "loss": 10.7836, + "step": 640 + }, + { + "epoch": 0.0032460236210641964, + "grad_norm": 0.095396488904953, + "learning_rate": 2.9977471275876743e-05, + "loss": 10.7838, + "step": 650 + }, + { + "epoch": 0.0032959624460036454, + "grad_norm": 0.09543643146753311, + "learning_rate": 2.997596936093519e-05, + "loss": 10.7835, + "step": 660 + }, + { + "epoch": 0.003345901270943095, + "grad_norm": 11308.7890625, + "learning_rate": 2.9974467445993644e-05, + "loss": 11.5043, + "step": 670 + }, + { + "epoch": 0.003395840095882544, + "grad_norm": 0.09934733808040619, + "learning_rate": 2.997296553105209e-05, + "loss": 17.0433, + "step": 680 + }, + { + "epoch": 0.003445778920821993, + "grad_norm": 0.09415256977081299, + "learning_rate": 2.9971463616110544e-05, + "loss": 10.9281, + "step": 690 + }, + { + "epoch": 0.0034957177457614424, + "grad_norm": 0.09561091661453247, + "learning_rate": 2.996996170116899e-05, + "loss": 12.0749, + "step": 700 + }, + { + "epoch": 0.0035456565707008914, + "grad_norm": 308366.0625, + "learning_rate": 2.996845978622744e-05, + "loss": 65.9464, + "step": 710 + }, + { + "epoch": 0.0035955953956403404, + "grad_norm": 36107.77734375, + "learning_rate": 2.996695787128589e-05, + "loss": 87.2685, + "step": 720 + }, + { + "epoch": 0.00364553422057979, + "grad_norm": 0.09354925155639648, + "learning_rate": 2.9965455956344338e-05, + "loss": 10.9749, + "step": 730 + }, + { + "epoch": 0.003695473045519239, + "grad_norm": 0.09494537115097046, + "learning_rate": 2.996395404140279e-05, + "loss": 10.7808, + "step": 740 + }, + { + "epoch": 0.003745411870458688, + "grad_norm": 0.10302335023880005, + "learning_rate": 2.9962452126461238e-05, + "loss": 13.2444, + "step": 750 + }, + { + "epoch": 0.0037953506953981374, + "grad_norm": 0.09412359446287155, + "learning_rate": 2.996095021151969e-05, + "loss": 11.111, + "step": 760 + }, + { + "epoch": 0.0038452895203375864, + "grad_norm": 0.10018070042133331, + "learning_rate": 2.995944829657814e-05, + "loss": 10.7808, + "step": 770 + }, + { + "epoch": 0.003895228345277036, + "grad_norm": 0.09139321744441986, + "learning_rate": 2.9957946381636585e-05, + "loss": 10.7802, + "step": 780 + }, + { + "epoch": 0.003945167170216484, + "grad_norm": 0.09905160218477249, + "learning_rate": 2.995644446669504e-05, + "loss": 10.7798, + "step": 790 + }, + { + "epoch": 0.003995105995155934, + "grad_norm": 0.09502265602350235, + "learning_rate": 2.9954942551753486e-05, + "loss": 10.7806, + "step": 800 + }, + { + "epoch": 0.004045044820095383, + "grad_norm": 0.09315773844718933, + "learning_rate": 2.9953440636811936e-05, + "loss": 10.7789, + "step": 810 + }, + { + "epoch": 0.004094983645034832, + "grad_norm": 0.09650130569934845, + "learning_rate": 2.9951938721870386e-05, + "loss": 10.7788, + "step": 820 + }, + { + "epoch": 0.004144922469974281, + "grad_norm": 0.09284522384405136, + "learning_rate": 2.9950436806928833e-05, + "loss": 10.7786, + "step": 830 + }, + { + "epoch": 0.004194861294913731, + "grad_norm": 0.09788000583648682, + "learning_rate": 2.9948934891987286e-05, + "loss": 10.7783, + "step": 840 + }, + { + "epoch": 0.004244800119853179, + "grad_norm": 0.09668333828449249, + "learning_rate": 2.9947432977045733e-05, + "loss": 10.7779, + "step": 850 + }, + { + "epoch": 0.004294738944792629, + "grad_norm": 0.09662320464849472, + "learning_rate": 2.9945931062104183e-05, + "loss": 10.7769, + "step": 860 + }, + { + "epoch": 0.004344677769732078, + "grad_norm": 0.09260370582342148, + "learning_rate": 2.9944429147162634e-05, + "loss": 10.7772, + "step": 870 + }, + { + "epoch": 0.004394616594671528, + "grad_norm": 0.09133943170309067, + "learning_rate": 2.994292723222108e-05, + "loss": 10.7774, + "step": 880 + }, + { + "epoch": 0.004444555419610976, + "grad_norm": 0.09417100250720978, + "learning_rate": 2.9941425317279534e-05, + "loss": 10.7768, + "step": 890 + }, + { + "epoch": 0.004494494244550426, + "grad_norm": 0.09618920087814331, + "learning_rate": 2.993992340233798e-05, + "loss": 10.776, + "step": 900 + }, + { + "epoch": 0.004544433069489875, + "grad_norm": 0.09578631818294525, + "learning_rate": 2.993842148739643e-05, + "loss": 10.7759, + "step": 910 + }, + { + "epoch": 0.004594371894429324, + "grad_norm": 0.09697435051202774, + "learning_rate": 2.993691957245488e-05, + "loss": 10.7744, + "step": 920 + }, + { + "epoch": 0.004644310719368773, + "grad_norm": 0.09273876994848251, + "learning_rate": 2.9935417657513328e-05, + "loss": 10.7755, + "step": 930 + }, + { + "epoch": 0.004694249544308223, + "grad_norm": 0.0970180332660675, + "learning_rate": 2.993391574257178e-05, + "loss": 10.7751, + "step": 940 + }, + { + "epoch": 0.004744188369247671, + "grad_norm": 0.0976632609963417, + "learning_rate": 2.9932413827630228e-05, + "loss": 10.7745, + "step": 950 + }, + { + "epoch": 0.004794127194187121, + "grad_norm": 0.09234705567359924, + "learning_rate": 2.993091191268868e-05, + "loss": 10.7746, + "step": 960 + }, + { + "epoch": 0.00484406601912657, + "grad_norm": 0.09593399614095688, + "learning_rate": 2.992940999774713e-05, + "loss": 10.7742, + "step": 970 + }, + { + "epoch": 0.004894004844066019, + "grad_norm": 0.09624140709638596, + "learning_rate": 2.9927908082805575e-05, + "loss": 10.7732, + "step": 980 + }, + { + "epoch": 0.004943943669005468, + "grad_norm": 0.09578274935483932, + "learning_rate": 2.992640616786403e-05, + "loss": 10.7728, + "step": 990 + }, + { + "epoch": 0.004993882493944918, + "grad_norm": 0.10335574299097061, + "learning_rate": 2.9924904252922476e-05, + "loss": 10.7731, + "step": 1000 + }, + { + "epoch": 0.005043821318884366, + "grad_norm": 0.09355924278497696, + "learning_rate": 2.9923402337980926e-05, + "loss": 10.7723, + "step": 1010 + }, + { + "epoch": 0.005093760143823816, + "grad_norm": 0.09353188425302505, + "learning_rate": 2.9921900423039376e-05, + "loss": 10.773, + "step": 1020 + }, + { + "epoch": 0.005143698968763265, + "grad_norm": 0.09189878404140472, + "learning_rate": 2.9920398508097826e-05, + "loss": 10.7719, + "step": 1030 + }, + { + "epoch": 0.005193637793702714, + "grad_norm": 0.09627330303192139, + "learning_rate": 2.9918896593156277e-05, + "loss": 10.7721, + "step": 1040 + }, + { + "epoch": 0.005243576618642163, + "grad_norm": 0.09078870713710785, + "learning_rate": 2.9917394678214723e-05, + "loss": 10.7711, + "step": 1050 + }, + { + "epoch": 0.005293515443581613, + "grad_norm": 0.09166533499956131, + "learning_rate": 2.9915892763273173e-05, + "loss": 10.7711, + "step": 1060 + }, + { + "epoch": 0.005343454268521061, + "grad_norm": 0.0925443097949028, + "learning_rate": 2.9914390848331624e-05, + "loss": 10.771, + "step": 1070 + }, + { + "epoch": 0.005393393093460511, + "grad_norm": 0.09320785850286484, + "learning_rate": 2.9912888933390074e-05, + "loss": 10.7703, + "step": 1080 + }, + { + "epoch": 0.00544333191839996, + "grad_norm": 0.09314128011465073, + "learning_rate": 2.9911387018448524e-05, + "loss": 10.7707, + "step": 1090 + }, + { + "epoch": 0.005493270743339409, + "grad_norm": 0.09605088829994202, + "learning_rate": 2.990988510350697e-05, + "loss": 10.7705, + "step": 1100 + }, + { + "epoch": 0.005543209568278858, + "grad_norm": 0.08931813389062881, + "learning_rate": 2.990838318856542e-05, + "loss": 10.7689, + "step": 1110 + }, + { + "epoch": 0.005593148393218308, + "grad_norm": 0.09207310527563095, + "learning_rate": 2.990688127362387e-05, + "loss": 10.7691, + "step": 1120 + }, + { + "epoch": 0.005643087218157756, + "grad_norm": 0.09695383906364441, + "learning_rate": 2.990537935868232e-05, + "loss": 10.7687, + "step": 1130 + }, + { + "epoch": 0.005693026043097206, + "grad_norm": 0.09819135814905167, + "learning_rate": 2.990387744374077e-05, + "loss": 10.7679, + "step": 1140 + }, + { + "epoch": 0.005742964868036655, + "grad_norm": 0.09095334261655807, + "learning_rate": 2.990237552879922e-05, + "loss": 10.7685, + "step": 1150 + }, + { + "epoch": 0.005792903692976104, + "grad_norm": 0.08971603959798813, + "learning_rate": 2.990087361385767e-05, + "loss": 10.7678, + "step": 1160 + }, + { + "epoch": 0.005842842517915553, + "grad_norm": 0.0930616483092308, + "learning_rate": 2.989937169891612e-05, + "loss": 10.7679, + "step": 1170 + }, + { + "epoch": 0.005892781342855003, + "grad_norm": 0.09826360642910004, + "learning_rate": 2.989786978397457e-05, + "loss": 10.767, + "step": 1180 + }, + { + "epoch": 0.005942720167794451, + "grad_norm": 0.09297499060630798, + "learning_rate": 2.989636786903302e-05, + "loss": 10.7671, + "step": 1190 + }, + { + "epoch": 0.005992658992733901, + "grad_norm": 0.09357722103595734, + "learning_rate": 2.9894865954091466e-05, + "loss": 10.7661, + "step": 1200 + }, + { + "epoch": 0.00604259781767335, + "grad_norm": 0.09486209601163864, + "learning_rate": 2.9893364039149916e-05, + "loss": 10.7658, + "step": 1210 + }, + { + "epoch": 0.0060925366426128, + "grad_norm": 0.09341169148683548, + "learning_rate": 2.9891862124208366e-05, + "loss": 10.7663, + "step": 1220 + }, + { + "epoch": 0.006142475467552248, + "grad_norm": 0.09570655226707458, + "learning_rate": 2.9890360209266816e-05, + "loss": 10.7658, + "step": 1230 + }, + { + "epoch": 0.006192414292491698, + "grad_norm": 0.09547128528356552, + "learning_rate": 2.9888858294325267e-05, + "loss": 10.764, + "step": 1240 + }, + { + "epoch": 0.006242353117431147, + "grad_norm": 0.08968691527843475, + "learning_rate": 2.9887356379383713e-05, + "loss": 10.7649, + "step": 1250 + }, + { + "epoch": 0.006292291942370596, + "grad_norm": 0.09444384276866913, + "learning_rate": 2.9885854464442163e-05, + "loss": 10.7647, + "step": 1260 + }, + { + "epoch": 0.006342230767310045, + "grad_norm": 0.0966639518737793, + "learning_rate": 2.9884352549500614e-05, + "loss": 10.764, + "step": 1270 + }, + { + "epoch": 0.006392169592249495, + "grad_norm": 0.09153666347265244, + "learning_rate": 2.9882850634559064e-05, + "loss": 10.7646, + "step": 1280 + }, + { + "epoch": 0.006442108417188943, + "grad_norm": 0.09613633900880814, + "learning_rate": 2.9881348719617514e-05, + "loss": 10.763, + "step": 1290 + }, + { + "epoch": 0.006492047242128393, + "grad_norm": 0.09370370209217072, + "learning_rate": 2.987984680467596e-05, + "loss": 10.7634, + "step": 1300 + }, + { + "epoch": 0.006541986067067842, + "grad_norm": 0.09263869374990463, + "learning_rate": 2.987834488973441e-05, + "loss": 10.7635, + "step": 1310 + }, + { + "epoch": 0.006591924892007291, + "grad_norm": 0.09146302193403244, + "learning_rate": 2.987684297479286e-05, + "loss": 10.7622, + "step": 1320 + }, + { + "epoch": 0.00664186371694674, + "grad_norm": 0.09400025755167007, + "learning_rate": 2.987534105985131e-05, + "loss": 10.7627, + "step": 1330 + }, + { + "epoch": 0.00669180254188619, + "grad_norm": 0.09636477380990982, + "learning_rate": 2.987383914490976e-05, + "loss": 10.762, + "step": 1340 + }, + { + "epoch": 0.006741741366825638, + "grad_norm": 0.08832681179046631, + "learning_rate": 2.9872337229968212e-05, + "loss": 10.7615, + "step": 1350 + }, + { + "epoch": 0.006791680191765088, + "grad_norm": 0.10007186979055405, + "learning_rate": 2.987083531502666e-05, + "loss": 10.7612, + "step": 1360 + }, + { + "epoch": 0.006841619016704537, + "grad_norm": 0.09131281077861786, + "learning_rate": 2.986933340008511e-05, + "loss": 10.761, + "step": 1370 + }, + { + "epoch": 0.006891557841643986, + "grad_norm": 0.0982031375169754, + "learning_rate": 2.986783148514356e-05, + "loss": 10.7597, + "step": 1380 + }, + { + "epoch": 0.006941496666583435, + "grad_norm": 0.10071565955877304, + "learning_rate": 2.986632957020201e-05, + "loss": 10.7601, + "step": 1390 + }, + { + "epoch": 0.006991435491522885, + "grad_norm": 0.09393281489610672, + "learning_rate": 2.986482765526046e-05, + "loss": 10.7598, + "step": 1400 + }, + { + "epoch": 0.007041374316462333, + "grad_norm": 0.09201990813016891, + "learning_rate": 2.9863325740318906e-05, + "loss": 10.7588, + "step": 1410 + }, + { + "epoch": 0.007091313141401783, + "grad_norm": 0.09910114109516144, + "learning_rate": 2.9861823825377356e-05, + "loss": 10.7597, + "step": 1420 + }, + { + "epoch": 0.007141251966341232, + "grad_norm": 0.09161806851625443, + "learning_rate": 2.9860321910435806e-05, + "loss": 10.7587, + "step": 1430 + }, + { + "epoch": 0.007191190791280681, + "grad_norm": 0.09380662441253662, + "learning_rate": 2.9858819995494257e-05, + "loss": 10.7589, + "step": 1440 + }, + { + "epoch": 0.00724112961622013, + "grad_norm": 0.09290491044521332, + "learning_rate": 2.9857318080552707e-05, + "loss": 10.7591, + "step": 1450 + }, + { + "epoch": 0.00729106844115958, + "grad_norm": 0.09262260794639587, + "learning_rate": 2.9855816165611154e-05, + "loss": 10.7581, + "step": 1460 + }, + { + "epoch": 0.007341007266099028, + "grad_norm": 698.4036865234375, + "learning_rate": 2.9854314250669604e-05, + "loss": 10.758, + "step": 1470 + }, + { + "epoch": 0.007390946091038478, + "grad_norm": 0.09542746841907501, + "learning_rate": 2.9852812335728054e-05, + "loss": 12.0635, + "step": 1480 + }, + { + "epoch": 0.007440884915977927, + "grad_norm": 0.09986045211553574, + "learning_rate": 2.9851310420786504e-05, + "loss": 10.7564, + "step": 1490 + }, + { + "epoch": 0.007490823740917376, + "grad_norm": 0.09409373998641968, + "learning_rate": 2.9849808505844954e-05, + "loss": 10.7568, + "step": 1500 + }, + { + "epoch": 0.007540762565856825, + "grad_norm": 0.092925526201725, + "learning_rate": 2.98483065909034e-05, + "loss": 10.7565, + "step": 1510 + }, + { + "epoch": 0.007590701390796275, + "grad_norm": 0.0927615836262703, + "learning_rate": 2.984680467596185e-05, + "loss": 10.7556, + "step": 1520 + }, + { + "epoch": 0.007640640215735723, + "grad_norm": 0.10118461400270462, + "learning_rate": 2.98453027610203e-05, + "loss": 10.7564, + "step": 1530 + }, + { + "epoch": 0.007690579040675173, + "grad_norm": 0.09674585610628128, + "learning_rate": 2.984380084607875e-05, + "loss": 10.7554, + "step": 1540 + }, + { + "epoch": 0.007740517865614622, + "grad_norm": 0.09508625417947769, + "learning_rate": 2.9842298931137202e-05, + "loss": 10.7555, + "step": 1550 + }, + { + "epoch": 0.007790456690554072, + "grad_norm": 0.0912444218993187, + "learning_rate": 2.984079701619565e-05, + "loss": 10.7547, + "step": 1560 + }, + { + "epoch": 0.007840395515493521, + "grad_norm": 0.09958932548761368, + "learning_rate": 2.98392951012541e-05, + "loss": 10.7539, + "step": 1570 + }, + { + "epoch": 0.007890334340432969, + "grad_norm": 0.0945759117603302, + "learning_rate": 2.983779318631255e-05, + "loss": 10.7541, + "step": 1580 + }, + { + "epoch": 0.007940273165372418, + "grad_norm": 0.09051518142223358, + "learning_rate": 2.9836291271371e-05, + "loss": 10.7533, + "step": 1590 + }, + { + "epoch": 0.007990211990311868, + "grad_norm": 0.09735298156738281, + "learning_rate": 2.983478935642945e-05, + "loss": 10.753, + "step": 1600 + }, + { + "epoch": 0.008040150815251317, + "grad_norm": 0.09062410145998001, + "learning_rate": 2.9833287441487896e-05, + "loss": 10.7529, + "step": 1610 + }, + { + "epoch": 0.008090089640190767, + "grad_norm": 0.091838039457798, + "learning_rate": 2.9831785526546346e-05, + "loss": 10.7524, + "step": 1620 + }, + { + "epoch": 0.008140028465130216, + "grad_norm": 0.0958361029624939, + "learning_rate": 2.9830283611604796e-05, + "loss": 10.7529, + "step": 1630 + }, + { + "epoch": 0.008189967290069664, + "grad_norm": 0.09625451266765594, + "learning_rate": 2.9828781696663247e-05, + "loss": 10.7514, + "step": 1640 + }, + { + "epoch": 0.008239906115009113, + "grad_norm": 0.0954735055565834, + "learning_rate": 2.9827279781721697e-05, + "loss": 10.7526, + "step": 1650 + }, + { + "epoch": 0.008289844939948563, + "grad_norm": 0.09449174255132675, + "learning_rate": 2.9825777866780144e-05, + "loss": 10.7519, + "step": 1660 + }, + { + "epoch": 0.008339783764888012, + "grad_norm": 0.08916933834552765, + "learning_rate": 2.9824275951838597e-05, + "loss": 10.7517, + "step": 1670 + }, + { + "epoch": 0.008389722589827462, + "grad_norm": 0.0955243706703186, + "learning_rate": 2.9822774036897044e-05, + "loss": 10.7513, + "step": 1680 + }, + { + "epoch": 0.008439661414766911, + "grad_norm": 0.0943785309791565, + "learning_rate": 2.9821272121955494e-05, + "loss": 10.7504, + "step": 1690 + }, + { + "epoch": 0.008489600239706359, + "grad_norm": 0.09509387612342834, + "learning_rate": 2.9819770207013944e-05, + "loss": 10.7505, + "step": 1700 + }, + { + "epoch": 0.008539539064645808, + "grad_norm": 0.09808336943387985, + "learning_rate": 2.981826829207239e-05, + "loss": 10.7496, + "step": 1710 + }, + { + "epoch": 0.008589477889585258, + "grad_norm": 0.0928177759051323, + "learning_rate": 2.9816766377130845e-05, + "loss": 10.7501, + "step": 1720 + }, + { + "epoch": 0.008639416714524707, + "grad_norm": 0.09282363951206207, + "learning_rate": 2.981526446218929e-05, + "loss": 10.7484, + "step": 1730 + }, + { + "epoch": 0.008689355539464157, + "grad_norm": 0.09041490405797958, + "learning_rate": 2.981376254724774e-05, + "loss": 10.7492, + "step": 1740 + }, + { + "epoch": 0.008739294364403606, + "grad_norm": 0.09924125671386719, + "learning_rate": 2.9812260632306192e-05, + "loss": 10.7486, + "step": 1750 + }, + { + "epoch": 0.008789233189343056, + "grad_norm": 0.09147768467664719, + "learning_rate": 2.981075871736464e-05, + "loss": 10.7488, + "step": 1760 + }, + { + "epoch": 0.008839172014282503, + "grad_norm": 0.09582570195198059, + "learning_rate": 2.9809256802423092e-05, + "loss": 10.7485, + "step": 1770 + }, + { + "epoch": 0.008889110839221953, + "grad_norm": 0.09464222192764282, + "learning_rate": 2.980775488748154e-05, + "loss": 10.7473, + "step": 1780 + }, + { + "epoch": 0.008939049664161402, + "grad_norm": 0.09430133551359177, + "learning_rate": 2.980625297253999e-05, + "loss": 10.7484, + "step": 1790 + }, + { + "epoch": 0.008988988489100852, + "grad_norm": 0.09573784470558167, + "learning_rate": 2.980475105759844e-05, + "loss": 10.7467, + "step": 1800 + }, + { + "epoch": 0.009038927314040301, + "grad_norm": 0.09440425038337708, + "learning_rate": 2.9803249142656886e-05, + "loss": 10.7473, + "step": 1810 + }, + { + "epoch": 0.00908886613897975, + "grad_norm": 0.098334401845932, + "learning_rate": 2.980174722771534e-05, + "loss": 10.7465, + "step": 1820 + }, + { + "epoch": 0.009138804963919198, + "grad_norm": 0.095046766102314, + "learning_rate": 2.9800245312773786e-05, + "loss": 10.7463, + "step": 1830 + }, + { + "epoch": 0.009188743788858648, + "grad_norm": 0.09811218827962875, + "learning_rate": 2.9798743397832237e-05, + "loss": 10.7457, + "step": 1840 + }, + { + "epoch": 0.009238682613798097, + "grad_norm": 0.09796342253684998, + "learning_rate": 2.9797241482890687e-05, + "loss": 10.7463, + "step": 1850 + }, + { + "epoch": 0.009288621438737547, + "grad_norm": 0.09163960069417953, + "learning_rate": 2.9795739567949134e-05, + "loss": 10.7455, + "step": 1860 + }, + { + "epoch": 0.009338560263676996, + "grad_norm": 0.09211383759975433, + "learning_rate": 2.9794237653007587e-05, + "loss": 10.7452, + "step": 1870 + }, + { + "epoch": 0.009388499088616446, + "grad_norm": 0.1004691869020462, + "learning_rate": 2.9792735738066034e-05, + "loss": 10.7444, + "step": 1880 + }, + { + "epoch": 0.009438437913555893, + "grad_norm": 0.09318966418504715, + "learning_rate": 2.9791233823124484e-05, + "loss": 10.744, + "step": 1890 + }, + { + "epoch": 0.009488376738495343, + "grad_norm": 0.09770355373620987, + "learning_rate": 2.9789731908182934e-05, + "loss": 10.7435, + "step": 1900 + }, + { + "epoch": 0.009538315563434792, + "grad_norm": 0.09747663885354996, + "learning_rate": 2.978822999324138e-05, + "loss": 10.7435, + "step": 1910 + }, + { + "epoch": 0.009588254388374242, + "grad_norm": 0.0939902812242508, + "learning_rate": 2.9786728078299835e-05, + "loss": 10.7433, + "step": 1920 + }, + { + "epoch": 0.009638193213313691, + "grad_norm": 0.09048355370759964, + "learning_rate": 2.978522616335828e-05, + "loss": 10.7432, + "step": 1930 + }, + { + "epoch": 0.00968813203825314, + "grad_norm": 0.09295009821653366, + "learning_rate": 2.978372424841673e-05, + "loss": 10.743, + "step": 1940 + }, + { + "epoch": 0.009738070863192588, + "grad_norm": 0.09584958851337433, + "learning_rate": 2.9782222333475182e-05, + "loss": 10.7431, + "step": 1950 + }, + { + "epoch": 0.009788009688132038, + "grad_norm": 0.09165474027395248, + "learning_rate": 2.978072041853363e-05, + "loss": 10.7419, + "step": 1960 + }, + { + "epoch": 0.009837948513071487, + "grad_norm": 0.0949409231543541, + "learning_rate": 2.9779218503592082e-05, + "loss": 10.7418, + "step": 1970 + }, + { + "epoch": 0.009887887338010937, + "grad_norm": 0.09680484980344772, + "learning_rate": 2.977771658865053e-05, + "loss": 10.7418, + "step": 1980 + }, + { + "epoch": 0.009937826162950386, + "grad_norm": 0.10440608114004135, + "learning_rate": 2.9776214673708982e-05, + "loss": 10.7418, + "step": 1990 + }, + { + "epoch": 0.009987764987889836, + "grad_norm": 0.09684424102306366, + "learning_rate": 2.977471275876743e-05, + "loss": 10.741, + "step": 2000 + }, + { + "epoch": 0.010037703812829283, + "grad_norm": 0.09417276084423065, + "learning_rate": 2.9773210843825876e-05, + "loss": 10.7413, + "step": 2010 + }, + { + "epoch": 0.010087642637768733, + "grad_norm": 0.09294068813323975, + "learning_rate": 2.977170892888433e-05, + "loss": 10.74, + "step": 2020 + }, + { + "epoch": 0.010137581462708182, + "grad_norm": 0.09408864378929138, + "learning_rate": 2.9770207013942776e-05, + "loss": 10.7388, + "step": 2030 + }, + { + "epoch": 0.010187520287647632, + "grad_norm": 0.09455525130033493, + "learning_rate": 2.976870509900123e-05, + "loss": 10.7401, + "step": 2040 + }, + { + "epoch": 0.010237459112587081, + "grad_norm": 0.09249377250671387, + "learning_rate": 2.9767203184059677e-05, + "loss": 10.7395, + "step": 2050 + }, + { + "epoch": 0.01028739793752653, + "grad_norm": 0.09711704403162003, + "learning_rate": 2.9765701269118124e-05, + "loss": 10.7387, + "step": 2060 + }, + { + "epoch": 0.01033733676246598, + "grad_norm": 0.09462075680494308, + "learning_rate": 2.9764199354176577e-05, + "loss": 10.7389, + "step": 2070 + }, + { + "epoch": 0.010387275587405428, + "grad_norm": 0.09138345718383789, + "learning_rate": 2.9762697439235024e-05, + "loss": 10.7381, + "step": 2080 + }, + { + "epoch": 0.010437214412344877, + "grad_norm": 0.0979338064789772, + "learning_rate": 2.9761195524293477e-05, + "loss": 10.7373, + "step": 2090 + }, + { + "epoch": 0.010487153237284327, + "grad_norm": 428.1760559082031, + "learning_rate": 2.9759693609351924e-05, + "loss": 10.7368, + "step": 2100 + }, + { + "epoch": 0.010537092062223776, + "grad_norm": 0.09210320562124252, + "learning_rate": 2.975819169441037e-05, + "loss": 11.136, + "step": 2110 + }, + { + "epoch": 0.010587030887163226, + "grad_norm": 0.09596727043390274, + "learning_rate": 2.9756689779468825e-05, + "loss": 12.8231, + "step": 2120 + }, + { + "epoch": 0.010636969712102675, + "grad_norm": 0.10231532901525497, + "learning_rate": 2.975518786452727e-05, + "loss": 10.8774, + "step": 2130 + }, + { + "epoch": 0.010686908537042123, + "grad_norm": 0.09213022887706757, + "learning_rate": 2.9753685949585725e-05, + "loss": 10.7361, + "step": 2140 + }, + { + "epoch": 0.010736847361981572, + "grad_norm": 0.0937543660402298, + "learning_rate": 2.9752184034644172e-05, + "loss": 10.7364, + "step": 2150 + }, + { + "epoch": 0.010786786186921022, + "grad_norm": 0.09011025726795197, + "learning_rate": 2.975068211970262e-05, + "loss": 10.7361, + "step": 2160 + }, + { + "epoch": 0.010836725011860471, + "grad_norm": 0.09514773637056351, + "learning_rate": 2.9749180204761072e-05, + "loss": 10.7348, + "step": 2170 + }, + { + "epoch": 0.01088666383679992, + "grad_norm": 0.0912405401468277, + "learning_rate": 2.974767828981952e-05, + "loss": 10.7349, + "step": 2180 + }, + { + "epoch": 0.01093660266173937, + "grad_norm": 0.10206656157970428, + "learning_rate": 2.9746176374877972e-05, + "loss": 10.7357, + "step": 2190 + }, + { + "epoch": 0.010986541486678818, + "grad_norm": 0.10607360303401947, + "learning_rate": 2.974467445993642e-05, + "loss": 10.7347, + "step": 2200 + }, + { + "epoch": 0.011036480311618267, + "grad_norm": 0.09714030474424362, + "learning_rate": 2.9743172544994866e-05, + "loss": 10.7333, + "step": 2210 + }, + { + "epoch": 0.011086419136557717, + "grad_norm": 0.09556932747364044, + "learning_rate": 2.974167063005332e-05, + "loss": 10.7338, + "step": 2220 + }, + { + "epoch": 0.011136357961497166, + "grad_norm": 0.09247563034296036, + "learning_rate": 2.9740168715111766e-05, + "loss": 10.7334, + "step": 2230 + }, + { + "epoch": 0.011186296786436616, + "grad_norm": 0.08850020170211792, + "learning_rate": 2.973866680017022e-05, + "loss": 10.7334, + "step": 2240 + }, + { + "epoch": 0.011236235611376065, + "grad_norm": 0.09381990134716034, + "learning_rate": 2.9737164885228667e-05, + "loss": 10.7324, + "step": 2250 + }, + { + "epoch": 0.011286174436315513, + "grad_norm": 0.09170781821012497, + "learning_rate": 2.9735662970287114e-05, + "loss": 10.7327, + "step": 2260 + }, + { + "epoch": 0.011336113261254962, + "grad_norm": 0.09936770796775818, + "learning_rate": 2.9734161055345567e-05, + "loss": 10.7321, + "step": 2270 + }, + { + "epoch": 0.011386052086194412, + "grad_norm": 0.09585506469011307, + "learning_rate": 2.9732659140404014e-05, + "loss": 10.7322, + "step": 2280 + }, + { + "epoch": 0.011435990911133861, + "grad_norm": 0.0958356037735939, + "learning_rate": 2.9731157225462467e-05, + "loss": 10.7323, + "step": 2290 + }, + { + "epoch": 0.01148592973607331, + "grad_norm": 0.09194158017635345, + "learning_rate": 2.9729655310520914e-05, + "loss": 10.7318, + "step": 2300 + }, + { + "epoch": 0.01153586856101276, + "grad_norm": 0.09054262191057205, + "learning_rate": 2.9728153395579364e-05, + "loss": 10.7313, + "step": 2310 + }, + { + "epoch": 0.011585807385952208, + "grad_norm": 0.09786096960306168, + "learning_rate": 2.9726651480637815e-05, + "loss": 10.7306, + "step": 2320 + }, + { + "epoch": 0.011635746210891657, + "grad_norm": 0.09685620665550232, + "learning_rate": 2.972514956569626e-05, + "loss": 10.731, + "step": 2330 + }, + { + "epoch": 0.011685685035831107, + "grad_norm": 0.09197534620761871, + "learning_rate": 2.9723647650754715e-05, + "loss": 10.7293, + "step": 2340 + }, + { + "epoch": 0.011735623860770556, + "grad_norm": 0.09799861162900925, + "learning_rate": 2.9722145735813162e-05, + "loss": 10.7295, + "step": 2350 + }, + { + "epoch": 0.011785562685710006, + "grad_norm": 0.09268220514059067, + "learning_rate": 2.9720643820871612e-05, + "loss": 10.7298, + "step": 2360 + }, + { + "epoch": 0.011835501510649455, + "grad_norm": 0.09487041085958481, + "learning_rate": 2.9719141905930062e-05, + "loss": 10.7286, + "step": 2370 + }, + { + "epoch": 0.011885440335588903, + "grad_norm": 0.09200552850961685, + "learning_rate": 2.971763999098851e-05, + "loss": 10.7287, + "step": 2380 + }, + { + "epoch": 0.011935379160528352, + "grad_norm": 0.09358713030815125, + "learning_rate": 2.9716138076046962e-05, + "loss": 10.7276, + "step": 2390 + }, + { + "epoch": 0.011985317985467802, + "grad_norm": 0.09549114853143692, + "learning_rate": 2.971463616110541e-05, + "loss": 10.7284, + "step": 2400 + }, + { + "epoch": 0.012035256810407251, + "grad_norm": 0.09079451113939285, + "learning_rate": 2.971313424616386e-05, + "loss": 10.7274, + "step": 2410 + }, + { + "epoch": 0.0120851956353467, + "grad_norm": 0.09617462754249573, + "learning_rate": 2.971163233122231e-05, + "loss": 10.7274, + "step": 2420 + }, + { + "epoch": 0.01213513446028615, + "grad_norm": 0.09577368944883347, + "learning_rate": 2.9710130416280756e-05, + "loss": 10.7275, + "step": 2430 + }, + { + "epoch": 0.0121850732852256, + "grad_norm": 0.09303394705057144, + "learning_rate": 2.970862850133921e-05, + "loss": 10.7257, + "step": 2440 + }, + { + "epoch": 0.012235012110165047, + "grad_norm": 0.09505324810743332, + "learning_rate": 2.9707126586397657e-05, + "loss": 10.726, + "step": 2450 + }, + { + "epoch": 0.012284950935104497, + "grad_norm": 0.0933798998594284, + "learning_rate": 2.9705624671456107e-05, + "loss": 10.7261, + "step": 2460 + }, + { + "epoch": 0.012334889760043946, + "grad_norm": 0.09893235564231873, + "learning_rate": 2.9704122756514557e-05, + "loss": 10.7264, + "step": 2470 + }, + { + "epoch": 0.012384828584983396, + "grad_norm": 0.08968345075845718, + "learning_rate": 2.9702620841573004e-05, + "loss": 10.7259, + "step": 2480 + }, + { + "epoch": 0.012434767409922845, + "grad_norm": 0.09330223500728607, + "learning_rate": 2.9701118926631458e-05, + "loss": 10.7266, + "step": 2490 + }, + { + "epoch": 0.012484706234862294, + "grad_norm": 0.10579249262809753, + "learning_rate": 2.9699617011689904e-05, + "loss": 10.7249, + "step": 2500 + }, + { + "epoch": 0.012534645059801742, + "grad_norm": 0.0928342342376709, + "learning_rate": 2.9698115096748354e-05, + "loss": 10.7238, + "step": 2510 + }, + { + "epoch": 0.012584583884741192, + "grad_norm": 0.09858880937099457, + "learning_rate": 2.9696613181806805e-05, + "loss": 10.7247, + "step": 2520 + }, + { + "epoch": 0.012634522709680641, + "grad_norm": 0.09675079584121704, + "learning_rate": 2.969511126686525e-05, + "loss": 10.7236, + "step": 2530 + }, + { + "epoch": 0.01268446153462009, + "grad_norm": 0.09549619257450104, + "learning_rate": 2.9693609351923705e-05, + "loss": 10.7238, + "step": 2540 + }, + { + "epoch": 0.01273440035955954, + "grad_norm": 0.09920501708984375, + "learning_rate": 2.9692107436982152e-05, + "loss": 10.7236, + "step": 2550 + }, + { + "epoch": 0.01278433918449899, + "grad_norm": 0.09226509928703308, + "learning_rate": 2.9690605522040602e-05, + "loss": 10.7233, + "step": 2560 + }, + { + "epoch": 0.012834278009438437, + "grad_norm": 0.09456513077020645, + "learning_rate": 2.9689103607099052e-05, + "loss": 10.7237, + "step": 2570 + }, + { + "epoch": 0.012884216834377887, + "grad_norm": 0.09502000361680984, + "learning_rate": 2.96876016921575e-05, + "loss": 10.7231, + "step": 2580 + }, + { + "epoch": 0.012934155659317336, + "grad_norm": 0.09463300555944443, + "learning_rate": 2.9686099777215953e-05, + "loss": 10.722, + "step": 2590 + }, + { + "epoch": 0.012984094484256786, + "grad_norm": 0.09156177937984467, + "learning_rate": 2.96845978622744e-05, + "loss": 10.722, + "step": 2600 + }, + { + "epoch": 0.013034033309196235, + "grad_norm": 0.09233105927705765, + "learning_rate": 2.968309594733285e-05, + "loss": 10.7217, + "step": 2610 + }, + { + "epoch": 0.013083972134135684, + "grad_norm": 0.0961083248257637, + "learning_rate": 2.96815940323913e-05, + "loss": 10.7208, + "step": 2620 + }, + { + "epoch": 0.013133910959075132, + "grad_norm": 0.0997881218791008, + "learning_rate": 2.968009211744975e-05, + "loss": 10.7205, + "step": 2630 + }, + { + "epoch": 0.013183849784014582, + "grad_norm": 0.09116201847791672, + "learning_rate": 2.96785902025082e-05, + "loss": 10.72, + "step": 2640 + }, + { + "epoch": 0.013233788608954031, + "grad_norm": 0.0893169716000557, + "learning_rate": 2.9677088287566647e-05, + "loss": 10.7206, + "step": 2650 + }, + { + "epoch": 0.01328372743389348, + "grad_norm": 0.09925134479999542, + "learning_rate": 2.9675586372625097e-05, + "loss": 31.1372, + "step": 2660 + }, + { + "epoch": 0.01333366625883293, + "grad_norm": 0.09098075330257416, + "learning_rate": 2.9674084457683547e-05, + "loss": 10.7191, + "step": 2670 + }, + { + "epoch": 0.01338360508377238, + "grad_norm": 0.09981559216976166, + "learning_rate": 2.9672582542741997e-05, + "loss": 18.3812, + "step": 2680 + }, + { + "epoch": 0.013433543908711827, + "grad_norm": 0.10291223227977753, + "learning_rate": 2.9671080627800448e-05, + "loss": 10.7193, + "step": 2690 + }, + { + "epoch": 0.013483482733651277, + "grad_norm": 0.09718188643455505, + "learning_rate": 2.9669578712858894e-05, + "loss": 10.789, + "step": 2700 + }, + { + "epoch": 0.013533421558590726, + "grad_norm": 0.1061011552810669, + "learning_rate": 2.9668076797917344e-05, + "loss": 10.7188, + "step": 2710 + }, + { + "epoch": 0.013583360383530176, + "grad_norm": 65854.7578125, + "learning_rate": 2.9666574882975795e-05, + "loss": 11.0129, + "step": 2720 + }, + { + "epoch": 0.013633299208469625, + "grad_norm": 743.2807006835938, + "learning_rate": 2.9665072968034245e-05, + "loss": 10.7305, + "step": 2730 + }, + { + "epoch": 0.013683238033409074, + "grad_norm": 0.09718185663223267, + "learning_rate": 2.9663571053092695e-05, + "loss": 11.708, + "step": 2740 + }, + { + "epoch": 0.013733176858348524, + "grad_norm": 0.09672287851572037, + "learning_rate": 2.9662069138151142e-05, + "loss": 10.7177, + "step": 2750 + }, + { + "epoch": 0.013783115683287972, + "grad_norm": 0.093637615442276, + "learning_rate": 2.9660567223209595e-05, + "loss": 10.8841, + "step": 2760 + }, + { + "epoch": 0.013833054508227421, + "grad_norm": 0.10001079738140106, + "learning_rate": 2.9659065308268042e-05, + "loss": 10.8078, + "step": 2770 + }, + { + "epoch": 0.01388299333316687, + "grad_norm": 0.09189160168170929, + "learning_rate": 2.9657563393326492e-05, + "loss": 11.6597, + "step": 2780 + }, + { + "epoch": 0.01393293215810632, + "grad_norm": 9842.095703125, + "learning_rate": 2.9656061478384943e-05, + "loss": 13.5412, + "step": 2790 + }, + { + "epoch": 0.01398287098304577, + "grad_norm": 0.09831225126981735, + "learning_rate": 2.965455956344339e-05, + "loss": 10.7168, + "step": 2800 + }, + { + "epoch": 0.014032809807985219, + "grad_norm": 0.09885016083717346, + "learning_rate": 2.9653057648501843e-05, + "loss": 10.7171, + "step": 2810 + }, + { + "epoch": 0.014082748632924667, + "grad_norm": 0.09697610884904861, + "learning_rate": 2.965155573356029e-05, + "loss": 10.7158, + "step": 2820 + }, + { + "epoch": 0.014132687457864116, + "grad_norm": 0.09410693496465683, + "learning_rate": 2.965005381861874e-05, + "loss": 10.7153, + "step": 2830 + }, + { + "epoch": 0.014182626282803566, + "grad_norm": 0.09180265665054321, + "learning_rate": 2.964855190367719e-05, + "loss": 10.7161, + "step": 2840 + }, + { + "epoch": 0.014232565107743015, + "grad_norm": 0.10008688271045685, + "learning_rate": 2.9647049988735637e-05, + "loss": 10.7156, + "step": 2850 + }, + { + "epoch": 0.014282503932682464, + "grad_norm": 0.09007236361503601, + "learning_rate": 2.964554807379409e-05, + "loss": 10.7152, + "step": 2860 + }, + { + "epoch": 0.014332442757621914, + "grad_norm": 0.09538665413856506, + "learning_rate": 2.9644046158852537e-05, + "loss": 10.7139, + "step": 2870 + }, + { + "epoch": 0.014382381582561362, + "grad_norm": 0.09514635801315308, + "learning_rate": 2.9642544243910987e-05, + "loss": 10.7136, + "step": 2880 + }, + { + "epoch": 0.014432320407500811, + "grad_norm": 0.09610595554113388, + "learning_rate": 2.9641042328969438e-05, + "loss": 10.7138, + "step": 2890 + }, + { + "epoch": 0.01448225923244026, + "grad_norm": 0.09452015161514282, + "learning_rate": 2.9639540414027884e-05, + "loss": 10.7137, + "step": 2900 + }, + { + "epoch": 0.01453219805737971, + "grad_norm": 0.09626973420381546, + "learning_rate": 2.9638038499086338e-05, + "loss": 10.7121, + "step": 2910 + }, + { + "epoch": 0.01458213688231916, + "grad_norm": 0.0968800038099289, + "learning_rate": 2.9636536584144785e-05, + "loss": 10.7127, + "step": 2920 + }, + { + "epoch": 0.014632075707258609, + "grad_norm": 0.09562117606401443, + "learning_rate": 2.9635034669203235e-05, + "loss": 10.7123, + "step": 2930 + }, + { + "epoch": 0.014682014532198057, + "grad_norm": 0.09600751847028732, + "learning_rate": 2.9633532754261685e-05, + "loss": 10.7121, + "step": 2940 + }, + { + "epoch": 0.014731953357137506, + "grad_norm": 0.09042752534151077, + "learning_rate": 2.9632030839320135e-05, + "loss": 10.7112, + "step": 2950 + }, + { + "epoch": 0.014781892182076956, + "grad_norm": 0.09635340422391891, + "learning_rate": 2.9630528924378585e-05, + "loss": 10.7109, + "step": 2960 + }, + { + "epoch": 0.014831831007016405, + "grad_norm": 0.09323673695325851, + "learning_rate": 2.9629027009437032e-05, + "loss": 10.7115, + "step": 2970 + }, + { + "epoch": 0.014881769831955854, + "grad_norm": 0.09715006500482559, + "learning_rate": 2.9627525094495482e-05, + "loss": 10.7108, + "step": 2980 + }, + { + "epoch": 0.014931708656895304, + "grad_norm": 0.09540782868862152, + "learning_rate": 2.9626023179553933e-05, + "loss": 10.7107, + "step": 2990 + }, + { + "epoch": 0.014981647481834752, + "grad_norm": 0.09565411508083344, + "learning_rate": 2.9624521264612383e-05, + "loss": 10.7098, + "step": 3000 + }, + { + "epoch": 0.015031586306774201, + "grad_norm": 0.09608177095651627, + "learning_rate": 2.9623019349670833e-05, + "loss": 10.7095, + "step": 3010 + }, + { + "epoch": 0.01508152513171365, + "grad_norm": 0.09020178765058517, + "learning_rate": 2.962151743472928e-05, + "loss": 10.7091, + "step": 3020 + }, + { + "epoch": 0.0151314639566531, + "grad_norm": 0.09340853989124298, + "learning_rate": 2.962001551978773e-05, + "loss": 10.7083, + "step": 3030 + }, + { + "epoch": 0.01518140278159255, + "grad_norm": 0.09446629136800766, + "learning_rate": 2.961851360484618e-05, + "loss": 10.7087, + "step": 3040 + }, + { + "epoch": 0.015231341606531999, + "grad_norm": 0.0978376492857933, + "learning_rate": 2.961701168990463e-05, + "loss": 10.7081, + "step": 3050 + }, + { + "epoch": 0.015281280431471447, + "grad_norm": 0.09334233403205872, + "learning_rate": 2.961550977496308e-05, + "loss": 10.709, + "step": 3060 + }, + { + "epoch": 0.015331219256410896, + "grad_norm": 0.098800890147686, + "learning_rate": 2.9614007860021527e-05, + "loss": 10.7079, + "step": 3070 + }, + { + "epoch": 0.015381158081350346, + "grad_norm": 0.09233418852090836, + "learning_rate": 2.9612505945079977e-05, + "loss": 10.7071, + "step": 3080 + }, + { + "epoch": 0.015431096906289795, + "grad_norm": 0.0931033119559288, + "learning_rate": 2.9611004030138428e-05, + "loss": 10.7077, + "step": 3090 + }, + { + "epoch": 0.015481035731229244, + "grad_norm": 0.08983912318944931, + "learning_rate": 2.9609502115196878e-05, + "loss": 10.7069, + "step": 3100 + }, + { + "epoch": 0.015530974556168694, + "grad_norm": 0.09569084644317627, + "learning_rate": 2.9608000200255328e-05, + "loss": 10.7061, + "step": 3110 + }, + { + "epoch": 0.015580913381108143, + "grad_norm": 0.09349966049194336, + "learning_rate": 2.9606498285313775e-05, + "loss": 10.7061, + "step": 3120 + }, + { + "epoch": 0.015630852206047593, + "grad_norm": 0.09354298561811447, + "learning_rate": 2.9604996370372225e-05, + "loss": 10.7061, + "step": 3130 + }, + { + "epoch": 0.015680791030987042, + "grad_norm": 0.09295152127742767, + "learning_rate": 2.9603494455430675e-05, + "loss": 10.7059, + "step": 3140 + }, + { + "epoch": 0.01573072985592649, + "grad_norm": 0.09449790418148041, + "learning_rate": 2.9601992540489125e-05, + "loss": 10.7059, + "step": 3150 + }, + { + "epoch": 0.015780668680865938, + "grad_norm": 0.0972711518406868, + "learning_rate": 2.9600490625547575e-05, + "loss": 10.7049, + "step": 3160 + }, + { + "epoch": 0.015830607505805387, + "grad_norm": 0.09353794902563095, + "learning_rate": 2.9598988710606022e-05, + "loss": 10.7047, + "step": 3170 + }, + { + "epoch": 0.015880546330744837, + "grad_norm": 0.09542354941368103, + "learning_rate": 2.9597486795664472e-05, + "loss": 10.7048, + "step": 3180 + }, + { + "epoch": 0.015930485155684286, + "grad_norm": 0.09505563974380493, + "learning_rate": 2.9595984880722923e-05, + "loss": 10.7042, + "step": 3190 + }, + { + "epoch": 0.015980423980623736, + "grad_norm": 0.09707723557949066, + "learning_rate": 2.9594482965781373e-05, + "loss": 10.7039, + "step": 3200 + }, + { + "epoch": 0.016030362805563185, + "grad_norm": 0.09463075548410416, + "learning_rate": 2.9592981050839823e-05, + "loss": 10.7039, + "step": 3210 + }, + { + "epoch": 0.016080301630502634, + "grad_norm": 0.09552239626646042, + "learning_rate": 2.959147913589827e-05, + "loss": 10.7023, + "step": 3220 + }, + { + "epoch": 0.016130240455442084, + "grad_norm": 0.0979151502251625, + "learning_rate": 2.958997722095672e-05, + "loss": 10.7026, + "step": 3230 + }, + { + "epoch": 0.016180179280381533, + "grad_norm": 0.0954020619392395, + "learning_rate": 2.958847530601517e-05, + "loss": 10.702, + "step": 3240 + }, + { + "epoch": 0.016230118105320983, + "grad_norm": 0.09606318175792694, + "learning_rate": 2.958697339107362e-05, + "loss": 10.702, + "step": 3250 + }, + { + "epoch": 0.016280056930260432, + "grad_norm": 0.09881041944026947, + "learning_rate": 2.958547147613207e-05, + "loss": 10.7023, + "step": 3260 + }, + { + "epoch": 0.01632999575519988, + "grad_norm": 0.09216438978910446, + "learning_rate": 2.958396956119052e-05, + "loss": 10.7006, + "step": 3270 + }, + { + "epoch": 0.016379934580139328, + "grad_norm": 0.09119243174791336, + "learning_rate": 2.9582467646248967e-05, + "loss": 10.7009, + "step": 3280 + }, + { + "epoch": 0.016429873405078777, + "grad_norm": 0.09380321204662323, + "learning_rate": 2.9580965731307418e-05, + "loss": 10.7007, + "step": 3290 + }, + { + "epoch": 0.016479812230018227, + "grad_norm": 0.0956210270524025, + "learning_rate": 2.9579463816365868e-05, + "loss": 10.7, + "step": 3300 + }, + { + "epoch": 0.016529751054957676, + "grad_norm": 0.0973009392619133, + "learning_rate": 2.9577961901424318e-05, + "loss": 10.6998, + "step": 3310 + }, + { + "epoch": 0.016579689879897126, + "grad_norm": 0.09392161667346954, + "learning_rate": 2.9576459986482768e-05, + "loss": 10.7002, + "step": 3320 + }, + { + "epoch": 0.016629628704836575, + "grad_norm": 0.09009403735399246, + "learning_rate": 2.9574958071541215e-05, + "loss": 10.7001, + "step": 3330 + }, + { + "epoch": 0.016679567529776024, + "grad_norm": 0.09653884917497635, + "learning_rate": 2.9573456156599665e-05, + "loss": 10.6986, + "step": 3340 + }, + { + "epoch": 0.016729506354715474, + "grad_norm": 0.09588049352169037, + "learning_rate": 2.9571954241658115e-05, + "loss": 10.6994, + "step": 3350 + }, + { + "epoch": 0.016779445179654923, + "grad_norm": 0.09776197373867035, + "learning_rate": 2.9570452326716565e-05, + "loss": 10.6991, + "step": 3360 + }, + { + "epoch": 0.016829384004594373, + "grad_norm": 0.09142053127288818, + "learning_rate": 2.9568950411775016e-05, + "loss": 10.6981, + "step": 3370 + }, + { + "epoch": 0.016879322829533822, + "grad_norm": 0.09754452109336853, + "learning_rate": 2.9567448496833462e-05, + "loss": 10.6978, + "step": 3380 + }, + { + "epoch": 0.01692926165447327, + "grad_norm": 0.09043591469526291, + "learning_rate": 2.9565946581891913e-05, + "loss": 10.6968, + "step": 3390 + }, + { + "epoch": 0.016979200479412718, + "grad_norm": 0.09461095184087753, + "learning_rate": 2.9564444666950363e-05, + "loss": 10.6971, + "step": 3400 + }, + { + "epoch": 0.017029139304352167, + "grad_norm": 0.10008252412080765, + "learning_rate": 2.9562942752008813e-05, + "loss": 10.6968, + "step": 3410 + }, + { + "epoch": 0.017079078129291617, + "grad_norm": 0.09465460479259491, + "learning_rate": 2.9561440837067263e-05, + "loss": 10.6963, + "step": 3420 + }, + { + "epoch": 0.017129016954231066, + "grad_norm": 0.09323673695325851, + "learning_rate": 2.955993892212571e-05, + "loss": 10.6959, + "step": 3430 + }, + { + "epoch": 0.017178955779170516, + "grad_norm": 0.09518439322710037, + "learning_rate": 2.955843700718416e-05, + "loss": 10.6961, + "step": 3440 + }, + { + "epoch": 0.017228894604109965, + "grad_norm": 0.09833721816539764, + "learning_rate": 2.955693509224261e-05, + "loss": 10.6954, + "step": 3450 + }, + { + "epoch": 0.017278833429049414, + "grad_norm": 0.09518902748823166, + "learning_rate": 2.955543317730106e-05, + "loss": 10.6944, + "step": 3460 + }, + { + "epoch": 0.017328772253988864, + "grad_norm": 0.10006536543369293, + "learning_rate": 2.955393126235951e-05, + "loss": 10.6956, + "step": 3470 + }, + { + "epoch": 0.017378711078928313, + "grad_norm": 0.08888301998376846, + "learning_rate": 2.9552429347417957e-05, + "loss": 10.6955, + "step": 3480 + }, + { + "epoch": 0.017428649903867763, + "grad_norm": 0.09507909417152405, + "learning_rate": 2.9550927432476408e-05, + "loss": 10.695, + "step": 3490 + }, + { + "epoch": 0.017478588728807212, + "grad_norm": 0.09857828170061111, + "learning_rate": 2.9549425517534858e-05, + "loss": 10.6974, + "step": 3500 + }, + { + "epoch": 0.01752852755374666, + "grad_norm": 0.09427110850811005, + "learning_rate": 2.9547923602593308e-05, + "loss": 10.6932, + "step": 3510 + }, + { + "epoch": 0.01757846637868611, + "grad_norm": 0.08865749090909958, + "learning_rate": 2.9546421687651758e-05, + "loss": 10.6936, + "step": 3520 + }, + { + "epoch": 0.017628405203625557, + "grad_norm": 0.09774231165647507, + "learning_rate": 2.9544919772710205e-05, + "loss": 10.6936, + "step": 3530 + }, + { + "epoch": 0.017678344028565007, + "grad_norm": 0.10259677469730377, + "learning_rate": 2.9543417857768655e-05, + "loss": 10.6929, + "step": 3540 + }, + { + "epoch": 0.017728282853504456, + "grad_norm": 0.09243781864643097, + "learning_rate": 2.9541915942827105e-05, + "loss": 10.6924, + "step": 3550 + }, + { + "epoch": 0.017778221678443906, + "grad_norm": 0.08847206085920334, + "learning_rate": 2.9540414027885555e-05, + "loss": 10.6917, + "step": 3560 + }, + { + "epoch": 0.017828160503383355, + "grad_norm": 0.09328203648328781, + "learning_rate": 2.9538912112944006e-05, + "loss": 10.692, + "step": 3570 + }, + { + "epoch": 0.017878099328322804, + "grad_norm": 0.09655408561229706, + "learning_rate": 2.9537410198002452e-05, + "loss": 10.6916, + "step": 3580 + }, + { + "epoch": 0.017928038153262254, + "grad_norm": 0.09533986449241638, + "learning_rate": 2.9535908283060903e-05, + "loss": 10.6912, + "step": 3590 + }, + { + "epoch": 0.017977976978201703, + "grad_norm": 0.09806837886571884, + "learning_rate": 2.9534406368119353e-05, + "loss": 10.6905, + "step": 3600 + }, + { + "epoch": 0.018027915803141153, + "grad_norm": 0.09532346576452255, + "learning_rate": 2.9532904453177803e-05, + "loss": 10.6909, + "step": 3610 + }, + { + "epoch": 0.018077854628080602, + "grad_norm": 0.09362402558326721, + "learning_rate": 2.9531402538236253e-05, + "loss": 10.6905, + "step": 3620 + }, + { + "epoch": 0.01812779345302005, + "grad_norm": 0.09821821749210358, + "learning_rate": 2.95299006232947e-05, + "loss": 10.6909, + "step": 3630 + }, + { + "epoch": 0.0181777322779595, + "grad_norm": 0.09857241809368134, + "learning_rate": 2.9528398708353153e-05, + "loss": 10.6896, + "step": 3640 + }, + { + "epoch": 0.018227671102898947, + "grad_norm": 0.0939745232462883, + "learning_rate": 2.95268967934116e-05, + "loss": 10.6893, + "step": 3650 + }, + { + "epoch": 0.018277609927838397, + "grad_norm": 0.09561046957969666, + "learning_rate": 2.952539487847005e-05, + "loss": 10.6889, + "step": 3660 + }, + { + "epoch": 0.018327548752777846, + "grad_norm": 0.09186465293169022, + "learning_rate": 2.95238929635285e-05, + "loss": 10.6892, + "step": 3670 + }, + { + "epoch": 0.018377487577717296, + "grad_norm": 0.1008329913020134, + "learning_rate": 2.9522391048586947e-05, + "loss": 10.6882, + "step": 3680 + }, + { + "epoch": 0.018427426402656745, + "grad_norm": 0.09180609881877899, + "learning_rate": 2.95208891336454e-05, + "loss": 10.6886, + "step": 3690 + }, + { + "epoch": 0.018477365227596194, + "grad_norm": 0.09365356713533401, + "learning_rate": 2.9519387218703848e-05, + "loss": 10.6877, + "step": 3700 + }, + { + "epoch": 0.018527304052535644, + "grad_norm": 0.09670042246580124, + "learning_rate": 2.9517885303762298e-05, + "loss": 10.6865, + "step": 3710 + }, + { + "epoch": 0.018577242877475093, + "grad_norm": 0.1001424565911293, + "learning_rate": 2.9516383388820748e-05, + "loss": 10.6872, + "step": 3720 + }, + { + "epoch": 0.018627181702414543, + "grad_norm": 0.0908576175570488, + "learning_rate": 2.9514881473879195e-05, + "loss": 10.6879, + "step": 3730 + }, + { + "epoch": 0.018677120527353992, + "grad_norm": 0.0932522565126419, + "learning_rate": 2.951337955893765e-05, + "loss": 10.6877, + "step": 3740 + }, + { + "epoch": 0.01872705935229344, + "grad_norm": 0.0923110842704773, + "learning_rate": 2.9511877643996095e-05, + "loss": 10.6855, + "step": 3750 + }, + { + "epoch": 0.01877699817723289, + "grad_norm": 0.09654710441827774, + "learning_rate": 2.9510375729054545e-05, + "loss": 10.6861, + "step": 3760 + }, + { + "epoch": 0.01882693700217234, + "grad_norm": 0.09733393788337708, + "learning_rate": 2.9508873814112996e-05, + "loss": 10.6865, + "step": 3770 + }, + { + "epoch": 0.018876875827111787, + "grad_norm": 0.09079433232545853, + "learning_rate": 2.9507371899171442e-05, + "loss": 10.6857, + "step": 3780 + }, + { + "epoch": 0.018926814652051236, + "grad_norm": 0.09449440240859985, + "learning_rate": 2.9505869984229896e-05, + "loss": 10.6846, + "step": 3790 + }, + { + "epoch": 0.018976753476990686, + "grad_norm": 0.0952952429652214, + "learning_rate": 2.9504368069288343e-05, + "loss": 10.6846, + "step": 3800 + }, + { + "epoch": 0.019026692301930135, + "grad_norm": 0.09154447913169861, + "learning_rate": 2.9502866154346793e-05, + "loss": 10.6848, + "step": 3810 + }, + { + "epoch": 0.019076631126869584, + "grad_norm": 0.09125672280788422, + "learning_rate": 2.9501364239405243e-05, + "loss": 10.6841, + "step": 3820 + }, + { + "epoch": 0.019126569951809034, + "grad_norm": 0.09873983263969421, + "learning_rate": 2.949986232446369e-05, + "loss": 10.6839, + "step": 3830 + }, + { + "epoch": 0.019176508776748483, + "grad_norm": 0.09625153988599777, + "learning_rate": 2.9498360409522143e-05, + "loss": 10.6842, + "step": 3840 + }, + { + "epoch": 0.019226447601687933, + "grad_norm": 0.09592974185943604, + "learning_rate": 2.949685849458059e-05, + "loss": 10.6827, + "step": 3850 + }, + { + "epoch": 0.019276386426627382, + "grad_norm": 0.10091127455234528, + "learning_rate": 2.949535657963904e-05, + "loss": 10.684, + "step": 3860 + }, + { + "epoch": 0.01932632525156683, + "grad_norm": 0.09118268638849258, + "learning_rate": 2.949385466469749e-05, + "loss": 10.6836, + "step": 3870 + }, + { + "epoch": 0.01937626407650628, + "grad_norm": 0.09394898265600204, + "learning_rate": 2.9492352749755937e-05, + "loss": 10.6824, + "step": 3880 + }, + { + "epoch": 0.01942620290144573, + "grad_norm": 0.08936263620853424, + "learning_rate": 2.949085083481439e-05, + "loss": 10.6821, + "step": 3890 + }, + { + "epoch": 0.019476141726385177, + "grad_norm": 0.09612365067005157, + "learning_rate": 2.9489348919872838e-05, + "loss": 10.6827, + "step": 3900 + }, + { + "epoch": 0.019526080551324626, + "grad_norm": 0.09695883840322495, + "learning_rate": 2.9487847004931288e-05, + "loss": 10.6824, + "step": 3910 + }, + { + "epoch": 0.019576019376264076, + "grad_norm": 0.0948113203048706, + "learning_rate": 2.9486345089989738e-05, + "loss": 10.6813, + "step": 3920 + }, + { + "epoch": 0.019625958201203525, + "grad_norm": 0.09577417373657227, + "learning_rate": 2.9484843175048185e-05, + "loss": 10.6822, + "step": 3930 + }, + { + "epoch": 0.019675897026142974, + "grad_norm": 0.09592152386903763, + "learning_rate": 2.948334126010664e-05, + "loss": 10.6808, + "step": 3940 + }, + { + "epoch": 0.019725835851082424, + "grad_norm": 0.09353554248809814, + "learning_rate": 2.9481839345165085e-05, + "loss": 10.68, + "step": 3950 + }, + { + "epoch": 0.019775774676021873, + "grad_norm": 0.09733685851097107, + "learning_rate": 2.948033743022354e-05, + "loss": 10.6801, + "step": 3960 + }, + { + "epoch": 0.019825713500961323, + "grad_norm": 0.09616942703723907, + "learning_rate": 2.9478835515281986e-05, + "loss": 10.6782, + "step": 3970 + }, + { + "epoch": 0.019875652325900772, + "grad_norm": 0.091241754591465, + "learning_rate": 2.9477333600340432e-05, + "loss": 10.6792, + "step": 3980 + }, + { + "epoch": 0.01992559115084022, + "grad_norm": 0.09594473242759705, + "learning_rate": 2.9475831685398886e-05, + "loss": 10.6788, + "step": 3990 + }, + { + "epoch": 0.01997552997577967, + "grad_norm": 0.09622420370578766, + "learning_rate": 2.9474329770457333e-05, + "loss": 10.679, + "step": 4000 + }, + { + "epoch": 0.02002546880071912, + "grad_norm": 0.09455501288175583, + "learning_rate": 2.9472827855515786e-05, + "loss": 10.6778, + "step": 4010 + }, + { + "epoch": 0.020075407625658567, + "grad_norm": 0.08896750956773758, + "learning_rate": 2.9471325940574233e-05, + "loss": 10.679, + "step": 4020 + }, + { + "epoch": 0.020125346450598016, + "grad_norm": 0.09691905975341797, + "learning_rate": 2.946982402563268e-05, + "loss": 10.6772, + "step": 4030 + }, + { + "epoch": 0.020175285275537466, + "grad_norm": 0.09754662215709686, + "learning_rate": 2.9468322110691134e-05, + "loss": 10.6766, + "step": 4040 + }, + { + "epoch": 0.020225224100476915, + "grad_norm": 0.09269405156373978, + "learning_rate": 2.946682019574958e-05, + "loss": 10.6776, + "step": 4050 + }, + { + "epoch": 0.020275162925416364, + "grad_norm": 0.08767396956682205, + "learning_rate": 2.9465318280808034e-05, + "loss": 10.6769, + "step": 4060 + }, + { + "epoch": 0.020325101750355814, + "grad_norm": 0.09328394383192062, + "learning_rate": 2.946381636586648e-05, + "loss": 10.6763, + "step": 4070 + }, + { + "epoch": 0.020375040575295263, + "grad_norm": 0.09484425187110901, + "learning_rate": 2.9462314450924927e-05, + "loss": 10.6768, + "step": 4080 + }, + { + "epoch": 0.020424979400234713, + "grad_norm": 0.09627150744199753, + "learning_rate": 2.946081253598338e-05, + "loss": 10.6757, + "step": 4090 + }, + { + "epoch": 0.020474918225174162, + "grad_norm": 0.093561552464962, + "learning_rate": 2.9459310621041828e-05, + "loss": 10.6754, + "step": 4100 + }, + { + "epoch": 0.02052485705011361, + "grad_norm": 0.0967199057340622, + "learning_rate": 2.945780870610028e-05, + "loss": 10.6758, + "step": 4110 + }, + { + "epoch": 0.02057479587505306, + "grad_norm": 0.09370432049036026, + "learning_rate": 2.9456306791158728e-05, + "loss": 10.6746, + "step": 4120 + }, + { + "epoch": 0.02062473469999251, + "grad_norm": 0.10350590944290161, + "learning_rate": 2.9454804876217175e-05, + "loss": 10.6754, + "step": 4130 + }, + { + "epoch": 0.02067467352493196, + "grad_norm": 0.09343244880437851, + "learning_rate": 2.945330296127563e-05, + "loss": 10.6739, + "step": 4140 + }, + { + "epoch": 0.020724612349871406, + "grad_norm": 0.09434296935796738, + "learning_rate": 2.9451801046334075e-05, + "loss": 10.6729, + "step": 4150 + }, + { + "epoch": 0.020774551174810856, + "grad_norm": 0.09708379209041595, + "learning_rate": 2.945029913139253e-05, + "loss": 10.6736, + "step": 4160 + }, + { + "epoch": 0.020824489999750305, + "grad_norm": 0.0962655320763588, + "learning_rate": 2.9448797216450976e-05, + "loss": 10.6728, + "step": 4170 + }, + { + "epoch": 0.020874428824689754, + "grad_norm": 0.10440271347761154, + "learning_rate": 2.9447295301509422e-05, + "loss": 10.6742, + "step": 4180 + }, + { + "epoch": 0.020924367649629204, + "grad_norm": 0.10102055221796036, + "learning_rate": 2.9445793386567876e-05, + "loss": 10.6736, + "step": 4190 + }, + { + "epoch": 0.020974306474568653, + "grad_norm": 0.10021764785051346, + "learning_rate": 2.9444291471626323e-05, + "loss": 10.6728, + "step": 4200 + }, + { + "epoch": 0.021024245299508103, + "grad_norm": 0.09241849929094315, + "learning_rate": 2.9442789556684776e-05, + "loss": 10.6724, + "step": 4210 + }, + { + "epoch": 0.021074184124447552, + "grad_norm": 0.10048867762088776, + "learning_rate": 2.9441287641743223e-05, + "loss": 10.6714, + "step": 4220 + }, + { + "epoch": 0.021124122949387, + "grad_norm": 0.09459193050861359, + "learning_rate": 2.943978572680167e-05, + "loss": 10.6714, + "step": 4230 + }, + { + "epoch": 0.02117406177432645, + "grad_norm": 0.09565653651952744, + "learning_rate": 2.9438283811860124e-05, + "loss": 10.6698, + "step": 4240 + }, + { + "epoch": 0.0212240005992659, + "grad_norm": 0.09747759997844696, + "learning_rate": 2.943678189691857e-05, + "loss": 10.6708, + "step": 4250 + }, + { + "epoch": 0.02127393942420535, + "grad_norm": 0.09385592490434647, + "learning_rate": 2.9435279981977024e-05, + "loss": 10.6712, + "step": 4260 + }, + { + "epoch": 0.021323878249144796, + "grad_norm": 0.09437979012727737, + "learning_rate": 2.943377806703547e-05, + "loss": 10.672, + "step": 4270 + }, + { + "epoch": 0.021373817074084245, + "grad_norm": 0.09702524542808533, + "learning_rate": 2.943227615209392e-05, + "loss": 10.6702, + "step": 4280 + }, + { + "epoch": 0.021423755899023695, + "grad_norm": 0.09239441156387329, + "learning_rate": 2.943077423715237e-05, + "loss": 10.6703, + "step": 4290 + }, + { + "epoch": 0.021473694723963144, + "grad_norm": 0.09539250284433365, + "learning_rate": 2.9429272322210818e-05, + "loss": 10.6698, + "step": 4300 + }, + { + "epoch": 0.021523633548902594, + "grad_norm": 0.09849133342504501, + "learning_rate": 2.942777040726927e-05, + "loss": 10.6696, + "step": 4310 + }, + { + "epoch": 0.021573572373842043, + "grad_norm": 0.09273874014616013, + "learning_rate": 2.9426268492327718e-05, + "loss": 10.6689, + "step": 4320 + }, + { + "epoch": 0.021623511198781493, + "grad_norm": 0.09297140687704086, + "learning_rate": 2.942476657738617e-05, + "loss": 10.6676, + "step": 4330 + }, + { + "epoch": 0.021673450023720942, + "grad_norm": 0.09432685375213623, + "learning_rate": 2.942326466244462e-05, + "loss": 10.6677, + "step": 4340 + }, + { + "epoch": 0.02172338884866039, + "grad_norm": 3660.670654296875, + "learning_rate": 2.9421762747503065e-05, + "loss": 10.674, + "step": 4350 + }, + { + "epoch": 0.02177332767359984, + "grad_norm": 0.09602420032024384, + "learning_rate": 2.942026083256152e-05, + "loss": 16.9632, + "step": 4360 + }, + { + "epoch": 0.02182326649853929, + "grad_norm": 0.09601210057735443, + "learning_rate": 2.9418758917619966e-05, + "loss": 10.6676, + "step": 4370 + }, + { + "epoch": 0.02187320532347874, + "grad_norm": 0.08743233233690262, + "learning_rate": 2.9417257002678416e-05, + "loss": 10.6673, + "step": 4380 + }, + { + "epoch": 0.021923144148418186, + "grad_norm": 0.10627735406160355, + "learning_rate": 2.9415755087736866e-05, + "loss": 10.8175, + "step": 4390 + }, + { + "epoch": 0.021973082973357635, + "grad_norm": 0.09581291675567627, + "learning_rate": 2.9414253172795313e-05, + "loss": 10.6657, + "step": 4400 + }, + { + "epoch": 0.022023021798297085, + "grad_norm": 0.09985919296741486, + "learning_rate": 2.9412751257853766e-05, + "loss": 10.6651, + "step": 4410 + }, + { + "epoch": 0.022072960623236534, + "grad_norm": 0.09798286855220795, + "learning_rate": 2.9411249342912213e-05, + "loss": 10.6658, + "step": 4420 + }, + { + "epoch": 0.022122899448175984, + "grad_norm": 0.09146228432655334, + "learning_rate": 2.9409747427970663e-05, + "loss": 10.6667, + "step": 4430 + }, + { + "epoch": 0.022172838273115433, + "grad_norm": 0.10088124871253967, + "learning_rate": 2.9408245513029114e-05, + "loss": 10.6659, + "step": 4440 + }, + { + "epoch": 0.022222777098054883, + "grad_norm": 0.09959380328655243, + "learning_rate": 2.940674359808756e-05, + "loss": 10.666, + "step": 4450 + }, + { + "epoch": 0.022272715922994332, + "grad_norm": 0.09625295549631119, + "learning_rate": 2.9405241683146014e-05, + "loss": 10.6653, + "step": 4460 + }, + { + "epoch": 0.02232265474793378, + "grad_norm": 0.09174207597970963, + "learning_rate": 2.940373976820446e-05, + "loss": 10.6652, + "step": 4470 + }, + { + "epoch": 0.02237259357287323, + "grad_norm": 0.09853401780128479, + "learning_rate": 2.940223785326291e-05, + "loss": 10.6626, + "step": 4480 + }, + { + "epoch": 0.02242253239781268, + "grad_norm": 0.09681205451488495, + "learning_rate": 2.940073593832136e-05, + "loss": 10.6643, + "step": 4490 + }, + { + "epoch": 0.02247247122275213, + "grad_norm": 0.0938250795006752, + "learning_rate": 2.9399234023379808e-05, + "loss": 10.6627, + "step": 4500 + }, + { + "epoch": 0.02252241004769158, + "grad_norm": 0.09339260309934616, + "learning_rate": 2.939773210843826e-05, + "loss": 10.6642, + "step": 4510 + }, + { + "epoch": 0.022572348872631025, + "grad_norm": 0.0960143581032753, + "learning_rate": 2.9396230193496708e-05, + "loss": 10.6622, + "step": 4520 + }, + { + "epoch": 0.022622287697570475, + "grad_norm": 0.09255195409059525, + "learning_rate": 2.939472827855516e-05, + "loss": 10.6628, + "step": 4530 + }, + { + "epoch": 0.022672226522509924, + "grad_norm": 0.0997820645570755, + "learning_rate": 2.939322636361361e-05, + "loss": 10.661, + "step": 4540 + }, + { + "epoch": 0.022722165347449374, + "grad_norm": 0.09495710581541061, + "learning_rate": 2.9391724448672055e-05, + "loss": 10.6627, + "step": 4550 + }, + { + "epoch": 0.022772104172388823, + "grad_norm": 0.09391613304615021, + "learning_rate": 2.939022253373051e-05, + "loss": 10.6619, + "step": 4560 + }, + { + "epoch": 0.022822042997328273, + "grad_norm": 0.09510276466608047, + "learning_rate": 2.9388720618788956e-05, + "loss": 10.6613, + "step": 4570 + }, + { + "epoch": 0.022871981822267722, + "grad_norm": 0.09492931514978409, + "learning_rate": 2.9387218703847406e-05, + "loss": 10.6618, + "step": 4580 + }, + { + "epoch": 0.02292192064720717, + "grad_norm": 0.0956352949142456, + "learning_rate": 2.9385716788905856e-05, + "loss": 10.6594, + "step": 4590 + }, + { + "epoch": 0.02297185947214662, + "grad_norm": 0.09539540112018585, + "learning_rate": 2.9384214873964306e-05, + "loss": 10.6604, + "step": 4600 + }, + { + "epoch": 0.02302179829708607, + "grad_norm": 0.09625688195228577, + "learning_rate": 2.9382712959022756e-05, + "loss": 10.6606, + "step": 4610 + }, + { + "epoch": 0.02307173712202552, + "grad_norm": 0.09467311948537827, + "learning_rate": 2.9381211044081203e-05, + "loss": 10.6607, + "step": 4620 + }, + { + "epoch": 0.02312167594696497, + "grad_norm": 0.10024222731590271, + "learning_rate": 2.9379709129139653e-05, + "loss": 10.6599, + "step": 4630 + }, + { + "epoch": 0.023171614771904415, + "grad_norm": 0.09454458951950073, + "learning_rate": 2.9378207214198104e-05, + "loss": 10.6598, + "step": 4640 + }, + { + "epoch": 0.023221553596843865, + "grad_norm": 0.10061488300561905, + "learning_rate": 2.9376705299256554e-05, + "loss": 10.6583, + "step": 4650 + }, + { + "epoch": 0.023271492421783314, + "grad_norm": 0.09349951148033142, + "learning_rate": 2.9375203384315004e-05, + "loss": 10.6581, + "step": 4660 + }, + { + "epoch": 0.023321431246722764, + "grad_norm": 0.09272319078445435, + "learning_rate": 2.937370146937345e-05, + "loss": 10.6588, + "step": 4670 + }, + { + "epoch": 0.023371370071662213, + "grad_norm": 0.09603145718574524, + "learning_rate": 2.93721995544319e-05, + "loss": 10.6568, + "step": 4680 + }, + { + "epoch": 0.023421308896601663, + "grad_norm": 0.09523800760507584, + "learning_rate": 2.937069763949035e-05, + "loss": 10.6573, + "step": 4690 + }, + { + "epoch": 0.023471247721541112, + "grad_norm": 0.09391947090625763, + "learning_rate": 2.93691957245488e-05, + "loss": 10.6567, + "step": 4700 + }, + { + "epoch": 0.02352118654648056, + "grad_norm": 0.09547725319862366, + "learning_rate": 2.936769380960725e-05, + "loss": 10.656, + "step": 4710 + }, + { + "epoch": 0.02357112537142001, + "grad_norm": 0.09450241923332214, + "learning_rate": 2.9366191894665698e-05, + "loss": 10.6568, + "step": 4720 + }, + { + "epoch": 0.02362106419635946, + "grad_norm": 0.09558774530887604, + "learning_rate": 2.936468997972415e-05, + "loss": 10.6559, + "step": 4730 + }, + { + "epoch": 0.02367100302129891, + "grad_norm": 0.09340886026620865, + "learning_rate": 2.93631880647826e-05, + "loss": 10.6566, + "step": 4740 + }, + { + "epoch": 0.02372094184623836, + "grad_norm": 0.09513580799102783, + "learning_rate": 2.936168614984105e-05, + "loss": 10.6557, + "step": 4750 + }, + { + "epoch": 0.023770880671177805, + "grad_norm": 0.09321212023496628, + "learning_rate": 2.93601842348995e-05, + "loss": 10.6559, + "step": 4760 + }, + { + "epoch": 0.023820819496117255, + "grad_norm": 0.10149511694908142, + "learning_rate": 2.9358682319957946e-05, + "loss": 10.6545, + "step": 4770 + }, + { + "epoch": 0.023870758321056704, + "grad_norm": 0.09594745188951492, + "learning_rate": 2.9357180405016396e-05, + "loss": 10.6542, + "step": 4780 + }, + { + "epoch": 0.023920697145996154, + "grad_norm": 0.09818349033594131, + "learning_rate": 2.9355678490074846e-05, + "loss": 10.6542, + "step": 4790 + }, + { + "epoch": 0.023970635970935603, + "grad_norm": 0.09233896434307098, + "learning_rate": 2.9354176575133296e-05, + "loss": 10.654, + "step": 4800 + }, + { + "epoch": 0.024020574795875053, + "grad_norm": 0.09461528062820435, + "learning_rate": 2.9352674660191746e-05, + "loss": 10.6537, + "step": 4810 + }, + { + "epoch": 0.024070513620814502, + "grad_norm": 0.10067877173423767, + "learning_rate": 2.9351172745250193e-05, + "loss": 10.6535, + "step": 4820 + }, + { + "epoch": 0.02412045244575395, + "grad_norm": 0.08958308398723602, + "learning_rate": 2.9349670830308643e-05, + "loss": 10.6532, + "step": 4830 + }, + { + "epoch": 0.0241703912706934, + "grad_norm": 0.09203724563121796, + "learning_rate": 2.9348168915367094e-05, + "loss": 10.652, + "step": 4840 + }, + { + "epoch": 0.02422033009563285, + "grad_norm": 0.11031246185302734, + "learning_rate": 2.9346667000425544e-05, + "loss": 10.6523, + "step": 4850 + }, + { + "epoch": 0.0242702689205723, + "grad_norm": 0.09454483538866043, + "learning_rate": 2.9345165085483994e-05, + "loss": 10.6523, + "step": 4860 + }, + { + "epoch": 0.02432020774551175, + "grad_norm": 0.0910816341638565, + "learning_rate": 2.934366317054244e-05, + "loss": 10.6521, + "step": 4870 + }, + { + "epoch": 0.0243701465704512, + "grad_norm": 0.09349098056554794, + "learning_rate": 2.934216125560089e-05, + "loss": 10.6514, + "step": 4880 + }, + { + "epoch": 0.024420085395390645, + "grad_norm": 0.0930526852607727, + "learning_rate": 2.934065934065934e-05, + "loss": 10.6526, + "step": 4890 + }, + { + "epoch": 0.024470024220330094, + "grad_norm": 0.09221765398979187, + "learning_rate": 2.933915742571779e-05, + "loss": 10.652, + "step": 4900 + }, + { + "epoch": 0.024519963045269544, + "grad_norm": 0.08790691196918488, + "learning_rate": 2.933765551077624e-05, + "loss": 10.6504, + "step": 4910 + }, + { + "epoch": 0.024569901870208993, + "grad_norm": 0.09350787848234177, + "learning_rate": 2.933615359583469e-05, + "loss": 10.6491, + "step": 4920 + }, + { + "epoch": 0.024619840695148443, + "grad_norm": 0.10474139451980591, + "learning_rate": 2.933465168089314e-05, + "loss": 10.651, + "step": 4930 + }, + { + "epoch": 0.024669779520087892, + "grad_norm": 0.09355979412794113, + "learning_rate": 2.933314976595159e-05, + "loss": 10.6504, + "step": 4940 + }, + { + "epoch": 0.02471971834502734, + "grad_norm": 0.09748471528291702, + "learning_rate": 2.933164785101004e-05, + "loss": 10.6502, + "step": 4950 + }, + { + "epoch": 0.02476965716996679, + "grad_norm": 0.09395548701286316, + "learning_rate": 2.933014593606849e-05, + "loss": 10.6488, + "step": 4960 + }, + { + "epoch": 0.02481959599490624, + "grad_norm": 0.09552334249019623, + "learning_rate": 2.932864402112694e-05, + "loss": 10.6497, + "step": 4970 + }, + { + "epoch": 0.02486953481984569, + "grad_norm": 0.0992099866271019, + "learning_rate": 2.9327142106185386e-05, + "loss": 10.6502, + "step": 4980 + }, + { + "epoch": 0.02491947364478514, + "grad_norm": 0.09515640139579773, + "learning_rate": 2.9325640191243836e-05, + "loss": 10.6497, + "step": 4990 + }, + { + "epoch": 0.02496941246972459, + "grad_norm": 0.09259581565856934, + "learning_rate": 2.9324138276302286e-05, + "loss": 10.648, + "step": 5000 + }, + { + "epoch": 0.025019351294664035, + "grad_norm": 0.09485942125320435, + "learning_rate": 2.9322636361360736e-05, + "loss": 10.6474, + "step": 5010 + }, + { + "epoch": 0.025069290119603484, + "grad_norm": 0.09455141425132751, + "learning_rate": 2.9321134446419187e-05, + "loss": 10.6474, + "step": 5020 + }, + { + "epoch": 0.025119228944542934, + "grad_norm": 0.09555475413799286, + "learning_rate": 2.9319632531477633e-05, + "loss": 10.6471, + "step": 5030 + }, + { + "epoch": 0.025169167769482383, + "grad_norm": 0.09917616844177246, + "learning_rate": 2.9318130616536084e-05, + "loss": 10.646, + "step": 5040 + }, + { + "epoch": 0.025219106594421833, + "grad_norm": 0.09640449285507202, + "learning_rate": 2.9316628701594534e-05, + "loss": 10.6467, + "step": 5050 + }, + { + "epoch": 0.025269045419361282, + "grad_norm": 0.09314526617527008, + "learning_rate": 2.9315126786652984e-05, + "loss": 10.6474, + "step": 5060 + }, + { + "epoch": 0.02531898424430073, + "grad_norm": 0.09635156393051147, + "learning_rate": 2.9313624871711434e-05, + "loss": 10.6464, + "step": 5070 + }, + { + "epoch": 0.02536892306924018, + "grad_norm": 0.09240912646055222, + "learning_rate": 2.931212295676988e-05, + "loss": 10.6449, + "step": 5080 + }, + { + "epoch": 0.02541886189417963, + "grad_norm": 0.09308675676584244, + "learning_rate": 2.931062104182833e-05, + "loss": 10.6455, + "step": 5090 + }, + { + "epoch": 0.02546880071911908, + "grad_norm": 0.10017043352127075, + "learning_rate": 2.930911912688678e-05, + "loss": 10.6449, + "step": 5100 + }, + { + "epoch": 0.02551873954405853, + "grad_norm": 0.1008182093501091, + "learning_rate": 2.930761721194523e-05, + "loss": 10.6452, + "step": 5110 + }, + { + "epoch": 0.02556867836899798, + "grad_norm": 0.09373480826616287, + "learning_rate": 2.930611529700368e-05, + "loss": 10.6442, + "step": 5120 + }, + { + "epoch": 0.025618617193937425, + "grad_norm": 0.0935322567820549, + "learning_rate": 2.930461338206213e-05, + "loss": 10.6445, + "step": 5130 + }, + { + "epoch": 0.025668556018876874, + "grad_norm": 0.08992544561624527, + "learning_rate": 2.930311146712058e-05, + "loss": 10.6437, + "step": 5140 + }, + { + "epoch": 0.025718494843816324, + "grad_norm": 0.09083853662014008, + "learning_rate": 2.930160955217903e-05, + "loss": 10.6428, + "step": 5150 + }, + { + "epoch": 0.025768433668755773, + "grad_norm": 0.09427718818187714, + "learning_rate": 2.930010763723748e-05, + "loss": 10.642, + "step": 5160 + }, + { + "epoch": 0.025818372493695223, + "grad_norm": 0.09764672070741653, + "learning_rate": 2.929860572229593e-05, + "loss": 10.6418, + "step": 5170 + }, + { + "epoch": 0.025868311318634672, + "grad_norm": 0.09804851561784744, + "learning_rate": 2.9297103807354376e-05, + "loss": 10.6429, + "step": 5180 + }, + { + "epoch": 0.02591825014357412, + "grad_norm": 0.095367930829525, + "learning_rate": 2.9295601892412826e-05, + "loss": 10.6437, + "step": 5190 + }, + { + "epoch": 0.02596818896851357, + "grad_norm": 0.09083083271980286, + "learning_rate": 2.9294099977471276e-05, + "loss": 10.6429, + "step": 5200 + }, + { + "epoch": 0.02601812779345302, + "grad_norm": 0.09228214621543884, + "learning_rate": 2.9292598062529726e-05, + "loss": 10.6408, + "step": 5210 + }, + { + "epoch": 0.02606806661839247, + "grad_norm": 0.09770570695400238, + "learning_rate": 2.9291096147588177e-05, + "loss": 10.6422, + "step": 5220 + }, + { + "epoch": 0.02611800544333192, + "grad_norm": 0.0948592945933342, + "learning_rate": 2.9289594232646623e-05, + "loss": 10.6408, + "step": 5230 + }, + { + "epoch": 0.02616794426827137, + "grad_norm": 0.09406052529811859, + "learning_rate": 2.9288092317705077e-05, + "loss": 10.6412, + "step": 5240 + }, + { + "epoch": 0.02621788309321082, + "grad_norm": 0.10018353909254074, + "learning_rate": 2.9286590402763524e-05, + "loss": 10.6396, + "step": 5250 + }, + { + "epoch": 0.026267821918150264, + "grad_norm": 0.08951601386070251, + "learning_rate": 2.9285088487821974e-05, + "loss": 10.642, + "step": 5260 + }, + { + "epoch": 0.026317760743089714, + "grad_norm": 0.10409606993198395, + "learning_rate": 2.9283586572880424e-05, + "loss": 10.6401, + "step": 5270 + }, + { + "epoch": 0.026367699568029163, + "grad_norm": 0.09470842033624649, + "learning_rate": 2.928208465793887e-05, + "loss": 10.6407, + "step": 5280 + }, + { + "epoch": 0.026417638392968613, + "grad_norm": 0.09823372960090637, + "learning_rate": 2.9280582742997324e-05, + "loss": 10.6392, + "step": 5290 + }, + { + "epoch": 0.026467577217908062, + "grad_norm": 0.10075034201145172, + "learning_rate": 2.927908082805577e-05, + "loss": 10.6392, + "step": 5300 + }, + { + "epoch": 0.02651751604284751, + "grad_norm": 0.09643511474132538, + "learning_rate": 2.927757891311422e-05, + "loss": 10.6394, + "step": 5310 + }, + { + "epoch": 0.02656745486778696, + "grad_norm": 0.09283813089132309, + "learning_rate": 2.927607699817267e-05, + "loss": 10.6378, + "step": 5320 + }, + { + "epoch": 0.02661739369272641, + "grad_norm": 0.09951145201921463, + "learning_rate": 2.927457508323112e-05, + "loss": 10.6374, + "step": 5330 + }, + { + "epoch": 0.02666733251766586, + "grad_norm": 0.0960063636302948, + "learning_rate": 2.9273073168289572e-05, + "loss": 10.6374, + "step": 5340 + }, + { + "epoch": 0.02671727134260531, + "grad_norm": 0.09506009519100189, + "learning_rate": 2.927157125334802e-05, + "loss": 10.6369, + "step": 5350 + }, + { + "epoch": 0.02676721016754476, + "grad_norm": 0.09488440304994583, + "learning_rate": 2.927006933840647e-05, + "loss": 10.6387, + "step": 5360 + }, + { + "epoch": 0.02681714899248421, + "grad_norm": 0.09644606709480286, + "learning_rate": 2.926856742346492e-05, + "loss": 10.6359, + "step": 5370 + }, + { + "epoch": 0.026867087817423654, + "grad_norm": 0.09125857800245285, + "learning_rate": 2.9267065508523366e-05, + "loss": 10.6361, + "step": 5380 + }, + { + "epoch": 0.026917026642363104, + "grad_norm": 0.09280921518802643, + "learning_rate": 2.926556359358182e-05, + "loss": 10.6364, + "step": 5390 + }, + { + "epoch": 0.026966965467302553, + "grad_norm": 0.09589015692472458, + "learning_rate": 2.9264061678640266e-05, + "loss": 10.6376, + "step": 5400 + }, + { + "epoch": 0.027016904292242003, + "grad_norm": 0.0969088077545166, + "learning_rate": 2.9262559763698716e-05, + "loss": 10.6358, + "step": 5410 + }, + { + "epoch": 0.027066843117181452, + "grad_norm": 0.09490292519330978, + "learning_rate": 2.9261057848757167e-05, + "loss": 10.6357, + "step": 5420 + }, + { + "epoch": 0.0271167819421209, + "grad_norm": 0.09488876163959503, + "learning_rate": 2.9259555933815613e-05, + "loss": 10.6352, + "step": 5430 + }, + { + "epoch": 0.02716672076706035, + "grad_norm": 0.09537888318300247, + "learning_rate": 2.9258054018874067e-05, + "loss": 10.6348, + "step": 5440 + }, + { + "epoch": 0.0272166595919998, + "grad_norm": 0.0909004881978035, + "learning_rate": 2.9256552103932514e-05, + "loss": 10.6343, + "step": 5450 + }, + { + "epoch": 0.02726659841693925, + "grad_norm": 0.0989050418138504, + "learning_rate": 2.9255050188990964e-05, + "loss": 10.6339, + "step": 5460 + }, + { + "epoch": 0.0273165372418787, + "grad_norm": 0.09519681334495544, + "learning_rate": 2.9253548274049414e-05, + "loss": 10.6333, + "step": 5470 + }, + { + "epoch": 0.02736647606681815, + "grad_norm": 0.09233660250902176, + "learning_rate": 2.925204635910786e-05, + "loss": 10.6336, + "step": 5480 + }, + { + "epoch": 0.0274164148917576, + "grad_norm": 0.09617756307125092, + "learning_rate": 2.9250544444166314e-05, + "loss": 10.6345, + "step": 5490 + }, + { + "epoch": 0.027466353716697048, + "grad_norm": 0.09878206998109818, + "learning_rate": 2.924904252922476e-05, + "loss": 10.6326, + "step": 5500 + }, + { + "epoch": 0.027516292541636494, + "grad_norm": 0.10213974118232727, + "learning_rate": 2.924754061428321e-05, + "loss": 10.6315, + "step": 5510 + }, + { + "epoch": 0.027566231366575943, + "grad_norm": 0.09810013324022293, + "learning_rate": 2.924603869934166e-05, + "loss": 10.6324, + "step": 5520 + }, + { + "epoch": 0.027616170191515393, + "grad_norm": 0.0984368622303009, + "learning_rate": 2.924453678440011e-05, + "loss": 10.6327, + "step": 5530 + }, + { + "epoch": 0.027666109016454842, + "grad_norm": 0.094744011759758, + "learning_rate": 2.9243034869458562e-05, + "loss": 10.6325, + "step": 5540 + }, + { + "epoch": 0.02771604784139429, + "grad_norm": 0.09556036442518234, + "learning_rate": 2.924153295451701e-05, + "loss": 10.6324, + "step": 5550 + }, + { + "epoch": 0.02776598666633374, + "grad_norm": 0.09775315970182419, + "learning_rate": 2.9240031039575462e-05, + "loss": 10.6308, + "step": 5560 + }, + { + "epoch": 0.02781592549127319, + "grad_norm": 0.09518972039222717, + "learning_rate": 2.923852912463391e-05, + "loss": 10.6312, + "step": 5570 + }, + { + "epoch": 0.02786586431621264, + "grad_norm": 0.10233526676893234, + "learning_rate": 2.9237027209692356e-05, + "loss": 10.631, + "step": 5580 + }, + { + "epoch": 0.02791580314115209, + "grad_norm": 0.09543079882860184, + "learning_rate": 2.923552529475081e-05, + "loss": 10.6295, + "step": 5590 + }, + { + "epoch": 0.02796574196609154, + "grad_norm": 0.08888249844312668, + "learning_rate": 2.9234023379809256e-05, + "loss": 10.6316, + "step": 5600 + }, + { + "epoch": 0.02801568079103099, + "grad_norm": 0.09589431434869766, + "learning_rate": 2.923252146486771e-05, + "loss": 10.6294, + "step": 5610 + }, + { + "epoch": 0.028065619615970438, + "grad_norm": 0.09286142885684967, + "learning_rate": 2.9231019549926157e-05, + "loss": 10.6302, + "step": 5620 + }, + { + "epoch": 0.028115558440909884, + "grad_norm": 0.09479831159114838, + "learning_rate": 2.9229517634984603e-05, + "loss": 10.629, + "step": 5630 + }, + { + "epoch": 0.028165497265849333, + "grad_norm": 0.09502566605806351, + "learning_rate": 2.9228015720043057e-05, + "loss": 10.6301, + "step": 5640 + }, + { + "epoch": 0.028215436090788783, + "grad_norm": 0.0879800021648407, + "learning_rate": 2.9226513805101504e-05, + "loss": 10.6291, + "step": 5650 + }, + { + "epoch": 0.028265374915728232, + "grad_norm": 0.09621425718069077, + "learning_rate": 2.9225011890159957e-05, + "loss": 10.6279, + "step": 5660 + }, + { + "epoch": 0.02831531374066768, + "grad_norm": 0.0952867642045021, + "learning_rate": 2.9223509975218404e-05, + "loss": 10.629, + "step": 5670 + }, + { + "epoch": 0.02836525256560713, + "grad_norm": 0.09745540469884872, + "learning_rate": 2.922200806027685e-05, + "loss": 10.6263, + "step": 5680 + }, + { + "epoch": 0.02841519139054658, + "grad_norm": 0.10703998804092407, + "learning_rate": 2.9220506145335305e-05, + "loss": 10.6267, + "step": 5690 + }, + { + "epoch": 0.02846513021548603, + "grad_norm": 0.09526613354682922, + "learning_rate": 2.921900423039375e-05, + "loss": 10.6286, + "step": 5700 + }, + { + "epoch": 0.02851506904042548, + "grad_norm": 0.09355132281780243, + "learning_rate": 2.9217502315452205e-05, + "loss": 10.6258, + "step": 5710 + }, + { + "epoch": 0.02856500786536493, + "grad_norm": 0.09581465274095535, + "learning_rate": 2.921600040051065e-05, + "loss": 10.6266, + "step": 5720 + }, + { + "epoch": 0.02861494669030438, + "grad_norm": 0.09699860215187073, + "learning_rate": 2.92144984855691e-05, + "loss": 10.6275, + "step": 5730 + }, + { + "epoch": 0.028664885515243828, + "grad_norm": 0.09118512272834778, + "learning_rate": 2.9212996570627552e-05, + "loss": 10.6246, + "step": 5740 + }, + { + "epoch": 0.028714824340183274, + "grad_norm": 0.09603618830442429, + "learning_rate": 2.9211494655686e-05, + "loss": 10.6266, + "step": 5750 + }, + { + "epoch": 0.028764763165122723, + "grad_norm": 0.09384205937385559, + "learning_rate": 2.9209992740744452e-05, + "loss": 10.6246, + "step": 5760 + }, + { + "epoch": 0.028814701990062173, + "grad_norm": 0.09352291375398636, + "learning_rate": 2.92084908258029e-05, + "loss": 10.6245, + "step": 5770 + }, + { + "epoch": 0.028864640815001622, + "grad_norm": 0.09608279168605804, + "learning_rate": 2.9206988910861346e-05, + "loss": 10.6247, + "step": 5780 + }, + { + "epoch": 0.02891457963994107, + "grad_norm": 0.09599608927965164, + "learning_rate": 2.92054869959198e-05, + "loss": 10.6243, + "step": 5790 + }, + { + "epoch": 0.02896451846488052, + "grad_norm": 0.10410738736391068, + "learning_rate": 2.9203985080978246e-05, + "loss": 10.6244, + "step": 5800 + }, + { + "epoch": 0.02901445728981997, + "grad_norm": 0.10078850388526917, + "learning_rate": 2.92024831660367e-05, + "loss": 10.6234, + "step": 5810 + }, + { + "epoch": 0.02906439611475942, + "grad_norm": 0.09706875681877136, + "learning_rate": 2.9200981251095147e-05, + "loss": 10.6225, + "step": 5820 + }, + { + "epoch": 0.02911433493969887, + "grad_norm": 0.09206659346818924, + "learning_rate": 2.9199479336153593e-05, + "loss": 10.6223, + "step": 5830 + }, + { + "epoch": 0.02916427376463832, + "grad_norm": 0.09456048905849457, + "learning_rate": 2.9197977421212047e-05, + "loss": 10.6215, + "step": 5840 + }, + { + "epoch": 0.02921421258957777, + "grad_norm": 0.09819670021533966, + "learning_rate": 2.9196475506270494e-05, + "loss": 10.6221, + "step": 5850 + }, + { + "epoch": 0.029264151414517218, + "grad_norm": 0.09750770032405853, + "learning_rate": 2.9194973591328947e-05, + "loss": 10.6216, + "step": 5860 + }, + { + "epoch": 0.029314090239456667, + "grad_norm": 0.09041731804609299, + "learning_rate": 2.9193471676387394e-05, + "loss": 10.6213, + "step": 5870 + }, + { + "epoch": 0.029364029064396113, + "grad_norm": 0.09223254024982452, + "learning_rate": 2.9191969761445844e-05, + "loss": 10.6214, + "step": 5880 + }, + { + "epoch": 0.029413967889335563, + "grad_norm": 0.09175052493810654, + "learning_rate": 2.9190467846504295e-05, + "loss": 10.62, + "step": 5890 + }, + { + "epoch": 0.029463906714275012, + "grad_norm": 0.0979679524898529, + "learning_rate": 2.918896593156274e-05, + "loss": 10.6207, + "step": 5900 + }, + { + "epoch": 0.02951384553921446, + "grad_norm": 0.0917288288474083, + "learning_rate": 2.9187464016621195e-05, + "loss": 10.6207, + "step": 5910 + }, + { + "epoch": 0.02956378436415391, + "grad_norm": 0.09289643913507462, + "learning_rate": 2.918596210167964e-05, + "loss": 10.6209, + "step": 5920 + }, + { + "epoch": 0.02961372318909336, + "grad_norm": 0.09195461124181747, + "learning_rate": 2.9184460186738092e-05, + "loss": 10.6197, + "step": 5930 + }, + { + "epoch": 0.02966366201403281, + "grad_norm": 0.09683648496866226, + "learning_rate": 2.9182958271796542e-05, + "loss": 10.6192, + "step": 5940 + }, + { + "epoch": 0.02971360083897226, + "grad_norm": 0.08719411492347717, + "learning_rate": 2.918145635685499e-05, + "loss": 10.619, + "step": 5950 + }, + { + "epoch": 0.02976353966391171, + "grad_norm": 0.0998111441731453, + "learning_rate": 2.9179954441913442e-05, + "loss": 10.6193, + "step": 5960 + }, + { + "epoch": 0.02981347848885116, + "grad_norm": 0.09359613060951233, + "learning_rate": 2.917845252697189e-05, + "loss": 10.618, + "step": 5970 + }, + { + "epoch": 0.029863417313790608, + "grad_norm": 0.09427807480096817, + "learning_rate": 2.917695061203034e-05, + "loss": 10.6174, + "step": 5980 + }, + { + "epoch": 0.029913356138730057, + "grad_norm": 0.09695295989513397, + "learning_rate": 2.917544869708879e-05, + "loss": 10.6173, + "step": 5990 + }, + { + "epoch": 0.029963294963669503, + "grad_norm": 0.10022547841072083, + "learning_rate": 2.9173946782147236e-05, + "loss": 10.6184, + "step": 6000 + }, + { + "epoch": 0.030013233788608953, + "grad_norm": 0.09429972618818283, + "learning_rate": 2.917244486720569e-05, + "loss": 10.6166, + "step": 6010 + }, + { + "epoch": 0.030063172613548402, + "grad_norm": 0.09150966256856918, + "learning_rate": 2.9170942952264137e-05, + "loss": 10.6176, + "step": 6020 + }, + { + "epoch": 0.03011311143848785, + "grad_norm": 0.09376371651887894, + "learning_rate": 2.9169441037322587e-05, + "loss": 10.6167, + "step": 6030 + }, + { + "epoch": 0.0301630502634273, + "grad_norm": 0.09400255978107452, + "learning_rate": 2.9167939122381037e-05, + "loss": 10.6147, + "step": 6040 + }, + { + "epoch": 0.03021298908836675, + "grad_norm": 0.10719768702983856, + "learning_rate": 2.9166437207439484e-05, + "loss": 10.6168, + "step": 6050 + }, + { + "epoch": 0.0302629279133062, + "grad_norm": 0.0984666720032692, + "learning_rate": 2.9164935292497937e-05, + "loss": 10.6148, + "step": 6060 + }, + { + "epoch": 0.03031286673824565, + "grad_norm": 0.09107008576393127, + "learning_rate": 2.9163433377556384e-05, + "loss": 10.6148, + "step": 6070 + }, + { + "epoch": 0.0303628055631851, + "grad_norm": 0.09866014868021011, + "learning_rate": 2.9161931462614834e-05, + "loss": 10.6146, + "step": 6080 + }, + { + "epoch": 0.03041274438812455, + "grad_norm": 0.0949750542640686, + "learning_rate": 2.9160429547673285e-05, + "loss": 10.6148, + "step": 6090 + }, + { + "epoch": 0.030462683213063998, + "grad_norm": 0.09634987264871597, + "learning_rate": 2.915892763273173e-05, + "loss": 10.6139, + "step": 6100 + }, + { + "epoch": 0.030512622038003447, + "grad_norm": 0.09872616082429886, + "learning_rate": 2.9157425717790185e-05, + "loss": 10.615, + "step": 6110 + }, + { + "epoch": 0.030562560862942893, + "grad_norm": 0.09585590660572052, + "learning_rate": 2.915592380284863e-05, + "loss": 10.6138, + "step": 6120 + }, + { + "epoch": 0.030612499687882343, + "grad_norm": 0.09388266503810883, + "learning_rate": 2.9154421887907082e-05, + "loss": 10.614, + "step": 6130 + }, + { + "epoch": 0.030662438512821792, + "grad_norm": 0.10067202895879745, + "learning_rate": 2.9152919972965532e-05, + "loss": 10.6142, + "step": 6140 + }, + { + "epoch": 0.03071237733776124, + "grad_norm": 0.09900005161762238, + "learning_rate": 2.915141805802398e-05, + "loss": 10.6135, + "step": 6150 + }, + { + "epoch": 0.03076231616270069, + "grad_norm": 0.09707643836736679, + "learning_rate": 2.9149916143082432e-05, + "loss": 10.6129, + "step": 6160 + }, + { + "epoch": 0.03081225498764014, + "grad_norm": 0.09792651236057281, + "learning_rate": 2.914841422814088e-05, + "loss": 10.6117, + "step": 6170 + }, + { + "epoch": 0.03086219381257959, + "grad_norm": 0.09310603886842728, + "learning_rate": 2.914691231319933e-05, + "loss": 10.6116, + "step": 6180 + }, + { + "epoch": 0.03091213263751904, + "grad_norm": 0.09561890363693237, + "learning_rate": 2.914541039825778e-05, + "loss": 10.6131, + "step": 6190 + }, + { + "epoch": 0.03096207146245849, + "grad_norm": 0.09028071165084839, + "learning_rate": 2.914390848331623e-05, + "loss": 10.6135, + "step": 6200 + }, + { + "epoch": 0.03101201028739794, + "grad_norm": 0.09210582822561264, + "learning_rate": 2.914240656837468e-05, + "loss": 10.6127, + "step": 6210 + }, + { + "epoch": 0.031061949112337388, + "grad_norm": 0.09679680317640305, + "learning_rate": 2.9140904653433127e-05, + "loss": 10.611, + "step": 6220 + }, + { + "epoch": 0.031111887937276837, + "grad_norm": 0.09834344685077667, + "learning_rate": 2.9139402738491577e-05, + "loss": 10.6099, + "step": 6230 + }, + { + "epoch": 0.031161826762216287, + "grad_norm": 0.09306725114583969, + "learning_rate": 2.9137900823550027e-05, + "loss": 10.6113, + "step": 6240 + }, + { + "epoch": 0.031211765587155733, + "grad_norm": 0.09439186751842499, + "learning_rate": 2.9136398908608477e-05, + "loss": 10.6104, + "step": 6250 + }, + { + "epoch": 0.031261704412095186, + "grad_norm": 0.10274600237607956, + "learning_rate": 2.9134896993666927e-05, + "loss": 10.6088, + "step": 6260 + }, + { + "epoch": 0.031311643237034635, + "grad_norm": 0.10353299975395203, + "learning_rate": 2.9133395078725374e-05, + "loss": 10.6096, + "step": 6270 + }, + { + "epoch": 0.031361582061974085, + "grad_norm": 0.09838922321796417, + "learning_rate": 2.9131893163783824e-05, + "loss": 10.6104, + "step": 6280 + }, + { + "epoch": 0.031411520886913534, + "grad_norm": 0.0952482670545578, + "learning_rate": 2.9130391248842275e-05, + "loss": 10.6079, + "step": 6290 + }, + { + "epoch": 0.03146145971185298, + "grad_norm": 0.0897747352719307, + "learning_rate": 2.9128889333900725e-05, + "loss": 10.6092, + "step": 6300 + }, + { + "epoch": 0.031511398536792426, + "grad_norm": 0.08722511678934097, + "learning_rate": 2.9127387418959175e-05, + "loss": 10.611, + "step": 6310 + }, + { + "epoch": 0.031561337361731875, + "grad_norm": 0.09219997376203537, + "learning_rate": 2.9125885504017622e-05, + "loss": 10.6096, + "step": 6320 + }, + { + "epoch": 0.031611276186671325, + "grad_norm": 0.08804293721914291, + "learning_rate": 2.9124383589076072e-05, + "loss": 10.6068, + "step": 6330 + }, + { + "epoch": 0.031661215011610774, + "grad_norm": 0.09116089344024658, + "learning_rate": 2.9122881674134522e-05, + "loss": 10.6073, + "step": 6340 + }, + { + "epoch": 0.031711153836550224, + "grad_norm": 0.10001252591609955, + "learning_rate": 2.9121379759192972e-05, + "loss": 10.6077, + "step": 6350 + }, + { + "epoch": 0.03176109266148967, + "grad_norm": 0.0931568518280983, + "learning_rate": 2.9119877844251422e-05, + "loss": 10.6072, + "step": 6360 + }, + { + "epoch": 0.03181103148642912, + "grad_norm": 0.09584708511829376, + "learning_rate": 2.911837592930987e-05, + "loss": 10.6061, + "step": 6370 + }, + { + "epoch": 0.03186097031136857, + "grad_norm": 0.09385517984628677, + "learning_rate": 2.911687401436832e-05, + "loss": 10.6075, + "step": 6380 + }, + { + "epoch": 0.03191090913630802, + "grad_norm": 0.09303200244903564, + "learning_rate": 2.911537209942677e-05, + "loss": 10.607, + "step": 6390 + }, + { + "epoch": 0.03196084796124747, + "grad_norm": 0.09322622418403625, + "learning_rate": 2.911387018448522e-05, + "loss": 10.6056, + "step": 6400 + }, + { + "epoch": 0.03201078678618692, + "grad_norm": 0.09804125130176544, + "learning_rate": 2.911236826954367e-05, + "loss": 10.6035, + "step": 6410 + }, + { + "epoch": 0.03206072561112637, + "grad_norm": 0.09895630925893784, + "learning_rate": 2.9110866354602117e-05, + "loss": 10.6053, + "step": 6420 + }, + { + "epoch": 0.03211066443606582, + "grad_norm": 0.09591073542833328, + "learning_rate": 2.9109364439660567e-05, + "loss": 10.6044, + "step": 6430 + }, + { + "epoch": 0.03216060326100527, + "grad_norm": 0.08683682233095169, + "learning_rate": 2.9107862524719017e-05, + "loss": 10.6046, + "step": 6440 + }, + { + "epoch": 0.03221054208594472, + "grad_norm": 0.09420903772115707, + "learning_rate": 2.9106360609777467e-05, + "loss": 10.6029, + "step": 6450 + }, + { + "epoch": 0.03226048091088417, + "grad_norm": 0.09442789852619171, + "learning_rate": 2.9104858694835917e-05, + "loss": 10.6037, + "step": 6460 + }, + { + "epoch": 0.03231041973582362, + "grad_norm": 0.09980372339487076, + "learning_rate": 2.9103356779894364e-05, + "loss": 10.6028, + "step": 6470 + }, + { + "epoch": 0.03236035856076307, + "grad_norm": 0.0942731723189354, + "learning_rate": 2.9101854864952814e-05, + "loss": 10.603, + "step": 6480 + }, + { + "epoch": 0.032410297385702516, + "grad_norm": 0.09348214417695999, + "learning_rate": 2.9100352950011265e-05, + "loss": 10.6036, + "step": 6490 + }, + { + "epoch": 0.032460236210641966, + "grad_norm": 0.09484933316707611, + "learning_rate": 2.9098851035069715e-05, + "loss": 10.6022, + "step": 6500 + }, + { + "epoch": 0.032510175035581415, + "grad_norm": 0.09451691806316376, + "learning_rate": 2.9097349120128165e-05, + "loss": 10.6036, + "step": 6510 + }, + { + "epoch": 0.032560113860520865, + "grad_norm": 0.0937938317656517, + "learning_rate": 2.9095847205186612e-05, + "loss": 10.6008, + "step": 6520 + }, + { + "epoch": 0.032610052685460314, + "grad_norm": 0.09523924440145493, + "learning_rate": 2.9094345290245062e-05, + "loss": 10.601, + "step": 6530 + }, + { + "epoch": 0.03265999151039976, + "grad_norm": 0.09146000444889069, + "learning_rate": 2.9092843375303512e-05, + "loss": 10.601, + "step": 6540 + }, + { + "epoch": 0.03270993033533921, + "grad_norm": 0.09606587141752243, + "learning_rate": 2.9091341460361962e-05, + "loss": 10.5993, + "step": 6550 + }, + { + "epoch": 0.032759869160278655, + "grad_norm": 0.09281494468450546, + "learning_rate": 2.9089839545420412e-05, + "loss": 10.6012, + "step": 6560 + }, + { + "epoch": 0.032809807985218105, + "grad_norm": 0.09290701895952225, + "learning_rate": 2.9088337630478863e-05, + "loss": 10.6008, + "step": 6570 + }, + { + "epoch": 0.032859746810157554, + "grad_norm": 0.10137023776769638, + "learning_rate": 2.908683571553731e-05, + "loss": 10.6012, + "step": 6580 + }, + { + "epoch": 0.032909685635097004, + "grad_norm": 0.09537509083747864, + "learning_rate": 2.908533380059576e-05, + "loss": 10.599, + "step": 6590 + }, + { + "epoch": 0.03295962446003645, + "grad_norm": 0.0944240391254425, + "learning_rate": 2.908383188565421e-05, + "loss": 10.6006, + "step": 6600 + }, + { + "epoch": 0.0330095632849759, + "grad_norm": 0.09300459921360016, + "learning_rate": 2.908232997071266e-05, + "loss": 10.6005, + "step": 6610 + }, + { + "epoch": 0.03305950210991535, + "grad_norm": 0.09325224906206131, + "learning_rate": 2.908082805577111e-05, + "loss": 10.5999, + "step": 6620 + }, + { + "epoch": 0.0331094409348548, + "grad_norm": 0.09946674108505249, + "learning_rate": 2.9079326140829557e-05, + "loss": 10.597, + "step": 6630 + }, + { + "epoch": 0.03315937975979425, + "grad_norm": 0.09831472486257553, + "learning_rate": 2.9077824225888007e-05, + "loss": 10.5973, + "step": 6640 + }, + { + "epoch": 0.0332093185847337, + "grad_norm": 0.09227102994918823, + "learning_rate": 2.9076322310946457e-05, + "loss": 10.5992, + "step": 6650 + }, + { + "epoch": 0.03325925740967315, + "grad_norm": 0.094718337059021, + "learning_rate": 2.9074820396004907e-05, + "loss": 10.5978, + "step": 6660 + }, + { + "epoch": 0.0333091962346126, + "grad_norm": 0.09251998364925385, + "learning_rate": 2.9073318481063358e-05, + "loss": 10.5985, + "step": 6670 + }, + { + "epoch": 0.03335913505955205, + "grad_norm": 0.0993405282497406, + "learning_rate": 2.9071816566121804e-05, + "loss": 10.599, + "step": 6680 + }, + { + "epoch": 0.0334090738844915, + "grad_norm": 0.09692016988992691, + "learning_rate": 2.9070314651180255e-05, + "loss": 10.5975, + "step": 6690 + }, + { + "epoch": 0.03345901270943095, + "grad_norm": 0.0930512323975563, + "learning_rate": 2.9068812736238705e-05, + "loss": 10.5965, + "step": 6700 + }, + { + "epoch": 0.0335089515343704, + "grad_norm": 0.09981938451528549, + "learning_rate": 2.9067310821297155e-05, + "loss": 10.5969, + "step": 6710 + }, + { + "epoch": 0.03355889035930985, + "grad_norm": 0.09343113005161285, + "learning_rate": 2.9065808906355605e-05, + "loss": 10.5961, + "step": 6720 + }, + { + "epoch": 0.033608829184249296, + "grad_norm": 0.09517756849527359, + "learning_rate": 2.9064306991414052e-05, + "loss": 10.5965, + "step": 6730 + }, + { + "epoch": 0.033658768009188746, + "grad_norm": 0.090797059237957, + "learning_rate": 2.9062805076472502e-05, + "loss": 10.5962, + "step": 6740 + }, + { + "epoch": 0.033708706834128195, + "grad_norm": 0.09465719759464264, + "learning_rate": 2.9061303161530952e-05, + "loss": 10.5961, + "step": 6750 + }, + { + "epoch": 0.033758645659067645, + "grad_norm": 0.09611517190933228, + "learning_rate": 2.9059801246589402e-05, + "loss": 10.5927, + "step": 6760 + }, + { + "epoch": 0.033808584484007094, + "grad_norm": 0.10133732110261917, + "learning_rate": 2.9058299331647853e-05, + "loss": 10.5945, + "step": 6770 + }, + { + "epoch": 0.03385852330894654, + "grad_norm": 0.09177147597074509, + "learning_rate": 2.90567974167063e-05, + "loss": 10.5955, + "step": 6780 + }, + { + "epoch": 0.03390846213388599, + "grad_norm": 0.09687118232250214, + "learning_rate": 2.905529550176475e-05, + "loss": 10.5931, + "step": 6790 + }, + { + "epoch": 0.033958400958825435, + "grad_norm": 0.09665414690971375, + "learning_rate": 2.90537935868232e-05, + "loss": 10.5954, + "step": 6800 + }, + { + "epoch": 0.034008339783764885, + "grad_norm": 0.09662660956382751, + "learning_rate": 2.905229167188165e-05, + "loss": 10.5924, + "step": 6810 + }, + { + "epoch": 0.034058278608704334, + "grad_norm": 0.09507598727941513, + "learning_rate": 2.90507897569401e-05, + "loss": 10.5926, + "step": 6820 + }, + { + "epoch": 0.034108217433643784, + "grad_norm": 0.09256746619939804, + "learning_rate": 2.9049287841998547e-05, + "loss": 10.5938, + "step": 6830 + }, + { + "epoch": 0.03415815625858323, + "grad_norm": 0.09468525648117065, + "learning_rate": 2.9047785927056997e-05, + "loss": 10.5927, + "step": 6840 + }, + { + "epoch": 0.03420809508352268, + "grad_norm": 0.09714210778474808, + "learning_rate": 2.9046284012115447e-05, + "loss": 10.5932, + "step": 6850 + }, + { + "epoch": 0.03425803390846213, + "grad_norm": 0.09986428171396255, + "learning_rate": 2.9044782097173897e-05, + "loss": 10.5918, + "step": 6860 + }, + { + "epoch": 0.03430797273340158, + "grad_norm": 0.09874702244997025, + "learning_rate": 2.9043280182232348e-05, + "loss": 10.5937, + "step": 6870 + }, + { + "epoch": 0.03435791155834103, + "grad_norm": 0.09810672700405121, + "learning_rate": 2.9041778267290794e-05, + "loss": 10.591, + "step": 6880 + }, + { + "epoch": 0.03440785038328048, + "grad_norm": 0.09897971153259277, + "learning_rate": 2.9040276352349248e-05, + "loss": 10.5932, + "step": 6890 + }, + { + "epoch": 0.03445778920821993, + "grad_norm": 0.09357500821352005, + "learning_rate": 2.9038774437407695e-05, + "loss": 10.5916, + "step": 6900 + }, + { + "epoch": 0.03450772803315938, + "grad_norm": 0.09426924586296082, + "learning_rate": 2.9037272522466145e-05, + "loss": 10.5915, + "step": 6910 + }, + { + "epoch": 0.03455766685809883, + "grad_norm": 0.09487421810626984, + "learning_rate": 2.9035770607524595e-05, + "loss": 10.5889, + "step": 6920 + }, + { + "epoch": 0.03460760568303828, + "grad_norm": 0.08992663025856018, + "learning_rate": 2.9034268692583042e-05, + "loss": 10.5891, + "step": 6930 + }, + { + "epoch": 0.03465754450797773, + "grad_norm": 0.0984937772154808, + "learning_rate": 2.9032766777641495e-05, + "loss": 10.5908, + "step": 6940 + }, + { + "epoch": 0.03470748333291718, + "grad_norm": 0.08962874859571457, + "learning_rate": 2.9031264862699942e-05, + "loss": 10.59, + "step": 6950 + }, + { + "epoch": 0.03475742215785663, + "grad_norm": 0.09817676246166229, + "learning_rate": 2.9029762947758392e-05, + "loss": 10.5901, + "step": 6960 + }, + { + "epoch": 0.034807360982796076, + "grad_norm": 0.09869422018527985, + "learning_rate": 2.9028261032816843e-05, + "loss": 10.589, + "step": 6970 + }, + { + "epoch": 0.034857299807735526, + "grad_norm": 0.09235729277133942, + "learning_rate": 2.9026759117875293e-05, + "loss": 10.588, + "step": 6980 + }, + { + "epoch": 0.034907238632674975, + "grad_norm": 0.0891089141368866, + "learning_rate": 2.9025257202933743e-05, + "loss": 16.5832, + "step": 6990 + }, + { + "epoch": 0.034957177457614425, + "grad_norm": 0.0962633341550827, + "learning_rate": 2.902375528799219e-05, + "loss": 22.1721, + "step": 7000 + }, + { + "epoch": 0.035007116282553874, + "grad_norm": 0.09975840896368027, + "learning_rate": 2.902225337305064e-05, + "loss": 10.604, + "step": 7010 + }, + { + "epoch": 0.03505705510749332, + "grad_norm": 0.09406078606843948, + "learning_rate": 2.902075145810909e-05, + "loss": 12.82, + "step": 7020 + }, + { + "epoch": 0.03510699393243277, + "grad_norm": 0.09256692975759506, + "learning_rate": 2.901924954316754e-05, + "loss": 10.5856, + "step": 7030 + }, + { + "epoch": 0.03515693275737222, + "grad_norm": 0.09028827399015427, + "learning_rate": 2.901774762822599e-05, + "loss": 10.5869, + "step": 7040 + }, + { + "epoch": 0.035206871582311665, + "grad_norm": 0.0957457572221756, + "learning_rate": 2.9016245713284437e-05, + "loss": 10.5873, + "step": 7050 + }, + { + "epoch": 0.035256810407251114, + "grad_norm": 0.09316723793745041, + "learning_rate": 2.9014743798342887e-05, + "loss": 10.5875, + "step": 7060 + }, + { + "epoch": 0.035306749232190564, + "grad_norm": 0.09424072504043579, + "learning_rate": 2.9013241883401338e-05, + "loss": 10.5852, + "step": 7070 + }, + { + "epoch": 0.03535668805713001, + "grad_norm": 0.09654801338911057, + "learning_rate": 2.9011739968459788e-05, + "loss": 12.5143, + "step": 7080 + }, + { + "epoch": 0.03540662688206946, + "grad_norm": 0.10188226401805878, + "learning_rate": 2.9010238053518238e-05, + "loss": 10.5845, + "step": 7090 + }, + { + "epoch": 0.03545656570700891, + "grad_norm": 0.0997641459107399, + "learning_rate": 2.9008736138576685e-05, + "loss": 12.043, + "step": 7100 + }, + { + "epoch": 0.03550650453194836, + "grad_norm": 0.09502726793289185, + "learning_rate": 2.9007234223635135e-05, + "loss": 10.5843, + "step": 7110 + }, + { + "epoch": 0.03555644335688781, + "grad_norm": 0.09691105782985687, + "learning_rate": 2.9005732308693585e-05, + "loss": 10.5821, + "step": 7120 + }, + { + "epoch": 0.03560638218182726, + "grad_norm": 0.09250783175230026, + "learning_rate": 2.9004230393752035e-05, + "loss": 10.5865, + "step": 7130 + }, + { + "epoch": 0.03565632100676671, + "grad_norm": 0.09487821161746979, + "learning_rate": 2.9002728478810486e-05, + "loss": 10.5849, + "step": 7140 + }, + { + "epoch": 0.03570625983170616, + "grad_norm": 3425.232666015625, + "learning_rate": 2.9001226563868932e-05, + "loss": 10.598, + "step": 7150 + }, + { + "epoch": 0.03575619865664561, + "grad_norm": 0.10075180232524872, + "learning_rate": 2.8999724648927382e-05, + "loss": 10.5865, + "step": 7160 + }, + { + "epoch": 0.03580613748158506, + "grad_norm": 0.09755649417638779, + "learning_rate": 2.8998222733985833e-05, + "loss": 10.5843, + "step": 7170 + }, + { + "epoch": 0.03585607630652451, + "grad_norm": 0.09459280222654343, + "learning_rate": 2.8996720819044283e-05, + "loss": 10.5831, + "step": 7180 + }, + { + "epoch": 0.03590601513146396, + "grad_norm": 0.09937462210655212, + "learning_rate": 2.8995218904102733e-05, + "loss": 10.5837, + "step": 7190 + }, + { + "epoch": 0.03595595395640341, + "grad_norm": 0.09443425387144089, + "learning_rate": 2.899371698916118e-05, + "loss": 10.5812, + "step": 7200 + }, + { + "epoch": 0.036005892781342856, + "grad_norm": 0.0933324322104454, + "learning_rate": 2.8992215074219633e-05, + "loss": 10.5829, + "step": 7210 + }, + { + "epoch": 0.036055831606282306, + "grad_norm": 0.09167325496673584, + "learning_rate": 2.899071315927808e-05, + "loss": 10.5823, + "step": 7220 + }, + { + "epoch": 0.036105770431221755, + "grad_norm": 0.09948615729808807, + "learning_rate": 2.898921124433653e-05, + "loss": 10.5817, + "step": 7230 + }, + { + "epoch": 0.036155709256161205, + "grad_norm": 0.09661941975355148, + "learning_rate": 2.898770932939498e-05, + "loss": 10.58, + "step": 7240 + }, + { + "epoch": 0.036205648081100654, + "grad_norm": 0.09533500671386719, + "learning_rate": 2.8986207414453427e-05, + "loss": 10.5822, + "step": 7250 + }, + { + "epoch": 0.0362555869060401, + "grad_norm": 0.09911171346902847, + "learning_rate": 2.898470549951188e-05, + "loss": 10.5812, + "step": 7260 + }, + { + "epoch": 0.03630552573097955, + "grad_norm": 0.09520751982927322, + "learning_rate": 2.8983203584570328e-05, + "loss": 10.5781, + "step": 7270 + }, + { + "epoch": 0.036355464555919, + "grad_norm": 0.09380774945020676, + "learning_rate": 2.8981701669628778e-05, + "loss": 10.5798, + "step": 7280 + }, + { + "epoch": 0.03640540338085845, + "grad_norm": 0.09551588445901871, + "learning_rate": 2.8980199754687228e-05, + "loss": 10.5789, + "step": 7290 + }, + { + "epoch": 0.036455342205797894, + "grad_norm": 0.09562008827924728, + "learning_rate": 2.8978697839745675e-05, + "loss": 10.5788, + "step": 7300 + }, + { + "epoch": 0.036505281030737344, + "grad_norm": 0.09640656411647797, + "learning_rate": 2.897719592480413e-05, + "loss": 10.5793, + "step": 7310 + }, + { + "epoch": 0.03655521985567679, + "grad_norm": 0.09420552849769592, + "learning_rate": 2.8975694009862575e-05, + "loss": 10.5776, + "step": 7320 + }, + { + "epoch": 0.03660515868061624, + "grad_norm": 0.09690862894058228, + "learning_rate": 2.8974192094921025e-05, + "loss": 10.577, + "step": 7330 + }, + { + "epoch": 0.03665509750555569, + "grad_norm": 0.09202788025140762, + "learning_rate": 2.8972690179979476e-05, + "loss": 10.5785, + "step": 7340 + }, + { + "epoch": 0.03670503633049514, + "grad_norm": 0.08765114843845367, + "learning_rate": 2.8971188265037922e-05, + "loss": 10.5781, + "step": 7350 + }, + { + "epoch": 0.03675497515543459, + "grad_norm": 0.09262295067310333, + "learning_rate": 2.8969686350096376e-05, + "loss": 10.5772, + "step": 7360 + }, + { + "epoch": 0.03680491398037404, + "grad_norm": 0.09478264302015305, + "learning_rate": 2.8968184435154823e-05, + "loss": 10.5783, + "step": 7370 + }, + { + "epoch": 0.03685485280531349, + "grad_norm": 0.09246762841939926, + "learning_rate": 2.8966682520213273e-05, + "loss": 10.5775, + "step": 7380 + }, + { + "epoch": 0.03690479163025294, + "grad_norm": 0.0920056626200676, + "learning_rate": 2.8965180605271723e-05, + "loss": 10.5771, + "step": 7390 + }, + { + "epoch": 0.03695473045519239, + "grad_norm": 0.09541013091802597, + "learning_rate": 2.896367869033017e-05, + "loss": 10.5757, + "step": 7400 + }, + { + "epoch": 0.03700466928013184, + "grad_norm": 0.0953700914978981, + "learning_rate": 2.8962176775388623e-05, + "loss": 10.5765, + "step": 7410 + }, + { + "epoch": 0.03705460810507129, + "grad_norm": 0.09888526797294617, + "learning_rate": 2.896067486044707e-05, + "loss": 10.5757, + "step": 7420 + }, + { + "epoch": 0.03710454693001074, + "grad_norm": 0.09751054644584656, + "learning_rate": 2.895917294550552e-05, + "loss": 10.5733, + "step": 7430 + }, + { + "epoch": 0.03715448575495019, + "grad_norm": 0.095875583589077, + "learning_rate": 2.895767103056397e-05, + "loss": 10.5762, + "step": 7440 + }, + { + "epoch": 0.037204424579889636, + "grad_norm": 0.0908399224281311, + "learning_rate": 2.8956169115622417e-05, + "loss": 10.5755, + "step": 7450 + }, + { + "epoch": 0.037254363404829086, + "grad_norm": 0.09418118000030518, + "learning_rate": 2.895466720068087e-05, + "loss": 10.5738, + "step": 7460 + }, + { + "epoch": 0.037304302229768535, + "grad_norm": 0.09714969247579575, + "learning_rate": 2.8953165285739318e-05, + "loss": 10.5742, + "step": 7470 + }, + { + "epoch": 0.037354241054707985, + "grad_norm": 0.09826507419347763, + "learning_rate": 2.8951663370797768e-05, + "loss": 10.5731, + "step": 7480 + }, + { + "epoch": 0.037404179879647434, + "grad_norm": 0.0948546826839447, + "learning_rate": 2.8950161455856218e-05, + "loss": 10.5723, + "step": 7490 + }, + { + "epoch": 0.03745411870458688, + "grad_norm": 0.09687359631061554, + "learning_rate": 2.8948659540914665e-05, + "loss": 10.5733, + "step": 7500 + }, + { + "epoch": 0.03750405752952633, + "grad_norm": 0.10149165242910385, + "learning_rate": 2.894715762597312e-05, + "loss": 10.5732, + "step": 7510 + }, + { + "epoch": 0.03755399635446578, + "grad_norm": 0.09481348842382431, + "learning_rate": 2.8945655711031565e-05, + "loss": 10.5732, + "step": 7520 + }, + { + "epoch": 0.03760393517940523, + "grad_norm": 0.09526333957910538, + "learning_rate": 2.894415379609002e-05, + "loss": 10.5736, + "step": 7530 + }, + { + "epoch": 0.03765387400434468, + "grad_norm": 0.09095079451799393, + "learning_rate": 2.8942651881148466e-05, + "loss": 10.5709, + "step": 7540 + }, + { + "epoch": 0.037703812829284124, + "grad_norm": 0.09532736986875534, + "learning_rate": 2.8941149966206912e-05, + "loss": 10.5712, + "step": 7550 + }, + { + "epoch": 0.03775375165422357, + "grad_norm": 0.09308824688196182, + "learning_rate": 2.8939648051265366e-05, + "loss": 10.5711, + "step": 7560 + }, + { + "epoch": 0.03780369047916302, + "grad_norm": 0.0935669094324112, + "learning_rate": 2.8938146136323813e-05, + "loss": 10.5715, + "step": 7570 + }, + { + "epoch": 0.03785362930410247, + "grad_norm": 0.09036646783351898, + "learning_rate": 2.8936644221382266e-05, + "loss": 10.5704, + "step": 7580 + }, + { + "epoch": 0.03790356812904192, + "grad_norm": 0.09510631859302521, + "learning_rate": 2.8935142306440713e-05, + "loss": 10.5709, + "step": 7590 + }, + { + "epoch": 0.03795350695398137, + "grad_norm": 0.09474785625934601, + "learning_rate": 2.893364039149916e-05, + "loss": 10.571, + "step": 7600 + }, + { + "epoch": 0.03800344577892082, + "grad_norm": 0.09734784811735153, + "learning_rate": 2.8932138476557613e-05, + "loss": 10.5693, + "step": 7610 + }, + { + "epoch": 0.03805338460386027, + "grad_norm": 0.10142108052968979, + "learning_rate": 2.893063656161606e-05, + "loss": 10.5701, + "step": 7620 + }, + { + "epoch": 0.03810332342879972, + "grad_norm": 0.09453453868627548, + "learning_rate": 2.8929134646674514e-05, + "loss": 10.5697, + "step": 7630 + }, + { + "epoch": 0.03815326225373917, + "grad_norm": 0.09844522923231125, + "learning_rate": 2.892763273173296e-05, + "loss": 10.5688, + "step": 7640 + }, + { + "epoch": 0.03820320107867862, + "grad_norm": 0.10033461451530457, + "learning_rate": 2.8926130816791407e-05, + "loss": 10.5689, + "step": 7650 + }, + { + "epoch": 0.03825313990361807, + "grad_norm": 0.10299541801214218, + "learning_rate": 2.892462890184986e-05, + "loss": 10.5693, + "step": 7660 + }, + { + "epoch": 0.03830307872855752, + "grad_norm": 0.09430062025785446, + "learning_rate": 2.8923126986908308e-05, + "loss": 10.5672, + "step": 7670 + }, + { + "epoch": 0.03835301755349697, + "grad_norm": 0.10111190378665924, + "learning_rate": 2.892162507196676e-05, + "loss": 10.5674, + "step": 7680 + }, + { + "epoch": 0.038402956378436416, + "grad_norm": 0.09100598096847534, + "learning_rate": 2.8920123157025208e-05, + "loss": 10.5685, + "step": 7690 + }, + { + "epoch": 0.038452895203375866, + "grad_norm": 0.09059002995491028, + "learning_rate": 2.8918621242083655e-05, + "loss": 10.5676, + "step": 7700 + }, + { + "epoch": 0.038502834028315315, + "grad_norm": 0.09399350732564926, + "learning_rate": 2.891711932714211e-05, + "loss": 10.568, + "step": 7710 + }, + { + "epoch": 0.038552772853254764, + "grad_norm": 0.09312798827886581, + "learning_rate": 2.8915617412200555e-05, + "loss": 10.5672, + "step": 7720 + }, + { + "epoch": 0.038602711678194214, + "grad_norm": 0.0949626937508583, + "learning_rate": 2.891411549725901e-05, + "loss": 10.5663, + "step": 7730 + }, + { + "epoch": 0.03865265050313366, + "grad_norm": 0.08603295683860779, + "learning_rate": 2.8912613582317456e-05, + "loss": 10.5665, + "step": 7740 + }, + { + "epoch": 0.03870258932807311, + "grad_norm": 0.09113872051239014, + "learning_rate": 2.8911111667375902e-05, + "loss": 10.567, + "step": 7750 + }, + { + "epoch": 0.03875252815301256, + "grad_norm": 0.09675870090723038, + "learning_rate": 2.8909609752434356e-05, + "loss": 10.5649, + "step": 7760 + }, + { + "epoch": 0.03880246697795201, + "grad_norm": 0.09332741051912308, + "learning_rate": 2.8908107837492803e-05, + "loss": 10.5656, + "step": 7770 + }, + { + "epoch": 0.03885240580289146, + "grad_norm": 0.09331821650266647, + "learning_rate": 2.8906605922551256e-05, + "loss": 10.5659, + "step": 7780 + }, + { + "epoch": 0.038902344627830904, + "grad_norm": 0.09395716339349747, + "learning_rate": 2.8905104007609703e-05, + "loss": 10.5656, + "step": 7790 + }, + { + "epoch": 0.03895228345277035, + "grad_norm": 0.09877283871173859, + "learning_rate": 2.890360209266815e-05, + "loss": 10.5653, + "step": 7800 + }, + { + "epoch": 0.0390022222777098, + "grad_norm": 0.09474792331457138, + "learning_rate": 2.8902100177726603e-05, + "loss": 10.5637, + "step": 7810 + }, + { + "epoch": 0.03905216110264925, + "grad_norm": 0.09844552725553513, + "learning_rate": 2.890059826278505e-05, + "loss": 10.5633, + "step": 7820 + }, + { + "epoch": 0.0391020999275887, + "grad_norm": 0.09523854404687881, + "learning_rate": 2.8899096347843504e-05, + "loss": 10.5637, + "step": 7830 + }, + { + "epoch": 0.03915203875252815, + "grad_norm": 0.09699324518442154, + "learning_rate": 2.889759443290195e-05, + "loss": 10.5631, + "step": 7840 + }, + { + "epoch": 0.0392019775774676, + "grad_norm": 0.09227604418992996, + "learning_rate": 2.88960925179604e-05, + "loss": 10.5623, + "step": 7850 + }, + { + "epoch": 0.03925191640240705, + "grad_norm": 0.0924190804362297, + "learning_rate": 2.889459060301885e-05, + "loss": 10.5623, + "step": 7860 + }, + { + "epoch": 0.0393018552273465, + "grad_norm": 0.10132279247045517, + "learning_rate": 2.8893088688077298e-05, + "loss": 10.5635, + "step": 7870 + }, + { + "epoch": 0.03935179405228595, + "grad_norm": 0.09170647710561752, + "learning_rate": 2.889158677313575e-05, + "loss": 10.5616, + "step": 7880 + }, + { + "epoch": 0.0394017328772254, + "grad_norm": 0.09620120376348495, + "learning_rate": 2.8890084858194198e-05, + "loss": 10.5616, + "step": 7890 + }, + { + "epoch": 0.03945167170216485, + "grad_norm": 0.09894801676273346, + "learning_rate": 2.8888582943252648e-05, + "loss": 10.5605, + "step": 7900 + }, + { + "epoch": 0.0395016105271043, + "grad_norm": 0.09626492857933044, + "learning_rate": 2.88870810283111e-05, + "loss": 10.5602, + "step": 7910 + }, + { + "epoch": 0.03955154935204375, + "grad_norm": 0.09040050953626633, + "learning_rate": 2.8885579113369545e-05, + "loss": 10.5595, + "step": 7920 + }, + { + "epoch": 0.039601488176983196, + "grad_norm": 0.09787009656429291, + "learning_rate": 2.8884077198428e-05, + "loss": 10.5598, + "step": 7930 + }, + { + "epoch": 0.039651427001922646, + "grad_norm": 0.09217660874128342, + "learning_rate": 2.8882575283486446e-05, + "loss": 10.5596, + "step": 7940 + }, + { + "epoch": 0.039701365826862095, + "grad_norm": 0.09433452039957047, + "learning_rate": 2.8881073368544896e-05, + "loss": 10.56, + "step": 7950 + }, + { + "epoch": 0.039751304651801544, + "grad_norm": 0.09549769759178162, + "learning_rate": 2.8879571453603346e-05, + "loss": 10.5576, + "step": 7960 + }, + { + "epoch": 0.039801243476740994, + "grad_norm": 0.09538113325834274, + "learning_rate": 2.8878069538661793e-05, + "loss": 10.56, + "step": 7970 + }, + { + "epoch": 0.03985118230168044, + "grad_norm": 0.09417406469583511, + "learning_rate": 2.8876567623720246e-05, + "loss": 10.5598, + "step": 7980 + }, + { + "epoch": 0.03990112112661989, + "grad_norm": 0.09565738588571548, + "learning_rate": 2.8875065708778693e-05, + "loss": 10.5599, + "step": 7990 + }, + { + "epoch": 0.03995105995155934, + "grad_norm": 0.09348537772893906, + "learning_rate": 2.8873563793837143e-05, + "loss": 10.5605, + "step": 8000 + }, + { + "epoch": 0.04000099877649879, + "grad_norm": 0.0924496129155159, + "learning_rate": 2.8872061878895593e-05, + "loss": 10.5592, + "step": 8010 + }, + { + "epoch": 0.04005093760143824, + "grad_norm": 0.09314726293087006, + "learning_rate": 2.887055996395404e-05, + "loss": 10.5568, + "step": 8020 + }, + { + "epoch": 0.04010087642637769, + "grad_norm": 0.09801434725522995, + "learning_rate": 2.8869058049012494e-05, + "loss": 10.558, + "step": 8030 + }, + { + "epoch": 0.04015081525131713, + "grad_norm": 0.09441787749528885, + "learning_rate": 2.886755613407094e-05, + "loss": 10.5562, + "step": 8040 + }, + { + "epoch": 0.04020075407625658, + "grad_norm": 0.09265992045402527, + "learning_rate": 2.886605421912939e-05, + "loss": 10.5588, + "step": 8050 + }, + { + "epoch": 0.04025069290119603, + "grad_norm": 0.09449957311153412, + "learning_rate": 2.886455230418784e-05, + "loss": 10.5578, + "step": 8060 + }, + { + "epoch": 0.04030063172613548, + "grad_norm": 0.09760826081037521, + "learning_rate": 2.8863050389246288e-05, + "loss": 10.5543, + "step": 8070 + }, + { + "epoch": 0.04035057055107493, + "grad_norm": 0.09547002613544464, + "learning_rate": 2.886154847430474e-05, + "loss": 10.5575, + "step": 8080 + }, + { + "epoch": 0.04040050937601438, + "grad_norm": 0.09818068146705627, + "learning_rate": 2.8860046559363188e-05, + "loss": 10.5569, + "step": 8090 + }, + { + "epoch": 0.04045044820095383, + "grad_norm": 0.09117206931114197, + "learning_rate": 2.8858544644421638e-05, + "loss": 10.5529, + "step": 8100 + }, + { + "epoch": 0.04050038702589328, + "grad_norm": 0.10111954063177109, + "learning_rate": 2.885704272948009e-05, + "loss": 10.5541, + "step": 8110 + }, + { + "epoch": 0.04055032585083273, + "grad_norm": 0.09272032976150513, + "learning_rate": 2.8855540814538535e-05, + "loss": 10.5528, + "step": 8120 + }, + { + "epoch": 0.04060026467577218, + "grad_norm": 0.09511199593544006, + "learning_rate": 2.885403889959699e-05, + "loss": 10.5561, + "step": 8130 + }, + { + "epoch": 0.04065020350071163, + "grad_norm": 0.09244706481695175, + "learning_rate": 2.8852536984655436e-05, + "loss": 10.5553, + "step": 8140 + }, + { + "epoch": 0.04070014232565108, + "grad_norm": 0.09505173563957214, + "learning_rate": 2.8851035069713886e-05, + "loss": 10.5548, + "step": 8150 + }, + { + "epoch": 0.04075008115059053, + "grad_norm": 0.09145089983940125, + "learning_rate": 2.8849533154772336e-05, + "loss": 10.5544, + "step": 8160 + }, + { + "epoch": 0.040800019975529976, + "grad_norm": 0.09063593298196793, + "learning_rate": 2.8848031239830786e-05, + "loss": 10.5549, + "step": 8170 + }, + { + "epoch": 0.040849958800469426, + "grad_norm": 0.09323187917470932, + "learning_rate": 2.8846529324889236e-05, + "loss": 10.5522, + "step": 8180 + }, + { + "epoch": 0.040899897625408875, + "grad_norm": 0.09502808004617691, + "learning_rate": 2.8845027409947683e-05, + "loss": 10.5537, + "step": 8190 + }, + { + "epoch": 0.040949836450348324, + "grad_norm": 0.10582366585731506, + "learning_rate": 2.8843525495006133e-05, + "loss": 10.5538, + "step": 8200 + }, + { + "epoch": 0.040999775275287774, + "grad_norm": 0.09141421318054199, + "learning_rate": 2.8842023580064583e-05, + "loss": 10.5523, + "step": 8210 + }, + { + "epoch": 0.04104971410022722, + "grad_norm": 0.10094629228115082, + "learning_rate": 2.8840521665123034e-05, + "loss": 10.5521, + "step": 8220 + }, + { + "epoch": 0.04109965292516667, + "grad_norm": 0.08866948634386063, + "learning_rate": 2.8839019750181484e-05, + "loss": 10.5503, + "step": 8230 + }, + { + "epoch": 0.04114959175010612, + "grad_norm": 0.09741657972335815, + "learning_rate": 2.883751783523993e-05, + "loss": 10.5508, + "step": 8240 + }, + { + "epoch": 0.04119953057504557, + "grad_norm": 0.09136499464511871, + "learning_rate": 2.883601592029838e-05, + "loss": 10.5507, + "step": 8250 + }, + { + "epoch": 0.04124946939998502, + "grad_norm": 0.09198024123907089, + "learning_rate": 2.883451400535683e-05, + "loss": 10.5514, + "step": 8260 + }, + { + "epoch": 0.04129940822492447, + "grad_norm": 0.09833436459302902, + "learning_rate": 2.883301209041528e-05, + "loss": 10.5492, + "step": 8270 + }, + { + "epoch": 0.04134934704986392, + "grad_norm": 0.09501312673091888, + "learning_rate": 2.883151017547373e-05, + "loss": 10.5505, + "step": 8280 + }, + { + "epoch": 0.04139928587480336, + "grad_norm": 0.08871445059776306, + "learning_rate": 2.8830008260532178e-05, + "loss": 10.5488, + "step": 8290 + }, + { + "epoch": 0.04144922469974281, + "grad_norm": 0.09673582017421722, + "learning_rate": 2.8828506345590628e-05, + "loss": 10.55, + "step": 8300 + }, + { + "epoch": 0.04149916352468226, + "grad_norm": 0.09481798112392426, + "learning_rate": 2.882700443064908e-05, + "loss": 10.5475, + "step": 8310 + }, + { + "epoch": 0.04154910234962171, + "grad_norm": 0.09376543760299683, + "learning_rate": 2.882550251570753e-05, + "loss": 10.5485, + "step": 8320 + }, + { + "epoch": 0.04159904117456116, + "grad_norm": 0.09397636353969574, + "learning_rate": 2.882400060076598e-05, + "loss": 10.5472, + "step": 8330 + }, + { + "epoch": 0.04164897999950061, + "grad_norm": 0.09375918656587601, + "learning_rate": 2.8822498685824426e-05, + "loss": 10.5469, + "step": 8340 + }, + { + "epoch": 0.04169891882444006, + "grad_norm": 0.09085606038570404, + "learning_rate": 2.8820996770882876e-05, + "loss": 10.5496, + "step": 8350 + }, + { + "epoch": 0.04174885764937951, + "grad_norm": 0.0982041284441948, + "learning_rate": 2.8819494855941326e-05, + "loss": 10.5468, + "step": 8360 + }, + { + "epoch": 0.04179879647431896, + "grad_norm": 0.09510507434606552, + "learning_rate": 2.8817992940999776e-05, + "loss": 10.5458, + "step": 8370 + }, + { + "epoch": 0.04184873529925841, + "grad_norm": 0.09728366136550903, + "learning_rate": 2.8816491026058226e-05, + "loss": 10.5469, + "step": 8380 + }, + { + "epoch": 0.04189867412419786, + "grad_norm": 0.10339502990245819, + "learning_rate": 2.8814989111116673e-05, + "loss": 10.5475, + "step": 8390 + }, + { + "epoch": 0.04194861294913731, + "grad_norm": 0.09696449339389801, + "learning_rate": 2.8813487196175123e-05, + "loss": 10.5457, + "step": 8400 + }, + { + "epoch": 0.041998551774076756, + "grad_norm": 0.09432040899991989, + "learning_rate": 2.8811985281233573e-05, + "loss": 10.5464, + "step": 8410 + }, + { + "epoch": 0.042048490599016206, + "grad_norm": 0.09533160924911499, + "learning_rate": 2.8810483366292024e-05, + "loss": 10.5458, + "step": 8420 + }, + { + "epoch": 0.042098429423955655, + "grad_norm": 0.09491016715765, + "learning_rate": 2.8808981451350474e-05, + "loss": 10.546, + "step": 8430 + }, + { + "epoch": 0.042148368248895104, + "grad_norm": 0.0958162397146225, + "learning_rate": 2.880747953640892e-05, + "loss": 10.5436, + "step": 8440 + }, + { + "epoch": 0.042198307073834554, + "grad_norm": 0.10142121464014053, + "learning_rate": 2.880597762146737e-05, + "loss": 10.5453, + "step": 8450 + }, + { + "epoch": 0.042248245898774, + "grad_norm": 0.09626547992229462, + "learning_rate": 2.880447570652582e-05, + "loss": 10.5443, + "step": 8460 + }, + { + "epoch": 0.04229818472371345, + "grad_norm": 0.09666489064693451, + "learning_rate": 2.880297379158427e-05, + "loss": 10.5445, + "step": 8470 + }, + { + "epoch": 0.0423481235486529, + "grad_norm": 0.0963977724313736, + "learning_rate": 2.880147187664272e-05, + "loss": 10.545, + "step": 8480 + }, + { + "epoch": 0.04239806237359235, + "grad_norm": 0.09442082047462463, + "learning_rate": 2.879996996170117e-05, + "loss": 10.5455, + "step": 8490 + }, + { + "epoch": 0.0424480011985318, + "grad_norm": 0.09085852652788162, + "learning_rate": 2.8798468046759618e-05, + "loss": 10.5439, + "step": 8500 + }, + { + "epoch": 0.04249794002347125, + "grad_norm": 0.0952218547463417, + "learning_rate": 2.879696613181807e-05, + "loss": 10.5411, + "step": 8510 + }, + { + "epoch": 0.0425478788484107, + "grad_norm": 0.09868868440389633, + "learning_rate": 2.879546421687652e-05, + "loss": 10.5424, + "step": 8520 + }, + { + "epoch": 0.04259781767335014, + "grad_norm": 0.09193531423807144, + "learning_rate": 2.879396230193497e-05, + "loss": 10.5434, + "step": 8530 + }, + { + "epoch": 0.04264775649828959, + "grad_norm": 0.09298193454742432, + "learning_rate": 2.879246038699342e-05, + "loss": 10.5418, + "step": 8540 + }, + { + "epoch": 0.04269769532322904, + "grad_norm": 0.09158916771411896, + "learning_rate": 2.8790958472051866e-05, + "loss": 10.5422, + "step": 8550 + }, + { + "epoch": 0.04274763414816849, + "grad_norm": 0.09237027168273926, + "learning_rate": 2.8789456557110316e-05, + "loss": 10.5407, + "step": 8560 + }, + { + "epoch": 0.04279757297310794, + "grad_norm": 0.1070277988910675, + "learning_rate": 2.8787954642168766e-05, + "loss": 10.5417, + "step": 8570 + }, + { + "epoch": 0.04284751179804739, + "grad_norm": 0.0924166664481163, + "learning_rate": 2.8786452727227216e-05, + "loss": 10.5391, + "step": 8580 + }, + { + "epoch": 0.04289745062298684, + "grad_norm": 0.09547102451324463, + "learning_rate": 2.8784950812285667e-05, + "loss": 10.5386, + "step": 8590 + }, + { + "epoch": 0.04294738944792629, + "grad_norm": 0.09805493801832199, + "learning_rate": 2.8783448897344113e-05, + "loss": 10.5404, + "step": 8600 + }, + { + "epoch": 0.04299732827286574, + "grad_norm": 0.09734487533569336, + "learning_rate": 2.8781946982402563e-05, + "loss": 10.5402, + "step": 8610 + }, + { + "epoch": 0.04304726709780519, + "grad_norm": 0.09356056153774261, + "learning_rate": 2.8780445067461014e-05, + "loss": 10.5394, + "step": 8620 + }, + { + "epoch": 0.04309720592274464, + "grad_norm": 0.09781930595636368, + "learning_rate": 2.8778943152519464e-05, + "loss": 10.5389, + "step": 8630 + }, + { + "epoch": 0.04314714474768409, + "grad_norm": 0.0970260426402092, + "learning_rate": 2.8777441237577914e-05, + "loss": 10.5402, + "step": 8640 + }, + { + "epoch": 0.043197083572623536, + "grad_norm": 0.09671913832426071, + "learning_rate": 2.877593932263636e-05, + "loss": 10.5387, + "step": 8650 + }, + { + "epoch": 0.043247022397562986, + "grad_norm": 0.0935794860124588, + "learning_rate": 2.877443740769481e-05, + "loss": 10.5378, + "step": 8660 + }, + { + "epoch": 0.043296961222502435, + "grad_norm": 0.0933162197470665, + "learning_rate": 2.877293549275326e-05, + "loss": 10.5399, + "step": 8670 + }, + { + "epoch": 0.043346900047441884, + "grad_norm": 0.09513531625270844, + "learning_rate": 2.877143357781171e-05, + "loss": 10.5379, + "step": 8680 + }, + { + "epoch": 0.043396838872381334, + "grad_norm": 0.10161089897155762, + "learning_rate": 2.876993166287016e-05, + "loss": 10.5386, + "step": 8690 + }, + { + "epoch": 0.04344677769732078, + "grad_norm": 0.09620557725429535, + "learning_rate": 2.8768429747928608e-05, + "loss": 10.5381, + "step": 8700 + }, + { + "epoch": 0.04349671652226023, + "grad_norm": 0.09468070417642593, + "learning_rate": 2.876692783298706e-05, + "loss": 10.5371, + "step": 8710 + }, + { + "epoch": 0.04354665534719968, + "grad_norm": 0.09541548788547516, + "learning_rate": 2.876542591804551e-05, + "loss": 10.5374, + "step": 8720 + }, + { + "epoch": 0.04359659417213913, + "grad_norm": 0.09611213952302933, + "learning_rate": 2.876392400310396e-05, + "loss": 10.5382, + "step": 8730 + }, + { + "epoch": 0.04364653299707858, + "grad_norm": 0.08991603553295135, + "learning_rate": 2.876242208816241e-05, + "loss": 10.536, + "step": 8740 + }, + { + "epoch": 0.04369647182201803, + "grad_norm": 0.09493478387594223, + "learning_rate": 2.8760920173220856e-05, + "loss": 10.5358, + "step": 8750 + }, + { + "epoch": 0.04374641064695748, + "grad_norm": 0.09500102698802948, + "learning_rate": 2.8759418258279306e-05, + "loss": 10.5345, + "step": 8760 + }, + { + "epoch": 0.04379634947189693, + "grad_norm": 0.09735292196273804, + "learning_rate": 2.8757916343337756e-05, + "loss": 10.5329, + "step": 8770 + }, + { + "epoch": 0.04384628829683637, + "grad_norm": 0.09669680148363113, + "learning_rate": 2.8756414428396206e-05, + "loss": 10.5363, + "step": 8780 + }, + { + "epoch": 0.04389622712177582, + "grad_norm": 0.09384785592556, + "learning_rate": 2.8754912513454657e-05, + "loss": 10.5342, + "step": 8790 + }, + { + "epoch": 0.04394616594671527, + "grad_norm": 0.09973820298910141, + "learning_rate": 2.8753410598513103e-05, + "loss": 10.5346, + "step": 8800 + }, + { + "epoch": 0.04399610477165472, + "grad_norm": 0.09043479710817337, + "learning_rate": 2.8751908683571557e-05, + "loss": 10.5345, + "step": 8810 + }, + { + "epoch": 0.04404604359659417, + "grad_norm": 0.09604961425065994, + "learning_rate": 2.8750406768630004e-05, + "loss": 10.5338, + "step": 8820 + }, + { + "epoch": 0.04409598242153362, + "grad_norm": 0.09182944893836975, + "learning_rate": 2.8748904853688454e-05, + "loss": 10.5345, + "step": 8830 + }, + { + "epoch": 0.04414592124647307, + "grad_norm": 0.09448856860399246, + "learning_rate": 2.8747402938746904e-05, + "loss": 10.5337, + "step": 8840 + }, + { + "epoch": 0.04419586007141252, + "grad_norm": 0.09963658452033997, + "learning_rate": 2.874590102380535e-05, + "loss": 10.5313, + "step": 8850 + }, + { + "epoch": 0.04424579889635197, + "grad_norm": 0.09832189232110977, + "learning_rate": 2.8744399108863804e-05, + "loss": 10.5341, + "step": 8860 + }, + { + "epoch": 0.04429573772129142, + "grad_norm": 0.0896415188908577, + "learning_rate": 2.874289719392225e-05, + "loss": 10.5319, + "step": 8870 + }, + { + "epoch": 0.04434567654623087, + "grad_norm": 0.09824098646640778, + "learning_rate": 2.87413952789807e-05, + "loss": 10.5328, + "step": 8880 + }, + { + "epoch": 0.044395615371170316, + "grad_norm": 0.09288741648197174, + "learning_rate": 2.873989336403915e-05, + "loss": 10.5329, + "step": 8890 + }, + { + "epoch": 0.044445554196109766, + "grad_norm": 0.09383495151996613, + "learning_rate": 2.87383914490976e-05, + "loss": 10.5323, + "step": 8900 + }, + { + "epoch": 0.044495493021049215, + "grad_norm": 0.09476141631603241, + "learning_rate": 2.8736889534156052e-05, + "loss": 10.5316, + "step": 8910 + }, + { + "epoch": 0.044545431845988664, + "grad_norm": 0.0921645388007164, + "learning_rate": 2.87353876192145e-05, + "loss": 10.5313, + "step": 8920 + }, + { + "epoch": 0.044595370670928114, + "grad_norm": 0.09352537989616394, + "learning_rate": 2.873388570427295e-05, + "loss": 10.5309, + "step": 8930 + }, + { + "epoch": 0.04464530949586756, + "grad_norm": 0.08910872787237167, + "learning_rate": 2.87323837893314e-05, + "loss": 10.5303, + "step": 8940 + }, + { + "epoch": 0.04469524832080701, + "grad_norm": 0.09727400541305542, + "learning_rate": 2.8730881874389846e-05, + "loss": 10.5306, + "step": 8950 + }, + { + "epoch": 0.04474518714574646, + "grad_norm": 0.09605511277914047, + "learning_rate": 2.87293799594483e-05, + "loss": 10.5287, + "step": 8960 + }, + { + "epoch": 0.04479512597068591, + "grad_norm": 0.0909186601638794, + "learning_rate": 2.8727878044506746e-05, + "loss": 10.5279, + "step": 8970 + }, + { + "epoch": 0.04484506479562536, + "grad_norm": 0.0998016744852066, + "learning_rate": 2.8726376129565196e-05, + "loss": 10.5307, + "step": 8980 + }, + { + "epoch": 0.04489500362056481, + "grad_norm": 0.09300219267606735, + "learning_rate": 2.8724874214623647e-05, + "loss": 10.5304, + "step": 8990 + }, + { + "epoch": 0.04494494244550426, + "grad_norm": 0.09390337765216827, + "learning_rate": 2.8723372299682093e-05, + "loss": 10.5274, + "step": 9000 + }, + { + "epoch": 0.04499488127044371, + "grad_norm": 0.09505102783441544, + "learning_rate": 2.8721870384740547e-05, + "loss": 10.529, + "step": 9010 + }, + { + "epoch": 0.04504482009538316, + "grad_norm": 0.09317026287317276, + "learning_rate": 2.8720368469798994e-05, + "loss": 10.5281, + "step": 9020 + }, + { + "epoch": 0.0450947589203226, + "grad_norm": 0.09734094887971878, + "learning_rate": 2.8718866554857444e-05, + "loss": 10.5281, + "step": 9030 + }, + { + "epoch": 0.04514469774526205, + "grad_norm": 0.09334099292755127, + "learning_rate": 2.8717364639915894e-05, + "loss": 10.5263, + "step": 9040 + }, + { + "epoch": 0.0451946365702015, + "grad_norm": 0.0930711030960083, + "learning_rate": 2.871586272497434e-05, + "loss": 10.526, + "step": 9050 + }, + { + "epoch": 0.04524457539514095, + "grad_norm": 0.09420018643140793, + "learning_rate": 2.8714360810032794e-05, + "loss": 10.527, + "step": 9060 + }, + { + "epoch": 0.0452945142200804, + "grad_norm": 0.0920024886727333, + "learning_rate": 2.871285889509124e-05, + "loss": 10.5282, + "step": 9070 + }, + { + "epoch": 0.04534445304501985, + "grad_norm": 0.1000896766781807, + "learning_rate": 2.871135698014969e-05, + "loss": 10.5276, + "step": 9080 + }, + { + "epoch": 0.0453943918699593, + "grad_norm": 0.09131770581007004, + "learning_rate": 2.870985506520814e-05, + "loss": 10.5263, + "step": 9090 + }, + { + "epoch": 0.04544433069489875, + "grad_norm": 0.09568016231060028, + "learning_rate": 2.870835315026659e-05, + "loss": 10.5241, + "step": 9100 + }, + { + "epoch": 0.0454942695198382, + "grad_norm": 0.1060924157500267, + "learning_rate": 2.8706851235325042e-05, + "loss": 10.5241, + "step": 9110 + }, + { + "epoch": 0.04554420834477765, + "grad_norm": 0.10000130534172058, + "learning_rate": 2.870534932038349e-05, + "loss": 10.5277, + "step": 9120 + }, + { + "epoch": 0.045594147169717096, + "grad_norm": 0.09147665649652481, + "learning_rate": 2.8703847405441942e-05, + "loss": 10.5265, + "step": 9130 + }, + { + "epoch": 0.045644085994656546, + "grad_norm": 0.09924129396677017, + "learning_rate": 2.870234549050039e-05, + "loss": 10.5251, + "step": 9140 + }, + { + "epoch": 0.045694024819595995, + "grad_norm": 0.09299519658088684, + "learning_rate": 2.8700843575558836e-05, + "loss": 10.5242, + "step": 9150 + }, + { + "epoch": 0.045743963644535444, + "grad_norm": 0.09638933092355728, + "learning_rate": 2.869934166061729e-05, + "loss": 10.5243, + "step": 9160 + }, + { + "epoch": 0.045793902469474894, + "grad_norm": 0.10021718591451645, + "learning_rate": 2.8697839745675736e-05, + "loss": 10.5254, + "step": 9170 + }, + { + "epoch": 0.04584384129441434, + "grad_norm": 0.09364977478981018, + "learning_rate": 2.869633783073419e-05, + "loss": 10.524, + "step": 9180 + }, + { + "epoch": 0.04589378011935379, + "grad_norm": 0.09493537992238998, + "learning_rate": 2.8694835915792637e-05, + "loss": 10.524, + "step": 9190 + }, + { + "epoch": 0.04594371894429324, + "grad_norm": 0.09197970479726791, + "learning_rate": 2.8693334000851083e-05, + "loss": 10.5239, + "step": 9200 + }, + { + "epoch": 0.04599365776923269, + "grad_norm": 0.09489205479621887, + "learning_rate": 2.8691832085909537e-05, + "loss": 10.5234, + "step": 9210 + }, + { + "epoch": 0.04604359659417214, + "grad_norm": 0.09727194160223007, + "learning_rate": 2.8690330170967984e-05, + "loss": 10.5224, + "step": 9220 + }, + { + "epoch": 0.04609353541911159, + "grad_norm": 0.09218050539493561, + "learning_rate": 2.8688828256026437e-05, + "loss": 10.5209, + "step": 9230 + }, + { + "epoch": 0.04614347424405104, + "grad_norm": 0.08994846791028976, + "learning_rate": 2.8687326341084884e-05, + "loss": 10.5225, + "step": 9240 + }, + { + "epoch": 0.04619341306899049, + "grad_norm": 0.09504954516887665, + "learning_rate": 2.868582442614333e-05, + "loss": 10.5213, + "step": 9250 + }, + { + "epoch": 0.04624335189392994, + "grad_norm": 0.0972573459148407, + "learning_rate": 2.8684322511201784e-05, + "loss": 10.522, + "step": 9260 + }, + { + "epoch": 0.04629329071886939, + "grad_norm": 0.09305988252162933, + "learning_rate": 2.868282059626023e-05, + "loss": 10.5211, + "step": 9270 + }, + { + "epoch": 0.04634322954380883, + "grad_norm": 0.09756896644830704, + "learning_rate": 2.8681318681318685e-05, + "loss": 10.5218, + "step": 9280 + }, + { + "epoch": 0.04639316836874828, + "grad_norm": 0.09515320509672165, + "learning_rate": 2.867981676637713e-05, + "loss": 10.5219, + "step": 9290 + }, + { + "epoch": 0.04644310719368773, + "grad_norm": 0.1009177565574646, + "learning_rate": 2.867831485143558e-05, + "loss": 10.5203, + "step": 9300 + }, + { + "epoch": 0.04649304601862718, + "grad_norm": 0.09644551575183868, + "learning_rate": 2.8676812936494032e-05, + "loss": 10.5188, + "step": 9310 + }, + { + "epoch": 0.04654298484356663, + "grad_norm": 0.09817861765623093, + "learning_rate": 2.867531102155248e-05, + "loss": 10.5195, + "step": 9320 + }, + { + "epoch": 0.04659292366850608, + "grad_norm": 0.09904427081346512, + "learning_rate": 2.8673809106610932e-05, + "loss": 10.5177, + "step": 9330 + }, + { + "epoch": 0.04664286249344553, + "grad_norm": 0.09583260118961334, + "learning_rate": 2.867230719166938e-05, + "loss": 10.5209, + "step": 9340 + }, + { + "epoch": 0.04669280131838498, + "grad_norm": 0.09900553524494171, + "learning_rate": 2.8670805276727826e-05, + "loss": 10.5185, + "step": 9350 + }, + { + "epoch": 0.04674274014332443, + "grad_norm": 0.09579353779554367, + "learning_rate": 2.866930336178628e-05, + "loss": 10.5189, + "step": 9360 + }, + { + "epoch": 0.046792678968263876, + "grad_norm": 0.09370560199022293, + "learning_rate": 2.8667801446844726e-05, + "loss": 10.5196, + "step": 9370 + }, + { + "epoch": 0.046842617793203326, + "grad_norm": 0.09705270081758499, + "learning_rate": 2.866629953190318e-05, + "loss": 10.5178, + "step": 9380 + }, + { + "epoch": 0.046892556618142775, + "grad_norm": 0.09159563481807709, + "learning_rate": 2.8664797616961627e-05, + "loss": 10.5194, + "step": 9390 + }, + { + "epoch": 0.046942495443082224, + "grad_norm": 0.09416472911834717, + "learning_rate": 2.8663295702020073e-05, + "loss": 10.5183, + "step": 9400 + }, + { + "epoch": 0.046992434268021674, + "grad_norm": 0.09269614517688751, + "learning_rate": 2.8661793787078527e-05, + "loss": 10.5174, + "step": 9410 + }, + { + "epoch": 0.04704237309296112, + "grad_norm": 0.09696542471647263, + "learning_rate": 2.8660291872136974e-05, + "loss": 10.5164, + "step": 9420 + }, + { + "epoch": 0.04709231191790057, + "grad_norm": 0.09807953983545303, + "learning_rate": 2.8658789957195427e-05, + "loss": 10.5161, + "step": 9430 + }, + { + "epoch": 0.04714225074284002, + "grad_norm": 0.09456553310155869, + "learning_rate": 2.8657288042253874e-05, + "loss": 10.5161, + "step": 9440 + }, + { + "epoch": 0.04719218956777947, + "grad_norm": 0.09571606665849686, + "learning_rate": 2.865578612731232e-05, + "loss": 10.5163, + "step": 9450 + }, + { + "epoch": 0.04724212839271892, + "grad_norm": 0.09193666279315948, + "learning_rate": 2.8654284212370774e-05, + "loss": 10.5135, + "step": 9460 + }, + { + "epoch": 0.04729206721765837, + "grad_norm": 0.09799723327159882, + "learning_rate": 2.865278229742922e-05, + "loss": 10.515, + "step": 9470 + }, + { + "epoch": 0.04734200604259782, + "grad_norm": 0.09496576339006424, + "learning_rate": 2.8651280382487675e-05, + "loss": 10.5143, + "step": 9480 + }, + { + "epoch": 0.04739194486753727, + "grad_norm": 0.09969381988048553, + "learning_rate": 2.864977846754612e-05, + "loss": 10.5162, + "step": 9490 + }, + { + "epoch": 0.04744188369247672, + "grad_norm": 0.08909635990858078, + "learning_rate": 2.8648276552604572e-05, + "loss": 10.5178, + "step": 9500 + }, + { + "epoch": 0.04749182251741617, + "grad_norm": 0.10057427734136581, + "learning_rate": 2.8646774637663022e-05, + "loss": 10.5147, + "step": 9510 + }, + { + "epoch": 0.04754176134235561, + "grad_norm": 0.09253738075494766, + "learning_rate": 2.864527272272147e-05, + "loss": 10.5167, + "step": 9520 + }, + { + "epoch": 0.04759170016729506, + "grad_norm": 0.09274590760469437, + "learning_rate": 2.8643770807779922e-05, + "loss": 10.9066, + "step": 9530 + }, + { + "epoch": 0.04764163899223451, + "grad_norm": 0.09520485997200012, + "learning_rate": 2.864226889283837e-05, + "loss": 10.5115, + "step": 9540 + }, + { + "epoch": 0.04769157781717396, + "grad_norm": 0.09519509971141815, + "learning_rate": 2.864076697789682e-05, + "loss": 10.5245, + "step": 9550 + }, + { + "epoch": 0.04774151664211341, + "grad_norm": 0.09528204053640366, + "learning_rate": 2.863926506295527e-05, + "loss": 10.5133, + "step": 9560 + }, + { + "epoch": 0.04779145546705286, + "grad_norm": 0.09493537992238998, + "learning_rate": 2.8637763148013716e-05, + "loss": 10.5144, + "step": 9570 + }, + { + "epoch": 0.04784139429199231, + "grad_norm": 0.09186563640832901, + "learning_rate": 2.863626123307217e-05, + "loss": 10.5186, + "step": 9580 + }, + { + "epoch": 0.04789133311693176, + "grad_norm": 0.09734216332435608, + "learning_rate": 2.8634759318130617e-05, + "loss": 10.5126, + "step": 9590 + }, + { + "epoch": 0.04794127194187121, + "grad_norm": 0.09098707884550095, + "learning_rate": 2.8633257403189067e-05, + "loss": 11.7738, + "step": 9600 + }, + { + "epoch": 0.047991210766810656, + "grad_norm": 0.09297781437635422, + "learning_rate": 2.8631755488247517e-05, + "loss": 10.6659, + "step": 9610 + }, + { + "epoch": 0.048041149591750106, + "grad_norm": 0.09503110498189926, + "learning_rate": 2.8630253573305964e-05, + "loss": 10.5111, + "step": 9620 + }, + { + "epoch": 0.048091088416689555, + "grad_norm": 0.09521497786045074, + "learning_rate": 2.8628751658364417e-05, + "loss": 10.5093, + "step": 9630 + }, + { + "epoch": 0.048141027241629004, + "grad_norm": 0.09482381492853165, + "learning_rate": 2.8627249743422864e-05, + "loss": 10.5128, + "step": 9640 + }, + { + "epoch": 0.048190966066568454, + "grad_norm": 0.09641779214143753, + "learning_rate": 2.8625747828481314e-05, + "loss": 10.51, + "step": 9650 + }, + { + "epoch": 0.0482409048915079, + "grad_norm": 0.10081770271062851, + "learning_rate": 2.8624245913539764e-05, + "loss": 10.511, + "step": 9660 + }, + { + "epoch": 0.04829084371644735, + "grad_norm": 0.09441135823726654, + "learning_rate": 2.862274399859821e-05, + "loss": 10.5093, + "step": 9670 + }, + { + "epoch": 0.0483407825413868, + "grad_norm": 0.09851568937301636, + "learning_rate": 2.8621242083656665e-05, + "loss": 10.5099, + "step": 9680 + }, + { + "epoch": 0.04839072136632625, + "grad_norm": 0.09381608664989471, + "learning_rate": 2.861974016871511e-05, + "loss": 10.5099, + "step": 9690 + }, + { + "epoch": 0.0484406601912657, + "grad_norm": 0.0943373441696167, + "learning_rate": 2.8618238253773562e-05, + "loss": 10.5086, + "step": 9700 + }, + { + "epoch": 0.04849059901620515, + "grad_norm": 0.09664776921272278, + "learning_rate": 2.8616736338832012e-05, + "loss": 10.51, + "step": 9710 + }, + { + "epoch": 0.0485405378411446, + "grad_norm": 0.09082762151956558, + "learning_rate": 2.861523442389046e-05, + "loss": 10.509, + "step": 9720 + }, + { + "epoch": 0.04859047666608405, + "grad_norm": 0.1018294245004654, + "learning_rate": 2.8613732508948912e-05, + "loss": 10.5095, + "step": 9730 + }, + { + "epoch": 0.0486404154910235, + "grad_norm": 0.09788943082094193, + "learning_rate": 2.861223059400736e-05, + "loss": 10.506, + "step": 9740 + }, + { + "epoch": 0.04869035431596295, + "grad_norm": 0.0945865660905838, + "learning_rate": 2.861072867906581e-05, + "loss": 10.5045, + "step": 9750 + }, + { + "epoch": 0.0487402931409024, + "grad_norm": 0.09245441854000092, + "learning_rate": 2.860922676412426e-05, + "loss": 10.5076, + "step": 9760 + }, + { + "epoch": 0.04879023196584184, + "grad_norm": 0.09692434966564178, + "learning_rate": 2.8607724849182706e-05, + "loss": 10.5044, + "step": 9770 + }, + { + "epoch": 0.04884017079078129, + "grad_norm": 0.10064822435379028, + "learning_rate": 2.860622293424116e-05, + "loss": 10.5054, + "step": 9780 + }, + { + "epoch": 0.04889010961572074, + "grad_norm": 0.09322358667850494, + "learning_rate": 2.8604721019299607e-05, + "loss": 10.5048, + "step": 9790 + }, + { + "epoch": 0.04894004844066019, + "grad_norm": 0.09551732242107391, + "learning_rate": 2.8603219104358057e-05, + "loss": 10.5049, + "step": 9800 + }, + { + "epoch": 0.04898998726559964, + "grad_norm": 0.09352359920740128, + "learning_rate": 2.8601717189416507e-05, + "loss": 10.5055, + "step": 9810 + }, + { + "epoch": 0.04903992609053909, + "grad_norm": 0.09425562620162964, + "learning_rate": 2.8600215274474957e-05, + "loss": 10.5046, + "step": 9820 + }, + { + "epoch": 0.04908986491547854, + "grad_norm": 0.09174810349941254, + "learning_rate": 2.8598713359533407e-05, + "loss": 10.5061, + "step": 9830 + }, + { + "epoch": 0.04913980374041799, + "grad_norm": 0.0945669412612915, + "learning_rate": 2.8597211444591854e-05, + "loss": 10.5056, + "step": 9840 + }, + { + "epoch": 0.049189742565357436, + "grad_norm": 0.1002650186419487, + "learning_rate": 2.8595709529650304e-05, + "loss": 10.5034, + "step": 9850 + }, + { + "epoch": 0.049239681390296886, + "grad_norm": 0.09847228229045868, + "learning_rate": 2.8594207614708754e-05, + "loss": 10.503, + "step": 9860 + }, + { + "epoch": 0.049289620215236335, + "grad_norm": 0.10454359650611877, + "learning_rate": 2.8592705699767205e-05, + "loss": 10.504, + "step": 9870 + }, + { + "epoch": 0.049339559040175784, + "grad_norm": 0.09625834971666336, + "learning_rate": 2.8591203784825655e-05, + "loss": 10.5023, + "step": 9880 + }, + { + "epoch": 0.049389497865115234, + "grad_norm": 0.09287148714065552, + "learning_rate": 2.85897018698841e-05, + "loss": 10.5059, + "step": 9890 + }, + { + "epoch": 0.04943943669005468, + "grad_norm": 0.09204880893230438, + "learning_rate": 2.8588199954942552e-05, + "loss": 10.5014, + "step": 9900 + }, + { + "epoch": 0.04948937551499413, + "grad_norm": 0.09391821175813675, + "learning_rate": 2.8586698040001002e-05, + "loss": 10.5023, + "step": 9910 + }, + { + "epoch": 0.04953931433993358, + "grad_norm": 0.09617898613214493, + "learning_rate": 2.8585196125059452e-05, + "loss": 10.5032, + "step": 9920 + }, + { + "epoch": 0.04958925316487303, + "grad_norm": 0.09573251754045486, + "learning_rate": 2.8583694210117902e-05, + "loss": 10.5024, + "step": 9930 + }, + { + "epoch": 0.04963919198981248, + "grad_norm": 0.09664706140756607, + "learning_rate": 2.858219229517635e-05, + "loss": 10.5018, + "step": 9940 + }, + { + "epoch": 0.04968913081475193, + "grad_norm": 0.09707572311162949, + "learning_rate": 2.85806903802348e-05, + "loss": 10.5026, + "step": 9950 + }, + { + "epoch": 0.04973906963969138, + "grad_norm": 0.09420596808195114, + "learning_rate": 2.857918846529325e-05, + "loss": 10.5004, + "step": 9960 + }, + { + "epoch": 0.04978900846463083, + "grad_norm": 0.09333644807338715, + "learning_rate": 2.85776865503517e-05, + "loss": 10.4996, + "step": 9970 + }, + { + "epoch": 0.04983894728957028, + "grad_norm": 0.09561195969581604, + "learning_rate": 2.857618463541015e-05, + "loss": 10.5005, + "step": 9980 + }, + { + "epoch": 0.04988888611450973, + "grad_norm": 0.09285534173250198, + "learning_rate": 2.8574682720468597e-05, + "loss": 10.5016, + "step": 9990 + }, + { + "epoch": 0.04993882493944918, + "grad_norm": 0.09716643393039703, + "learning_rate": 2.8573180805527047e-05, + "loss": 10.4969, + "step": 10000 + }, + { + "epoch": 0.04998876376438863, + "grad_norm": 0.09244739264249802, + "learning_rate": 2.8571678890585497e-05, + "loss": 10.4983, + "step": 10010 + }, + { + "epoch": 0.05003870258932807, + "grad_norm": 0.099833182990551, + "learning_rate": 2.8570176975643947e-05, + "loss": 10.5007, + "step": 10020 + }, + { + "epoch": 0.05008864141426752, + "grad_norm": 0.09650156646966934, + "learning_rate": 2.8568675060702397e-05, + "loss": 10.4993, + "step": 10030 + }, + { + "epoch": 0.05013858023920697, + "grad_norm": 0.10166086256504059, + "learning_rate": 2.8567173145760844e-05, + "loss": 10.4981, + "step": 10040 + }, + { + "epoch": 0.05018851906414642, + "grad_norm": 0.09234780818223953, + "learning_rate": 2.8565671230819294e-05, + "loss": 10.4969, + "step": 10050 + }, + { + "epoch": 0.05023845788908587, + "grad_norm": 0.0958356112241745, + "learning_rate": 2.8564169315877744e-05, + "loss": 10.4996, + "step": 10060 + }, + { + "epoch": 0.05028839671402532, + "grad_norm": 0.09565756469964981, + "learning_rate": 2.8562667400936195e-05, + "loss": 10.4971, + "step": 10070 + }, + { + "epoch": 0.05033833553896477, + "grad_norm": 0.09427642822265625, + "learning_rate": 2.8561165485994645e-05, + "loss": 10.4966, + "step": 10080 + }, + { + "epoch": 0.050388274363904216, + "grad_norm": 0.0930957943201065, + "learning_rate": 2.855966357105309e-05, + "loss": 10.4987, + "step": 10090 + }, + { + "epoch": 0.050438213188843666, + "grad_norm": 0.09489066898822784, + "learning_rate": 2.8558161656111542e-05, + "loss": 10.496, + "step": 10100 + }, + { + "epoch": 0.050488152013783115, + "grad_norm": 0.09817587584257126, + "learning_rate": 2.8556659741169992e-05, + "loss": 10.4956, + "step": 10110 + }, + { + "epoch": 0.050538090838722564, + "grad_norm": 0.09142786264419556, + "learning_rate": 2.8555157826228442e-05, + "loss": 10.4974, + "step": 10120 + }, + { + "epoch": 0.050588029663662014, + "grad_norm": 0.09748794138431549, + "learning_rate": 2.8553655911286892e-05, + "loss": 10.4959, + "step": 10130 + }, + { + "epoch": 0.05063796848860146, + "grad_norm": 0.09116542339324951, + "learning_rate": 2.8552153996345343e-05, + "loss": 10.498, + "step": 10140 + }, + { + "epoch": 0.05068790731354091, + "grad_norm": 0.09313584864139557, + "learning_rate": 2.855065208140379e-05, + "loss": 10.4932, + "step": 10150 + }, + { + "epoch": 0.05073784613848036, + "grad_norm": 0.09887810051441193, + "learning_rate": 2.854915016646224e-05, + "loss": 10.4945, + "step": 10160 + }, + { + "epoch": 0.05078778496341981, + "grad_norm": 0.09741490334272385, + "learning_rate": 2.854764825152069e-05, + "loss": 10.4943, + "step": 10170 + }, + { + "epoch": 0.05083772378835926, + "grad_norm": 0.09374571591615677, + "learning_rate": 2.854614633657914e-05, + "loss": 10.4943, + "step": 10180 + }, + { + "epoch": 0.05088766261329871, + "grad_norm": 0.09434989094734192, + "learning_rate": 2.854464442163759e-05, + "loss": 10.4963, + "step": 10190 + }, + { + "epoch": 0.05093760143823816, + "grad_norm": 0.09508496522903442, + "learning_rate": 2.8543142506696037e-05, + "loss": 10.4925, + "step": 10200 + }, + { + "epoch": 0.05098754026317761, + "grad_norm": 0.09928717464208603, + "learning_rate": 2.8541640591754487e-05, + "loss": 10.4933, + "step": 10210 + }, + { + "epoch": 0.05103747908811706, + "grad_norm": 0.09969381988048553, + "learning_rate": 2.8540138676812937e-05, + "loss": 10.4932, + "step": 10220 + }, + { + "epoch": 0.05108741791305651, + "grad_norm": 0.0988709032535553, + "learning_rate": 2.8538636761871387e-05, + "loss": 10.4942, + "step": 10230 + }, + { + "epoch": 0.05113735673799596, + "grad_norm": 0.09207888692617416, + "learning_rate": 2.8537134846929838e-05, + "loss": 10.492, + "step": 10240 + }, + { + "epoch": 0.05118729556293541, + "grad_norm": 0.09073358029127121, + "learning_rate": 2.8535632931988284e-05, + "loss": 10.494, + "step": 10250 + }, + { + "epoch": 0.05123723438787485, + "grad_norm": 0.09680426120758057, + "learning_rate": 2.8534131017046734e-05, + "loss": 10.4916, + "step": 10260 + }, + { + "epoch": 0.0512871732128143, + "grad_norm": 0.09444583207368851, + "learning_rate": 2.8532629102105185e-05, + "loss": 10.4934, + "step": 10270 + }, + { + "epoch": 0.05133711203775375, + "grad_norm": 0.09547161310911179, + "learning_rate": 2.8531127187163635e-05, + "loss": 10.4909, + "step": 10280 + }, + { + "epoch": 0.0513870508626932, + "grad_norm": 0.09873054176568985, + "learning_rate": 2.8529625272222085e-05, + "loss": 10.4924, + "step": 10290 + }, + { + "epoch": 0.05143698968763265, + "grad_norm": 0.09690874069929123, + "learning_rate": 2.8528123357280532e-05, + "loss": 10.4925, + "step": 10300 + }, + { + "epoch": 0.0514869285125721, + "grad_norm": 0.09330197423696518, + "learning_rate": 2.8526621442338982e-05, + "loss": 10.4923, + "step": 10310 + }, + { + "epoch": 0.05153686733751155, + "grad_norm": 0.10089316219091415, + "learning_rate": 2.8525119527397432e-05, + "loss": 10.4924, + "step": 10320 + }, + { + "epoch": 0.051586806162450996, + "grad_norm": 0.0995856523513794, + "learning_rate": 2.8523617612455882e-05, + "loss": 10.4916, + "step": 10330 + }, + { + "epoch": 0.051636744987390445, + "grad_norm": 0.09683484584093094, + "learning_rate": 2.8522115697514333e-05, + "loss": 10.4886, + "step": 10340 + }, + { + "epoch": 0.051686683812329895, + "grad_norm": 0.09331785142421722, + "learning_rate": 2.852061378257278e-05, + "loss": 10.4894, + "step": 10350 + }, + { + "epoch": 0.051736622637269344, + "grad_norm": 0.09782253205776215, + "learning_rate": 2.851911186763123e-05, + "loss": 10.4884, + "step": 10360 + }, + { + "epoch": 0.051786561462208794, + "grad_norm": 0.09803865849971771, + "learning_rate": 2.851760995268968e-05, + "loss": 10.4905, + "step": 10370 + }, + { + "epoch": 0.05183650028714824, + "grad_norm": 0.09339244663715363, + "learning_rate": 2.851610803774813e-05, + "loss": 10.4909, + "step": 10380 + }, + { + "epoch": 0.05188643911208769, + "grad_norm": 0.09537281095981598, + "learning_rate": 2.851460612280658e-05, + "loss": 10.4881, + "step": 10390 + }, + { + "epoch": 0.05193637793702714, + "grad_norm": 0.09507238864898682, + "learning_rate": 2.8513104207865027e-05, + "loss": 10.4862, + "step": 10400 + }, + { + "epoch": 0.05198631676196659, + "grad_norm": 0.09395944327116013, + "learning_rate": 2.8511602292923477e-05, + "loss": 10.486, + "step": 10410 + }, + { + "epoch": 0.05203625558690604, + "grad_norm": 0.09412617236375809, + "learning_rate": 2.8510100377981927e-05, + "loss": 10.4888, + "step": 10420 + }, + { + "epoch": 0.05208619441184549, + "grad_norm": 0.10089904814958572, + "learning_rate": 2.8508598463040377e-05, + "loss": 10.488, + "step": 10430 + }, + { + "epoch": 0.05213613323678494, + "grad_norm": 0.09662104398012161, + "learning_rate": 2.8507096548098828e-05, + "loss": 10.4879, + "step": 10440 + }, + { + "epoch": 0.05218607206172439, + "grad_norm": 0.09902186691761017, + "learning_rate": 2.8505594633157274e-05, + "loss": 10.4877, + "step": 10450 + }, + { + "epoch": 0.05223601088666384, + "grad_norm": 0.09871252626180649, + "learning_rate": 2.8504092718215728e-05, + "loss": 10.489, + "step": 10460 + }, + { + "epoch": 0.05228594971160329, + "grad_norm": 0.09363856911659241, + "learning_rate": 2.8502590803274175e-05, + "loss": 10.4855, + "step": 10470 + }, + { + "epoch": 0.05233588853654274, + "grad_norm": 0.0946008712053299, + "learning_rate": 2.8501088888332625e-05, + "loss": 10.4855, + "step": 10480 + }, + { + "epoch": 0.05238582736148219, + "grad_norm": 0.09601443260908127, + "learning_rate": 2.8499586973391075e-05, + "loss": 10.4831, + "step": 10490 + }, + { + "epoch": 0.05243576618642164, + "grad_norm": 0.09569672495126724, + "learning_rate": 2.8498085058449522e-05, + "loss": 10.4833, + "step": 10500 + }, + { + "epoch": 0.05248570501136108, + "grad_norm": 0.09212665259838104, + "learning_rate": 2.8496583143507975e-05, + "loss": 10.4858, + "step": 10510 + }, + { + "epoch": 0.05253564383630053, + "grad_norm": 0.09606766700744629, + "learning_rate": 2.8495081228566422e-05, + "loss": 10.4837, + "step": 10520 + }, + { + "epoch": 0.05258558266123998, + "grad_norm": 0.09896976500749588, + "learning_rate": 2.8493579313624872e-05, + "loss": 10.4854, + "step": 10530 + }, + { + "epoch": 0.05263552148617943, + "grad_norm": 0.0996663048863411, + "learning_rate": 2.8492077398683323e-05, + "loss": 10.4849, + "step": 10540 + }, + { + "epoch": 0.05268546031111888, + "grad_norm": 0.09384378045797348, + "learning_rate": 2.849057548374177e-05, + "loss": 10.4849, + "step": 10550 + }, + { + "epoch": 0.05273539913605833, + "grad_norm": 0.09996800869703293, + "learning_rate": 2.8489073568800223e-05, + "loss": 10.4818, + "step": 10560 + }, + { + "epoch": 0.052785337960997776, + "grad_norm": 0.09955079853534698, + "learning_rate": 2.848757165385867e-05, + "loss": 10.4836, + "step": 10570 + }, + { + "epoch": 0.052835276785937225, + "grad_norm": 0.0904826894402504, + "learning_rate": 2.848606973891712e-05, + "loss": 10.483, + "step": 10580 + }, + { + "epoch": 0.052885215610876675, + "grad_norm": 0.0925852507352829, + "learning_rate": 2.848456782397557e-05, + "loss": 10.4826, + "step": 10590 + }, + { + "epoch": 0.052935154435816124, + "grad_norm": 0.09814697504043579, + "learning_rate": 2.8483065909034017e-05, + "loss": 10.483, + "step": 10600 + }, + { + "epoch": 0.052985093260755574, + "grad_norm": 0.09643968194723129, + "learning_rate": 2.848156399409247e-05, + "loss": 10.4833, + "step": 10610 + }, + { + "epoch": 0.05303503208569502, + "grad_norm": 0.09589682519435883, + "learning_rate": 2.8480062079150917e-05, + "loss": 10.4814, + "step": 10620 + }, + { + "epoch": 0.05308497091063447, + "grad_norm": 0.09177594631910324, + "learning_rate": 2.8478560164209367e-05, + "loss": 10.4801, + "step": 10630 + }, + { + "epoch": 0.05313490973557392, + "grad_norm": 0.08999846875667572, + "learning_rate": 2.8477058249267818e-05, + "loss": 10.4834, + "step": 10640 + }, + { + "epoch": 0.05318484856051337, + "grad_norm": 0.09998583048582077, + "learning_rate": 2.8475556334326264e-05, + "loss": 10.4802, + "step": 10650 + }, + { + "epoch": 0.05323478738545282, + "grad_norm": 0.0951545462012291, + "learning_rate": 2.8474054419384718e-05, + "loss": 10.4797, + "step": 10660 + }, + { + "epoch": 0.05328472621039227, + "grad_norm": 0.09384284913539886, + "learning_rate": 2.8472552504443165e-05, + "loss": 10.4819, + "step": 10670 + }, + { + "epoch": 0.05333466503533172, + "grad_norm": 0.09465983510017395, + "learning_rate": 2.8471050589501615e-05, + "loss": 10.4797, + "step": 10680 + }, + { + "epoch": 0.05338460386027117, + "grad_norm": 0.09355910867452621, + "learning_rate": 2.8469548674560065e-05, + "loss": 10.4793, + "step": 10690 + }, + { + "epoch": 0.05343454268521062, + "grad_norm": 0.10254157334566116, + "learning_rate": 2.8468046759618512e-05, + "loss": 10.481, + "step": 10700 + }, + { + "epoch": 0.05348448151015007, + "grad_norm": 0.09623821824789047, + "learning_rate": 2.8466544844676965e-05, + "loss": 10.4798, + "step": 10710 + }, + { + "epoch": 0.05353442033508952, + "grad_norm": 0.0956728383898735, + "learning_rate": 2.8465042929735412e-05, + "loss": 10.4775, + "step": 10720 + }, + { + "epoch": 0.05358435916002897, + "grad_norm": 0.1043584942817688, + "learning_rate": 2.8463541014793862e-05, + "loss": 10.4797, + "step": 10730 + }, + { + "epoch": 0.05363429798496842, + "grad_norm": 0.09671546518802643, + "learning_rate": 2.8462039099852313e-05, + "loss": 10.4768, + "step": 10740 + }, + { + "epoch": 0.053684236809907866, + "grad_norm": 0.0880408063530922, + "learning_rate": 2.846053718491076e-05, + "loss": 10.4796, + "step": 10750 + }, + { + "epoch": 0.05373417563484731, + "grad_norm": 0.0982058197259903, + "learning_rate": 2.8459035269969213e-05, + "loss": 10.4792, + "step": 10760 + }, + { + "epoch": 0.05378411445978676, + "grad_norm": 0.09881541877985, + "learning_rate": 2.845753335502766e-05, + "loss": 10.4785, + "step": 10770 + }, + { + "epoch": 0.05383405328472621, + "grad_norm": 0.10053160041570663, + "learning_rate": 2.8456031440086113e-05, + "loss": 10.4742, + "step": 10780 + }, + { + "epoch": 0.05388399210966566, + "grad_norm": 0.0990302637219429, + "learning_rate": 2.845452952514456e-05, + "loss": 10.4763, + "step": 10790 + }, + { + "epoch": 0.05393393093460511, + "grad_norm": 0.09606090933084488, + "learning_rate": 2.8453027610203007e-05, + "loss": 10.478, + "step": 10800 + }, + { + "epoch": 0.053983869759544556, + "grad_norm": 0.09462147206068039, + "learning_rate": 2.845152569526146e-05, + "loss": 10.4758, + "step": 10810 + }, + { + "epoch": 0.054033808584484005, + "grad_norm": 0.09580814093351364, + "learning_rate": 2.8450023780319907e-05, + "loss": 10.4743, + "step": 10820 + }, + { + "epoch": 0.054083747409423455, + "grad_norm": 0.09848089516162872, + "learning_rate": 2.844852186537836e-05, + "loss": 10.4749, + "step": 10830 + }, + { + "epoch": 0.054133686234362904, + "grad_norm": 0.09434827417135239, + "learning_rate": 2.8447019950436808e-05, + "loss": 10.4777, + "step": 10840 + }, + { + "epoch": 0.054183625059302354, + "grad_norm": 0.09295500814914703, + "learning_rate": 2.8445518035495254e-05, + "loss": 10.4744, + "step": 10850 + }, + { + "epoch": 0.0542335638842418, + "grad_norm": 0.0959131121635437, + "learning_rate": 2.8444016120553708e-05, + "loss": 10.4755, + "step": 10860 + }, + { + "epoch": 0.05428350270918125, + "grad_norm": 0.09555883705615997, + "learning_rate": 2.8442514205612155e-05, + "loss": 10.4737, + "step": 10870 + }, + { + "epoch": 0.0543334415341207, + "grad_norm": 0.09721937030553818, + "learning_rate": 2.8441012290670608e-05, + "loss": 10.4752, + "step": 10880 + }, + { + "epoch": 0.05438338035906015, + "grad_norm": 0.09354712814092636, + "learning_rate": 2.8439510375729055e-05, + "loss": 10.4721, + "step": 10890 + }, + { + "epoch": 0.0544333191839996, + "grad_norm": 0.09647790342569351, + "learning_rate": 2.8438008460787502e-05, + "loss": 10.4745, + "step": 10900 + }, + { + "epoch": 0.05448325800893905, + "grad_norm": 0.10081273317337036, + "learning_rate": 2.8436506545845955e-05, + "loss": 10.4725, + "step": 10910 + }, + { + "epoch": 0.0545331968338785, + "grad_norm": 0.09475501626729965, + "learning_rate": 2.8435004630904402e-05, + "loss": 10.4723, + "step": 10920 + }, + { + "epoch": 0.05458313565881795, + "grad_norm": 0.09256067872047424, + "learning_rate": 2.8433502715962856e-05, + "loss": 10.4717, + "step": 10930 + }, + { + "epoch": 0.0546330744837574, + "grad_norm": 0.09541960060596466, + "learning_rate": 2.8432000801021303e-05, + "loss": 10.4727, + "step": 10940 + }, + { + "epoch": 0.05468301330869685, + "grad_norm": 0.09717325121164322, + "learning_rate": 2.843049888607975e-05, + "loss": 10.4725, + "step": 10950 + }, + { + "epoch": 0.0547329521336363, + "grad_norm": 0.09780695289373398, + "learning_rate": 2.8428996971138203e-05, + "loss": 10.4726, + "step": 10960 + }, + { + "epoch": 0.05478289095857575, + "grad_norm": 0.09682448208332062, + "learning_rate": 2.842749505619665e-05, + "loss": 10.4728, + "step": 10970 + }, + { + "epoch": 0.0548328297835152, + "grad_norm": 0.0957455188035965, + "learning_rate": 2.8425993141255103e-05, + "loss": 10.4719, + "step": 10980 + }, + { + "epoch": 0.054882768608454646, + "grad_norm": 0.0956406518816948, + "learning_rate": 2.842449122631355e-05, + "loss": 10.4714, + "step": 10990 + }, + { + "epoch": 0.054932707433394096, + "grad_norm": 0.0934106856584549, + "learning_rate": 2.8422989311371997e-05, + "loss": 10.4694, + "step": 11000 + }, + { + "epoch": 0.05498264625833354, + "grad_norm": 0.09818681329488754, + "learning_rate": 2.842148739643045e-05, + "loss": 10.4721, + "step": 11010 + }, + { + "epoch": 0.05503258508327299, + "grad_norm": 0.09489887952804565, + "learning_rate": 2.8419985481488897e-05, + "loss": 10.4708, + "step": 11020 + }, + { + "epoch": 0.05508252390821244, + "grad_norm": 0.09038610011339188, + "learning_rate": 2.841848356654735e-05, + "loss": 10.4731, + "step": 11030 + }, + { + "epoch": 0.05513246273315189, + "grad_norm": 0.0944511890411377, + "learning_rate": 2.8416981651605798e-05, + "loss": 10.4708, + "step": 11040 + }, + { + "epoch": 0.055182401558091336, + "grad_norm": 0.09606794267892838, + "learning_rate": 2.8415479736664244e-05, + "loss": 10.4689, + "step": 11050 + }, + { + "epoch": 0.055232340383030785, + "grad_norm": 0.09804631024599075, + "learning_rate": 2.8413977821722698e-05, + "loss": 10.4698, + "step": 11060 + }, + { + "epoch": 0.055282279207970235, + "grad_norm": 0.10113038122653961, + "learning_rate": 2.8412475906781145e-05, + "loss": 10.4686, + "step": 11070 + }, + { + "epoch": 0.055332218032909684, + "grad_norm": 0.10174177587032318, + "learning_rate": 2.8410973991839598e-05, + "loss": 10.4694, + "step": 11080 + }, + { + "epoch": 0.055382156857849134, + "grad_norm": 0.09673687815666199, + "learning_rate": 2.8409472076898045e-05, + "loss": 10.4677, + "step": 11090 + }, + { + "epoch": 0.05543209568278858, + "grad_norm": 0.09577104449272156, + "learning_rate": 2.8407970161956495e-05, + "loss": 10.4673, + "step": 11100 + }, + { + "epoch": 0.05548203450772803, + "grad_norm": 0.09360501170158386, + "learning_rate": 2.8406468247014945e-05, + "loss": 10.47, + "step": 11110 + }, + { + "epoch": 0.05553197333266748, + "grad_norm": 0.09364285320043564, + "learning_rate": 2.8404966332073392e-05, + "loss": 10.4676, + "step": 11120 + }, + { + "epoch": 0.05558191215760693, + "grad_norm": 0.093747578561306, + "learning_rate": 2.8403464417131846e-05, + "loss": 10.4669, + "step": 11130 + }, + { + "epoch": 0.05563185098254638, + "grad_norm": 0.0995292142033577, + "learning_rate": 2.8401962502190293e-05, + "loss": 10.4677, + "step": 11140 + }, + { + "epoch": 0.05568178980748583, + "grad_norm": 0.0943877249956131, + "learning_rate": 2.8400460587248743e-05, + "loss": 10.4691, + "step": 11150 + }, + { + "epoch": 0.05573172863242528, + "grad_norm": 0.09354741126298904, + "learning_rate": 2.8398958672307193e-05, + "loss": 10.4657, + "step": 11160 + }, + { + "epoch": 0.05578166745736473, + "grad_norm": 0.09523334354162216, + "learning_rate": 2.839745675736564e-05, + "loss": 10.4658, + "step": 11170 + }, + { + "epoch": 0.05583160628230418, + "grad_norm": 0.09460029006004333, + "learning_rate": 2.8395954842424093e-05, + "loss": 10.4631, + "step": 11180 + }, + { + "epoch": 0.05588154510724363, + "grad_norm": 0.09529443830251694, + "learning_rate": 2.839445292748254e-05, + "loss": 10.4652, + "step": 11190 + }, + { + "epoch": 0.05593148393218308, + "grad_norm": 0.09970243275165558, + "learning_rate": 2.839295101254099e-05, + "loss": 10.4644, + "step": 11200 + }, + { + "epoch": 0.05598142275712253, + "grad_norm": 0.09669343382120132, + "learning_rate": 2.839144909759944e-05, + "loss": 10.465, + "step": 11210 + }, + { + "epoch": 0.05603136158206198, + "grad_norm": 0.0948340967297554, + "learning_rate": 2.8389947182657887e-05, + "loss": 10.4656, + "step": 11220 + }, + { + "epoch": 0.056081300407001426, + "grad_norm": 0.09210893511772156, + "learning_rate": 2.838844526771634e-05, + "loss": 10.4647, + "step": 11230 + }, + { + "epoch": 0.056131239231940876, + "grad_norm": 0.09912335872650146, + "learning_rate": 2.8386943352774788e-05, + "loss": 10.4635, + "step": 11240 + }, + { + "epoch": 0.05618117805688032, + "grad_norm": 0.09609910845756531, + "learning_rate": 2.8385441437833238e-05, + "loss": 10.4647, + "step": 11250 + }, + { + "epoch": 0.05623111688181977, + "grad_norm": 0.09431801736354828, + "learning_rate": 2.8383939522891688e-05, + "loss": 10.4616, + "step": 11260 + }, + { + "epoch": 0.05628105570675922, + "grad_norm": 0.09194690734148026, + "learning_rate": 2.8382437607950135e-05, + "loss": 10.4663, + "step": 11270 + }, + { + "epoch": 0.05633099453169867, + "grad_norm": 0.09139011800289154, + "learning_rate": 2.8380935693008588e-05, + "loss": 10.462, + "step": 11280 + }, + { + "epoch": 0.056380933356638116, + "grad_norm": 0.09335776418447495, + "learning_rate": 2.8379433778067035e-05, + "loss": 10.4619, + "step": 11290 + }, + { + "epoch": 0.056430872181577565, + "grad_norm": 0.0932675376534462, + "learning_rate": 2.8377931863125485e-05, + "loss": 10.4618, + "step": 11300 + }, + { + "epoch": 0.056480811006517015, + "grad_norm": 0.09276323765516281, + "learning_rate": 2.8376429948183935e-05, + "loss": 10.4618, + "step": 11310 + }, + { + "epoch": 0.056530749831456464, + "grad_norm": 0.09329287707805634, + "learning_rate": 2.8374928033242382e-05, + "loss": 10.4613, + "step": 11320 + }, + { + "epoch": 0.056580688656395914, + "grad_norm": 0.09470265358686447, + "learning_rate": 2.8373426118300836e-05, + "loss": 10.4618, + "step": 11330 + }, + { + "epoch": 0.05663062748133536, + "grad_norm": 0.09163019806146622, + "learning_rate": 2.8371924203359283e-05, + "loss": 10.4631, + "step": 11340 + }, + { + "epoch": 0.05668056630627481, + "grad_norm": 0.09264067560434341, + "learning_rate": 2.8370422288417733e-05, + "loss": 10.4619, + "step": 11350 + }, + { + "epoch": 0.05673050513121426, + "grad_norm": 0.09891056269407272, + "learning_rate": 2.8368920373476183e-05, + "loss": 10.459, + "step": 11360 + }, + { + "epoch": 0.05678044395615371, + "grad_norm": 0.09387898445129395, + "learning_rate": 2.836741845853463e-05, + "loss": 10.4621, + "step": 11370 + }, + { + "epoch": 0.05683038278109316, + "grad_norm": 0.09117914736270905, + "learning_rate": 2.8365916543593083e-05, + "loss": 10.4607, + "step": 11380 + }, + { + "epoch": 0.05688032160603261, + "grad_norm": 0.09382776916027069, + "learning_rate": 2.836441462865153e-05, + "loss": 10.4601, + "step": 11390 + }, + { + "epoch": 0.05693026043097206, + "grad_norm": 0.09707043319940567, + "learning_rate": 2.836291271370998e-05, + "loss": 10.4598, + "step": 11400 + }, + { + "epoch": 0.05698019925591151, + "grad_norm": 0.09478152543306351, + "learning_rate": 2.836141079876843e-05, + "loss": 10.4594, + "step": 11410 + }, + { + "epoch": 0.05703013808085096, + "grad_norm": 0.09193360805511475, + "learning_rate": 2.835990888382688e-05, + "loss": 10.4586, + "step": 11420 + }, + { + "epoch": 0.05708007690579041, + "grad_norm": 0.09122911840677261, + "learning_rate": 2.835840696888533e-05, + "loss": 10.4587, + "step": 11430 + }, + { + "epoch": 0.05713001573072986, + "grad_norm": 0.09300627559423447, + "learning_rate": 2.8356905053943778e-05, + "loss": 10.4607, + "step": 11440 + }, + { + "epoch": 0.05717995455566931, + "grad_norm": 0.09430982172489166, + "learning_rate": 2.8355403139002228e-05, + "loss": 10.4581, + "step": 11450 + }, + { + "epoch": 0.05722989338060876, + "grad_norm": 0.1006307452917099, + "learning_rate": 2.8353901224060678e-05, + "loss": 10.4581, + "step": 11460 + }, + { + "epoch": 0.057279832205548206, + "grad_norm": 0.09385743737220764, + "learning_rate": 2.8352399309119128e-05, + "loss": 10.4586, + "step": 11470 + }, + { + "epoch": 0.057329771030487656, + "grad_norm": 0.09824958443641663, + "learning_rate": 2.835089739417758e-05, + "loss": 10.4587, + "step": 11480 + }, + { + "epoch": 0.057379709855427105, + "grad_norm": 0.0924108624458313, + "learning_rate": 2.8349395479236025e-05, + "loss": 10.4568, + "step": 11490 + }, + { + "epoch": 0.05742964868036655, + "grad_norm": 0.09391859918832779, + "learning_rate": 2.834789356429448e-05, + "loss": 10.4601, + "step": 11500 + }, + { + "epoch": 0.057479587505306, + "grad_norm": 0.0961366668343544, + "learning_rate": 2.8346391649352925e-05, + "loss": 10.4558, + "step": 11510 + }, + { + "epoch": 0.05752952633024545, + "grad_norm": 0.0933762714266777, + "learning_rate": 2.8344889734411376e-05, + "loss": 10.4547, + "step": 11520 + }, + { + "epoch": 0.057579465155184896, + "grad_norm": 0.09340479224920273, + "learning_rate": 2.8343387819469826e-05, + "loss": 10.4567, + "step": 11530 + }, + { + "epoch": 0.057629403980124345, + "grad_norm": 0.09362339973449707, + "learning_rate": 2.8341885904528273e-05, + "loss": 10.4587, + "step": 11540 + }, + { + "epoch": 0.057679342805063795, + "grad_norm": 0.09768813848495483, + "learning_rate": 2.8340383989586726e-05, + "loss": 10.4558, + "step": 11550 + }, + { + "epoch": 0.057729281630003244, + "grad_norm": 0.09130477160215378, + "learning_rate": 2.8338882074645173e-05, + "loss": 10.4552, + "step": 11560 + }, + { + "epoch": 0.057779220454942694, + "grad_norm": 0.0935993641614914, + "learning_rate": 2.8337380159703623e-05, + "loss": 10.4522, + "step": 11570 + }, + { + "epoch": 0.05782915927988214, + "grad_norm": 0.09301052987575531, + "learning_rate": 2.8335878244762073e-05, + "loss": 10.4558, + "step": 11580 + }, + { + "epoch": 0.05787909810482159, + "grad_norm": 0.09924156963825226, + "learning_rate": 2.833437632982052e-05, + "loss": 10.4555, + "step": 11590 + }, + { + "epoch": 0.05792903692976104, + "grad_norm": 0.09675630927085876, + "learning_rate": 2.8332874414878974e-05, + "loss": 10.4523, + "step": 11600 + }, + { + "epoch": 0.05797897575470049, + "grad_norm": 0.09440577030181885, + "learning_rate": 2.833137249993742e-05, + "loss": 10.4536, + "step": 11610 + }, + { + "epoch": 0.05802891457963994, + "grad_norm": 0.09317108243703842, + "learning_rate": 2.832987058499587e-05, + "loss": 10.4548, + "step": 11620 + }, + { + "epoch": 0.05807885340457939, + "grad_norm": 0.09359714388847351, + "learning_rate": 2.832836867005432e-05, + "loss": 10.4534, + "step": 11630 + }, + { + "epoch": 0.05812879222951884, + "grad_norm": 0.09872293472290039, + "learning_rate": 2.8326866755112768e-05, + "loss": 10.4551, + "step": 11640 + }, + { + "epoch": 0.05817873105445829, + "grad_norm": 0.0959540382027626, + "learning_rate": 2.832536484017122e-05, + "loss": 10.4505, + "step": 11650 + }, + { + "epoch": 0.05822866987939774, + "grad_norm": 0.09325166791677475, + "learning_rate": 2.8323862925229668e-05, + "loss": 10.4514, + "step": 11660 + }, + { + "epoch": 0.05827860870433719, + "grad_norm": 0.0949905589222908, + "learning_rate": 2.8322361010288118e-05, + "loss": 10.4537, + "step": 11670 + }, + { + "epoch": 0.05832854752927664, + "grad_norm": 0.09572743624448776, + "learning_rate": 2.832085909534657e-05, + "loss": 10.4515, + "step": 11680 + }, + { + "epoch": 0.05837848635421609, + "grad_norm": 0.09633267670869827, + "learning_rate": 2.8319357180405015e-05, + "loss": 10.4507, + "step": 11690 + }, + { + "epoch": 0.05842842517915554, + "grad_norm": 0.09619545936584473, + "learning_rate": 2.831785526546347e-05, + "loss": 10.4505, + "step": 11700 + }, + { + "epoch": 0.058478364004094986, + "grad_norm": 0.0950017124414444, + "learning_rate": 2.8316353350521915e-05, + "loss": 10.4486, + "step": 11710 + }, + { + "epoch": 0.058528302829034436, + "grad_norm": 0.09573563188314438, + "learning_rate": 2.8314851435580366e-05, + "loss": 10.45, + "step": 11720 + }, + { + "epoch": 0.058578241653973885, + "grad_norm": 0.09022394567728043, + "learning_rate": 2.8313349520638816e-05, + "loss": 10.4487, + "step": 11730 + }, + { + "epoch": 0.058628180478913335, + "grad_norm": 0.09762822836637497, + "learning_rate": 2.8311847605697266e-05, + "loss": 10.451, + "step": 11740 + }, + { + "epoch": 0.05867811930385278, + "grad_norm": 0.10274951159954071, + "learning_rate": 2.8310345690755716e-05, + "loss": 10.4502, + "step": 11750 + }, + { + "epoch": 0.05872805812879223, + "grad_norm": 0.09342820197343826, + "learning_rate": 2.8308843775814163e-05, + "loss": 10.4512, + "step": 11760 + }, + { + "epoch": 0.058777996953731676, + "grad_norm": 0.09088432788848877, + "learning_rate": 2.8307341860872613e-05, + "loss": 10.4489, + "step": 11770 + }, + { + "epoch": 0.058827935778671125, + "grad_norm": 0.09308407455682755, + "learning_rate": 2.8305839945931063e-05, + "loss": 10.4484, + "step": 11780 + }, + { + "epoch": 0.058877874603610575, + "grad_norm": 0.09679020196199417, + "learning_rate": 2.8304338030989514e-05, + "loss": 10.4507, + "step": 11790 + }, + { + "epoch": 0.058927813428550024, + "grad_norm": 0.09807083010673523, + "learning_rate": 2.8302836116047964e-05, + "loss": 10.4473, + "step": 11800 + }, + { + "epoch": 0.058977752253489474, + "grad_norm": 0.09622442722320557, + "learning_rate": 2.830133420110641e-05, + "loss": 10.4472, + "step": 11810 + }, + { + "epoch": 0.05902769107842892, + "grad_norm": 0.09451454877853394, + "learning_rate": 2.829983228616486e-05, + "loss": 10.4465, + "step": 11820 + }, + { + "epoch": 0.05907762990336837, + "grad_norm": 0.10234472155570984, + "learning_rate": 2.829833037122331e-05, + "loss": 10.4481, + "step": 11830 + }, + { + "epoch": 0.05912756872830782, + "grad_norm": 0.09240438789129257, + "learning_rate": 2.829682845628176e-05, + "loss": 10.4469, + "step": 11840 + }, + { + "epoch": 0.05917750755324727, + "grad_norm": 0.09405849874019623, + "learning_rate": 2.829532654134021e-05, + "loss": 10.4481, + "step": 11850 + }, + { + "epoch": 0.05922744637818672, + "grad_norm": 0.08946629613637924, + "learning_rate": 2.8293824626398658e-05, + "loss": 10.4498, + "step": 11860 + }, + { + "epoch": 0.05927738520312617, + "grad_norm": 0.09469353407621384, + "learning_rate": 2.8292322711457108e-05, + "loss": 10.4447, + "step": 11870 + }, + { + "epoch": 0.05932732402806562, + "grad_norm": 0.09458359330892563, + "learning_rate": 2.829082079651556e-05, + "loss": 10.4443, + "step": 11880 + }, + { + "epoch": 0.05937726285300507, + "grad_norm": 0.08880256861448288, + "learning_rate": 2.828931888157401e-05, + "loss": 10.4459, + "step": 11890 + }, + { + "epoch": 0.05942720167794452, + "grad_norm": 0.09645318239927292, + "learning_rate": 2.828781696663246e-05, + "loss": 10.4466, + "step": 11900 + }, + { + "epoch": 0.05947714050288397, + "grad_norm": 0.0943111777305603, + "learning_rate": 2.8286315051690905e-05, + "loss": 10.4462, + "step": 11910 + }, + { + "epoch": 0.05952707932782342, + "grad_norm": 0.09864623099565506, + "learning_rate": 2.8284813136749356e-05, + "loss": 10.4449, + "step": 11920 + }, + { + "epoch": 0.05957701815276287, + "grad_norm": 0.09035992622375488, + "learning_rate": 2.8283311221807806e-05, + "loss": 10.4435, + "step": 11930 + }, + { + "epoch": 0.05962695697770232, + "grad_norm": 0.09477786719799042, + "learning_rate": 2.8281809306866256e-05, + "loss": 10.4472, + "step": 11940 + }, + { + "epoch": 0.059676895802641766, + "grad_norm": 0.09425050765275955, + "learning_rate": 2.8280307391924706e-05, + "loss": 10.4472, + "step": 11950 + }, + { + "epoch": 0.059726834627581216, + "grad_norm": 0.0916745588183403, + "learning_rate": 2.8278805476983153e-05, + "loss": 10.4452, + "step": 11960 + }, + { + "epoch": 0.059776773452520665, + "grad_norm": 0.09402048587799072, + "learning_rate": 2.8277303562041603e-05, + "loss": 10.4401, + "step": 11970 + }, + { + "epoch": 0.059826712277460115, + "grad_norm": 0.09692014008760452, + "learning_rate": 2.8275801647100053e-05, + "loss": 10.444, + "step": 11980 + }, + { + "epoch": 0.05987665110239956, + "grad_norm": 0.09546181559562683, + "learning_rate": 2.8274299732158504e-05, + "loss": 10.4423, + "step": 11990 + }, + { + "epoch": 0.059926589927339007, + "grad_norm": 0.09214867651462555, + "learning_rate": 2.8272797817216954e-05, + "loss": 10.4401, + "step": 12000 + }, + { + "epoch": 0.059976528752278456, + "grad_norm": 0.09537333995103836, + "learning_rate": 2.82712959022754e-05, + "loss": 10.4422, + "step": 12010 + }, + { + "epoch": 0.060026467577217905, + "grad_norm": 0.0944821685552597, + "learning_rate": 2.826979398733385e-05, + "loss": 10.4448, + "step": 12020 + }, + { + "epoch": 0.060076406402157355, + "grad_norm": 0.08920750021934509, + "learning_rate": 2.82682920723923e-05, + "loss": 10.4432, + "step": 12030 + }, + { + "epoch": 0.060126345227096804, + "grad_norm": 0.09650328755378723, + "learning_rate": 2.826679015745075e-05, + "loss": 10.4419, + "step": 12040 + }, + { + "epoch": 0.060176284052036254, + "grad_norm": 0.09876326471567154, + "learning_rate": 2.82652882425092e-05, + "loss": 10.439, + "step": 12050 + }, + { + "epoch": 0.0602262228769757, + "grad_norm": 0.09975486248731613, + "learning_rate": 2.826378632756765e-05, + "loss": 10.439, + "step": 12060 + }, + { + "epoch": 0.06027616170191515, + "grad_norm": 0.09765785932540894, + "learning_rate": 2.8262284412626098e-05, + "loss": 10.4414, + "step": 12070 + }, + { + "epoch": 0.0603261005268546, + "grad_norm": 0.09784495830535889, + "learning_rate": 2.826078249768455e-05, + "loss": 10.4388, + "step": 12080 + }, + { + "epoch": 0.06037603935179405, + "grad_norm": 0.09981703758239746, + "learning_rate": 2.8259280582743e-05, + "loss": 10.4412, + "step": 12090 + }, + { + "epoch": 0.0604259781767335, + "grad_norm": 0.09435340017080307, + "learning_rate": 2.825777866780145e-05, + "loss": 10.439, + "step": 12100 + }, + { + "epoch": 0.06047591700167295, + "grad_norm": 0.10208732634782791, + "learning_rate": 2.82562767528599e-05, + "loss": 10.4376, + "step": 12110 + }, + { + "epoch": 0.0605258558266124, + "grad_norm": 0.09435919672250748, + "learning_rate": 2.8254774837918346e-05, + "loss": 10.44, + "step": 12120 + }, + { + "epoch": 0.06057579465155185, + "grad_norm": 0.09108801931142807, + "learning_rate": 2.8253272922976796e-05, + "loss": 10.4375, + "step": 12130 + }, + { + "epoch": 0.0606257334764913, + "grad_norm": 0.0965215414762497, + "learning_rate": 2.8251771008035246e-05, + "loss": 10.439, + "step": 12140 + }, + { + "epoch": 0.06067567230143075, + "grad_norm": 0.09519945085048676, + "learning_rate": 2.8250269093093696e-05, + "loss": 10.4363, + "step": 12150 + }, + { + "epoch": 0.0607256111263702, + "grad_norm": 0.09803465753793716, + "learning_rate": 2.8248767178152146e-05, + "loss": 10.4383, + "step": 12160 + }, + { + "epoch": 0.06077554995130965, + "grad_norm": 0.09530342370271683, + "learning_rate": 2.8247265263210593e-05, + "loss": 10.438, + "step": 12170 + }, + { + "epoch": 0.0608254887762491, + "grad_norm": 0.09839750826358795, + "learning_rate": 2.8245763348269043e-05, + "loss": 10.4355, + "step": 12180 + }, + { + "epoch": 0.060875427601188546, + "grad_norm": 0.09567271173000336, + "learning_rate": 2.8244261433327494e-05, + "loss": 10.4368, + "step": 12190 + }, + { + "epoch": 0.060925366426127996, + "grad_norm": 0.09974398463964462, + "learning_rate": 2.8242759518385944e-05, + "loss": 10.4357, + "step": 12200 + }, + { + "epoch": 0.060975305251067445, + "grad_norm": 0.09306304156780243, + "learning_rate": 2.8241257603444394e-05, + "loss": 10.4359, + "step": 12210 + }, + { + "epoch": 0.061025244076006895, + "grad_norm": 0.09435778856277466, + "learning_rate": 2.823975568850284e-05, + "loss": 10.4371, + "step": 12220 + }, + { + "epoch": 0.061075182900946344, + "grad_norm": 0.10123757272958755, + "learning_rate": 2.823825377356129e-05, + "loss": 10.4379, + "step": 12230 + }, + { + "epoch": 0.061125121725885787, + "grad_norm": 0.09351859986782074, + "learning_rate": 2.823675185861974e-05, + "loss": 10.434, + "step": 12240 + }, + { + "epoch": 0.061175060550825236, + "grad_norm": 0.09679336100816727, + "learning_rate": 2.823524994367819e-05, + "loss": 10.4362, + "step": 12250 + }, + { + "epoch": 0.061224999375764685, + "grad_norm": 0.09072909504175186, + "learning_rate": 2.823374802873664e-05, + "loss": 10.4377, + "step": 12260 + }, + { + "epoch": 0.061274938200704135, + "grad_norm": 0.08954602479934692, + "learning_rate": 2.8232246113795088e-05, + "loss": 10.4366, + "step": 12270 + }, + { + "epoch": 0.061324877025643584, + "grad_norm": 0.0955163910984993, + "learning_rate": 2.823074419885354e-05, + "loss": 10.4351, + "step": 12280 + }, + { + "epoch": 0.061374815850583034, + "grad_norm": 0.09503049403429031, + "learning_rate": 2.822924228391199e-05, + "loss": 10.4351, + "step": 12290 + }, + { + "epoch": 0.06142475467552248, + "grad_norm": 0.10008996725082397, + "learning_rate": 2.822774036897044e-05, + "loss": 10.4331, + "step": 12300 + }, + { + "epoch": 0.06147469350046193, + "grad_norm": 0.1014408990740776, + "learning_rate": 2.822623845402889e-05, + "loss": 10.4347, + "step": 12310 + }, + { + "epoch": 0.06152463232540138, + "grad_norm": 0.09109607338905334, + "learning_rate": 2.8224736539087336e-05, + "loss": 10.4333, + "step": 12320 + }, + { + "epoch": 0.06157457115034083, + "grad_norm": 0.09781739115715027, + "learning_rate": 2.8223234624145786e-05, + "loss": 10.4332, + "step": 12330 + }, + { + "epoch": 0.06162450997528028, + "grad_norm": 0.09548245370388031, + "learning_rate": 2.8221732709204236e-05, + "loss": 10.4314, + "step": 12340 + }, + { + "epoch": 0.06167444880021973, + "grad_norm": 0.09480303525924683, + "learning_rate": 2.8220230794262686e-05, + "loss": 10.43, + "step": 12350 + }, + { + "epoch": 0.06172438762515918, + "grad_norm": 0.09702970832586288, + "learning_rate": 2.8218728879321136e-05, + "loss": 10.433, + "step": 12360 + }, + { + "epoch": 0.06177432645009863, + "grad_norm": 0.09529626369476318, + "learning_rate": 2.8217226964379583e-05, + "loss": 10.4315, + "step": 12370 + }, + { + "epoch": 0.06182426527503808, + "grad_norm": 0.09766676276922226, + "learning_rate": 2.8215725049438033e-05, + "loss": 10.4315, + "step": 12380 + }, + { + "epoch": 0.06187420409997753, + "grad_norm": 0.09520022571086884, + "learning_rate": 2.8214223134496484e-05, + "loss": 10.4313, + "step": 12390 + }, + { + "epoch": 0.06192414292491698, + "grad_norm": 0.09235453605651855, + "learning_rate": 2.8212721219554934e-05, + "loss": 10.4308, + "step": 12400 + }, + { + "epoch": 0.06197408174985643, + "grad_norm": 0.09697452187538147, + "learning_rate": 2.8211219304613384e-05, + "loss": 10.4296, + "step": 12410 + }, + { + "epoch": 0.06202402057479588, + "grad_norm": 0.09309589117765427, + "learning_rate": 2.820971738967183e-05, + "loss": 10.4306, + "step": 12420 + }, + { + "epoch": 0.062073959399735326, + "grad_norm": 0.09192889928817749, + "learning_rate": 2.8208215474730284e-05, + "loss": 10.4305, + "step": 12430 + }, + { + "epoch": 0.062123898224674776, + "grad_norm": 0.09932176768779755, + "learning_rate": 2.820671355978873e-05, + "loss": 10.4302, + "step": 12440 + }, + { + "epoch": 0.062173837049614225, + "grad_norm": 0.09811878204345703, + "learning_rate": 2.820521164484718e-05, + "loss": 10.429, + "step": 12450 + }, + { + "epoch": 0.062223775874553675, + "grad_norm": 0.09283413738012314, + "learning_rate": 2.820370972990563e-05, + "loss": 10.4304, + "step": 12460 + }, + { + "epoch": 0.062273714699493124, + "grad_norm": 0.09722374379634857, + "learning_rate": 2.8202207814964078e-05, + "loss": 10.429, + "step": 12470 + }, + { + "epoch": 0.06232365352443257, + "grad_norm": 0.09965924918651581, + "learning_rate": 2.8200705900022532e-05, + "loss": 10.4291, + "step": 12480 + }, + { + "epoch": 0.062373592349372016, + "grad_norm": 0.09636944532394409, + "learning_rate": 2.819920398508098e-05, + "loss": 10.4304, + "step": 12490 + }, + { + "epoch": 0.062423531174311465, + "grad_norm": 0.09442856162786484, + "learning_rate": 2.819770207013943e-05, + "loss": 10.4282, + "step": 12500 + }, + { + "epoch": 0.062473469999250915, + "grad_norm": 0.09579756110906601, + "learning_rate": 2.819620015519788e-05, + "loss": 10.4286, + "step": 12510 + }, + { + "epoch": 0.06252340882419037, + "grad_norm": 0.09269638359546661, + "learning_rate": 2.8194698240256326e-05, + "loss": 10.4284, + "step": 12520 + }, + { + "epoch": 0.06257334764912982, + "grad_norm": 0.0913536548614502, + "learning_rate": 2.819319632531478e-05, + "loss": 10.4262, + "step": 12530 + }, + { + "epoch": 0.06262328647406927, + "grad_norm": 0.09674961864948273, + "learning_rate": 2.8191694410373226e-05, + "loss": 10.4301, + "step": 12540 + }, + { + "epoch": 0.06267322529900872, + "grad_norm": 0.09084776788949966, + "learning_rate": 2.8190192495431676e-05, + "loss": 10.4284, + "step": 12550 + }, + { + "epoch": 0.06272316412394817, + "grad_norm": 0.09603569656610489, + "learning_rate": 2.8188690580490126e-05, + "loss": 10.4262, + "step": 12560 + }, + { + "epoch": 0.06277310294888762, + "grad_norm": 0.100166916847229, + "learning_rate": 2.8187188665548573e-05, + "loss": 10.4253, + "step": 12570 + }, + { + "epoch": 0.06282304177382707, + "grad_norm": 0.09294571727514267, + "learning_rate": 2.8185686750607027e-05, + "loss": 10.4256, + "step": 12580 + }, + { + "epoch": 0.06287298059876652, + "grad_norm": 0.09238424152135849, + "learning_rate": 2.8184184835665474e-05, + "loss": 10.427, + "step": 12590 + }, + { + "epoch": 0.06292291942370597, + "grad_norm": 0.09537918120622635, + "learning_rate": 2.8182682920723924e-05, + "loss": 10.425, + "step": 12600 + }, + { + "epoch": 0.0629728582486454, + "grad_norm": 0.09167756885290146, + "learning_rate": 2.8181181005782374e-05, + "loss": 10.4268, + "step": 12610 + }, + { + "epoch": 0.06302279707358485, + "grad_norm": 0.09255441278219223, + "learning_rate": 2.817967909084082e-05, + "loss": 10.4283, + "step": 12620 + }, + { + "epoch": 0.0630727358985243, + "grad_norm": 0.09267347306013107, + "learning_rate": 2.8178177175899274e-05, + "loss": 10.4236, + "step": 12630 + }, + { + "epoch": 0.06312267472346375, + "grad_norm": 0.09058328717947006, + "learning_rate": 2.817667526095772e-05, + "loss": 10.4257, + "step": 12640 + }, + { + "epoch": 0.0631726135484032, + "grad_norm": 0.09093445539474487, + "learning_rate": 2.817517334601617e-05, + "loss": 10.4247, + "step": 12650 + }, + { + "epoch": 0.06322255237334265, + "grad_norm": 0.09584364295005798, + "learning_rate": 2.817367143107462e-05, + "loss": 10.4212, + "step": 12660 + }, + { + "epoch": 0.0632724911982821, + "grad_norm": 0.09648899734020233, + "learning_rate": 2.8172169516133068e-05, + "loss": 10.4236, + "step": 12670 + }, + { + "epoch": 0.06332243002322155, + "grad_norm": 0.10392139106988907, + "learning_rate": 2.8170667601191522e-05, + "loss": 10.4231, + "step": 12680 + }, + { + "epoch": 0.063372368848161, + "grad_norm": 0.09030050039291382, + "learning_rate": 2.816916568624997e-05, + "loss": 10.424, + "step": 12690 + }, + { + "epoch": 0.06342230767310045, + "grad_norm": 0.0974743589758873, + "learning_rate": 2.816766377130842e-05, + "loss": 10.4226, + "step": 12700 + }, + { + "epoch": 0.0634722464980399, + "grad_norm": 0.09758103638887405, + "learning_rate": 2.816616185636687e-05, + "loss": 10.4236, + "step": 12710 + }, + { + "epoch": 0.06352218532297935, + "grad_norm": 0.09443262219429016, + "learning_rate": 2.8164659941425316e-05, + "loss": 10.4204, + "step": 12720 + }, + { + "epoch": 0.0635721241479188, + "grad_norm": 0.09165245294570923, + "learning_rate": 2.816315802648377e-05, + "loss": 10.4208, + "step": 12730 + }, + { + "epoch": 0.06362206297285825, + "grad_norm": 0.09566860646009445, + "learning_rate": 2.8161656111542216e-05, + "loss": 10.4215, + "step": 12740 + }, + { + "epoch": 0.0636720017977977, + "grad_norm": 0.0996147021651268, + "learning_rate": 2.816015419660067e-05, + "loss": 10.423, + "step": 12750 + }, + { + "epoch": 0.06372194062273714, + "grad_norm": 0.09040533006191254, + "learning_rate": 2.8158652281659116e-05, + "loss": 10.422, + "step": 12760 + }, + { + "epoch": 0.0637718794476766, + "grad_norm": 0.09722060710191727, + "learning_rate": 2.8157150366717563e-05, + "loss": 10.4204, + "step": 12770 + }, + { + "epoch": 0.06382181827261604, + "grad_norm": 0.09629692882299423, + "learning_rate": 2.8155648451776017e-05, + "loss": 10.4208, + "step": 12780 + }, + { + "epoch": 0.06387175709755549, + "grad_norm": 0.08845196664333344, + "learning_rate": 2.8154146536834464e-05, + "loss": 10.4187, + "step": 12790 + }, + { + "epoch": 0.06392169592249494, + "grad_norm": 0.0945558026432991, + "learning_rate": 2.8152644621892917e-05, + "loss": 10.4237, + "step": 12800 + }, + { + "epoch": 0.06397163474743439, + "grad_norm": 0.09783702343702316, + "learning_rate": 2.8151142706951364e-05, + "loss": 10.4185, + "step": 12810 + }, + { + "epoch": 0.06402157357237384, + "grad_norm": 0.09721019119024277, + "learning_rate": 2.814964079200981e-05, + "loss": 10.4202, + "step": 12820 + }, + { + "epoch": 0.06407151239731329, + "grad_norm": 0.09728706628084183, + "learning_rate": 2.8148138877068264e-05, + "loss": 10.4211, + "step": 12830 + }, + { + "epoch": 0.06412145122225274, + "grad_norm": 0.0896829217672348, + "learning_rate": 2.814663696212671e-05, + "loss": 10.4196, + "step": 12840 + }, + { + "epoch": 0.06417139004719219, + "grad_norm": 0.09445061534643173, + "learning_rate": 2.8145135047185165e-05, + "loss": 10.4193, + "step": 12850 + }, + { + "epoch": 0.06422132887213164, + "grad_norm": 0.0961589440703392, + "learning_rate": 2.814363313224361e-05, + "loss": 10.4175, + "step": 12860 + }, + { + "epoch": 0.06427126769707109, + "grad_norm": 0.09850755333900452, + "learning_rate": 2.8142131217302058e-05, + "loss": 10.4181, + "step": 12870 + }, + { + "epoch": 0.06432120652201054, + "grad_norm": 0.093666210770607, + "learning_rate": 2.8140629302360512e-05, + "loss": 10.419, + "step": 12880 + }, + { + "epoch": 0.06437114534694999, + "grad_norm": 0.09768545627593994, + "learning_rate": 2.813912738741896e-05, + "loss": 10.417, + "step": 12890 + }, + { + "epoch": 0.06442108417188944, + "grad_norm": 0.0899498239159584, + "learning_rate": 2.8137625472477412e-05, + "loss": 10.4175, + "step": 12900 + }, + { + "epoch": 0.06447102299682889, + "grad_norm": 0.09297134727239609, + "learning_rate": 2.813612355753586e-05, + "loss": 10.418, + "step": 12910 + }, + { + "epoch": 0.06452096182176834, + "grad_norm": 0.09266901016235352, + "learning_rate": 2.8134621642594306e-05, + "loss": 10.4167, + "step": 12920 + }, + { + "epoch": 0.06457090064670779, + "grad_norm": 0.09560717642307281, + "learning_rate": 2.813311972765276e-05, + "loss": 10.4158, + "step": 12930 + }, + { + "epoch": 0.06462083947164723, + "grad_norm": 0.09199609607458115, + "learning_rate": 2.8131617812711206e-05, + "loss": 10.4151, + "step": 12940 + }, + { + "epoch": 0.06467077829658668, + "grad_norm": 0.08880805224180222, + "learning_rate": 2.813011589776966e-05, + "loss": 10.4152, + "step": 12950 + }, + { + "epoch": 0.06472071712152613, + "grad_norm": 0.09574782103300095, + "learning_rate": 2.8128613982828106e-05, + "loss": 10.4178, + "step": 12960 + }, + { + "epoch": 0.06477065594646558, + "grad_norm": 0.09296387434005737, + "learning_rate": 2.8127112067886553e-05, + "loss": 10.415, + "step": 12970 + }, + { + "epoch": 0.06482059477140503, + "grad_norm": 0.09417938441038132, + "learning_rate": 2.8125610152945007e-05, + "loss": 10.4148, + "step": 12980 + }, + { + "epoch": 0.06487053359634448, + "grad_norm": 0.09101897478103638, + "learning_rate": 2.8124108238003454e-05, + "loss": 10.4149, + "step": 12990 + }, + { + "epoch": 0.06492047242128393, + "grad_norm": 0.09890080988407135, + "learning_rate": 2.8122606323061907e-05, + "loss": 10.4157, + "step": 13000 + }, + { + "epoch": 0.06497041124622338, + "grad_norm": 0.09360602498054504, + "learning_rate": 2.8121104408120354e-05, + "loss": 10.4141, + "step": 13010 + }, + { + "epoch": 0.06502035007116283, + "grad_norm": 0.10021226108074188, + "learning_rate": 2.81196024931788e-05, + "loss": 10.4117, + "step": 13020 + }, + { + "epoch": 0.06507028889610228, + "grad_norm": 0.09726858139038086, + "learning_rate": 2.8118100578237254e-05, + "loss": 10.4117, + "step": 13030 + }, + { + "epoch": 0.06512022772104173, + "grad_norm": 0.09342258423566818, + "learning_rate": 2.81165986632957e-05, + "loss": 10.4155, + "step": 13040 + }, + { + "epoch": 0.06517016654598118, + "grad_norm": 0.09800725430250168, + "learning_rate": 2.8115096748354155e-05, + "loss": 10.4146, + "step": 13050 + }, + { + "epoch": 0.06522010537092063, + "grad_norm": 0.09531977027654648, + "learning_rate": 2.81135948334126e-05, + "loss": 10.4122, + "step": 13060 + }, + { + "epoch": 0.06527004419586008, + "grad_norm": 0.09499124437570572, + "learning_rate": 2.811209291847105e-05, + "loss": 10.4126, + "step": 13070 + }, + { + "epoch": 0.06531998302079953, + "grad_norm": 0.09590071439743042, + "learning_rate": 2.8110591003529502e-05, + "loss": 10.4127, + "step": 13080 + }, + { + "epoch": 0.06536992184573898, + "grad_norm": 0.09835195541381836, + "learning_rate": 2.810908908858795e-05, + "loss": 10.4109, + "step": 13090 + }, + { + "epoch": 0.06541986067067843, + "grad_norm": 0.09182047098875046, + "learning_rate": 2.8107587173646402e-05, + "loss": 10.4132, + "step": 13100 + }, + { + "epoch": 0.06546979949561786, + "grad_norm": 0.09572348743677139, + "learning_rate": 2.810608525870485e-05, + "loss": 10.4111, + "step": 13110 + }, + { + "epoch": 0.06551973832055731, + "grad_norm": 0.09192756563425064, + "learning_rate": 2.81045833437633e-05, + "loss": 10.4094, + "step": 13120 + }, + { + "epoch": 0.06556967714549676, + "grad_norm": 0.09450101852416992, + "learning_rate": 2.810308142882175e-05, + "loss": 10.4124, + "step": 13130 + }, + { + "epoch": 0.06561961597043621, + "grad_norm": 0.09333983063697815, + "learning_rate": 2.8101579513880196e-05, + "loss": 10.4101, + "step": 13140 + }, + { + "epoch": 0.06566955479537566, + "grad_norm": 0.09769093245267868, + "learning_rate": 2.810007759893865e-05, + "loss": 10.4109, + "step": 13150 + }, + { + "epoch": 0.06571949362031511, + "grad_norm": 0.09517354518175125, + "learning_rate": 2.8098575683997096e-05, + "loss": 10.4101, + "step": 13160 + }, + { + "epoch": 0.06576943244525456, + "grad_norm": 0.09823792427778244, + "learning_rate": 2.8097073769055547e-05, + "loss": 10.4095, + "step": 13170 + }, + { + "epoch": 0.06581937127019401, + "grad_norm": 0.09689707309007645, + "learning_rate": 2.8095571854113997e-05, + "loss": 10.4104, + "step": 13180 + }, + { + "epoch": 0.06586931009513346, + "grad_norm": 0.09225734323263168, + "learning_rate": 2.8094069939172444e-05, + "loss": 10.4067, + "step": 13190 + }, + { + "epoch": 0.0659192489200729, + "grad_norm": 0.10383229702711105, + "learning_rate": 2.8092568024230897e-05, + "loss": 10.41, + "step": 13200 + }, + { + "epoch": 0.06596918774501236, + "grad_norm": 0.09270164370536804, + "learning_rate": 2.8091066109289344e-05, + "loss": 10.4071, + "step": 13210 + }, + { + "epoch": 0.0660191265699518, + "grad_norm": 0.09225078672170639, + "learning_rate": 2.8089564194347794e-05, + "loss": 10.4071, + "step": 13220 + }, + { + "epoch": 0.06606906539489125, + "grad_norm": 0.09302686154842377, + "learning_rate": 2.8088062279406244e-05, + "loss": 10.4091, + "step": 13230 + }, + { + "epoch": 0.0661190042198307, + "grad_norm": 0.09300468862056732, + "learning_rate": 2.808656036446469e-05, + "loss": 10.4079, + "step": 13240 + }, + { + "epoch": 0.06616894304477015, + "grad_norm": 0.10051462054252625, + "learning_rate": 2.8085058449523145e-05, + "loss": 10.4058, + "step": 13250 + }, + { + "epoch": 0.0662188818697096, + "grad_norm": 0.10076721012592316, + "learning_rate": 2.808355653458159e-05, + "loss": 10.4052, + "step": 13260 + }, + { + "epoch": 0.06626882069464905, + "grad_norm": 0.09321360290050507, + "learning_rate": 2.808205461964004e-05, + "loss": 10.4091, + "step": 13270 + }, + { + "epoch": 0.0663187595195885, + "grad_norm": 0.09066988527774811, + "learning_rate": 2.8080552704698492e-05, + "loss": 10.4054, + "step": 13280 + }, + { + "epoch": 0.06636869834452795, + "grad_norm": 0.09279420971870422, + "learning_rate": 2.807905078975694e-05, + "loss": 10.4051, + "step": 13290 + }, + { + "epoch": 0.0664186371694674, + "grad_norm": 0.09202752262353897, + "learning_rate": 2.8077548874815392e-05, + "loss": 10.4091, + "step": 13300 + }, + { + "epoch": 0.06646857599440685, + "grad_norm": 0.09188825637102127, + "learning_rate": 2.807604695987384e-05, + "loss": 10.4076, + "step": 13310 + }, + { + "epoch": 0.0665185148193463, + "grad_norm": 0.0955401360988617, + "learning_rate": 2.807454504493229e-05, + "loss": 10.406, + "step": 13320 + }, + { + "epoch": 0.06656845364428575, + "grad_norm": 0.09799564629793167, + "learning_rate": 2.807304312999074e-05, + "loss": 10.4049, + "step": 13330 + }, + { + "epoch": 0.0666183924692252, + "grad_norm": 0.09366542100906372, + "learning_rate": 2.8071541215049186e-05, + "loss": 10.4073, + "step": 13340 + }, + { + "epoch": 0.06666833129416465, + "grad_norm": 0.08726493269205093, + "learning_rate": 2.807003930010764e-05, + "loss": 10.4078, + "step": 13350 + }, + { + "epoch": 0.0667182701191041, + "grad_norm": 0.09408682584762573, + "learning_rate": 2.8068537385166086e-05, + "loss": 10.4018, + "step": 13360 + }, + { + "epoch": 0.06676820894404355, + "grad_norm": 0.1020817831158638, + "learning_rate": 2.8067035470224537e-05, + "loss": 10.4036, + "step": 13370 + }, + { + "epoch": 0.066818147768983, + "grad_norm": 0.0937168076634407, + "learning_rate": 2.8065533555282987e-05, + "loss": 10.405, + "step": 13380 + }, + { + "epoch": 0.06686808659392245, + "grad_norm": 0.08735665678977966, + "learning_rate": 2.8064031640341437e-05, + "loss": 10.4038, + "step": 13390 + }, + { + "epoch": 0.0669180254188619, + "grad_norm": 0.09232959151268005, + "learning_rate": 2.8062529725399887e-05, + "loss": 10.4007, + "step": 13400 + }, + { + "epoch": 0.06696796424380135, + "grad_norm": 0.09150397777557373, + "learning_rate": 2.8061027810458334e-05, + "loss": 10.4059, + "step": 13410 + }, + { + "epoch": 0.0670179030687408, + "grad_norm": 0.09197428822517395, + "learning_rate": 2.8059525895516784e-05, + "loss": 10.4031, + "step": 13420 + }, + { + "epoch": 0.06706784189368024, + "grad_norm": 0.09242592751979828, + "learning_rate": 2.8058023980575234e-05, + "loss": 10.402, + "step": 13430 + }, + { + "epoch": 0.0671177807186197, + "grad_norm": 0.09760529547929764, + "learning_rate": 2.8056522065633685e-05, + "loss": 10.402, + "step": 13440 + }, + { + "epoch": 0.06716771954355914, + "grad_norm": 0.09522455185651779, + "learning_rate": 2.8055020150692135e-05, + "loss": 10.4012, + "step": 13450 + }, + { + "epoch": 0.06721765836849859, + "grad_norm": 0.093946672976017, + "learning_rate": 2.805351823575058e-05, + "loss": 10.4044, + "step": 13460 + }, + { + "epoch": 0.06726759719343804, + "grad_norm": 0.09539136290550232, + "learning_rate": 2.805201632080903e-05, + "loss": 10.4038, + "step": 13470 + }, + { + "epoch": 0.06731753601837749, + "grad_norm": 0.10141439735889435, + "learning_rate": 2.8050514405867482e-05, + "loss": 10.4001, + "step": 13480 + }, + { + "epoch": 0.06736747484331694, + "grad_norm": 0.094613216817379, + "learning_rate": 2.8049012490925932e-05, + "loss": 10.4008, + "step": 13490 + }, + { + "epoch": 0.06741741366825639, + "grad_norm": 0.09187793731689453, + "learning_rate": 2.8047510575984382e-05, + "loss": 10.4017, + "step": 13500 + }, + { + "epoch": 0.06746735249319584, + "grad_norm": 0.10149643570184708, + "learning_rate": 2.804600866104283e-05, + "loss": 10.4028, + "step": 13510 + }, + { + "epoch": 0.06751729131813529, + "grad_norm": 0.09373277425765991, + "learning_rate": 2.804450674610128e-05, + "loss": 10.3996, + "step": 13520 + }, + { + "epoch": 0.06756723014307474, + "grad_norm": 0.09465789794921875, + "learning_rate": 2.804300483115973e-05, + "loss": 10.3994, + "step": 13530 + }, + { + "epoch": 0.06761716896801419, + "grad_norm": 0.08999823033809662, + "learning_rate": 2.804150291621818e-05, + "loss": 10.4001, + "step": 13540 + }, + { + "epoch": 0.06766710779295364, + "grad_norm": 0.09075751155614853, + "learning_rate": 2.804000100127663e-05, + "loss": 10.4001, + "step": 13550 + }, + { + "epoch": 0.06771704661789309, + "grad_norm": 0.09290343523025513, + "learning_rate": 2.8038499086335076e-05, + "loss": 10.3973, + "step": 13560 + }, + { + "epoch": 0.06776698544283254, + "grad_norm": 0.09069476276636124, + "learning_rate": 2.8036997171393527e-05, + "loss": 10.3977, + "step": 13570 + }, + { + "epoch": 0.06781692426777199, + "grad_norm": 0.09564889967441559, + "learning_rate": 2.8035495256451977e-05, + "loss": 10.4, + "step": 13580 + }, + { + "epoch": 0.06786686309271144, + "grad_norm": 0.09241501986980438, + "learning_rate": 2.8033993341510427e-05, + "loss": 10.3973, + "step": 13590 + }, + { + "epoch": 0.06791680191765087, + "grad_norm": 0.09421627223491669, + "learning_rate": 2.8032491426568877e-05, + "loss": 10.3985, + "step": 13600 + }, + { + "epoch": 0.06796674074259032, + "grad_norm": 0.0919337123632431, + "learning_rate": 2.8030989511627324e-05, + "loss": 10.3968, + "step": 13610 + }, + { + "epoch": 0.06801667956752977, + "grad_norm": 0.09475470334291458, + "learning_rate": 2.8029487596685774e-05, + "loss": 10.3991, + "step": 13620 + }, + { + "epoch": 0.06806661839246922, + "grad_norm": 0.09623359888792038, + "learning_rate": 2.8027985681744224e-05, + "loss": 10.3965, + "step": 13630 + }, + { + "epoch": 0.06811655721740867, + "grad_norm": 0.09497568756341934, + "learning_rate": 2.8026483766802675e-05, + "loss": 10.3988, + "step": 13640 + }, + { + "epoch": 0.06816649604234812, + "grad_norm": 0.09501062333583832, + "learning_rate": 2.8024981851861125e-05, + "loss": 10.3959, + "step": 13650 + }, + { + "epoch": 0.06821643486728757, + "grad_norm": 0.09278145432472229, + "learning_rate": 2.802347993691957e-05, + "loss": 10.3948, + "step": 13660 + }, + { + "epoch": 0.06826637369222702, + "grad_norm": 0.09373094141483307, + "learning_rate": 2.802197802197802e-05, + "loss": 10.395, + "step": 13670 + }, + { + "epoch": 0.06831631251716647, + "grad_norm": 0.09346432983875275, + "learning_rate": 2.8020476107036472e-05, + "loss": 10.396, + "step": 13680 + }, + { + "epoch": 0.06836625134210592, + "grad_norm": 0.09329260885715485, + "learning_rate": 2.8018974192094922e-05, + "loss": 10.3962, + "step": 13690 + }, + { + "epoch": 0.06841619016704537, + "grad_norm": 0.09555350244045258, + "learning_rate": 2.8017472277153372e-05, + "loss": 10.3954, + "step": 13700 + }, + { + "epoch": 0.06846612899198481, + "grad_norm": 0.09405256807804108, + "learning_rate": 2.8015970362211822e-05, + "loss": 10.3936, + "step": 13710 + }, + { + "epoch": 0.06851606781692426, + "grad_norm": 0.09874436259269714, + "learning_rate": 2.801446844727027e-05, + "loss": 10.3941, + "step": 13720 + }, + { + "epoch": 0.06856600664186371, + "grad_norm": 0.0999235138297081, + "learning_rate": 2.801296653232872e-05, + "loss": 10.3911, + "step": 13730 + }, + { + "epoch": 0.06861594546680316, + "grad_norm": 0.09541259706020355, + "learning_rate": 2.801146461738717e-05, + "loss": 10.3937, + "step": 13740 + }, + { + "epoch": 0.06866588429174261, + "grad_norm": 0.09852302819490433, + "learning_rate": 2.800996270244562e-05, + "loss": 10.3953, + "step": 13750 + }, + { + "epoch": 0.06871582311668206, + "grad_norm": 0.09277693927288055, + "learning_rate": 2.800846078750407e-05, + "loss": 10.3929, + "step": 13760 + }, + { + "epoch": 0.06876576194162151, + "grad_norm": 0.09327711910009384, + "learning_rate": 2.8006958872562517e-05, + "loss": 10.3907, + "step": 13770 + }, + { + "epoch": 0.06881570076656096, + "grad_norm": 0.08925262838602066, + "learning_rate": 2.8005456957620967e-05, + "loss": 10.3949, + "step": 13780 + }, + { + "epoch": 0.06886563959150041, + "grad_norm": 0.0973198264837265, + "learning_rate": 2.8003955042679417e-05, + "loss": 10.3892, + "step": 13790 + }, + { + "epoch": 0.06891557841643986, + "grad_norm": 0.09705457836389542, + "learning_rate": 2.8002453127737867e-05, + "loss": 10.3911, + "step": 13800 + }, + { + "epoch": 0.06896551724137931, + "grad_norm": 0.09261533617973328, + "learning_rate": 2.8000951212796317e-05, + "loss": 10.3915, + "step": 13810 + }, + { + "epoch": 0.06901545606631876, + "grad_norm": 0.09760327637195587, + "learning_rate": 2.7999449297854764e-05, + "loss": 10.3938, + "step": 13820 + }, + { + "epoch": 0.06906539489125821, + "grad_norm": 0.09349595010280609, + "learning_rate": 2.7997947382913214e-05, + "loss": 10.3904, + "step": 13830 + }, + { + "epoch": 0.06911533371619766, + "grad_norm": 0.0936850905418396, + "learning_rate": 2.7996445467971665e-05, + "loss": 10.3909, + "step": 13840 + }, + { + "epoch": 0.06916527254113711, + "grad_norm": 0.09198077023029327, + "learning_rate": 2.7994943553030115e-05, + "loss": 10.3907, + "step": 13850 + }, + { + "epoch": 0.06921521136607656, + "grad_norm": 0.09293072670698166, + "learning_rate": 2.7993441638088565e-05, + "loss": 10.3906, + "step": 13860 + }, + { + "epoch": 0.069265150191016, + "grad_norm": 0.10718231648206711, + "learning_rate": 2.7991939723147012e-05, + "loss": 10.388, + "step": 13870 + }, + { + "epoch": 0.06931508901595546, + "grad_norm": 0.09268306940793991, + "learning_rate": 2.7990437808205462e-05, + "loss": 10.3907, + "step": 13880 + }, + { + "epoch": 0.0693650278408949, + "grad_norm": 0.09039410948753357, + "learning_rate": 2.7988935893263912e-05, + "loss": 10.3901, + "step": 13890 + }, + { + "epoch": 0.06941496666583435, + "grad_norm": 0.09920943528413773, + "learning_rate": 2.7987433978322362e-05, + "loss": 10.3878, + "step": 13900 + }, + { + "epoch": 0.0694649054907738, + "grad_norm": 0.09512588381767273, + "learning_rate": 2.7985932063380812e-05, + "loss": 10.3883, + "step": 13910 + }, + { + "epoch": 0.06951484431571325, + "grad_norm": 0.0949581190943718, + "learning_rate": 2.798443014843926e-05, + "loss": 10.388, + "step": 13920 + }, + { + "epoch": 0.0695647831406527, + "grad_norm": 0.09540549665689468, + "learning_rate": 2.798292823349771e-05, + "loss": 10.3886, + "step": 13930 + }, + { + "epoch": 0.06961472196559215, + "grad_norm": 0.10153383016586304, + "learning_rate": 2.798142631855616e-05, + "loss": 10.3875, + "step": 13940 + }, + { + "epoch": 0.0696646607905316, + "grad_norm": 0.09448625147342682, + "learning_rate": 2.797992440361461e-05, + "loss": 10.3908, + "step": 13950 + }, + { + "epoch": 0.06971459961547105, + "grad_norm": 0.09156591445207596, + "learning_rate": 2.797842248867306e-05, + "loss": 10.3874, + "step": 13960 + }, + { + "epoch": 0.0697645384404105, + "grad_norm": 0.10002894699573517, + "learning_rate": 2.7976920573731507e-05, + "loss": 10.3878, + "step": 13970 + }, + { + "epoch": 0.06981447726534995, + "grad_norm": 0.1005663126707077, + "learning_rate": 2.7975418658789957e-05, + "loss": 10.3849, + "step": 13980 + }, + { + "epoch": 0.0698644160902894, + "grad_norm": 0.09588824957609177, + "learning_rate": 2.7973916743848407e-05, + "loss": 10.3876, + "step": 13990 + }, + { + "epoch": 0.06991435491522885, + "grad_norm": 0.10445947200059891, + "learning_rate": 2.7972414828906857e-05, + "loss": 10.3845, + "step": 14000 + }, + { + "epoch": 0.0699642937401683, + "grad_norm": 0.09666018187999725, + "learning_rate": 2.7970912913965307e-05, + "loss": 10.3877, + "step": 14010 + }, + { + "epoch": 0.07001423256510775, + "grad_norm": 0.09730076789855957, + "learning_rate": 2.7969410999023754e-05, + "loss": 10.3862, + "step": 14020 + }, + { + "epoch": 0.0700641713900472, + "grad_norm": 0.09373754262924194, + "learning_rate": 2.7967909084082208e-05, + "loss": 10.3866, + "step": 14030 + }, + { + "epoch": 0.07011411021498665, + "grad_norm": 0.09593051671981812, + "learning_rate": 2.7966407169140655e-05, + "loss": 10.3842, + "step": 14040 + }, + { + "epoch": 0.0701640490399261, + "grad_norm": 0.09878380596637726, + "learning_rate": 2.7964905254199105e-05, + "loss": 10.3858, + "step": 14050 + }, + { + "epoch": 0.07021398786486555, + "grad_norm": 0.09511502087116241, + "learning_rate": 2.7963403339257555e-05, + "loss": 10.3866, + "step": 14060 + }, + { + "epoch": 0.070263926689805, + "grad_norm": 0.09328655153512955, + "learning_rate": 2.7961901424316002e-05, + "loss": 10.3844, + "step": 14070 + }, + { + "epoch": 0.07031386551474444, + "grad_norm": 0.09099029004573822, + "learning_rate": 2.7960399509374455e-05, + "loss": 10.3844, + "step": 14080 + }, + { + "epoch": 0.0703638043396839, + "grad_norm": 0.09044034779071808, + "learning_rate": 2.7958897594432902e-05, + "loss": 10.383, + "step": 14090 + }, + { + "epoch": 0.07041374316462333, + "grad_norm": 0.09568076580762863, + "learning_rate": 2.7957395679491352e-05, + "loss": 10.3804, + "step": 14100 + }, + { + "epoch": 0.07046368198956278, + "grad_norm": 0.09366782754659653, + "learning_rate": 2.7955893764549802e-05, + "loss": 10.3835, + "step": 14110 + }, + { + "epoch": 0.07051362081450223, + "grad_norm": 0.0898858904838562, + "learning_rate": 2.795439184960825e-05, + "loss": 10.3813, + "step": 14120 + }, + { + "epoch": 0.07056355963944168, + "grad_norm": 0.09581518173217773, + "learning_rate": 2.7952889934666703e-05, + "loss": 10.3861, + "step": 14130 + }, + { + "epoch": 0.07061349846438113, + "grad_norm": 0.09709804505109787, + "learning_rate": 2.795138801972515e-05, + "loss": 10.3827, + "step": 14140 + }, + { + "epoch": 0.07066343728932058, + "grad_norm": 0.09556079655885696, + "learning_rate": 2.79498861047836e-05, + "loss": 10.3806, + "step": 14150 + }, + { + "epoch": 0.07071337611426003, + "grad_norm": 0.09610017389059067, + "learning_rate": 2.794838418984205e-05, + "loss": 10.3825, + "step": 14160 + }, + { + "epoch": 0.07076331493919948, + "grad_norm": 0.094112828373909, + "learning_rate": 2.7946882274900497e-05, + "loss": 10.3794, + "step": 14170 + }, + { + "epoch": 0.07081325376413893, + "grad_norm": 0.09225358068943024, + "learning_rate": 2.794538035995895e-05, + "loss": 10.3818, + "step": 14180 + }, + { + "epoch": 0.07086319258907837, + "grad_norm": 0.09522094577550888, + "learning_rate": 2.7943878445017397e-05, + "loss": 10.3811, + "step": 14190 + }, + { + "epoch": 0.07091313141401782, + "grad_norm": 0.09772941470146179, + "learning_rate": 2.7942376530075847e-05, + "loss": 10.3835, + "step": 14200 + }, + { + "epoch": 0.07096307023895727, + "grad_norm": 0.09984889626502991, + "learning_rate": 2.7940874615134297e-05, + "loss": 10.3794, + "step": 14210 + }, + { + "epoch": 0.07101300906389672, + "grad_norm": 0.09417369961738586, + "learning_rate": 2.7939372700192744e-05, + "loss": 10.3824, + "step": 14220 + }, + { + "epoch": 0.07106294788883617, + "grad_norm": 0.09816978126764297, + "learning_rate": 2.7937870785251198e-05, + "loss": 10.3796, + "step": 14230 + }, + { + "epoch": 0.07111288671377562, + "grad_norm": 0.10052361339330673, + "learning_rate": 2.7936368870309645e-05, + "loss": 10.3792, + "step": 14240 + }, + { + "epoch": 0.07116282553871507, + "grad_norm": 0.09660011529922485, + "learning_rate": 2.7934866955368095e-05, + "loss": 10.38, + "step": 14250 + }, + { + "epoch": 0.07121276436365452, + "grad_norm": 0.09375877678394318, + "learning_rate": 2.7933365040426545e-05, + "loss": 10.3788, + "step": 14260 + }, + { + "epoch": 0.07126270318859397, + "grad_norm": 0.09189482033252716, + "learning_rate": 2.7931863125484992e-05, + "loss": 10.3798, + "step": 14270 + }, + { + "epoch": 0.07131264201353342, + "grad_norm": 0.09980373829603195, + "learning_rate": 2.7930361210543445e-05, + "loss": 10.381, + "step": 14280 + }, + { + "epoch": 0.07136258083847287, + "grad_norm": 0.08954178541898727, + "learning_rate": 2.7928859295601892e-05, + "loss": 10.3781, + "step": 14290 + }, + { + "epoch": 0.07141251966341232, + "grad_norm": 0.08987992256879807, + "learning_rate": 2.7927357380660342e-05, + "loss": 10.3801, + "step": 14300 + }, + { + "epoch": 0.07146245848835177, + "grad_norm": 0.0954456776380539, + "learning_rate": 2.7925855465718792e-05, + "loss": 10.3775, + "step": 14310 + }, + { + "epoch": 0.07151239731329122, + "grad_norm": 0.09449037164449692, + "learning_rate": 2.792435355077724e-05, + "loss": 10.3813, + "step": 14320 + }, + { + "epoch": 0.07156233613823067, + "grad_norm": 0.09394067525863647, + "learning_rate": 2.7922851635835693e-05, + "loss": 10.3778, + "step": 14330 + }, + { + "epoch": 0.07161227496317012, + "grad_norm": 0.0954408049583435, + "learning_rate": 2.792134972089414e-05, + "loss": 10.3772, + "step": 14340 + }, + { + "epoch": 0.07166221378810957, + "grad_norm": 0.09376630187034607, + "learning_rate": 2.7919847805952593e-05, + "loss": 10.3763, + "step": 14350 + }, + { + "epoch": 0.07171215261304902, + "grad_norm": 0.0952349603176117, + "learning_rate": 2.791834589101104e-05, + "loss": 10.3765, + "step": 14360 + }, + { + "epoch": 0.07176209143798847, + "grad_norm": 0.10054927319288254, + "learning_rate": 2.7916843976069487e-05, + "loss": 10.3729, + "step": 14370 + }, + { + "epoch": 0.07181203026292791, + "grad_norm": 0.09942678362131119, + "learning_rate": 2.791534206112794e-05, + "loss": 10.3755, + "step": 14380 + }, + { + "epoch": 0.07186196908786736, + "grad_norm": 0.09826812148094177, + "learning_rate": 2.7913840146186387e-05, + "loss": 10.3753, + "step": 14390 + }, + { + "epoch": 0.07191190791280681, + "grad_norm": 0.09203541278839111, + "learning_rate": 2.791233823124484e-05, + "loss": 10.3753, + "step": 14400 + }, + { + "epoch": 0.07196184673774626, + "grad_norm": 0.09987717121839523, + "learning_rate": 2.7910836316303287e-05, + "loss": 10.3732, + "step": 14410 + }, + { + "epoch": 0.07201178556268571, + "grad_norm": 0.09702242910861969, + "learning_rate": 2.7909334401361734e-05, + "loss": 10.3748, + "step": 14420 + }, + { + "epoch": 0.07206172438762516, + "grad_norm": 0.09924949705600739, + "learning_rate": 2.7907832486420188e-05, + "loss": 10.3758, + "step": 14430 + }, + { + "epoch": 0.07211166321256461, + "grad_norm": 0.10308583825826645, + "learning_rate": 2.7906330571478635e-05, + "loss": 10.3698, + "step": 14440 + }, + { + "epoch": 0.07216160203750406, + "grad_norm": 0.09504105895757675, + "learning_rate": 2.7904828656537088e-05, + "loss": 10.3751, + "step": 14450 + }, + { + "epoch": 0.07221154086244351, + "grad_norm": 0.0910039022564888, + "learning_rate": 2.7903326741595535e-05, + "loss": 10.3762, + "step": 14460 + }, + { + "epoch": 0.07226147968738296, + "grad_norm": 0.09338394552469254, + "learning_rate": 2.7901824826653982e-05, + "loss": 10.3741, + "step": 14470 + }, + { + "epoch": 0.07231141851232241, + "grad_norm": 0.095545195043087, + "learning_rate": 2.7900322911712435e-05, + "loss": 10.3728, + "step": 14480 + }, + { + "epoch": 0.07236135733726186, + "grad_norm": 0.0895906612277031, + "learning_rate": 2.7898820996770882e-05, + "loss": 10.3754, + "step": 14490 + }, + { + "epoch": 0.07241129616220131, + "grad_norm": 0.09767705202102661, + "learning_rate": 2.7897319081829336e-05, + "loss": 10.3734, + "step": 14500 + }, + { + "epoch": 0.07246123498714076, + "grad_norm": 0.09386033564805984, + "learning_rate": 2.7895817166887782e-05, + "loss": 10.3728, + "step": 14510 + }, + { + "epoch": 0.0725111738120802, + "grad_norm": 0.09241237491369247, + "learning_rate": 2.789431525194623e-05, + "loss": 10.3727, + "step": 14520 + }, + { + "epoch": 0.07256111263701966, + "grad_norm": 0.09265079349279404, + "learning_rate": 2.7892813337004683e-05, + "loss": 10.3714, + "step": 14530 + }, + { + "epoch": 0.0726110514619591, + "grad_norm": 0.09971288591623306, + "learning_rate": 2.789131142206313e-05, + "loss": 10.3708, + "step": 14540 + }, + { + "epoch": 0.07266099028689856, + "grad_norm": 0.09628092497587204, + "learning_rate": 2.7889809507121583e-05, + "loss": 10.3714, + "step": 14550 + }, + { + "epoch": 0.072710929111838, + "grad_norm": 0.09946174919605255, + "learning_rate": 2.788830759218003e-05, + "loss": 10.3706, + "step": 14560 + }, + { + "epoch": 0.07276086793677745, + "grad_norm": 0.09489379823207855, + "learning_rate": 2.7886805677238477e-05, + "loss": 10.3715, + "step": 14570 + }, + { + "epoch": 0.0728108067617169, + "grad_norm": 0.09554824233055115, + "learning_rate": 2.788530376229693e-05, + "loss": 10.3715, + "step": 14580 + }, + { + "epoch": 0.07286074558665634, + "grad_norm": 0.09551879018545151, + "learning_rate": 2.7883801847355377e-05, + "loss": 10.3715, + "step": 14590 + }, + { + "epoch": 0.07291068441159579, + "grad_norm": 0.10136498510837555, + "learning_rate": 2.788229993241383e-05, + "loss": 10.371, + "step": 14600 + }, + { + "epoch": 0.07296062323653524, + "grad_norm": 0.09273940324783325, + "learning_rate": 2.7880798017472277e-05, + "loss": 10.3697, + "step": 14610 + }, + { + "epoch": 0.07301056206147469, + "grad_norm": 0.09797700494527817, + "learning_rate": 2.7879296102530724e-05, + "loss": 10.369, + "step": 14620 + }, + { + "epoch": 0.07306050088641414, + "grad_norm": 0.0974142774939537, + "learning_rate": 2.7877794187589178e-05, + "loss": 10.3689, + "step": 14630 + }, + { + "epoch": 0.07311043971135359, + "grad_norm": 0.10973493754863739, + "learning_rate": 2.7876292272647625e-05, + "loss": 10.3659, + "step": 14640 + }, + { + "epoch": 0.07316037853629304, + "grad_norm": 0.0951044112443924, + "learning_rate": 2.7874790357706078e-05, + "loss": 10.3671, + "step": 14650 + }, + { + "epoch": 0.07321031736123249, + "grad_norm": 0.09662732481956482, + "learning_rate": 2.7873288442764525e-05, + "loss": 10.369, + "step": 14660 + }, + { + "epoch": 0.07326025618617193, + "grad_norm": 0.09085254371166229, + "learning_rate": 2.7871786527822975e-05, + "loss": 10.3666, + "step": 14670 + }, + { + "epoch": 0.07331019501111138, + "grad_norm": 0.09421254694461823, + "learning_rate": 2.7870284612881425e-05, + "loss": 10.3648, + "step": 14680 + }, + { + "epoch": 0.07336013383605083, + "grad_norm": 0.09277543425559998, + "learning_rate": 2.7868782697939872e-05, + "loss": 10.3655, + "step": 14690 + }, + { + "epoch": 0.07341007266099028, + "grad_norm": 0.09704422950744629, + "learning_rate": 2.7867280782998326e-05, + "loss": 10.3657, + "step": 14700 + }, + { + "epoch": 0.07346001148592973, + "grad_norm": 0.09107200056314468, + "learning_rate": 2.7865778868056772e-05, + "loss": 10.3688, + "step": 14710 + }, + { + "epoch": 0.07350995031086918, + "grad_norm": 0.08991669118404388, + "learning_rate": 2.7864276953115223e-05, + "loss": 10.3676, + "step": 14720 + }, + { + "epoch": 0.07355988913580863, + "grad_norm": 0.09315147995948792, + "learning_rate": 2.7862775038173673e-05, + "loss": 10.3692, + "step": 14730 + }, + { + "epoch": 0.07360982796074808, + "grad_norm": 0.09433738887310028, + "learning_rate": 2.786127312323212e-05, + "loss": 10.364, + "step": 14740 + }, + { + "epoch": 0.07365976678568753, + "grad_norm": 0.0911378338932991, + "learning_rate": 2.7859771208290573e-05, + "loss": 10.3655, + "step": 14750 + }, + { + "epoch": 0.07370970561062698, + "grad_norm": 0.09321308135986328, + "learning_rate": 2.785826929334902e-05, + "loss": 10.3665, + "step": 14760 + }, + { + "epoch": 0.07375964443556643, + "grad_norm": 0.09273282438516617, + "learning_rate": 2.785676737840747e-05, + "loss": 10.3633, + "step": 14770 + }, + { + "epoch": 0.07380958326050588, + "grad_norm": 0.0901220440864563, + "learning_rate": 2.785526546346592e-05, + "loss": 10.3613, + "step": 14780 + }, + { + "epoch": 0.07385952208544533, + "grad_norm": 0.09423750638961792, + "learning_rate": 2.7853763548524367e-05, + "loss": 10.3643, + "step": 14790 + }, + { + "epoch": 0.07390946091038478, + "grad_norm": 0.09342154115438461, + "learning_rate": 2.785226163358282e-05, + "loss": 10.366, + "step": 14800 + }, + { + "epoch": 0.07395939973532423, + "grad_norm": 0.09549295902252197, + "learning_rate": 2.7850759718641267e-05, + "loss": 10.3637, + "step": 14810 + }, + { + "epoch": 0.07400933856026368, + "grad_norm": 0.09613852202892303, + "learning_rate": 2.7849257803699718e-05, + "loss": 10.3668, + "step": 14820 + }, + { + "epoch": 0.07405927738520313, + "grad_norm": 0.09182946383953094, + "learning_rate": 2.7847755888758168e-05, + "loss": 10.3647, + "step": 14830 + }, + { + "epoch": 0.07410921621014258, + "grad_norm": 0.08932546526193619, + "learning_rate": 2.7846253973816615e-05, + "loss": 10.3626, + "step": 14840 + }, + { + "epoch": 0.07415915503508202, + "grad_norm": 0.09887928515672684, + "learning_rate": 2.7844752058875068e-05, + "loss": 10.3609, + "step": 14850 + }, + { + "epoch": 0.07420909386002147, + "grad_norm": 0.09238041192293167, + "learning_rate": 2.7843250143933515e-05, + "loss": 10.3625, + "step": 14860 + }, + { + "epoch": 0.07425903268496092, + "grad_norm": 0.08984861522912979, + "learning_rate": 2.7841748228991965e-05, + "loss": 10.3626, + "step": 14870 + }, + { + "epoch": 0.07430897150990037, + "grad_norm": 0.09524226188659668, + "learning_rate": 2.7840246314050415e-05, + "loss": 10.3622, + "step": 14880 + }, + { + "epoch": 0.07435891033483982, + "grad_norm": 0.09836632758378983, + "learning_rate": 2.7838744399108862e-05, + "loss": 10.3668, + "step": 14890 + }, + { + "epoch": 0.07440884915977927, + "grad_norm": 0.08989029377698898, + "learning_rate": 2.7837242484167316e-05, + "loss": 10.364, + "step": 14900 + }, + { + "epoch": 0.07445878798471872, + "grad_norm": 0.09499987214803696, + "learning_rate": 2.7835740569225762e-05, + "loss": 10.3642, + "step": 14910 + }, + { + "epoch": 0.07450872680965817, + "grad_norm": 0.09698102623224258, + "learning_rate": 2.7834238654284213e-05, + "loss": 10.3599, + "step": 14920 + }, + { + "epoch": 0.07455866563459762, + "grad_norm": 0.09677815437316895, + "learning_rate": 2.7832736739342663e-05, + "loss": 10.3597, + "step": 14930 + }, + { + "epoch": 0.07460860445953707, + "grad_norm": 0.09893867373466492, + "learning_rate": 2.783123482440111e-05, + "loss": 10.358, + "step": 14940 + }, + { + "epoch": 0.07465854328447652, + "grad_norm": 0.09698786586523056, + "learning_rate": 2.7829732909459563e-05, + "loss": 10.3576, + "step": 14950 + }, + { + "epoch": 0.07470848210941597, + "grad_norm": 0.08816642314195633, + "learning_rate": 2.782823099451801e-05, + "loss": 10.3598, + "step": 14960 + }, + { + "epoch": 0.07475842093435542, + "grad_norm": 0.09455441683530807, + "learning_rate": 2.782672907957646e-05, + "loss": 10.3586, + "step": 14970 + }, + { + "epoch": 0.07480835975929487, + "grad_norm": 0.09937630593776703, + "learning_rate": 2.782522716463491e-05, + "loss": 10.3571, + "step": 14980 + }, + { + "epoch": 0.07485829858423432, + "grad_norm": 0.09834357351064682, + "learning_rate": 2.782372524969336e-05, + "loss": 10.357, + "step": 14990 + }, + { + "epoch": 0.07490823740917377, + "grad_norm": 0.09351138025522232, + "learning_rate": 2.782222333475181e-05, + "loss": 10.3599, + "step": 15000 + }, + { + "epoch": 0.07495817623411322, + "grad_norm": 0.09158463031053543, + "learning_rate": 2.7820721419810257e-05, + "loss": 10.3608, + "step": 15010 + }, + { + "epoch": 0.07500811505905267, + "grad_norm": 0.09933115541934967, + "learning_rate": 2.7819219504868708e-05, + "loss": 10.3587, + "step": 15020 + }, + { + "epoch": 0.07505805388399212, + "grad_norm": 0.09445559233427048, + "learning_rate": 2.7817717589927158e-05, + "loss": 10.356, + "step": 15030 + }, + { + "epoch": 0.07510799270893156, + "grad_norm": 0.0958174541592598, + "learning_rate": 2.7816215674985608e-05, + "loss": 10.3601, + "step": 15040 + }, + { + "epoch": 0.07515793153387101, + "grad_norm": 0.09285058826208115, + "learning_rate": 2.7814713760044058e-05, + "loss": 10.3572, + "step": 15050 + }, + { + "epoch": 0.07520787035881046, + "grad_norm": 0.09183011949062347, + "learning_rate": 2.7813211845102505e-05, + "loss": 10.3567, + "step": 15060 + }, + { + "epoch": 0.07525780918374991, + "grad_norm": 0.09525202959775925, + "learning_rate": 2.7811709930160955e-05, + "loss": 10.3597, + "step": 15070 + }, + { + "epoch": 0.07530774800868936, + "grad_norm": 0.09245378524065018, + "learning_rate": 2.7810208015219405e-05, + "loss": 10.3559, + "step": 15080 + }, + { + "epoch": 0.0753576868336288, + "grad_norm": 0.09502734988927841, + "learning_rate": 2.7808706100277856e-05, + "loss": 10.3569, + "step": 15090 + }, + { + "epoch": 0.07540762565856825, + "grad_norm": 0.09782931953668594, + "learning_rate": 2.7807204185336306e-05, + "loss": 10.3561, + "step": 15100 + }, + { + "epoch": 0.0754575644835077, + "grad_norm": 0.09502676874399185, + "learning_rate": 2.7805702270394752e-05, + "loss": 10.3553, + "step": 15110 + }, + { + "epoch": 0.07550750330844715, + "grad_norm": 0.09578611701726913, + "learning_rate": 2.7804200355453203e-05, + "loss": 10.3571, + "step": 15120 + }, + { + "epoch": 0.0755574421333866, + "grad_norm": 0.0919770896434784, + "learning_rate": 2.7802698440511653e-05, + "loss": 10.3541, + "step": 15130 + }, + { + "epoch": 0.07560738095832605, + "grad_norm": 0.09363967925310135, + "learning_rate": 2.7801196525570103e-05, + "loss": 10.3542, + "step": 15140 + }, + { + "epoch": 0.0756573197832655, + "grad_norm": 0.09395650029182434, + "learning_rate": 2.7799694610628553e-05, + "loss": 10.3552, + "step": 15150 + }, + { + "epoch": 0.07570725860820494, + "grad_norm": 0.09580358117818832, + "learning_rate": 2.7798192695687e-05, + "loss": 10.3551, + "step": 15160 + }, + { + "epoch": 0.0757571974331444, + "grad_norm": 0.09569013118743896, + "learning_rate": 2.779669078074545e-05, + "loss": 10.3505, + "step": 15170 + }, + { + "epoch": 0.07580713625808384, + "grad_norm": 0.09753712266683578, + "learning_rate": 2.77951888658039e-05, + "loss": 10.3539, + "step": 15180 + }, + { + "epoch": 0.07585707508302329, + "grad_norm": 0.09192202240228653, + "learning_rate": 2.779368695086235e-05, + "loss": 10.354, + "step": 15190 + }, + { + "epoch": 0.07590701390796274, + "grad_norm": 0.09564004093408585, + "learning_rate": 2.77921850359208e-05, + "loss": 10.3533, + "step": 15200 + }, + { + "epoch": 0.07595695273290219, + "grad_norm": 0.08988484740257263, + "learning_rate": 2.7790683120979248e-05, + "loss": 10.3559, + "step": 15210 + }, + { + "epoch": 0.07600689155784164, + "grad_norm": 0.09135802835226059, + "learning_rate": 2.7789181206037698e-05, + "loss": 10.355, + "step": 15220 + }, + { + "epoch": 0.07605683038278109, + "grad_norm": 0.10438718646764755, + "learning_rate": 2.7787679291096148e-05, + "loss": 10.3533, + "step": 15230 + }, + { + "epoch": 0.07610676920772054, + "grad_norm": 0.0949469655752182, + "learning_rate": 2.7786177376154598e-05, + "loss": 10.3526, + "step": 15240 + }, + { + "epoch": 0.07615670803265999, + "grad_norm": 0.09750612825155258, + "learning_rate": 2.7784675461213048e-05, + "loss": 10.3543, + "step": 15250 + }, + { + "epoch": 0.07620664685759944, + "grad_norm": 0.09662304818630219, + "learning_rate": 2.7783173546271495e-05, + "loss": 10.3492, + "step": 15260 + }, + { + "epoch": 0.07625658568253889, + "grad_norm": 0.09173522889614105, + "learning_rate": 2.7781671631329945e-05, + "loss": 10.3505, + "step": 15270 + }, + { + "epoch": 0.07630652450747834, + "grad_norm": 0.09761576354503632, + "learning_rate": 2.7780169716388395e-05, + "loss": 10.35, + "step": 15280 + }, + { + "epoch": 0.07635646333241779, + "grad_norm": 0.091497503221035, + "learning_rate": 2.7778667801446846e-05, + "loss": 10.353, + "step": 15290 + }, + { + "epoch": 0.07640640215735724, + "grad_norm": 0.0867045447230339, + "learning_rate": 2.7777165886505296e-05, + "loss": 10.3486, + "step": 15300 + }, + { + "epoch": 0.07645634098229669, + "grad_norm": 0.09160704165697098, + "learning_rate": 2.7775663971563743e-05, + "loss": 10.3531, + "step": 15310 + }, + { + "epoch": 0.07650627980723614, + "grad_norm": 0.09530753642320633, + "learning_rate": 2.7774162056622193e-05, + "loss": 10.3502, + "step": 15320 + }, + { + "epoch": 0.07655621863217558, + "grad_norm": 0.09408596903085709, + "learning_rate": 2.7772660141680643e-05, + "loss": 10.3514, + "step": 15330 + }, + { + "epoch": 0.07660615745711503, + "grad_norm": 0.10033749043941498, + "learning_rate": 2.7771158226739093e-05, + "loss": 10.3467, + "step": 15340 + }, + { + "epoch": 0.07665609628205448, + "grad_norm": 0.08937489241361618, + "learning_rate": 2.7769656311797543e-05, + "loss": 10.3505, + "step": 15350 + }, + { + "epoch": 0.07670603510699393, + "grad_norm": 0.09332039952278137, + "learning_rate": 2.7768154396855993e-05, + "loss": 10.349, + "step": 15360 + }, + { + "epoch": 0.07675597393193338, + "grad_norm": 0.10080984979867935, + "learning_rate": 2.776665248191444e-05, + "loss": 10.3472, + "step": 15370 + }, + { + "epoch": 0.07680591275687283, + "grad_norm": 0.09363698959350586, + "learning_rate": 2.776515056697289e-05, + "loss": 10.3491, + "step": 15380 + }, + { + "epoch": 0.07685585158181228, + "grad_norm": 0.09421728551387787, + "learning_rate": 2.776364865203134e-05, + "loss": 10.3457, + "step": 15390 + }, + { + "epoch": 0.07690579040675173, + "grad_norm": 0.09391890466213226, + "learning_rate": 2.776214673708979e-05, + "loss": 10.3481, + "step": 15400 + }, + { + "epoch": 0.07695572923169118, + "grad_norm": 0.09497596323490143, + "learning_rate": 2.776064482214824e-05, + "loss": 10.3486, + "step": 15410 + }, + { + "epoch": 0.07700566805663063, + "grad_norm": 0.09952494502067566, + "learning_rate": 2.7759142907206688e-05, + "loss": 10.3471, + "step": 15420 + }, + { + "epoch": 0.07705560688157008, + "grad_norm": 0.09322735667228699, + "learning_rate": 2.7757640992265138e-05, + "loss": 10.346, + "step": 15430 + }, + { + "epoch": 0.07710554570650953, + "grad_norm": 0.09606785327196121, + "learning_rate": 2.7756139077323588e-05, + "loss": 10.3519, + "step": 15440 + }, + { + "epoch": 0.07715548453144898, + "grad_norm": 0.09380047023296356, + "learning_rate": 2.7754637162382038e-05, + "loss": 10.3466, + "step": 15450 + }, + { + "epoch": 0.07720542335638843, + "grad_norm": 0.09241063892841339, + "learning_rate": 2.775313524744049e-05, + "loss": 10.3461, + "step": 15460 + }, + { + "epoch": 0.07725536218132788, + "grad_norm": 0.09774025529623032, + "learning_rate": 2.7751633332498935e-05, + "loss": 10.3434, + "step": 15470 + }, + { + "epoch": 0.07730530100626733, + "grad_norm": 0.09376423805952072, + "learning_rate": 2.7750131417557385e-05, + "loss": 10.3487, + "step": 15480 + }, + { + "epoch": 0.07735523983120678, + "grad_norm": 0.08905985951423645, + "learning_rate": 2.7748629502615836e-05, + "loss": 10.3459, + "step": 15490 + }, + { + "epoch": 0.07740517865614623, + "grad_norm": 0.09236487746238708, + "learning_rate": 2.7747127587674286e-05, + "loss": 10.3436, + "step": 15500 + }, + { + "epoch": 0.07745511748108568, + "grad_norm": 0.0898410752415657, + "learning_rate": 2.7745625672732736e-05, + "loss": 10.3419, + "step": 15510 + }, + { + "epoch": 0.07750505630602512, + "grad_norm": 0.09169359505176544, + "learning_rate": 2.7744123757791183e-05, + "loss": 10.3459, + "step": 15520 + }, + { + "epoch": 0.07755499513096457, + "grad_norm": 0.09277211874723434, + "learning_rate": 2.7742621842849633e-05, + "loss": 10.3462, + "step": 15530 + }, + { + "epoch": 0.07760493395590402, + "grad_norm": 0.09616000950336456, + "learning_rate": 2.7741119927908083e-05, + "loss": 10.3427, + "step": 15540 + }, + { + "epoch": 0.07765487278084347, + "grad_norm": 0.09537960588932037, + "learning_rate": 2.7739618012966533e-05, + "loss": 10.3464, + "step": 15550 + }, + { + "epoch": 0.07770481160578292, + "grad_norm": 0.09606116265058517, + "learning_rate": 2.7738116098024983e-05, + "loss": 10.348, + "step": 15560 + }, + { + "epoch": 0.07775475043072237, + "grad_norm": 0.10531776398420334, + "learning_rate": 2.773661418308343e-05, + "loss": 10.344, + "step": 15570 + }, + { + "epoch": 0.07780468925566181, + "grad_norm": 0.09302783012390137, + "learning_rate": 2.773511226814188e-05, + "loss": 10.3427, + "step": 15580 + }, + { + "epoch": 0.07785462808060126, + "grad_norm": 0.09965275973081589, + "learning_rate": 2.773361035320033e-05, + "loss": 10.3477, + "step": 15590 + }, + { + "epoch": 0.0779045669055407, + "grad_norm": 0.0914713442325592, + "learning_rate": 2.773210843825878e-05, + "loss": 10.343, + "step": 15600 + }, + { + "epoch": 0.07795450573048016, + "grad_norm": 0.09361792355775833, + "learning_rate": 2.773060652331723e-05, + "loss": 10.3476, + "step": 15610 + }, + { + "epoch": 0.0780044445554196, + "grad_norm": 0.09312635660171509, + "learning_rate": 2.7729104608375678e-05, + "loss": 10.3434, + "step": 15620 + }, + { + "epoch": 0.07805438338035905, + "grad_norm": 0.09331481158733368, + "learning_rate": 2.7727602693434128e-05, + "loss": 10.3433, + "step": 15630 + }, + { + "epoch": 0.0781043222052985, + "grad_norm": 0.0952400341629982, + "learning_rate": 2.7726100778492578e-05, + "loss": 10.343, + "step": 15640 + }, + { + "epoch": 0.07815426103023795, + "grad_norm": 0.09251481294631958, + "learning_rate": 2.7724598863551028e-05, + "loss": 10.3406, + "step": 15650 + }, + { + "epoch": 0.0782041998551774, + "grad_norm": 0.09364163130521774, + "learning_rate": 2.772309694860948e-05, + "loss": 10.3429, + "step": 15660 + }, + { + "epoch": 0.07825413868011685, + "grad_norm": 0.09640027582645416, + "learning_rate": 2.772159503366793e-05, + "loss": 10.3399, + "step": 15670 + }, + { + "epoch": 0.0783040775050563, + "grad_norm": 0.09361618012189865, + "learning_rate": 2.772009311872638e-05, + "loss": 10.3391, + "step": 15680 + }, + { + "epoch": 0.07835401632999575, + "grad_norm": 0.09318644553422928, + "learning_rate": 2.7718591203784826e-05, + "loss": 10.3398, + "step": 15690 + }, + { + "epoch": 0.0784039551549352, + "grad_norm": 0.09669560939073563, + "learning_rate": 2.7717089288843276e-05, + "loss": 10.3404, + "step": 15700 + }, + { + "epoch": 0.07845389397987465, + "grad_norm": 0.09504924714565277, + "learning_rate": 2.7715587373901726e-05, + "loss": 10.3368, + "step": 15710 + }, + { + "epoch": 0.0785038328048141, + "grad_norm": 0.09268130362033844, + "learning_rate": 2.7714085458960176e-05, + "loss": 10.3395, + "step": 15720 + }, + { + "epoch": 0.07855377162975355, + "grad_norm": 0.09593498706817627, + "learning_rate": 2.7712583544018626e-05, + "loss": 10.3378, + "step": 15730 + }, + { + "epoch": 0.078603710454693, + "grad_norm": 0.09858926385641098, + "learning_rate": 2.7711081629077073e-05, + "loss": 10.3395, + "step": 15740 + }, + { + "epoch": 0.07865364927963245, + "grad_norm": 0.09996471554040909, + "learning_rate": 2.7709579714135523e-05, + "loss": 10.3401, + "step": 15750 + }, + { + "epoch": 0.0787035881045719, + "grad_norm": 0.09353151172399521, + "learning_rate": 2.7708077799193973e-05, + "loss": 10.3392, + "step": 15760 + }, + { + "epoch": 0.07875352692951135, + "grad_norm": 0.0970112755894661, + "learning_rate": 2.7706575884252424e-05, + "loss": 10.337, + "step": 15770 + }, + { + "epoch": 0.0788034657544508, + "grad_norm": 0.09143032133579254, + "learning_rate": 2.7705073969310874e-05, + "loss": 10.3367, + "step": 15780 + }, + { + "epoch": 0.07885340457939025, + "grad_norm": 0.09837393462657928, + "learning_rate": 2.770357205436932e-05, + "loss": 10.3368, + "step": 15790 + }, + { + "epoch": 0.0789033434043297, + "grad_norm": 0.0949447974562645, + "learning_rate": 2.770207013942777e-05, + "loss": 10.3381, + "step": 15800 + }, + { + "epoch": 0.07895328222926914, + "grad_norm": 0.09616595506668091, + "learning_rate": 2.770056822448622e-05, + "loss": 10.3356, + "step": 15810 + }, + { + "epoch": 0.0790032210542086, + "grad_norm": 0.0987720862030983, + "learning_rate": 2.769906630954467e-05, + "loss": 10.3361, + "step": 15820 + }, + { + "epoch": 0.07905315987914804, + "grad_norm": 0.09198544174432755, + "learning_rate": 2.769756439460312e-05, + "loss": 10.3322, + "step": 15830 + }, + { + "epoch": 0.0791030987040875, + "grad_norm": 0.09431766718626022, + "learning_rate": 2.7696062479661568e-05, + "loss": 10.3391, + "step": 15840 + }, + { + "epoch": 0.07915303752902694, + "grad_norm": 0.08981557935476303, + "learning_rate": 2.7694560564720018e-05, + "loss": 10.3416, + "step": 15850 + }, + { + "epoch": 0.07920297635396639, + "grad_norm": 0.09577486664056778, + "learning_rate": 2.769305864977847e-05, + "loss": 10.3368, + "step": 15860 + }, + { + "epoch": 0.07925291517890584, + "grad_norm": 0.09910629689693451, + "learning_rate": 2.769155673483692e-05, + "loss": 10.3372, + "step": 15870 + }, + { + "epoch": 0.07930285400384529, + "grad_norm": 0.0959116742014885, + "learning_rate": 2.769005481989537e-05, + "loss": 10.332, + "step": 15880 + }, + { + "epoch": 0.07935279282878474, + "grad_norm": 0.09615825861692429, + "learning_rate": 2.7688552904953816e-05, + "loss": 10.3353, + "step": 15890 + }, + { + "epoch": 0.07940273165372419, + "grad_norm": 0.09139242023229599, + "learning_rate": 2.7687050990012266e-05, + "loss": 10.3368, + "step": 15900 + }, + { + "epoch": 0.07945267047866364, + "grad_norm": 0.09158849716186523, + "learning_rate": 2.7685549075070716e-05, + "loss": 10.3365, + "step": 15910 + }, + { + "epoch": 0.07950260930360309, + "grad_norm": 0.09999959915876389, + "learning_rate": 2.7684047160129166e-05, + "loss": 10.3314, + "step": 15920 + }, + { + "epoch": 0.07955254812854254, + "grad_norm": 0.09477412700653076, + "learning_rate": 2.7682545245187616e-05, + "loss": 10.3342, + "step": 15930 + }, + { + "epoch": 0.07960248695348199, + "grad_norm": 0.09604287892580032, + "learning_rate": 2.7681043330246063e-05, + "loss": 10.3322, + "step": 15940 + }, + { + "epoch": 0.07965242577842144, + "grad_norm": 0.09351278841495514, + "learning_rate": 2.7679541415304513e-05, + "loss": 10.3344, + "step": 15950 + }, + { + "epoch": 0.07970236460336089, + "grad_norm": 0.09625541418790817, + "learning_rate": 2.7678039500362963e-05, + "loss": 10.3328, + "step": 15960 + }, + { + "epoch": 0.07975230342830034, + "grad_norm": 0.09256501495838165, + "learning_rate": 2.7676537585421414e-05, + "loss": 10.3316, + "step": 15970 + }, + { + "epoch": 0.07980224225323979, + "grad_norm": 0.09308816492557526, + "learning_rate": 2.7675035670479864e-05, + "loss": 10.3311, + "step": 15980 + }, + { + "epoch": 0.07985218107817924, + "grad_norm": 0.09923449158668518, + "learning_rate": 2.767353375553831e-05, + "loss": 10.3336, + "step": 15990 + }, + { + "epoch": 0.07990211990311868, + "grad_norm": 0.09855549037456512, + "learning_rate": 2.7672031840596764e-05, + "loss": 10.3271, + "step": 16000 + }, + { + "epoch": 0.07995205872805813, + "grad_norm": 0.09425193816423416, + "learning_rate": 2.767052992565521e-05, + "loss": 10.3321, + "step": 16010 + }, + { + "epoch": 0.08000199755299758, + "grad_norm": 0.09234164655208588, + "learning_rate": 2.766902801071366e-05, + "loss": 10.334, + "step": 16020 + }, + { + "epoch": 0.08005193637793703, + "grad_norm": 0.09213145077228546, + "learning_rate": 2.766752609577211e-05, + "loss": 10.3343, + "step": 16030 + }, + { + "epoch": 0.08010187520287648, + "grad_norm": 0.09849076718091965, + "learning_rate": 2.7666024180830558e-05, + "loss": 10.3308, + "step": 16040 + }, + { + "epoch": 0.08015181402781593, + "grad_norm": 0.09698436409235, + "learning_rate": 2.766452226588901e-05, + "loss": 10.3301, + "step": 16050 + }, + { + "epoch": 0.08020175285275538, + "grad_norm": 0.10218020528554916, + "learning_rate": 2.766302035094746e-05, + "loss": 10.3282, + "step": 16060 + }, + { + "epoch": 0.08025169167769482, + "grad_norm": 0.0932408943772316, + "learning_rate": 2.766151843600591e-05, + "loss": 10.3309, + "step": 16070 + }, + { + "epoch": 0.08030163050263427, + "grad_norm": 0.09290464967489243, + "learning_rate": 2.766001652106436e-05, + "loss": 10.3298, + "step": 16080 + }, + { + "epoch": 0.08035156932757372, + "grad_norm": 0.09502555429935455, + "learning_rate": 2.7658514606122806e-05, + "loss": 10.3309, + "step": 16090 + }, + { + "epoch": 0.08040150815251317, + "grad_norm": 0.09941313415765762, + "learning_rate": 2.765701269118126e-05, + "loss": 10.3306, + "step": 16100 + }, + { + "epoch": 0.08045144697745261, + "grad_norm": 0.09274566918611526, + "learning_rate": 2.7655510776239706e-05, + "loss": 10.3321, + "step": 16110 + }, + { + "epoch": 0.08050138580239206, + "grad_norm": 0.094278983771801, + "learning_rate": 2.7654008861298156e-05, + "loss": 10.3285, + "step": 16120 + }, + { + "epoch": 0.08055132462733151, + "grad_norm": 0.09539202600717545, + "learning_rate": 2.7652506946356606e-05, + "loss": 10.3332, + "step": 16130 + }, + { + "epoch": 0.08060126345227096, + "grad_norm": 0.09607014805078506, + "learning_rate": 2.7651005031415053e-05, + "loss": 10.3289, + "step": 16140 + }, + { + "epoch": 0.08065120227721041, + "grad_norm": 0.09501274675130844, + "learning_rate": 2.7649503116473507e-05, + "loss": 10.3272, + "step": 16150 + }, + { + "epoch": 0.08070114110214986, + "grad_norm": 0.10182977467775345, + "learning_rate": 2.7648001201531953e-05, + "loss": 10.3256, + "step": 16160 + }, + { + "epoch": 0.08075107992708931, + "grad_norm": 0.10245219618082047, + "learning_rate": 2.7646499286590404e-05, + "loss": 10.3296, + "step": 16170 + }, + { + "epoch": 0.08080101875202876, + "grad_norm": 0.09179327636957169, + "learning_rate": 2.7644997371648854e-05, + "loss": 10.3269, + "step": 16180 + }, + { + "epoch": 0.08085095757696821, + "grad_norm": 0.08983848243951797, + "learning_rate": 2.76434954567073e-05, + "loss": 10.3286, + "step": 16190 + }, + { + "epoch": 0.08090089640190766, + "grad_norm": 0.09037093073129654, + "learning_rate": 2.7641993541765754e-05, + "loss": 10.3275, + "step": 16200 + }, + { + "epoch": 0.08095083522684711, + "grad_norm": 0.09235038608312607, + "learning_rate": 2.76404916268242e-05, + "loss": 10.3267, + "step": 16210 + }, + { + "epoch": 0.08100077405178656, + "grad_norm": 0.09570086747407913, + "learning_rate": 2.763898971188265e-05, + "loss": 10.3259, + "step": 16220 + }, + { + "epoch": 0.08105071287672601, + "grad_norm": 0.10826196521520615, + "learning_rate": 2.76374877969411e-05, + "loss": 10.3268, + "step": 16230 + }, + { + "epoch": 0.08110065170166546, + "grad_norm": 0.09768252819776535, + "learning_rate": 2.7635985881999548e-05, + "loss": 10.3236, + "step": 16240 + }, + { + "epoch": 0.08115059052660491, + "grad_norm": 0.09747245907783508, + "learning_rate": 2.7634483967058e-05, + "loss": 10.3286, + "step": 16250 + }, + { + "epoch": 0.08120052935154436, + "grad_norm": 0.0909043624997139, + "learning_rate": 2.763298205211645e-05, + "loss": 10.3277, + "step": 16260 + }, + { + "epoch": 0.0812504681764838, + "grad_norm": 0.09629534184932709, + "learning_rate": 2.76314801371749e-05, + "loss": 10.3245, + "step": 16270 + }, + { + "epoch": 0.08130040700142326, + "grad_norm": 0.08753444999456406, + "learning_rate": 2.762997822223335e-05, + "loss": 10.3248, + "step": 16280 + }, + { + "epoch": 0.0813503458263627, + "grad_norm": 0.09728357940912247, + "learning_rate": 2.7628476307291796e-05, + "loss": 10.3241, + "step": 16290 + }, + { + "epoch": 0.08140028465130215, + "grad_norm": 0.09314234554767609, + "learning_rate": 2.762697439235025e-05, + "loss": 10.3236, + "step": 16300 + }, + { + "epoch": 0.0814502234762416, + "grad_norm": 0.10189732909202576, + "learning_rate": 2.7625472477408696e-05, + "loss": 10.323, + "step": 16310 + }, + { + "epoch": 0.08150016230118105, + "grad_norm": 0.09725593775510788, + "learning_rate": 2.762397056246715e-05, + "loss": 10.3217, + "step": 16320 + }, + { + "epoch": 0.0815501011261205, + "grad_norm": 0.09180109947919846, + "learning_rate": 2.7622468647525596e-05, + "loss": 10.3236, + "step": 16330 + }, + { + "epoch": 0.08160003995105995, + "grad_norm": 0.09535349905490875, + "learning_rate": 2.7620966732584043e-05, + "loss": 10.3231, + "step": 16340 + }, + { + "epoch": 0.0816499787759994, + "grad_norm": 0.09873170405626297, + "learning_rate": 2.7619464817642497e-05, + "loss": 10.3229, + "step": 16350 + }, + { + "epoch": 0.08169991760093885, + "grad_norm": 0.09569983929395676, + "learning_rate": 2.7617962902700943e-05, + "loss": 10.3197, + "step": 16360 + }, + { + "epoch": 0.0817498564258783, + "grad_norm": 0.0878266841173172, + "learning_rate": 2.7616460987759397e-05, + "loss": 10.3213, + "step": 16370 + }, + { + "epoch": 0.08179979525081775, + "grad_norm": 0.09356684237718582, + "learning_rate": 2.7614959072817844e-05, + "loss": 10.3215, + "step": 16380 + }, + { + "epoch": 0.0818497340757572, + "grad_norm": 0.09417729079723358, + "learning_rate": 2.761345715787629e-05, + "loss": 10.3196, + "step": 16390 + }, + { + "epoch": 0.08189967290069665, + "grad_norm": 0.09582322090864182, + "learning_rate": 2.7611955242934744e-05, + "loss": 10.3188, + "step": 16400 + }, + { + "epoch": 0.0819496117256361, + "grad_norm": 0.09269662201404572, + "learning_rate": 2.761045332799319e-05, + "loss": 10.3197, + "step": 16410 + }, + { + "epoch": 0.08199955055057555, + "grad_norm": 0.09405113756656647, + "learning_rate": 2.7608951413051645e-05, + "loss": 10.3219, + "step": 16420 + }, + { + "epoch": 0.082049489375515, + "grad_norm": 0.09437499940395355, + "learning_rate": 2.760744949811009e-05, + "loss": 10.3209, + "step": 16430 + }, + { + "epoch": 0.08209942820045445, + "grad_norm": 0.09764499962329865, + "learning_rate": 2.7605947583168538e-05, + "loss": 10.3186, + "step": 16440 + }, + { + "epoch": 0.0821493670253939, + "grad_norm": 0.09104657918214798, + "learning_rate": 2.7604445668226992e-05, + "loss": 10.3198, + "step": 16450 + }, + { + "epoch": 0.08219930585033335, + "grad_norm": 0.09402047097682953, + "learning_rate": 2.760294375328544e-05, + "loss": 10.3226, + "step": 16460 + }, + { + "epoch": 0.0822492446752728, + "grad_norm": 0.09690111875534058, + "learning_rate": 2.7601441838343892e-05, + "loss": 10.3231, + "step": 16470 + }, + { + "epoch": 0.08229918350021224, + "grad_norm": 0.09647475183010101, + "learning_rate": 2.759993992340234e-05, + "loss": 10.318, + "step": 16480 + }, + { + "epoch": 0.0823491223251517, + "grad_norm": 0.09171656519174576, + "learning_rate": 2.7598438008460786e-05, + "loss": 10.3201, + "step": 16490 + }, + { + "epoch": 0.08239906115009114, + "grad_norm": 0.09819825738668442, + "learning_rate": 2.759693609351924e-05, + "loss": 10.3202, + "step": 16500 + }, + { + "epoch": 0.08244899997503059, + "grad_norm": 0.09369684010744095, + "learning_rate": 2.7595434178577686e-05, + "loss": 10.32, + "step": 16510 + }, + { + "epoch": 0.08249893879997004, + "grad_norm": 0.0983380377292633, + "learning_rate": 2.759393226363614e-05, + "loss": 10.3198, + "step": 16520 + }, + { + "epoch": 0.08254887762490949, + "grad_norm": 0.0950574278831482, + "learning_rate": 2.7592430348694586e-05, + "loss": 10.3151, + "step": 16530 + }, + { + "epoch": 0.08259881644984894, + "grad_norm": 0.09450704604387283, + "learning_rate": 2.7590928433753033e-05, + "loss": 10.3157, + "step": 16540 + }, + { + "epoch": 0.08264875527478839, + "grad_norm": 0.09773395955562592, + "learning_rate": 2.7589426518811487e-05, + "loss": 10.3158, + "step": 16550 + }, + { + "epoch": 0.08269869409972784, + "grad_norm": 0.09340257197618484, + "learning_rate": 2.7587924603869933e-05, + "loss": 10.318, + "step": 16560 + }, + { + "epoch": 0.08274863292466728, + "grad_norm": 0.09730247408151627, + "learning_rate": 2.7586422688928387e-05, + "loss": 10.3164, + "step": 16570 + }, + { + "epoch": 0.08279857174960673, + "grad_norm": 0.09569399803876877, + "learning_rate": 2.7584920773986834e-05, + "loss": 10.3131, + "step": 16580 + }, + { + "epoch": 0.08284851057454617, + "grad_norm": 0.09237007796764374, + "learning_rate": 2.758341885904528e-05, + "loss": 10.3144, + "step": 16590 + }, + { + "epoch": 0.08289844939948562, + "grad_norm": 0.09663589298725128, + "learning_rate": 2.7581916944103734e-05, + "loss": 10.3138, + "step": 16600 + }, + { + "epoch": 0.08294838822442507, + "grad_norm": 0.09514245390892029, + "learning_rate": 2.758041502916218e-05, + "loss": 10.3155, + "step": 16610 + }, + { + "epoch": 0.08299832704936452, + "grad_norm": 0.09728768467903137, + "learning_rate": 2.7578913114220635e-05, + "loss": 10.3166, + "step": 16620 + }, + { + "epoch": 0.08304826587430397, + "grad_norm": 0.10066792368888855, + "learning_rate": 2.757741119927908e-05, + "loss": 10.3149, + "step": 16630 + }, + { + "epoch": 0.08309820469924342, + "grad_norm": 0.09562237560749054, + "learning_rate": 2.757590928433753e-05, + "loss": 10.3161, + "step": 16640 + }, + { + "epoch": 0.08314814352418287, + "grad_norm": 0.09799202531576157, + "learning_rate": 2.7574407369395982e-05, + "loss": 10.3133, + "step": 16650 + }, + { + "epoch": 0.08319808234912232, + "grad_norm": 0.09569067507982254, + "learning_rate": 2.757290545445443e-05, + "loss": 10.314, + "step": 16660 + }, + { + "epoch": 0.08324802117406177, + "grad_norm": 0.09926281124353409, + "learning_rate": 2.7571403539512882e-05, + "loss": 10.3098, + "step": 16670 + }, + { + "epoch": 0.08329795999900122, + "grad_norm": 0.09658607095479965, + "learning_rate": 2.756990162457133e-05, + "loss": 10.3153, + "step": 16680 + }, + { + "epoch": 0.08334789882394067, + "grad_norm": 0.09640868008136749, + "learning_rate": 2.756839970962978e-05, + "loss": 10.3109, + "step": 16690 + }, + { + "epoch": 0.08339783764888012, + "grad_norm": 0.0960702896118164, + "learning_rate": 2.756689779468823e-05, + "loss": 10.3135, + "step": 16700 + }, + { + "epoch": 0.08344777647381957, + "grad_norm": 0.10049708187580109, + "learning_rate": 2.7565395879746676e-05, + "loss": 10.3111, + "step": 16710 + }, + { + "epoch": 0.08349771529875902, + "grad_norm": 0.1001792773604393, + "learning_rate": 2.756389396480513e-05, + "loss": 10.3115, + "step": 16720 + }, + { + "epoch": 0.08354765412369847, + "grad_norm": 0.09567177295684814, + "learning_rate": 2.7562392049863576e-05, + "loss": 10.3118, + "step": 16730 + }, + { + "epoch": 0.08359759294863792, + "grad_norm": 0.09801898151636124, + "learning_rate": 2.7560890134922027e-05, + "loss": 43.1471, + "step": 16740 + }, + { + "epoch": 0.08364753177357737, + "grad_norm": 0.09383946657180786, + "learning_rate": 2.7559388219980477e-05, + "loss": 14.7706, + "step": 16750 + }, + { + "epoch": 0.08369747059851682, + "grad_norm": 0.10004527121782303, + "learning_rate": 2.7557886305038924e-05, + "loss": 10.3106, + "step": 16760 + }, + { + "epoch": 0.08374740942345626, + "grad_norm": 0.10205840319395065, + "learning_rate": 2.7556384390097377e-05, + "loss": 10.7248, + "step": 16770 + }, + { + "epoch": 0.08379734824839571, + "grad_norm": 0.10261974483728409, + "learning_rate": 2.7554882475155824e-05, + "loss": 10.3106, + "step": 16780 + }, + { + "epoch": 0.08384728707333516, + "grad_norm": 0.09215449541807175, + "learning_rate": 2.7553380560214274e-05, + "loss": 10.3098, + "step": 16790 + }, + { + "epoch": 0.08389722589827461, + "grad_norm": 0.09583956003189087, + "learning_rate": 2.7551878645272724e-05, + "loss": 10.3098, + "step": 16800 + }, + { + "epoch": 0.08394716472321406, + "grad_norm": 0.09968181699514389, + "learning_rate": 2.755037673033117e-05, + "loss": 10.3098, + "step": 16810 + }, + { + "epoch": 0.08399710354815351, + "grad_norm": 0.09686349332332611, + "learning_rate": 2.7548874815389625e-05, + "loss": 10.3073, + "step": 16820 + }, + { + "epoch": 0.08404704237309296, + "grad_norm": 0.09120567888021469, + "learning_rate": 2.754737290044807e-05, + "loss": 10.3103, + "step": 16830 + }, + { + "epoch": 0.08409698119803241, + "grad_norm": 0.0937332808971405, + "learning_rate": 2.754587098550652e-05, + "loss": 10.3085, + "step": 16840 + }, + { + "epoch": 0.08414692002297186, + "grad_norm": 0.091478131711483, + "learning_rate": 2.7544369070564972e-05, + "loss": 10.3071, + "step": 16850 + }, + { + "epoch": 0.08419685884791131, + "grad_norm": 0.09270336478948593, + "learning_rate": 2.754286715562342e-05, + "loss": 10.3107, + "step": 16860 + }, + { + "epoch": 0.08424679767285076, + "grad_norm": 0.09209531545639038, + "learning_rate": 2.7541365240681872e-05, + "loss": 10.3128, + "step": 16870 + }, + { + "epoch": 0.08429673649779021, + "grad_norm": 0.09706257283687592, + "learning_rate": 2.753986332574032e-05, + "loss": 10.3076, + "step": 16880 + }, + { + "epoch": 0.08434667532272966, + "grad_norm": 0.09445540606975555, + "learning_rate": 2.753836141079877e-05, + "loss": 10.3094, + "step": 16890 + }, + { + "epoch": 0.08439661414766911, + "grad_norm": 0.09534412622451782, + "learning_rate": 2.753685949585722e-05, + "loss": 10.311, + "step": 16900 + }, + { + "epoch": 0.08444655297260856, + "grad_norm": 0.09859678149223328, + "learning_rate": 2.7535357580915666e-05, + "loss": 10.3073, + "step": 16910 + }, + { + "epoch": 0.084496491797548, + "grad_norm": 0.09540943056344986, + "learning_rate": 2.753385566597412e-05, + "loss": 10.3114, + "step": 16920 + }, + { + "epoch": 0.08454643062248746, + "grad_norm": 0.09608754515647888, + "learning_rate": 2.7532353751032566e-05, + "loss": 10.3052, + "step": 16930 + }, + { + "epoch": 0.0845963694474269, + "grad_norm": 0.09397275745868683, + "learning_rate": 2.7530851836091017e-05, + "loss": 10.3078, + "step": 16940 + }, + { + "epoch": 0.08464630827236636, + "grad_norm": 0.09713399410247803, + "learning_rate": 2.7529349921149467e-05, + "loss": 10.3083, + "step": 16950 + }, + { + "epoch": 0.0846962470973058, + "grad_norm": 0.09662938863039017, + "learning_rate": 2.7527848006207917e-05, + "loss": 10.304, + "step": 16960 + }, + { + "epoch": 0.08474618592224525, + "grad_norm": 0.09605155885219574, + "learning_rate": 2.7526346091266367e-05, + "loss": 10.3048, + "step": 16970 + }, + { + "epoch": 0.0847961247471847, + "grad_norm": 0.09501200169324875, + "learning_rate": 2.7524844176324814e-05, + "loss": 10.3018, + "step": 16980 + }, + { + "epoch": 0.08484606357212415, + "grad_norm": 0.0933113843202591, + "learning_rate": 2.7523342261383264e-05, + "loss": 10.3059, + "step": 16990 + }, + { + "epoch": 0.0848960023970636, + "grad_norm": 0.09310649335384369, + "learning_rate": 2.7521840346441714e-05, + "loss": 10.3055, + "step": 17000 + }, + { + "epoch": 0.08494594122200305, + "grad_norm": 0.09754558652639389, + "learning_rate": 2.7520338431500164e-05, + "loss": 10.3086, + "step": 17010 + }, + { + "epoch": 0.0849958800469425, + "grad_norm": 0.08995413780212402, + "learning_rate": 2.7518836516558615e-05, + "loss": 10.3065, + "step": 17020 + }, + { + "epoch": 0.08504581887188195, + "grad_norm": 0.09209508448839188, + "learning_rate": 2.751733460161706e-05, + "loss": 10.3058, + "step": 17030 + }, + { + "epoch": 0.0850957576968214, + "grad_norm": 0.0900413915514946, + "learning_rate": 2.751583268667551e-05, + "loss": 10.3033, + "step": 17040 + }, + { + "epoch": 0.08514569652176085, + "grad_norm": 0.10113733261823654, + "learning_rate": 2.7514330771733962e-05, + "loss": 10.3027, + "step": 17050 + }, + { + "epoch": 0.08519563534670029, + "grad_norm": 0.10684780031442642, + "learning_rate": 2.7512828856792412e-05, + "loss": 10.3006, + "step": 17060 + }, + { + "epoch": 0.08524557417163973, + "grad_norm": 0.09294813871383667, + "learning_rate": 2.7511326941850862e-05, + "loss": 10.3065, + "step": 17070 + }, + { + "epoch": 0.08529551299657918, + "grad_norm": 0.09303073585033417, + "learning_rate": 2.750982502690931e-05, + "loss": 10.3031, + "step": 17080 + }, + { + "epoch": 0.08534545182151863, + "grad_norm": 0.09494984894990921, + "learning_rate": 2.750832311196776e-05, + "loss": 10.3067, + "step": 17090 + }, + { + "epoch": 0.08539539064645808, + "grad_norm": 0.09534880518913269, + "learning_rate": 2.750682119702621e-05, + "loss": 10.302, + "step": 17100 + }, + { + "epoch": 0.08544532947139753, + "grad_norm": 0.09925422072410583, + "learning_rate": 2.750531928208466e-05, + "loss": 10.3026, + "step": 17110 + }, + { + "epoch": 0.08549526829633698, + "grad_norm": 0.09157177805900574, + "learning_rate": 2.750381736714311e-05, + "loss": 10.3023, + "step": 17120 + }, + { + "epoch": 0.08554520712127643, + "grad_norm": 0.09446504712104797, + "learning_rate": 2.7502315452201556e-05, + "loss": 10.3023, + "step": 17130 + }, + { + "epoch": 0.08559514594621588, + "grad_norm": 0.08893059939146042, + "learning_rate": 2.7500813537260007e-05, + "loss": 10.3017, + "step": 17140 + }, + { + "epoch": 0.08564508477115533, + "grad_norm": 0.09873572736978531, + "learning_rate": 2.7499311622318457e-05, + "loss": 10.3041, + "step": 17150 + }, + { + "epoch": 0.08569502359609478, + "grad_norm": 0.10007481276988983, + "learning_rate": 2.7497809707376907e-05, + "loss": 10.2991, + "step": 17160 + }, + { + "epoch": 0.08574496242103423, + "grad_norm": 0.0909745916724205, + "learning_rate": 2.7496307792435357e-05, + "loss": 12.4831, + "step": 17170 + }, + { + "epoch": 0.08579490124597368, + "grad_norm": 0.09608662128448486, + "learning_rate": 2.7494805877493804e-05, + "loss": 10.3047, + "step": 17180 + }, + { + "epoch": 0.08584484007091313, + "grad_norm": 0.0924767255783081, + "learning_rate": 2.7493303962552254e-05, + "loss": 10.2995, + "step": 17190 + }, + { + "epoch": 0.08589477889585258, + "grad_norm": 0.09744284301996231, + "learning_rate": 2.7491802047610704e-05, + "loss": 10.3003, + "step": 17200 + }, + { + "epoch": 0.08594471772079203, + "grad_norm": 0.09493273496627808, + "learning_rate": 2.7490300132669154e-05, + "loss": 10.3025, + "step": 17210 + }, + { + "epoch": 0.08599465654573148, + "grad_norm": 0.10000015050172806, + "learning_rate": 2.7488798217727605e-05, + "loss": 10.3011, + "step": 17220 + }, + { + "epoch": 0.08604459537067093, + "grad_norm": 0.09295312315225601, + "learning_rate": 2.748729630278605e-05, + "loss": 10.3004, + "step": 17230 + }, + { + "epoch": 0.08609453419561038, + "grad_norm": 0.10081910341978073, + "learning_rate": 2.74857943878445e-05, + "loss": 10.2935, + "step": 17240 + }, + { + "epoch": 0.08614447302054982, + "grad_norm": 0.09449261426925659, + "learning_rate": 2.7484292472902952e-05, + "loss": 10.2981, + "step": 17250 + }, + { + "epoch": 0.08619441184548927, + "grad_norm": 0.09793805330991745, + "learning_rate": 2.7482790557961402e-05, + "loss": 10.2947, + "step": 17260 + }, + { + "epoch": 0.08624435067042872, + "grad_norm": 0.10022538155317307, + "learning_rate": 2.7481288643019852e-05, + "loss": 10.2971, + "step": 17270 + }, + { + "epoch": 0.08629428949536817, + "grad_norm": 0.0923459604382515, + "learning_rate": 2.7479786728078302e-05, + "loss": 10.2982, + "step": 17280 + }, + { + "epoch": 0.08634422832030762, + "grad_norm": 0.09417003393173218, + "learning_rate": 2.747828481313675e-05, + "loss": 10.2989, + "step": 17290 + }, + { + "epoch": 0.08639416714524707, + "grad_norm": 0.09343262761831284, + "learning_rate": 2.74767828981952e-05, + "loss": 10.2968, + "step": 17300 + }, + { + "epoch": 0.08644410597018652, + "grad_norm": 0.09520164877176285, + "learning_rate": 2.747528098325365e-05, + "loss": 10.2959, + "step": 17310 + }, + { + "epoch": 0.08649404479512597, + "grad_norm": 0.09455534815788269, + "learning_rate": 2.74737790683121e-05, + "loss": 10.2957, + "step": 17320 + }, + { + "epoch": 0.08654398362006542, + "grad_norm": 0.09446435421705246, + "learning_rate": 2.747227715337055e-05, + "loss": 10.2937, + "step": 17330 + }, + { + "epoch": 0.08659392244500487, + "grad_norm": 0.09533445537090302, + "learning_rate": 2.7470775238428997e-05, + "loss": 10.2936, + "step": 17340 + }, + { + "epoch": 0.08664386126994432, + "grad_norm": 0.09248590469360352, + "learning_rate": 2.7469273323487447e-05, + "loss": 10.2965, + "step": 17350 + }, + { + "epoch": 0.08669380009488377, + "grad_norm": 0.0972449779510498, + "learning_rate": 2.7467771408545897e-05, + "loss": 10.2934, + "step": 17360 + }, + { + "epoch": 0.08674373891982322, + "grad_norm": 0.08786053210496902, + "learning_rate": 2.7466269493604347e-05, + "loss": 10.2956, + "step": 17370 + }, + { + "epoch": 0.08679367774476267, + "grad_norm": 0.10034439712762833, + "learning_rate": 2.7464767578662797e-05, + "loss": 10.2939, + "step": 17380 + }, + { + "epoch": 0.08684361656970212, + "grad_norm": 0.10132072865962982, + "learning_rate": 2.7463265663721244e-05, + "loss": 10.2934, + "step": 17390 + }, + { + "epoch": 0.08689355539464157, + "grad_norm": 0.093051977455616, + "learning_rate": 2.7461763748779694e-05, + "loss": 10.2921, + "step": 17400 + }, + { + "epoch": 0.08694349421958102, + "grad_norm": 0.09505248069763184, + "learning_rate": 2.7460261833838144e-05, + "loss": 10.2927, + "step": 17410 + }, + { + "epoch": 0.08699343304452047, + "grad_norm": 0.09846062958240509, + "learning_rate": 2.7458759918896595e-05, + "loss": 10.2926, + "step": 17420 + }, + { + "epoch": 0.08704337186945992, + "grad_norm": 0.10331861674785614, + "learning_rate": 2.7457258003955045e-05, + "loss": 10.2922, + "step": 17430 + }, + { + "epoch": 0.08709331069439936, + "grad_norm": 0.0902588963508606, + "learning_rate": 2.745575608901349e-05, + "loss": 10.2916, + "step": 17440 + }, + { + "epoch": 0.08714324951933881, + "grad_norm": 0.09760275483131409, + "learning_rate": 2.7454254174071942e-05, + "loss": 10.294, + "step": 17450 + }, + { + "epoch": 0.08719318834427826, + "grad_norm": 0.09371180832386017, + "learning_rate": 2.7452752259130392e-05, + "loss": 10.2919, + "step": 17460 + }, + { + "epoch": 0.08724312716921771, + "grad_norm": 0.09257213771343231, + "learning_rate": 2.7451250344188842e-05, + "loss": 10.2953, + "step": 17470 + }, + { + "epoch": 0.08729306599415716, + "grad_norm": 0.09264030307531357, + "learning_rate": 2.7449748429247292e-05, + "loss": 10.2946, + "step": 17480 + }, + { + "epoch": 0.08734300481909661, + "grad_norm": 0.09427089989185333, + "learning_rate": 2.744824651430574e-05, + "loss": 10.2918, + "step": 17490 + }, + { + "epoch": 0.08739294364403606, + "grad_norm": 0.09720293432474136, + "learning_rate": 2.744674459936419e-05, + "loss": 10.2937, + "step": 17500 + }, + { + "epoch": 0.08744288246897551, + "grad_norm": 0.09387167543172836, + "learning_rate": 2.744524268442264e-05, + "loss": 10.2891, + "step": 17510 + }, + { + "epoch": 0.08749282129391496, + "grad_norm": 0.09527067095041275, + "learning_rate": 2.744374076948109e-05, + "loss": 10.2929, + "step": 17520 + }, + { + "epoch": 0.08754276011885441, + "grad_norm": 0.09935829043388367, + "learning_rate": 2.744223885453954e-05, + "loss": 10.2936, + "step": 17530 + }, + { + "epoch": 0.08759269894379386, + "grad_norm": 0.0906045064330101, + "learning_rate": 2.7440736939597987e-05, + "loss": 10.2928, + "step": 17540 + }, + { + "epoch": 0.08764263776873331, + "grad_norm": 0.09333600848913193, + "learning_rate": 2.7439235024656437e-05, + "loss": 10.2918, + "step": 17550 + }, + { + "epoch": 0.08769257659367274, + "grad_norm": 0.09050074964761734, + "learning_rate": 2.7437733109714887e-05, + "loss": 10.2893, + "step": 17560 + }, + { + "epoch": 0.0877425154186122, + "grad_norm": 0.0935349315404892, + "learning_rate": 2.7436231194773337e-05, + "loss": 10.2924, + "step": 17570 + }, + { + "epoch": 0.08779245424355164, + "grad_norm": 0.09864573180675507, + "learning_rate": 2.7434729279831787e-05, + "loss": 10.2915, + "step": 17580 + }, + { + "epoch": 0.08784239306849109, + "grad_norm": 0.09615197032690048, + "learning_rate": 2.7433227364890234e-05, + "loss": 10.2898, + "step": 17590 + }, + { + "epoch": 0.08789233189343054, + "grad_norm": 0.09623583406209946, + "learning_rate": 2.7431725449948688e-05, + "loss": 10.2904, + "step": 17600 + }, + { + "epoch": 0.08794227071836999, + "grad_norm": 0.0986003652215004, + "learning_rate": 2.7430223535007134e-05, + "loss": 10.2876, + "step": 17610 + }, + { + "epoch": 0.08799220954330944, + "grad_norm": 0.09806670993566513, + "learning_rate": 2.7428721620065585e-05, + "loss": 10.2888, + "step": 17620 + }, + { + "epoch": 0.08804214836824889, + "grad_norm": 0.09443091601133347, + "learning_rate": 2.7427219705124035e-05, + "loss": 10.2883, + "step": 17630 + }, + { + "epoch": 0.08809208719318834, + "grad_norm": 0.0967443585395813, + "learning_rate": 2.742571779018248e-05, + "loss": 10.2869, + "step": 17640 + }, + { + "epoch": 0.08814202601812779, + "grad_norm": 0.09863567352294922, + "learning_rate": 2.7424215875240935e-05, + "loss": 10.2899, + "step": 17650 + }, + { + "epoch": 0.08819196484306724, + "grad_norm": 0.09256859123706818, + "learning_rate": 2.7422713960299382e-05, + "loss": 10.2843, + "step": 17660 + }, + { + "epoch": 0.08824190366800669, + "grad_norm": 0.09759136289358139, + "learning_rate": 2.7421212045357832e-05, + "loss": 10.2869, + "step": 17670 + }, + { + "epoch": 0.08829184249294614, + "grad_norm": 0.09880513697862625, + "learning_rate": 2.7419710130416282e-05, + "loss": 10.2891, + "step": 17680 + }, + { + "epoch": 0.08834178131788559, + "grad_norm": 0.09688255190849304, + "learning_rate": 2.741820821547473e-05, + "loss": 10.2857, + "step": 17690 + }, + { + "epoch": 0.08839172014282504, + "grad_norm": 0.0934658944606781, + "learning_rate": 2.7416706300533183e-05, + "loss": 10.2866, + "step": 17700 + }, + { + "epoch": 0.08844165896776449, + "grad_norm": 0.09990676492452621, + "learning_rate": 2.741520438559163e-05, + "loss": 10.2842, + "step": 17710 + }, + { + "epoch": 0.08849159779270394, + "grad_norm": 0.09611719101667404, + "learning_rate": 2.741370247065008e-05, + "loss": 10.2883, + "step": 17720 + }, + { + "epoch": 0.08854153661764338, + "grad_norm": 0.09569796919822693, + "learning_rate": 2.741220055570853e-05, + "loss": 10.2871, + "step": 17730 + }, + { + "epoch": 0.08859147544258283, + "grad_norm": 0.09596303105354309, + "learning_rate": 2.7410698640766977e-05, + "loss": 10.2823, + "step": 17740 + }, + { + "epoch": 0.08864141426752228, + "grad_norm": 0.0938568115234375, + "learning_rate": 2.740919672582543e-05, + "loss": 10.2833, + "step": 17750 + }, + { + "epoch": 0.08869135309246173, + "grad_norm": 0.09523828327655792, + "learning_rate": 2.7407694810883877e-05, + "loss": 10.2872, + "step": 17760 + }, + { + "epoch": 0.08874129191740118, + "grad_norm": 0.09327805787324905, + "learning_rate": 2.7406192895942327e-05, + "loss": 10.2872, + "step": 17770 + }, + { + "epoch": 0.08879123074234063, + "grad_norm": 0.09197764843702316, + "learning_rate": 2.7404690981000777e-05, + "loss": 10.2836, + "step": 17780 + }, + { + "epoch": 0.08884116956728008, + "grad_norm": 0.08873019367456436, + "learning_rate": 2.7403189066059224e-05, + "loss": 10.2816, + "step": 17790 + }, + { + "epoch": 0.08889110839221953, + "grad_norm": 0.0956963300704956, + "learning_rate": 2.7401687151117678e-05, + "loss": 10.2801, + "step": 17800 + }, + { + "epoch": 0.08894104721715898, + "grad_norm": 0.09462413936853409, + "learning_rate": 2.7400185236176124e-05, + "loss": 10.2806, + "step": 17810 + }, + { + "epoch": 0.08899098604209843, + "grad_norm": 0.09391899406909943, + "learning_rate": 2.7398683321234575e-05, + "loss": 10.2877, + "step": 17820 + }, + { + "epoch": 0.08904092486703788, + "grad_norm": 0.09864463657140732, + "learning_rate": 2.7397181406293025e-05, + "loss": 10.2851, + "step": 17830 + }, + { + "epoch": 0.08909086369197733, + "grad_norm": 0.09195532649755478, + "learning_rate": 2.739567949135147e-05, + "loss": 10.2851, + "step": 17840 + }, + { + "epoch": 0.08914080251691678, + "grad_norm": 0.09261298924684525, + "learning_rate": 2.7394177576409925e-05, + "loss": 10.2853, + "step": 17850 + }, + { + "epoch": 0.08919074134185623, + "grad_norm": 0.09226420521736145, + "learning_rate": 2.7392675661468372e-05, + "loss": 10.2833, + "step": 17860 + }, + { + "epoch": 0.08924068016679568, + "grad_norm": 0.09687785059213638, + "learning_rate": 2.7391173746526822e-05, + "loss": 10.2819, + "step": 17870 + }, + { + "epoch": 0.08929061899173513, + "grad_norm": 0.09745992720127106, + "learning_rate": 2.7389671831585272e-05, + "loss": 10.285, + "step": 17880 + }, + { + "epoch": 0.08934055781667458, + "grad_norm": 0.10478182882070541, + "learning_rate": 2.738816991664372e-05, + "loss": 10.2759, + "step": 17890 + }, + { + "epoch": 0.08939049664161403, + "grad_norm": 0.0948200598359108, + "learning_rate": 2.7386668001702173e-05, + "loss": 10.2803, + "step": 17900 + }, + { + "epoch": 0.08944043546655348, + "grad_norm": 0.09617318212985992, + "learning_rate": 2.738516608676062e-05, + "loss": 10.2789, + "step": 17910 + }, + { + "epoch": 0.08949037429149292, + "grad_norm": 0.09210513532161713, + "learning_rate": 2.7383664171819073e-05, + "loss": 10.2788, + "step": 17920 + }, + { + "epoch": 0.08954031311643237, + "grad_norm": 0.09981706738471985, + "learning_rate": 2.738216225687752e-05, + "loss": 10.2767, + "step": 17930 + }, + { + "epoch": 0.08959025194137182, + "grad_norm": 0.1067042201757431, + "learning_rate": 2.7380660341935967e-05, + "loss": 10.2798, + "step": 17940 + }, + { + "epoch": 0.08964019076631127, + "grad_norm": 0.0890965685248375, + "learning_rate": 2.737915842699442e-05, + "loss": 10.2807, + "step": 17950 + }, + { + "epoch": 0.08969012959125072, + "grad_norm": 0.09566470980644226, + "learning_rate": 2.7377656512052867e-05, + "loss": 10.2792, + "step": 17960 + }, + { + "epoch": 0.08974006841619017, + "grad_norm": 0.09969697147607803, + "learning_rate": 2.737615459711132e-05, + "loss": 10.2778, + "step": 17970 + }, + { + "epoch": 0.08979000724112962, + "grad_norm": 0.09558671712875366, + "learning_rate": 2.7374652682169767e-05, + "loss": 10.2777, + "step": 17980 + }, + { + "epoch": 0.08983994606606907, + "grad_norm": 0.09214569628238678, + "learning_rate": 2.7373150767228214e-05, + "loss": 10.2794, + "step": 17990 + }, + { + "epoch": 0.08988988489100852, + "grad_norm": 0.09558131545782089, + "learning_rate": 2.7371648852286668e-05, + "loss": 10.279, + "step": 18000 + }, + { + "epoch": 0.08993982371594797, + "grad_norm": 0.09858100861310959, + "learning_rate": 2.7370146937345114e-05, + "loss": 10.2783, + "step": 18010 + }, + { + "epoch": 0.08998976254088742, + "grad_norm": 0.09311861544847488, + "learning_rate": 2.7368645022403568e-05, + "loss": 10.28, + "step": 18020 + }, + { + "epoch": 0.09003970136582687, + "grad_norm": 0.09338545799255371, + "learning_rate": 2.7367143107462015e-05, + "loss": 10.2755, + "step": 18030 + }, + { + "epoch": 0.09008964019076632, + "grad_norm": 0.09420605003833771, + "learning_rate": 2.736564119252046e-05, + "loss": 10.2778, + "step": 18040 + }, + { + "epoch": 0.09013957901570575, + "grad_norm": 0.09855175018310547, + "learning_rate": 2.7364139277578915e-05, + "loss": 10.2763, + "step": 18050 + }, + { + "epoch": 0.0901895178406452, + "grad_norm": 0.09705475717782974, + "learning_rate": 2.7362637362637362e-05, + "loss": 10.2781, + "step": 18060 + }, + { + "epoch": 0.09023945666558465, + "grad_norm": 0.09603998064994812, + "learning_rate": 2.7361135447695816e-05, + "loss": 10.277, + "step": 18070 + }, + { + "epoch": 0.0902893954905241, + "grad_norm": 0.09418918937444687, + "learning_rate": 2.7359633532754262e-05, + "loss": 10.274, + "step": 18080 + }, + { + "epoch": 0.09033933431546355, + "grad_norm": 0.09442311525344849, + "learning_rate": 2.735813161781271e-05, + "loss": 10.2778, + "step": 18090 + }, + { + "epoch": 0.090389273140403, + "grad_norm": 0.0943770557641983, + "learning_rate": 2.7356629702871163e-05, + "loss": 10.273, + "step": 18100 + }, + { + "epoch": 0.09043921196534245, + "grad_norm": 0.09986016154289246, + "learning_rate": 2.735512778792961e-05, + "loss": 10.2782, + "step": 18110 + }, + { + "epoch": 0.0904891507902819, + "grad_norm": 0.09035969525575638, + "learning_rate": 2.7353625872988063e-05, + "loss": 10.2772, + "step": 18120 + }, + { + "epoch": 0.09053908961522135, + "grad_norm": 0.09936581552028656, + "learning_rate": 2.735212395804651e-05, + "loss": 10.2784, + "step": 18130 + }, + { + "epoch": 0.0905890284401608, + "grad_norm": 0.09473800659179688, + "learning_rate": 2.7350622043104957e-05, + "loss": 10.2698, + "step": 18140 + }, + { + "epoch": 0.09063896726510025, + "grad_norm": 0.09298662096261978, + "learning_rate": 2.734912012816341e-05, + "loss": 10.2763, + "step": 18150 + }, + { + "epoch": 0.0906889060900397, + "grad_norm": 0.09476146847009659, + "learning_rate": 2.7347618213221857e-05, + "loss": 10.2771, + "step": 18160 + }, + { + "epoch": 0.09073884491497915, + "grad_norm": 0.09148777276277542, + "learning_rate": 2.734611629828031e-05, + "loss": 10.2752, + "step": 18170 + }, + { + "epoch": 0.0907887837399186, + "grad_norm": 0.09469624608755112, + "learning_rate": 2.7344614383338757e-05, + "loss": 10.27, + "step": 18180 + }, + { + "epoch": 0.09083872256485805, + "grad_norm": 0.09769047796726227, + "learning_rate": 2.7343112468397204e-05, + "loss": 10.2733, + "step": 18190 + }, + { + "epoch": 0.0908886613897975, + "grad_norm": 0.09332720935344696, + "learning_rate": 2.7341610553455658e-05, + "loss": 10.2736, + "step": 18200 + }, + { + "epoch": 0.09093860021473694, + "grad_norm": 0.09613288938999176, + "learning_rate": 2.7340108638514105e-05, + "loss": 10.2693, + "step": 18210 + }, + { + "epoch": 0.0909885390396764, + "grad_norm": 0.09733350574970245, + "learning_rate": 2.7338606723572558e-05, + "loss": 10.2727, + "step": 18220 + }, + { + "epoch": 0.09103847786461584, + "grad_norm": 0.09478358179330826, + "learning_rate": 2.7337104808631005e-05, + "loss": 10.2741, + "step": 18230 + }, + { + "epoch": 0.0910884166895553, + "grad_norm": 0.0969361811876297, + "learning_rate": 2.733560289368945e-05, + "loss": 10.2716, + "step": 18240 + }, + { + "epoch": 0.09113835551449474, + "grad_norm": 0.09295036643743515, + "learning_rate": 2.7334100978747905e-05, + "loss": 10.2748, + "step": 18250 + }, + { + "epoch": 0.09118829433943419, + "grad_norm": 0.09057419002056122, + "learning_rate": 2.7332599063806352e-05, + "loss": 10.2733, + "step": 18260 + }, + { + "epoch": 0.09123823316437364, + "grad_norm": 0.09219224005937576, + "learning_rate": 2.7331097148864806e-05, + "loss": 10.2736, + "step": 18270 + }, + { + "epoch": 0.09128817198931309, + "grad_norm": 0.09382915496826172, + "learning_rate": 2.7329595233923252e-05, + "loss": 10.2732, + "step": 18280 + }, + { + "epoch": 0.09133811081425254, + "grad_norm": 0.09011480212211609, + "learning_rate": 2.7328093318981703e-05, + "loss": 10.2728, + "step": 18290 + }, + { + "epoch": 0.09138804963919199, + "grad_norm": 0.09689683467149734, + "learning_rate": 2.7326591404040153e-05, + "loss": 10.2674, + "step": 18300 + }, + { + "epoch": 0.09143798846413144, + "grad_norm": 0.09588810056447983, + "learning_rate": 2.73250894890986e-05, + "loss": 10.2684, + "step": 18310 + }, + { + "epoch": 0.09148792728907089, + "grad_norm": 0.09385774284601212, + "learning_rate": 2.7323587574157053e-05, + "loss": 10.2706, + "step": 18320 + }, + { + "epoch": 0.09153786611401034, + "grad_norm": 0.09056130051612854, + "learning_rate": 2.73220856592155e-05, + "loss": 10.27, + "step": 18330 + }, + { + "epoch": 0.09158780493894979, + "grad_norm": 0.09509393572807312, + "learning_rate": 2.732058374427395e-05, + "loss": 10.2709, + "step": 18340 + }, + { + "epoch": 0.09163774376388924, + "grad_norm": 0.09376955777406693, + "learning_rate": 2.73190818293324e-05, + "loss": 10.2682, + "step": 18350 + }, + { + "epoch": 0.09168768258882869, + "grad_norm": 0.10004284977912903, + "learning_rate": 2.7317579914390847e-05, + "loss": 10.2671, + "step": 18360 + }, + { + "epoch": 0.09173762141376814, + "grad_norm": 0.09222781658172607, + "learning_rate": 2.73160779994493e-05, + "loss": 10.2699, + "step": 18370 + }, + { + "epoch": 0.09178756023870759, + "grad_norm": 0.09528329968452454, + "learning_rate": 2.7314576084507747e-05, + "loss": 10.2711, + "step": 18380 + }, + { + "epoch": 0.09183749906364704, + "grad_norm": 0.09617341309785843, + "learning_rate": 2.7313074169566198e-05, + "loss": 10.2668, + "step": 18390 + }, + { + "epoch": 0.09188743788858648, + "grad_norm": 0.09537350386381149, + "learning_rate": 2.7311572254624648e-05, + "loss": 10.2692, + "step": 18400 + }, + { + "epoch": 0.09193737671352593, + "grad_norm": 0.09477705508470535, + "learning_rate": 2.7310070339683095e-05, + "loss": 10.2695, + "step": 18410 + }, + { + "epoch": 0.09198731553846538, + "grad_norm": 0.0950855240225792, + "learning_rate": 2.7308568424741548e-05, + "loss": 10.2652, + "step": 18420 + }, + { + "epoch": 0.09203725436340483, + "grad_norm": 0.09190298616886139, + "learning_rate": 2.7307066509799995e-05, + "loss": 10.2666, + "step": 18430 + }, + { + "epoch": 0.09208719318834428, + "grad_norm": 0.09800129383802414, + "learning_rate": 2.7305564594858445e-05, + "loss": 10.2699, + "step": 18440 + }, + { + "epoch": 0.09213713201328373, + "grad_norm": 0.09911572933197021, + "learning_rate": 2.7304062679916895e-05, + "loss": 10.2665, + "step": 18450 + }, + { + "epoch": 0.09218707083822318, + "grad_norm": 0.09081263840198517, + "learning_rate": 2.7302560764975342e-05, + "loss": 10.2648, + "step": 18460 + }, + { + "epoch": 0.09223700966316263, + "grad_norm": 0.09170430898666382, + "learning_rate": 2.7301058850033796e-05, + "loss": 10.2636, + "step": 18470 + }, + { + "epoch": 0.09228694848810208, + "grad_norm": 0.09938862174749374, + "learning_rate": 2.7299556935092242e-05, + "loss": 10.2678, + "step": 18480 + }, + { + "epoch": 0.09233688731304153, + "grad_norm": 0.09364289045333862, + "learning_rate": 2.7298055020150693e-05, + "loss": 10.2656, + "step": 18490 + }, + { + "epoch": 0.09238682613798098, + "grad_norm": 0.0970052108168602, + "learning_rate": 2.7296553105209143e-05, + "loss": 10.2641, + "step": 18500 + }, + { + "epoch": 0.09243676496292043, + "grad_norm": 0.09379066526889801, + "learning_rate": 2.729505119026759e-05, + "loss": 10.2681, + "step": 18510 + }, + { + "epoch": 0.09248670378785988, + "grad_norm": 0.0943591296672821, + "learning_rate": 2.7293549275326043e-05, + "loss": 10.2699, + "step": 18520 + }, + { + "epoch": 0.09253664261279933, + "grad_norm": 0.09379053860902786, + "learning_rate": 2.729204736038449e-05, + "loss": 10.2647, + "step": 18530 + }, + { + "epoch": 0.09258658143773878, + "grad_norm": 0.10091900825500488, + "learning_rate": 2.729054544544294e-05, + "loss": 10.2604, + "step": 18540 + }, + { + "epoch": 0.09263652026267821, + "grad_norm": 0.09774335473775864, + "learning_rate": 2.728904353050139e-05, + "loss": 10.263, + "step": 18550 + }, + { + "epoch": 0.09268645908761766, + "grad_norm": 0.0980936661362648, + "learning_rate": 2.7287541615559837e-05, + "loss": 10.2615, + "step": 18560 + }, + { + "epoch": 0.09273639791255711, + "grad_norm": 0.09376488626003265, + "learning_rate": 2.728603970061829e-05, + "loss": 10.2634, + "step": 18570 + }, + { + "epoch": 0.09278633673749656, + "grad_norm": 0.10236544907093048, + "learning_rate": 2.7284537785676737e-05, + "loss": 10.2677, + "step": 18580 + }, + { + "epoch": 0.09283627556243601, + "grad_norm": 0.09201527386903763, + "learning_rate": 2.7283035870735188e-05, + "loss": 10.2606, + "step": 18590 + }, + { + "epoch": 0.09288621438737546, + "grad_norm": 0.09707942605018616, + "learning_rate": 2.7281533955793638e-05, + "loss": 10.2669, + "step": 18600 + }, + { + "epoch": 0.09293615321231491, + "grad_norm": 0.09611304849386215, + "learning_rate": 2.7280032040852088e-05, + "loss": 10.2644, + "step": 18610 + }, + { + "epoch": 0.09298609203725436, + "grad_norm": 0.09938688576221466, + "learning_rate": 2.7278530125910538e-05, + "loss": 10.262, + "step": 18620 + }, + { + "epoch": 0.09303603086219381, + "grad_norm": 0.10115701705217361, + "learning_rate": 2.7277028210968985e-05, + "loss": 10.26, + "step": 18630 + }, + { + "epoch": 0.09308596968713326, + "grad_norm": 0.0930614024400711, + "learning_rate": 2.7275526296027435e-05, + "loss": 10.2623, + "step": 18640 + }, + { + "epoch": 0.09313590851207271, + "grad_norm": 0.09945568442344666, + "learning_rate": 2.7274024381085885e-05, + "loss": 10.2623, + "step": 18650 + }, + { + "epoch": 0.09318584733701216, + "grad_norm": 0.09213350713253021, + "learning_rate": 2.7272522466144335e-05, + "loss": 10.2614, + "step": 18660 + }, + { + "epoch": 0.0932357861619516, + "grad_norm": 0.09411916881799698, + "learning_rate": 2.7271020551202786e-05, + "loss": 10.2594, + "step": 18670 + }, + { + "epoch": 0.09328572498689106, + "grad_norm": 0.09325142949819565, + "learning_rate": 2.7269518636261232e-05, + "loss": 10.2569, + "step": 18680 + }, + { + "epoch": 0.0933356638118305, + "grad_norm": 0.09338999539613724, + "learning_rate": 2.7268016721319683e-05, + "loss": 10.2605, + "step": 18690 + }, + { + "epoch": 0.09338560263676995, + "grad_norm": 0.09095557034015656, + "learning_rate": 2.7266514806378133e-05, + "loss": 10.2629, + "step": 18700 + }, + { + "epoch": 0.0934355414617094, + "grad_norm": 0.09274104982614517, + "learning_rate": 2.7265012891436583e-05, + "loss": 10.2582, + "step": 18710 + }, + { + "epoch": 0.09348548028664885, + "grad_norm": 0.09546738117933273, + "learning_rate": 2.7263510976495033e-05, + "loss": 10.2595, + "step": 18720 + }, + { + "epoch": 0.0935354191115883, + "grad_norm": 0.0984828770160675, + "learning_rate": 2.726200906155348e-05, + "loss": 10.2569, + "step": 18730 + }, + { + "epoch": 0.09358535793652775, + "grad_norm": 0.09252218902111053, + "learning_rate": 2.726050714661193e-05, + "loss": 10.2585, + "step": 18740 + }, + { + "epoch": 0.0936352967614672, + "grad_norm": 0.09975884854793549, + "learning_rate": 2.725900523167038e-05, + "loss": 10.2581, + "step": 18750 + }, + { + "epoch": 0.09368523558640665, + "grad_norm": 0.08888278901576996, + "learning_rate": 2.725750331672883e-05, + "loss": 10.257, + "step": 18760 + }, + { + "epoch": 0.0937351744113461, + "grad_norm": 0.09609197080135345, + "learning_rate": 2.725600140178728e-05, + "loss": 10.2589, + "step": 18770 + }, + { + "epoch": 0.09378511323628555, + "grad_norm": 0.09457485377788544, + "learning_rate": 2.7254499486845727e-05, + "loss": 10.2558, + "step": 18780 + }, + { + "epoch": 0.093835052061225, + "grad_norm": 0.09543312340974808, + "learning_rate": 2.7252997571904178e-05, + "loss": 10.2583, + "step": 18790 + }, + { + "epoch": 0.09388499088616445, + "grad_norm": 0.08900411427021027, + "learning_rate": 2.7251495656962628e-05, + "loss": 10.2572, + "step": 18800 + }, + { + "epoch": 0.0939349297111039, + "grad_norm": 0.09842579811811447, + "learning_rate": 2.7249993742021078e-05, + "loss": 10.2532, + "step": 18810 + }, + { + "epoch": 0.09398486853604335, + "grad_norm": 0.0965215340256691, + "learning_rate": 2.7248491827079528e-05, + "loss": 10.2548, + "step": 18820 + }, + { + "epoch": 0.0940348073609828, + "grad_norm": 0.09778942912817001, + "learning_rate": 2.7246989912137975e-05, + "loss": 10.2543, + "step": 18830 + }, + { + "epoch": 0.09408474618592225, + "grad_norm": 0.09559378027915955, + "learning_rate": 2.7245487997196425e-05, + "loss": 10.2552, + "step": 18840 + }, + { + "epoch": 0.0941346850108617, + "grad_norm": 0.09030424058437347, + "learning_rate": 2.7243986082254875e-05, + "loss": 10.2562, + "step": 18850 + }, + { + "epoch": 0.09418462383580115, + "grad_norm": 0.09515605866909027, + "learning_rate": 2.7242484167313325e-05, + "loss": 10.2521, + "step": 18860 + }, + { + "epoch": 0.0942345626607406, + "grad_norm": 0.09284040331840515, + "learning_rate": 2.7240982252371776e-05, + "loss": 10.2552, + "step": 18870 + }, + { + "epoch": 0.09428450148568004, + "grad_norm": 0.09655296802520752, + "learning_rate": 2.7239480337430222e-05, + "loss": 10.2526, + "step": 18880 + }, + { + "epoch": 0.0943344403106195, + "grad_norm": 0.09709782898426056, + "learning_rate": 2.7237978422488673e-05, + "loss": 10.2554, + "step": 18890 + }, + { + "epoch": 0.09438437913555894, + "grad_norm": 0.09283418953418732, + "learning_rate": 2.7236476507547123e-05, + "loss": 10.2538, + "step": 18900 + }, + { + "epoch": 0.09443431796049839, + "grad_norm": 0.09396416693925858, + "learning_rate": 2.7234974592605573e-05, + "loss": 10.2539, + "step": 18910 + }, + { + "epoch": 0.09448425678543784, + "grad_norm": 0.1044858917593956, + "learning_rate": 2.7233472677664023e-05, + "loss": 10.2573, + "step": 18920 + }, + { + "epoch": 0.09453419561037729, + "grad_norm": 0.09172073006629944, + "learning_rate": 2.7231970762722473e-05, + "loss": 10.2548, + "step": 18930 + }, + { + "epoch": 0.09458413443531674, + "grad_norm": 0.09052781015634537, + "learning_rate": 2.723046884778092e-05, + "loss": 10.2535, + "step": 18940 + }, + { + "epoch": 0.09463407326025619, + "grad_norm": 0.09258178621530533, + "learning_rate": 2.722896693283937e-05, + "loss": 10.2505, + "step": 18950 + }, + { + "epoch": 0.09468401208519564, + "grad_norm": 0.09820058196783066, + "learning_rate": 2.722746501789782e-05, + "loss": 10.2543, + "step": 18960 + }, + { + "epoch": 0.09473395091013509, + "grad_norm": 0.09973189234733582, + "learning_rate": 2.722596310295627e-05, + "loss": 10.2533, + "step": 18970 + }, + { + "epoch": 0.09478388973507454, + "grad_norm": 0.09172434359788895, + "learning_rate": 2.722446118801472e-05, + "loss": 10.2502, + "step": 18980 + }, + { + "epoch": 0.09483382856001399, + "grad_norm": 0.0971081554889679, + "learning_rate": 2.7222959273073168e-05, + "loss": 10.2485, + "step": 18990 + }, + { + "epoch": 0.09488376738495344, + "grad_norm": 0.09259314090013504, + "learning_rate": 2.7221457358131618e-05, + "loss": 10.2516, + "step": 19000 + }, + { + "epoch": 0.09493370620989289, + "grad_norm": 0.10186239331960678, + "learning_rate": 2.7219955443190068e-05, + "loss": 10.2502, + "step": 19010 + }, + { + "epoch": 0.09498364503483234, + "grad_norm": 0.09782648831605911, + "learning_rate": 2.7218453528248518e-05, + "loss": 10.254, + "step": 19020 + }, + { + "epoch": 0.09503358385977179, + "grad_norm": 0.09522926807403564, + "learning_rate": 2.7216951613306968e-05, + "loss": 10.2542, + "step": 19030 + }, + { + "epoch": 0.09508352268471122, + "grad_norm": 0.09580672532320023, + "learning_rate": 2.7215449698365415e-05, + "loss": 10.2509, + "step": 19040 + }, + { + "epoch": 0.09513346150965067, + "grad_norm": 0.10172213613986969, + "learning_rate": 2.7213947783423865e-05, + "loss": 10.2519, + "step": 19050 + }, + { + "epoch": 0.09518340033459012, + "grad_norm": 0.09953866899013519, + "learning_rate": 2.7212445868482315e-05, + "loss": 10.2514, + "step": 19060 + }, + { + "epoch": 0.09523333915952957, + "grad_norm": 0.09473235160112381, + "learning_rate": 2.7210943953540766e-05, + "loss": 10.2493, + "step": 19070 + }, + { + "epoch": 0.09528327798446902, + "grad_norm": 0.09612436592578888, + "learning_rate": 2.7209442038599216e-05, + "loss": 10.2495, + "step": 19080 + }, + { + "epoch": 0.09533321680940847, + "grad_norm": 0.09395024180412292, + "learning_rate": 2.7207940123657663e-05, + "loss": 10.2494, + "step": 19090 + }, + { + "epoch": 0.09538315563434792, + "grad_norm": 0.0924721285700798, + "learning_rate": 2.7206438208716113e-05, + "loss": 10.2493, + "step": 19100 + }, + { + "epoch": 0.09543309445928737, + "grad_norm": 0.09010235965251923, + "learning_rate": 2.7204936293774563e-05, + "loss": 10.2508, + "step": 19110 + }, + { + "epoch": 0.09548303328422682, + "grad_norm": 0.09913980215787888, + "learning_rate": 2.7203434378833013e-05, + "loss": 10.2485, + "step": 19120 + }, + { + "epoch": 0.09553297210916627, + "grad_norm": 0.09284950792789459, + "learning_rate": 2.7201932463891463e-05, + "loss": 10.2497, + "step": 19130 + }, + { + "epoch": 0.09558291093410572, + "grad_norm": 0.09394965320825577, + "learning_rate": 2.720043054894991e-05, + "loss": 10.2504, + "step": 19140 + }, + { + "epoch": 0.09563284975904517, + "grad_norm": 0.09422177821397781, + "learning_rate": 2.719892863400836e-05, + "loss": 10.2443, + "step": 19150 + }, + { + "epoch": 0.09568278858398462, + "grad_norm": 0.09094090759754181, + "learning_rate": 2.719742671906681e-05, + "loss": 10.2472, + "step": 19160 + }, + { + "epoch": 0.09573272740892406, + "grad_norm": 0.09635195881128311, + "learning_rate": 2.719592480412526e-05, + "loss": 10.2485, + "step": 19170 + }, + { + "epoch": 0.09578266623386351, + "grad_norm": 0.09993939101696014, + "learning_rate": 2.719442288918371e-05, + "loss": 10.2454, + "step": 19180 + }, + { + "epoch": 0.09583260505880296, + "grad_norm": 0.09371164441108704, + "learning_rate": 2.7192920974242158e-05, + "loss": 10.2504, + "step": 19190 + }, + { + "epoch": 0.09588254388374241, + "grad_norm": 0.09679362177848816, + "learning_rate": 2.7191419059300608e-05, + "loss": 10.2465, + "step": 19200 + }, + { + "epoch": 0.09593248270868186, + "grad_norm": 0.08799337595701218, + "learning_rate": 2.7189917144359058e-05, + "loss": 10.2454, + "step": 19210 + }, + { + "epoch": 0.09598242153362131, + "grad_norm": 0.09332183748483658, + "learning_rate": 2.7188415229417508e-05, + "loss": 10.2492, + "step": 19220 + }, + { + "epoch": 0.09603236035856076, + "grad_norm": 0.09035744518041611, + "learning_rate": 2.718691331447596e-05, + "loss": 10.2473, + "step": 19230 + }, + { + "epoch": 0.09608229918350021, + "grad_norm": 0.09255316108465195, + "learning_rate": 2.7185411399534405e-05, + "loss": 10.2487, + "step": 19240 + }, + { + "epoch": 0.09613223800843966, + "grad_norm": 0.0899682343006134, + "learning_rate": 2.718390948459286e-05, + "loss": 10.2411, + "step": 19250 + }, + { + "epoch": 0.09618217683337911, + "grad_norm": 0.09768253564834595, + "learning_rate": 2.7182407569651305e-05, + "loss": 10.2458, + "step": 19260 + }, + { + "epoch": 0.09623211565831856, + "grad_norm": 0.09567224979400635, + "learning_rate": 2.7180905654709756e-05, + "loss": 10.2468, + "step": 19270 + }, + { + "epoch": 0.09628205448325801, + "grad_norm": 0.09296666830778122, + "learning_rate": 2.7179403739768206e-05, + "loss": 10.2441, + "step": 19280 + }, + { + "epoch": 0.09633199330819746, + "grad_norm": 0.09625297039747238, + "learning_rate": 2.7177901824826653e-05, + "loss": 10.2453, + "step": 19290 + }, + { + "epoch": 0.09638193213313691, + "grad_norm": 0.09271930158138275, + "learning_rate": 2.7176399909885106e-05, + "loss": 10.2413, + "step": 19300 + }, + { + "epoch": 0.09643187095807636, + "grad_norm": 0.09876267611980438, + "learning_rate": 2.7174897994943553e-05, + "loss": 10.2448, + "step": 19310 + }, + { + "epoch": 0.0964818097830158, + "grad_norm": 0.09215805679559708, + "learning_rate": 2.7173396080002003e-05, + "loss": 10.2435, + "step": 19320 + }, + { + "epoch": 0.09653174860795526, + "grad_norm": 0.09160521626472473, + "learning_rate": 2.7171894165060453e-05, + "loss": 10.2456, + "step": 19330 + }, + { + "epoch": 0.0965816874328947, + "grad_norm": 0.09767121821641922, + "learning_rate": 2.71703922501189e-05, + "loss": 10.2404, + "step": 19340 + }, + { + "epoch": 0.09663162625783416, + "grad_norm": 0.09696942567825317, + "learning_rate": 2.7168890335177354e-05, + "loss": 10.2442, + "step": 19350 + }, + { + "epoch": 0.0966815650827736, + "grad_norm": 0.09589537233114243, + "learning_rate": 2.71673884202358e-05, + "loss": 10.2434, + "step": 19360 + }, + { + "epoch": 0.09673150390771305, + "grad_norm": 0.09620683640241623, + "learning_rate": 2.716588650529425e-05, + "loss": 10.2424, + "step": 19370 + }, + { + "epoch": 0.0967814427326525, + "grad_norm": 0.08876433968544006, + "learning_rate": 2.71643845903527e-05, + "loss": 10.2431, + "step": 19380 + }, + { + "epoch": 0.09683138155759195, + "grad_norm": 0.094940684735775, + "learning_rate": 2.7162882675411148e-05, + "loss": 10.2415, + "step": 19390 + }, + { + "epoch": 0.0968813203825314, + "grad_norm": 0.09419713914394379, + "learning_rate": 2.71613807604696e-05, + "loss": 10.2437, + "step": 19400 + }, + { + "epoch": 0.09693125920747085, + "grad_norm": 0.09718811511993408, + "learning_rate": 2.7159878845528048e-05, + "loss": 10.2393, + "step": 19410 + }, + { + "epoch": 0.0969811980324103, + "grad_norm": 0.09436991810798645, + "learning_rate": 2.7158376930586498e-05, + "loss": 10.2408, + "step": 19420 + }, + { + "epoch": 0.09703113685734975, + "grad_norm": 0.09804237633943558, + "learning_rate": 2.715687501564495e-05, + "loss": 10.24, + "step": 19430 + }, + { + "epoch": 0.0970810756822892, + "grad_norm": 0.09041360020637512, + "learning_rate": 2.7155373100703395e-05, + "loss": 10.2429, + "step": 19440 + }, + { + "epoch": 0.09713101450722865, + "grad_norm": 0.09413240849971771, + "learning_rate": 2.715387118576185e-05, + "loss": 10.2405, + "step": 19450 + }, + { + "epoch": 0.0971809533321681, + "grad_norm": 0.09756959229707718, + "learning_rate": 2.7152369270820295e-05, + "loss": 10.4918, + "step": 19460 + }, + { + "epoch": 0.09723089215710755, + "grad_norm": 0.08898182958364487, + "learning_rate": 2.7150867355878746e-05, + "loss": 10.2411, + "step": 19470 + }, + { + "epoch": 0.097280830982047, + "grad_norm": 0.09335238486528397, + "learning_rate": 2.7149365440937196e-05, + "loss": 10.2348, + "step": 19480 + }, + { + "epoch": 0.09733076980698645, + "grad_norm": 0.09306208789348602, + "learning_rate": 2.7147863525995643e-05, + "loss": 10.2427, + "step": 19490 + }, + { + "epoch": 0.0973807086319259, + "grad_norm": 0.09693284332752228, + "learning_rate": 2.7146361611054096e-05, + "loss": 10.2377, + "step": 19500 + }, + { + "epoch": 0.09743064745686535, + "grad_norm": 0.10119107365608215, + "learning_rate": 2.7144859696112543e-05, + "loss": 10.2366, + "step": 19510 + }, + { + "epoch": 0.0974805862818048, + "grad_norm": 0.10089292377233505, + "learning_rate": 2.7143357781170993e-05, + "loss": 10.2385, + "step": 19520 + }, + { + "epoch": 0.09753052510674425, + "grad_norm": 0.09562947601079941, + "learning_rate": 2.7141855866229443e-05, + "loss": 10.2378, + "step": 19530 + }, + { + "epoch": 0.09758046393168368, + "grad_norm": 0.09517926722764969, + "learning_rate": 2.714035395128789e-05, + "loss": 10.237, + "step": 19540 + }, + { + "epoch": 0.09763040275662313, + "grad_norm": 0.0980682447552681, + "learning_rate": 2.7138852036346344e-05, + "loss": 10.2385, + "step": 19550 + }, + { + "epoch": 0.09768034158156258, + "grad_norm": 0.09545796364545822, + "learning_rate": 2.713735012140479e-05, + "loss": 10.2375, + "step": 19560 + }, + { + "epoch": 0.09773028040650203, + "grad_norm": 0.09550634771585464, + "learning_rate": 2.7135848206463244e-05, + "loss": 10.2384, + "step": 19570 + }, + { + "epoch": 0.09778021923144148, + "grad_norm": 0.09427410364151001, + "learning_rate": 2.713434629152169e-05, + "loss": 10.2344, + "step": 19580 + }, + { + "epoch": 0.09783015805638093, + "grad_norm": 0.09462705999612808, + "learning_rate": 2.7132844376580138e-05, + "loss": 10.2342, + "step": 19590 + }, + { + "epoch": 0.09788009688132038, + "grad_norm": 0.1018369272351265, + "learning_rate": 2.713134246163859e-05, + "loss": 10.2329, + "step": 19600 + }, + { + "epoch": 0.09793003570625983, + "grad_norm": 0.09698480367660522, + "learning_rate": 2.7129840546697038e-05, + "loss": 10.2358, + "step": 19610 + }, + { + "epoch": 0.09797997453119928, + "grad_norm": 0.09769893437623978, + "learning_rate": 2.712833863175549e-05, + "loss": 10.2382, + "step": 19620 + }, + { + "epoch": 0.09802991335613873, + "grad_norm": 0.09582178294658661, + "learning_rate": 2.712683671681394e-05, + "loss": 10.2342, + "step": 19630 + }, + { + "epoch": 0.09807985218107818, + "grad_norm": 0.09619859606027603, + "learning_rate": 2.7125334801872385e-05, + "loss": 10.2356, + "step": 19640 + }, + { + "epoch": 0.09812979100601762, + "grad_norm": 0.09638599306344986, + "learning_rate": 2.712383288693084e-05, + "loss": 10.2338, + "step": 19650 + }, + { + "epoch": 0.09817972983095707, + "grad_norm": 0.10089550167322159, + "learning_rate": 2.7122330971989285e-05, + "loss": 10.2301, + "step": 19660 + }, + { + "epoch": 0.09822966865589652, + "grad_norm": 0.09721601009368896, + "learning_rate": 2.712082905704774e-05, + "loss": 10.2364, + "step": 19670 + }, + { + "epoch": 0.09827960748083597, + "grad_norm": 0.09357859939336777, + "learning_rate": 2.7119327142106186e-05, + "loss": 10.236, + "step": 19680 + }, + { + "epoch": 0.09832954630577542, + "grad_norm": 0.09338711202144623, + "learning_rate": 2.7117825227164633e-05, + "loss": 10.2298, + "step": 19690 + }, + { + "epoch": 0.09837948513071487, + "grad_norm": 0.09414970874786377, + "learning_rate": 2.7116323312223086e-05, + "loss": 10.2383, + "step": 19700 + }, + { + "epoch": 0.09842942395565432, + "grad_norm": 0.10141325742006302, + "learning_rate": 2.7114821397281533e-05, + "loss": 10.2353, + "step": 19710 + }, + { + "epoch": 0.09847936278059377, + "grad_norm": 0.09696213901042938, + "learning_rate": 2.7113319482339987e-05, + "loss": 10.2326, + "step": 19720 + }, + { + "epoch": 0.09852930160553322, + "grad_norm": 0.09888730943202972, + "learning_rate": 2.7111817567398433e-05, + "loss": 10.2354, + "step": 19730 + }, + { + "epoch": 0.09857924043047267, + "grad_norm": 0.089784175157547, + "learning_rate": 2.711031565245688e-05, + "loss": 10.2333, + "step": 19740 + }, + { + "epoch": 0.09862917925541212, + "grad_norm": 0.09449610859155655, + "learning_rate": 2.7108813737515334e-05, + "loss": 10.233, + "step": 19750 + }, + { + "epoch": 0.09867911808035157, + "grad_norm": 0.09268537908792496, + "learning_rate": 2.710731182257378e-05, + "loss": 10.2273, + "step": 19760 + }, + { + "epoch": 0.09872905690529102, + "grad_norm": 0.09909249097108841, + "learning_rate": 2.7105809907632234e-05, + "loss": 10.2293, + "step": 19770 + }, + { + "epoch": 0.09877899573023047, + "grad_norm": 0.10036230832338333, + "learning_rate": 2.710430799269068e-05, + "loss": 10.2342, + "step": 19780 + }, + { + "epoch": 0.09882893455516992, + "grad_norm": 0.10188131779432297, + "learning_rate": 2.7102806077749128e-05, + "loss": 10.2328, + "step": 19790 + }, + { + "epoch": 0.09887887338010937, + "grad_norm": 0.09562569111585617, + "learning_rate": 2.710130416280758e-05, + "loss": 10.2278, + "step": 19800 + }, + { + "epoch": 0.09892881220504882, + "grad_norm": 0.09543158859014511, + "learning_rate": 2.7099802247866028e-05, + "loss": 10.2257, + "step": 19810 + }, + { + "epoch": 0.09897875102998827, + "grad_norm": 0.09362302720546722, + "learning_rate": 2.709830033292448e-05, + "loss": 10.2302, + "step": 19820 + }, + { + "epoch": 0.09902868985492772, + "grad_norm": 0.09583684056997299, + "learning_rate": 2.709679841798293e-05, + "loss": 10.2322, + "step": 19830 + }, + { + "epoch": 0.09907862867986716, + "grad_norm": 0.09406079351902008, + "learning_rate": 2.7095296503041375e-05, + "loss": 10.2291, + "step": 19840 + }, + { + "epoch": 0.09912856750480661, + "grad_norm": 0.09514211863279343, + "learning_rate": 2.709379458809983e-05, + "loss": 10.2303, + "step": 19850 + }, + { + "epoch": 0.09917850632974606, + "grad_norm": 0.09583709388971329, + "learning_rate": 2.7092292673158276e-05, + "loss": 10.2299, + "step": 19860 + }, + { + "epoch": 0.09922844515468551, + "grad_norm": 0.09366525709629059, + "learning_rate": 2.709079075821673e-05, + "loss": 10.2287, + "step": 19870 + }, + { + "epoch": 0.09927838397962496, + "grad_norm": 0.09371297061443329, + "learning_rate": 2.7089288843275176e-05, + "loss": 10.2276, + "step": 19880 + }, + { + "epoch": 0.09932832280456441, + "grad_norm": 0.09627794474363327, + "learning_rate": 2.7087786928333626e-05, + "loss": 10.2293, + "step": 19890 + }, + { + "epoch": 0.09937826162950386, + "grad_norm": 0.09279222786426544, + "learning_rate": 2.7086285013392076e-05, + "loss": 10.2283, + "step": 19900 + }, + { + "epoch": 0.09942820045444331, + "grad_norm": 0.0960431769490242, + "learning_rate": 2.7084783098450523e-05, + "loss": 10.2295, + "step": 19910 + }, + { + "epoch": 0.09947813927938276, + "grad_norm": 0.0974978655576706, + "learning_rate": 2.7083281183508977e-05, + "loss": 10.2283, + "step": 19920 + }, + { + "epoch": 0.09952807810432221, + "grad_norm": 0.09452254325151443, + "learning_rate": 2.7081779268567423e-05, + "loss": 10.2259, + "step": 19930 + }, + { + "epoch": 0.09957801692926166, + "grad_norm": 0.09121208637952805, + "learning_rate": 2.7080277353625874e-05, + "loss": 10.2277, + "step": 19940 + }, + { + "epoch": 0.09962795575420111, + "grad_norm": 0.09757377207279205, + "learning_rate": 2.7078775438684324e-05, + "loss": 10.2261, + "step": 19950 + }, + { + "epoch": 0.09967789457914056, + "grad_norm": 0.09297657012939453, + "learning_rate": 2.707727352374277e-05, + "loss": 10.2242, + "step": 19960 + }, + { + "epoch": 0.09972783340408001, + "grad_norm": 0.09815360605716705, + "learning_rate": 2.7075771608801224e-05, + "loss": 10.2245, + "step": 19970 + }, + { + "epoch": 0.09977777222901946, + "grad_norm": 0.09438446164131165, + "learning_rate": 2.707426969385967e-05, + "loss": 10.229, + "step": 19980 + }, + { + "epoch": 0.0998277110539589, + "grad_norm": 0.09827311336994171, + "learning_rate": 2.707276777891812e-05, + "loss": 10.2242, + "step": 19990 + }, + { + "epoch": 0.09987764987889836, + "grad_norm": 0.09929145127534866, + "learning_rate": 2.707126586397657e-05, + "loss": 10.2251, + "step": 20000 + }, + { + "epoch": 0.0999275887038378, + "grad_norm": 0.09331025183200836, + "learning_rate": 2.7069763949035018e-05, + "loss": 10.2282, + "step": 20010 + }, + { + "epoch": 0.09997752752877725, + "grad_norm": 0.09668101370334625, + "learning_rate": 2.706826203409347e-05, + "loss": 10.2285, + "step": 20020 + }, + { + "epoch": 0.10002746635371669, + "grad_norm": 0.09241931140422821, + "learning_rate": 2.706676011915192e-05, + "loss": 10.2261, + "step": 20030 + }, + { + "epoch": 0.10007740517865614, + "grad_norm": 0.09374601393938065, + "learning_rate": 2.706525820421037e-05, + "loss": 10.2251, + "step": 20040 + }, + { + "epoch": 0.10012734400359559, + "grad_norm": 0.09970963001251221, + "learning_rate": 2.706375628926882e-05, + "loss": 10.2242, + "step": 20050 + }, + { + "epoch": 0.10017728282853504, + "grad_norm": 0.10114763677120209, + "learning_rate": 2.7062254374327266e-05, + "loss": 10.2234, + "step": 20060 + }, + { + "epoch": 0.10022722165347449, + "grad_norm": 0.09584357589483261, + "learning_rate": 2.706075245938572e-05, + "loss": 10.223, + "step": 20070 + }, + { + "epoch": 0.10027716047841394, + "grad_norm": 0.09605442732572556, + "learning_rate": 2.7059250544444166e-05, + "loss": 10.2236, + "step": 20080 + }, + { + "epoch": 0.10032709930335339, + "grad_norm": 0.09641440957784653, + "learning_rate": 2.7057748629502616e-05, + "loss": 10.2236, + "step": 20090 + }, + { + "epoch": 0.10037703812829284, + "grad_norm": 0.09594506770372391, + "learning_rate": 2.7056246714561066e-05, + "loss": 10.2208, + "step": 20100 + }, + { + "epoch": 0.10042697695323229, + "grad_norm": 0.09294579178094864, + "learning_rate": 2.7054744799619513e-05, + "loss": 10.2232, + "step": 20110 + }, + { + "epoch": 0.10047691577817174, + "grad_norm": 0.09371384978294373, + "learning_rate": 2.7053242884677967e-05, + "loss": 10.2246, + "step": 20120 + }, + { + "epoch": 0.10052685460311118, + "grad_norm": 0.0971645787358284, + "learning_rate": 2.7051740969736413e-05, + "loss": 10.2199, + "step": 20130 + }, + { + "epoch": 0.10057679342805063, + "grad_norm": 0.09300613403320312, + "learning_rate": 2.7050239054794864e-05, + "loss": 10.2229, + "step": 20140 + }, + { + "epoch": 0.10062673225299008, + "grad_norm": 0.09003175050020218, + "learning_rate": 2.7048737139853314e-05, + "loss": 10.2235, + "step": 20150 + }, + { + "epoch": 0.10067667107792953, + "grad_norm": 0.09512855857610703, + "learning_rate": 2.704723522491176e-05, + "loss": 10.2204, + "step": 20160 + }, + { + "epoch": 0.10072660990286898, + "grad_norm": 0.10006555169820786, + "learning_rate": 2.7045733309970214e-05, + "loss": 10.2225, + "step": 20170 + }, + { + "epoch": 0.10077654872780843, + "grad_norm": 0.09965218603610992, + "learning_rate": 2.704423139502866e-05, + "loss": 10.2212, + "step": 20180 + }, + { + "epoch": 0.10082648755274788, + "grad_norm": 0.0935189425945282, + "learning_rate": 2.7042729480087114e-05, + "loss": 10.2217, + "step": 20190 + }, + { + "epoch": 0.10087642637768733, + "grad_norm": 0.09258928894996643, + "learning_rate": 2.704122756514556e-05, + "loss": 10.2222, + "step": 20200 + }, + { + "epoch": 0.10092636520262678, + "grad_norm": 0.0927526131272316, + "learning_rate": 2.703972565020401e-05, + "loss": 10.2206, + "step": 20210 + }, + { + "epoch": 0.10097630402756623, + "grad_norm": 0.09727808088064194, + "learning_rate": 2.703822373526246e-05, + "loss": 10.2186, + "step": 20220 + }, + { + "epoch": 0.10102624285250568, + "grad_norm": 0.09847782552242279, + "learning_rate": 2.703672182032091e-05, + "loss": 10.2159, + "step": 20230 + }, + { + "epoch": 0.10107618167744513, + "grad_norm": 0.10164419561624527, + "learning_rate": 2.7035219905379362e-05, + "loss": 10.2183, + "step": 20240 + }, + { + "epoch": 0.10112612050238458, + "grad_norm": 0.0917501449584961, + "learning_rate": 2.703371799043781e-05, + "loss": 10.2188, + "step": 20250 + }, + { + "epoch": 0.10117605932732403, + "grad_norm": 0.09016897529363632, + "learning_rate": 2.703221607549626e-05, + "loss": 10.2207, + "step": 20260 + }, + { + "epoch": 0.10122599815226348, + "grad_norm": 0.09659720957279205, + "learning_rate": 2.703071416055471e-05, + "loss": 10.2198, + "step": 20270 + }, + { + "epoch": 0.10127593697720293, + "grad_norm": 0.09625518321990967, + "learning_rate": 2.7029212245613156e-05, + "loss": 10.2209, + "step": 20280 + }, + { + "epoch": 0.10132587580214238, + "grad_norm": 0.09196675568819046, + "learning_rate": 2.702771033067161e-05, + "loss": 10.2194, + "step": 20290 + }, + { + "epoch": 0.10137581462708183, + "grad_norm": 0.09483517706394196, + "learning_rate": 2.7026208415730056e-05, + "loss": 10.217, + "step": 20300 + }, + { + "epoch": 0.10142575345202128, + "grad_norm": 0.09146818518638611, + "learning_rate": 2.7024706500788506e-05, + "loss": 10.2151, + "step": 20310 + }, + { + "epoch": 0.10147569227696072, + "grad_norm": 0.09970217198133469, + "learning_rate": 2.7023204585846957e-05, + "loss": 10.2176, + "step": 20320 + }, + { + "epoch": 0.10152563110190017, + "grad_norm": 0.10319959372282028, + "learning_rate": 2.7021702670905403e-05, + "loss": 10.2165, + "step": 20330 + }, + { + "epoch": 0.10157556992683962, + "grad_norm": 0.09527314454317093, + "learning_rate": 2.7020200755963857e-05, + "loss": 10.2176, + "step": 20340 + }, + { + "epoch": 0.10162550875177907, + "grad_norm": 0.09798305481672287, + "learning_rate": 2.7018698841022304e-05, + "loss": 10.2151, + "step": 20350 + }, + { + "epoch": 0.10167544757671852, + "grad_norm": 0.10353653877973557, + "learning_rate": 2.7017196926080754e-05, + "loss": 10.2197, + "step": 20360 + }, + { + "epoch": 0.10172538640165797, + "grad_norm": 0.09512107819318771, + "learning_rate": 2.7015695011139204e-05, + "loss": 10.2186, + "step": 20370 + }, + { + "epoch": 0.10177532522659742, + "grad_norm": 0.09513498097658157, + "learning_rate": 2.701419309619765e-05, + "loss": 10.2179, + "step": 20380 + }, + { + "epoch": 0.10182526405153687, + "grad_norm": 0.09943837672472, + "learning_rate": 2.7012691181256104e-05, + "loss": 10.2123, + "step": 20390 + }, + { + "epoch": 0.10187520287647632, + "grad_norm": 0.09323469549417496, + "learning_rate": 2.701118926631455e-05, + "loss": 10.2183, + "step": 20400 + }, + { + "epoch": 0.10192514170141577, + "grad_norm": 0.09345997124910355, + "learning_rate": 2.7009687351373e-05, + "loss": 10.2135, + "step": 20410 + }, + { + "epoch": 0.10197508052635522, + "grad_norm": 0.10201733559370041, + "learning_rate": 2.700818543643145e-05, + "loss": 10.2155, + "step": 20420 + }, + { + "epoch": 0.10202501935129467, + "grad_norm": 0.08883191645145416, + "learning_rate": 2.70066835214899e-05, + "loss": 10.2125, + "step": 20430 + }, + { + "epoch": 0.10207495817623412, + "grad_norm": 0.09999528527259827, + "learning_rate": 2.7005181606548352e-05, + "loss": 10.2138, + "step": 20440 + }, + { + "epoch": 0.10212489700117357, + "grad_norm": 0.09363902360200882, + "learning_rate": 2.70036796916068e-05, + "loss": 10.2105, + "step": 20450 + }, + { + "epoch": 0.10217483582611302, + "grad_norm": 0.1012590229511261, + "learning_rate": 2.700217777666525e-05, + "loss": 10.2133, + "step": 20460 + }, + { + "epoch": 0.10222477465105247, + "grad_norm": 0.09568557888269424, + "learning_rate": 2.70006758617237e-05, + "loss": 10.2142, + "step": 20470 + }, + { + "epoch": 0.10227471347599192, + "grad_norm": 0.10000815987586975, + "learning_rate": 2.6999173946782146e-05, + "loss": 10.2139, + "step": 20480 + }, + { + "epoch": 0.10232465230093137, + "grad_norm": 0.09673640131950378, + "learning_rate": 2.69976720318406e-05, + "loss": 10.2101, + "step": 20490 + }, + { + "epoch": 0.10237459112587081, + "grad_norm": 0.09184068441390991, + "learning_rate": 2.6996170116899046e-05, + "loss": 10.2125, + "step": 20500 + }, + { + "epoch": 0.10242452995081026, + "grad_norm": 0.09533202648162842, + "learning_rate": 2.6994668201957496e-05, + "loss": 10.2116, + "step": 20510 + }, + { + "epoch": 0.1024744687757497, + "grad_norm": 0.09686296433210373, + "learning_rate": 2.6993166287015947e-05, + "loss": 10.213, + "step": 20520 + }, + { + "epoch": 0.10252440760068915, + "grad_norm": 0.09219254553318024, + "learning_rate": 2.6991664372074397e-05, + "loss": 10.2101, + "step": 20530 + }, + { + "epoch": 0.1025743464256286, + "grad_norm": 0.0953301191329956, + "learning_rate": 2.6990162457132847e-05, + "loss": 10.2123, + "step": 20540 + }, + { + "epoch": 0.10262428525056805, + "grad_norm": 0.09723500162363052, + "learning_rate": 2.6988660542191294e-05, + "loss": 10.2132, + "step": 20550 + }, + { + "epoch": 0.1026742240755075, + "grad_norm": 0.09278090298175812, + "learning_rate": 2.6987158627249744e-05, + "loss": 10.2107, + "step": 20560 + }, + { + "epoch": 0.10272416290044695, + "grad_norm": 0.09172572195529938, + "learning_rate": 2.6985656712308194e-05, + "loss": 10.2093, + "step": 20570 + }, + { + "epoch": 0.1027741017253864, + "grad_norm": 0.09108912199735641, + "learning_rate": 2.6984154797366644e-05, + "loss": 10.2067, + "step": 20580 + }, + { + "epoch": 0.10282404055032585, + "grad_norm": 0.09847711771726608, + "learning_rate": 2.6982652882425094e-05, + "loss": 10.211, + "step": 20590 + }, + { + "epoch": 0.1028739793752653, + "grad_norm": 0.08993741869926453, + "learning_rate": 2.698115096748354e-05, + "loss": 10.2101, + "step": 20600 + }, + { + "epoch": 0.10292391820020474, + "grad_norm": 0.0974576473236084, + "learning_rate": 2.697964905254199e-05, + "loss": 10.2092, + "step": 20610 + }, + { + "epoch": 0.1029738570251442, + "grad_norm": 0.09782136976718903, + "learning_rate": 2.697814713760044e-05, + "loss": 10.206, + "step": 20620 + }, + { + "epoch": 0.10302379585008364, + "grad_norm": 0.09769613295793533, + "learning_rate": 2.6976645222658892e-05, + "loss": 10.2108, + "step": 20630 + }, + { + "epoch": 0.1030737346750231, + "grad_norm": 0.09461203962564468, + "learning_rate": 2.6975143307717342e-05, + "loss": 10.2053, + "step": 20640 + }, + { + "epoch": 0.10312367349996254, + "grad_norm": 0.09983788430690765, + "learning_rate": 2.697364139277579e-05, + "loss": 10.2081, + "step": 20650 + }, + { + "epoch": 0.10317361232490199, + "grad_norm": 0.09139873832464218, + "learning_rate": 2.697213947783424e-05, + "loss": 10.2104, + "step": 20660 + }, + { + "epoch": 0.10322355114984144, + "grad_norm": 0.10040886700153351, + "learning_rate": 2.697063756289269e-05, + "loss": 10.2067, + "step": 20670 + }, + { + "epoch": 0.10327348997478089, + "grad_norm": 0.0953102856874466, + "learning_rate": 2.696913564795114e-05, + "loss": 10.2053, + "step": 20680 + }, + { + "epoch": 0.10332342879972034, + "grad_norm": 0.0996142104268074, + "learning_rate": 2.696763373300959e-05, + "loss": 10.2083, + "step": 20690 + }, + { + "epoch": 0.10337336762465979, + "grad_norm": 0.09398601204156876, + "learning_rate": 2.6966131818068036e-05, + "loss": 10.2055, + "step": 20700 + }, + { + "epoch": 0.10342330644959924, + "grad_norm": 0.09323547780513763, + "learning_rate": 2.6964629903126486e-05, + "loss": 10.2101, + "step": 20710 + }, + { + "epoch": 0.10347324527453869, + "grad_norm": 0.09476854652166367, + "learning_rate": 2.6963127988184937e-05, + "loss": 10.2063, + "step": 20720 + }, + { + "epoch": 0.10352318409947814, + "grad_norm": 0.08883500099182129, + "learning_rate": 2.6961626073243387e-05, + "loss": 10.2072, + "step": 20730 + }, + { + "epoch": 0.10357312292441759, + "grad_norm": 0.09644431620836258, + "learning_rate": 2.6960124158301837e-05, + "loss": 10.2066, + "step": 20740 + }, + { + "epoch": 0.10362306174935704, + "grad_norm": 0.09586118906736374, + "learning_rate": 2.6958622243360284e-05, + "loss": 10.2093, + "step": 20750 + }, + { + "epoch": 0.10367300057429649, + "grad_norm": 0.09478766471147537, + "learning_rate": 2.6957120328418734e-05, + "loss": 10.2054, + "step": 20760 + }, + { + "epoch": 0.10372293939923594, + "grad_norm": 0.09579043835401535, + "learning_rate": 2.6955618413477184e-05, + "loss": 10.207, + "step": 20770 + }, + { + "epoch": 0.10377287822417539, + "grad_norm": 0.08978202193975449, + "learning_rate": 2.6954116498535634e-05, + "loss": 10.2019, + "step": 20780 + }, + { + "epoch": 0.10382281704911483, + "grad_norm": 0.09112431108951569, + "learning_rate": 2.6952614583594084e-05, + "loss": 10.2072, + "step": 20790 + }, + { + "epoch": 0.10387275587405428, + "grad_norm": 0.09767849743366241, + "learning_rate": 2.695111266865253e-05, + "loss": 10.2056, + "step": 20800 + }, + { + "epoch": 0.10392269469899373, + "grad_norm": 0.09825193881988525, + "learning_rate": 2.694961075371098e-05, + "loss": 10.2068, + "step": 20810 + }, + { + "epoch": 0.10397263352393318, + "grad_norm": 0.09760141372680664, + "learning_rate": 2.694810883876943e-05, + "loss": 10.2078, + "step": 20820 + }, + { + "epoch": 0.10402257234887263, + "grad_norm": 0.09558884054422379, + "learning_rate": 2.6946606923827882e-05, + "loss": 10.2056, + "step": 20830 + }, + { + "epoch": 0.10407251117381208, + "grad_norm": 0.09403236955404282, + "learning_rate": 2.6945105008886332e-05, + "loss": 10.2005, + "step": 20840 + }, + { + "epoch": 0.10412244999875153, + "grad_norm": 0.09680522978305817, + "learning_rate": 2.6943603093944782e-05, + "loss": 10.2052, + "step": 20850 + }, + { + "epoch": 0.10417238882369098, + "grad_norm": 0.09517577290534973, + "learning_rate": 2.694210117900323e-05, + "loss": 10.2047, + "step": 20860 + }, + { + "epoch": 0.10422232764863043, + "grad_norm": 0.09840694069862366, + "learning_rate": 2.694059926406168e-05, + "loss": 10.2048, + "step": 20870 + }, + { + "epoch": 0.10427226647356988, + "grad_norm": 0.09037858247756958, + "learning_rate": 2.693909734912013e-05, + "loss": 10.1986, + "step": 20880 + }, + { + "epoch": 0.10432220529850933, + "grad_norm": 0.09096881747245789, + "learning_rate": 2.693759543417858e-05, + "loss": 10.204, + "step": 20890 + }, + { + "epoch": 0.10437214412344878, + "grad_norm": 0.0983474999666214, + "learning_rate": 2.693609351923703e-05, + "loss": 10.2116, + "step": 20900 + }, + { + "epoch": 0.10442208294838823, + "grad_norm": 0.08912882208824158, + "learning_rate": 2.6934591604295476e-05, + "loss": 10.2036, + "step": 20910 + }, + { + "epoch": 0.10447202177332768, + "grad_norm": 122905.046875, + "learning_rate": 2.6933089689353927e-05, + "loss": 11.4131, + "step": 20920 + }, + { + "epoch": 0.10452196059826713, + "grad_norm": 0.09772419929504395, + "learning_rate": 2.6931587774412377e-05, + "loss": 38.3405, + "step": 20930 + }, + { + "epoch": 0.10457189942320658, + "grad_norm": 0.09270507097244263, + "learning_rate": 2.6930085859470827e-05, + "loss": 10.579, + "step": 20940 + }, + { + "epoch": 0.10462183824814603, + "grad_norm": 0.10307878255844116, + "learning_rate": 2.6928583944529277e-05, + "loss": 10.1982, + "step": 20950 + }, + { + "epoch": 0.10467177707308548, + "grad_norm": 0.10254894942045212, + "learning_rate": 2.6927082029587724e-05, + "loss": 10.2039, + "step": 20960 + }, + { + "epoch": 0.10472171589802493, + "grad_norm": 0.0985196977853775, + "learning_rate": 2.6925580114646174e-05, + "loss": 10.2041, + "step": 20970 + }, + { + "epoch": 0.10477165472296437, + "grad_norm": 0.0948171392083168, + "learning_rate": 2.6924078199704624e-05, + "loss": 10.2012, + "step": 20980 + }, + { + "epoch": 0.10482159354790382, + "grad_norm": 0.09431871771812439, + "learning_rate": 2.6922576284763075e-05, + "loss": 10.2013, + "step": 20990 + }, + { + "epoch": 0.10487153237284327, + "grad_norm": 0.09847448766231537, + "learning_rate": 2.6921074369821525e-05, + "loss": 10.1986, + "step": 21000 + }, + { + "epoch": 0.10492147119778272, + "grad_norm": 0.09803617000579834, + "learning_rate": 2.691957245487997e-05, + "loss": 10.1999, + "step": 21010 + }, + { + "epoch": 0.10497141002272216, + "grad_norm": 0.09164676815271378, + "learning_rate": 2.691807053993842e-05, + "loss": 10.1964, + "step": 21020 + }, + { + "epoch": 0.10502134884766161, + "grad_norm": 0.09696748852729797, + "learning_rate": 2.6916568624996872e-05, + "loss": 10.1992, + "step": 21030 + }, + { + "epoch": 0.10507128767260106, + "grad_norm": 0.090817391872406, + "learning_rate": 2.6915066710055322e-05, + "loss": 10.199, + "step": 21040 + }, + { + "epoch": 0.10512122649754051, + "grad_norm": 0.09760018438100815, + "learning_rate": 2.6913564795113772e-05, + "loss": 10.2002, + "step": 21050 + }, + { + "epoch": 0.10517116532247996, + "grad_norm": 0.09421108663082123, + "learning_rate": 2.691206288017222e-05, + "loss": 10.1982, + "step": 21060 + }, + { + "epoch": 0.1052211041474194, + "grad_norm": 0.09282322973012924, + "learning_rate": 2.691056096523067e-05, + "loss": 10.1998, + "step": 21070 + }, + { + "epoch": 0.10527104297235886, + "grad_norm": 0.09494070708751678, + "learning_rate": 2.690905905028912e-05, + "loss": 10.1958, + "step": 21080 + }, + { + "epoch": 0.1053209817972983, + "grad_norm": 0.0971512645483017, + "learning_rate": 2.690755713534757e-05, + "loss": 10.2041, + "step": 21090 + }, + { + "epoch": 0.10537092062223775, + "grad_norm": 0.09018143266439438, + "learning_rate": 2.690605522040602e-05, + "loss": 10.1996, + "step": 21100 + }, + { + "epoch": 0.1054208594471772, + "grad_norm": 0.09023455530405045, + "learning_rate": 2.6904553305464466e-05, + "loss": 10.2006, + "step": 21110 + }, + { + "epoch": 0.10547079827211665, + "grad_norm": 0.09394627809524536, + "learning_rate": 2.6903051390522917e-05, + "loss": 10.1966, + "step": 21120 + }, + { + "epoch": 0.1055207370970561, + "grad_norm": 0.09698754549026489, + "learning_rate": 2.6901549475581367e-05, + "loss": 10.1971, + "step": 21130 + }, + { + "epoch": 0.10557067592199555, + "grad_norm": 0.09606551378965378, + "learning_rate": 2.6900047560639817e-05, + "loss": 10.1975, + "step": 21140 + }, + { + "epoch": 0.105620614746935, + "grad_norm": 0.0962950810790062, + "learning_rate": 2.6898545645698267e-05, + "loss": 10.194, + "step": 21150 + }, + { + "epoch": 0.10567055357187445, + "grad_norm": 0.09697403013706207, + "learning_rate": 2.6897043730756714e-05, + "loss": 10.1934, + "step": 21160 + }, + { + "epoch": 0.1057204923968139, + "grad_norm": 0.09771595895290375, + "learning_rate": 2.6895541815815164e-05, + "loss": 10.197, + "step": 21170 + }, + { + "epoch": 0.10577043122175335, + "grad_norm": 0.09692160040140152, + "learning_rate": 2.6894039900873614e-05, + "loss": 10.196, + "step": 21180 + }, + { + "epoch": 0.1058203700466928, + "grad_norm": 0.09422864764928818, + "learning_rate": 2.6892537985932065e-05, + "loss": 10.1967, + "step": 21190 + }, + { + "epoch": 0.10587030887163225, + "grad_norm": 0.09830430895090103, + "learning_rate": 2.6891036070990515e-05, + "loss": 10.1954, + "step": 21200 + }, + { + "epoch": 0.1059202476965717, + "grad_norm": 0.09837440401315689, + "learning_rate": 2.688953415604896e-05, + "loss": 10.1955, + "step": 21210 + }, + { + "epoch": 0.10597018652151115, + "grad_norm": 0.09582128375768661, + "learning_rate": 2.6888032241107415e-05, + "loss": 10.1957, + "step": 21220 + }, + { + "epoch": 0.1060201253464506, + "grad_norm": 0.09249364584684372, + "learning_rate": 2.6886530326165862e-05, + "loss": 10.1941, + "step": 21230 + }, + { + "epoch": 0.10607006417139005, + "grad_norm": 0.09359084814786911, + "learning_rate": 2.6885028411224312e-05, + "loss": 10.1938, + "step": 21240 + }, + { + "epoch": 0.1061200029963295, + "grad_norm": 0.09493474662303925, + "learning_rate": 2.6883526496282762e-05, + "loss": 10.1944, + "step": 21250 + }, + { + "epoch": 0.10616994182126895, + "grad_norm": 0.09470515698194504, + "learning_rate": 2.688202458134121e-05, + "loss": 10.1947, + "step": 21260 + }, + { + "epoch": 0.1062198806462084, + "grad_norm": 0.09477045387029648, + "learning_rate": 2.6880522666399663e-05, + "loss": 10.1968, + "step": 21270 + }, + { + "epoch": 0.10626981947114784, + "grad_norm": 0.0963263064622879, + "learning_rate": 2.687902075145811e-05, + "loss": 10.1905, + "step": 21280 + }, + { + "epoch": 0.1063197582960873, + "grad_norm": 0.08664928376674652, + "learning_rate": 2.687751883651656e-05, + "loss": 10.1931, + "step": 21290 + }, + { + "epoch": 0.10636969712102674, + "grad_norm": 0.09358591586351395, + "learning_rate": 2.687601692157501e-05, + "loss": 10.1923, + "step": 21300 + }, + { + "epoch": 0.10641963594596619, + "grad_norm": 0.09352956712245941, + "learning_rate": 2.6874515006633457e-05, + "loss": 10.1936, + "step": 21310 + }, + { + "epoch": 0.10646957477090564, + "grad_norm": 0.09512390941381454, + "learning_rate": 2.687301309169191e-05, + "loss": 10.1936, + "step": 21320 + }, + { + "epoch": 0.10651951359584509, + "grad_norm": 0.09855396300554276, + "learning_rate": 2.6871511176750357e-05, + "loss": 10.1925, + "step": 21330 + }, + { + "epoch": 0.10656945242078454, + "grad_norm": 0.09649109095335007, + "learning_rate": 2.6870009261808807e-05, + "loss": 10.189, + "step": 21340 + }, + { + "epoch": 0.10661939124572399, + "grad_norm": 0.09623111039400101, + "learning_rate": 2.6868507346867257e-05, + "loss": 10.1903, + "step": 21350 + }, + { + "epoch": 0.10666933007066344, + "grad_norm": 0.1012498065829277, + "learning_rate": 2.6867005431925704e-05, + "loss": 10.1914, + "step": 21360 + }, + { + "epoch": 0.10671926889560289, + "grad_norm": 0.10435005277395248, + "learning_rate": 2.6865503516984158e-05, + "loss": 10.1913, + "step": 21370 + }, + { + "epoch": 0.10676920772054234, + "grad_norm": 0.09784939885139465, + "learning_rate": 2.6864001602042604e-05, + "loss": 10.1911, + "step": 21380 + }, + { + "epoch": 0.10681914654548179, + "grad_norm": 0.09625697135925293, + "learning_rate": 2.6862499687101055e-05, + "loss": 10.1886, + "step": 21390 + }, + { + "epoch": 0.10686908537042124, + "grad_norm": 0.09613115340471268, + "learning_rate": 2.6860997772159505e-05, + "loss": 10.1896, + "step": 21400 + }, + { + "epoch": 0.10691902419536069, + "grad_norm": 0.0873713418841362, + "learning_rate": 2.685949585721795e-05, + "loss": 10.188, + "step": 21410 + }, + { + "epoch": 0.10696896302030014, + "grad_norm": 0.09823717176914215, + "learning_rate": 2.6857993942276405e-05, + "loss": 10.1895, + "step": 21420 + }, + { + "epoch": 0.10701890184523959, + "grad_norm": 0.09435474872589111, + "learning_rate": 2.6856492027334852e-05, + "loss": 10.1872, + "step": 21430 + }, + { + "epoch": 0.10706884067017904, + "grad_norm": 0.09932689368724823, + "learning_rate": 2.6854990112393302e-05, + "loss": 10.189, + "step": 21440 + }, + { + "epoch": 0.10711877949511849, + "grad_norm": 0.1061321571469307, + "learning_rate": 2.6853488197451752e-05, + "loss": 10.1871, + "step": 21450 + }, + { + "epoch": 0.10716871832005793, + "grad_norm": 0.08677263557910919, + "learning_rate": 2.68519862825102e-05, + "loss": 10.1935, + "step": 21460 + }, + { + "epoch": 0.10721865714499738, + "grad_norm": 0.09986065328121185, + "learning_rate": 2.6850484367568653e-05, + "loss": 10.1886, + "step": 21470 + }, + { + "epoch": 0.10726859596993683, + "grad_norm": 0.09638167917728424, + "learning_rate": 2.68489824526271e-05, + "loss": 10.1887, + "step": 21480 + }, + { + "epoch": 0.10731853479487628, + "grad_norm": 0.09618506580591202, + "learning_rate": 2.684748053768555e-05, + "loss": 10.1871, + "step": 21490 + }, + { + "epoch": 0.10736847361981573, + "grad_norm": 0.09868462383747101, + "learning_rate": 2.6845978622744e-05, + "loss": 10.1872, + "step": 21500 + }, + { + "epoch": 0.10741841244475517, + "grad_norm": 0.08971121162176132, + "learning_rate": 2.6844476707802447e-05, + "loss": 10.1863, + "step": 21510 + }, + { + "epoch": 0.10746835126969462, + "grad_norm": 0.10220789909362793, + "learning_rate": 2.68429747928609e-05, + "loss": 10.1839, + "step": 21520 + }, + { + "epoch": 0.10751829009463407, + "grad_norm": 0.09101521223783493, + "learning_rate": 2.6841472877919347e-05, + "loss": 10.1825, + "step": 21530 + }, + { + "epoch": 0.10756822891957352, + "grad_norm": 0.09986945986747742, + "learning_rate": 2.68399709629778e-05, + "loss": 10.1873, + "step": 21540 + }, + { + "epoch": 0.10761816774451297, + "grad_norm": 0.09732682257890701, + "learning_rate": 2.6838469048036247e-05, + "loss": 10.1841, + "step": 21550 + }, + { + "epoch": 0.10766810656945242, + "grad_norm": 0.09686817973852158, + "learning_rate": 2.6836967133094694e-05, + "loss": 10.1836, + "step": 21560 + }, + { + "epoch": 0.10771804539439186, + "grad_norm": 0.09880684316158295, + "learning_rate": 2.6835465218153148e-05, + "loss": 10.1874, + "step": 21570 + }, + { + "epoch": 0.10776798421933131, + "grad_norm": 0.09467620402574539, + "learning_rate": 2.6833963303211594e-05, + "loss": 10.1826, + "step": 21580 + }, + { + "epoch": 0.10781792304427076, + "grad_norm": 0.0975777879357338, + "learning_rate": 2.6832461388270048e-05, + "loss": 10.1839, + "step": 21590 + }, + { + "epoch": 0.10786786186921021, + "grad_norm": 0.09700115025043488, + "learning_rate": 2.6830959473328495e-05, + "loss": 10.1848, + "step": 21600 + }, + { + "epoch": 0.10791780069414966, + "grad_norm": 0.0938863530755043, + "learning_rate": 2.682945755838694e-05, + "loss": 10.1856, + "step": 21610 + }, + { + "epoch": 0.10796773951908911, + "grad_norm": 0.09892918914556503, + "learning_rate": 2.6827955643445395e-05, + "loss": 10.1825, + "step": 21620 + }, + { + "epoch": 0.10801767834402856, + "grad_norm": 0.08988924324512482, + "learning_rate": 2.6826453728503842e-05, + "loss": 10.1828, + "step": 21630 + }, + { + "epoch": 0.10806761716896801, + "grad_norm": 0.09243562072515488, + "learning_rate": 2.6824951813562295e-05, + "loss": 10.1822, + "step": 21640 + }, + { + "epoch": 0.10811755599390746, + "grad_norm": 0.09508119523525238, + "learning_rate": 2.6823449898620742e-05, + "loss": 10.1855, + "step": 21650 + }, + { + "epoch": 0.10816749481884691, + "grad_norm": 0.09622988849878311, + "learning_rate": 2.682194798367919e-05, + "loss": 10.1833, + "step": 21660 + }, + { + "epoch": 0.10821743364378636, + "grad_norm": 0.09975172579288483, + "learning_rate": 2.6820446068737643e-05, + "loss": 10.1856, + "step": 21670 + }, + { + "epoch": 0.10826737246872581, + "grad_norm": 0.10020721703767776, + "learning_rate": 2.681894415379609e-05, + "loss": 10.1846, + "step": 21680 + }, + { + "epoch": 0.10831731129366526, + "grad_norm": 0.091154083609581, + "learning_rate": 2.6817442238854543e-05, + "loss": 10.1915, + "step": 21690 + }, + { + "epoch": 0.10836725011860471, + "grad_norm": 0.09663428366184235, + "learning_rate": 2.681594032391299e-05, + "loss": 10.1844, + "step": 21700 + }, + { + "epoch": 0.10841718894354416, + "grad_norm": 0.09310304373502731, + "learning_rate": 2.6814438408971437e-05, + "loss": 10.1833, + "step": 21710 + }, + { + "epoch": 0.1084671277684836, + "grad_norm": 0.09270327538251877, + "learning_rate": 2.681293649402989e-05, + "loss": 10.1821, + "step": 21720 + }, + { + "epoch": 0.10851706659342306, + "grad_norm": 0.09398692846298218, + "learning_rate": 2.6811434579088337e-05, + "loss": 10.1829, + "step": 21730 + }, + { + "epoch": 0.1085670054183625, + "grad_norm": 0.10346375405788422, + "learning_rate": 2.680993266414679e-05, + "loss": 10.1817, + "step": 21740 + }, + { + "epoch": 0.10861694424330195, + "grad_norm": 0.09369218349456787, + "learning_rate": 2.6808430749205237e-05, + "loss": 10.1809, + "step": 21750 + }, + { + "epoch": 0.1086668830682414, + "grad_norm": 0.09752266108989716, + "learning_rate": 2.6806928834263684e-05, + "loss": 10.1832, + "step": 21760 + }, + { + "epoch": 0.10871682189318085, + "grad_norm": 0.09794414043426514, + "learning_rate": 2.6805426919322138e-05, + "loss": 10.1814, + "step": 21770 + }, + { + "epoch": 0.1087667607181203, + "grad_norm": 0.09713537245988846, + "learning_rate": 2.6803925004380584e-05, + "loss": 10.1832, + "step": 21780 + }, + { + "epoch": 0.10881669954305975, + "grad_norm": 0.09311962127685547, + "learning_rate": 2.6802423089439038e-05, + "loss": 10.1797, + "step": 21790 + }, + { + "epoch": 0.1088666383679992, + "grad_norm": 0.095693439245224, + "learning_rate": 2.6800921174497485e-05, + "loss": 10.1804, + "step": 21800 + }, + { + "epoch": 0.10891657719293865, + "grad_norm": 0.0987323671579361, + "learning_rate": 2.679941925955593e-05, + "loss": 10.1776, + "step": 21810 + }, + { + "epoch": 0.1089665160178781, + "grad_norm": 0.09942522644996643, + "learning_rate": 2.6797917344614385e-05, + "loss": 10.1785, + "step": 21820 + }, + { + "epoch": 0.10901645484281755, + "grad_norm": 0.09082325547933578, + "learning_rate": 2.6796415429672832e-05, + "loss": 10.1772, + "step": 21830 + }, + { + "epoch": 0.109066393667757, + "grad_norm": 0.08826711773872375, + "learning_rate": 2.6794913514731285e-05, + "loss": 10.1822, + "step": 21840 + }, + { + "epoch": 0.10911633249269645, + "grad_norm": 0.09767315536737442, + "learning_rate": 2.6793411599789732e-05, + "loss": 10.1764, + "step": 21850 + }, + { + "epoch": 0.1091662713176359, + "grad_norm": 0.0958879366517067, + "learning_rate": 2.6791909684848182e-05, + "loss": 10.1805, + "step": 21860 + }, + { + "epoch": 0.10921621014257535, + "grad_norm": 0.09533664584159851, + "learning_rate": 2.6790407769906633e-05, + "loss": 10.1824, + "step": 21870 + }, + { + "epoch": 0.1092661489675148, + "grad_norm": 0.09033321589231491, + "learning_rate": 2.678890585496508e-05, + "loss": 10.1783, + "step": 21880 + }, + { + "epoch": 0.10931608779245425, + "grad_norm": 0.09859579056501389, + "learning_rate": 2.6787403940023533e-05, + "loss": 10.1763, + "step": 21890 + }, + { + "epoch": 0.1093660266173937, + "grad_norm": 0.09806695580482483, + "learning_rate": 2.678590202508198e-05, + "loss": 10.1808, + "step": 21900 + }, + { + "epoch": 0.10941596544233315, + "grad_norm": 0.09141387790441513, + "learning_rate": 2.678440011014043e-05, + "loss": 10.1787, + "step": 21910 + }, + { + "epoch": 0.1094659042672726, + "grad_norm": 0.09609120339155197, + "learning_rate": 2.678289819519888e-05, + "loss": 10.1731, + "step": 21920 + }, + { + "epoch": 0.10951584309221205, + "grad_norm": 0.09060762822628021, + "learning_rate": 2.6781396280257327e-05, + "loss": 10.1776, + "step": 21930 + }, + { + "epoch": 0.1095657819171515, + "grad_norm": 0.09392700344324112, + "learning_rate": 2.677989436531578e-05, + "loss": 10.1774, + "step": 21940 + }, + { + "epoch": 0.10961572074209094, + "grad_norm": 0.09288621693849564, + "learning_rate": 2.6778392450374227e-05, + "loss": 10.1799, + "step": 21950 + }, + { + "epoch": 0.1096656595670304, + "grad_norm": 0.09960731863975525, + "learning_rate": 2.6776890535432677e-05, + "loss": 10.1738, + "step": 21960 + }, + { + "epoch": 0.10971559839196984, + "grad_norm": 0.09052469581365585, + "learning_rate": 2.6775388620491128e-05, + "loss": 10.1785, + "step": 21970 + }, + { + "epoch": 0.10976553721690929, + "grad_norm": 0.09761349111795425, + "learning_rate": 2.6773886705549574e-05, + "loss": 10.1752, + "step": 21980 + }, + { + "epoch": 0.10981547604184874, + "grad_norm": 0.09489648789167404, + "learning_rate": 2.6772384790608028e-05, + "loss": 10.1732, + "step": 21990 + }, + { + "epoch": 0.10986541486678819, + "grad_norm": 0.0943918228149414, + "learning_rate": 2.6770882875666475e-05, + "loss": 10.1755, + "step": 22000 + }, + { + "epoch": 0.10991535369172763, + "grad_norm": 0.09196553379297256, + "learning_rate": 2.6769380960724925e-05, + "loss": 10.1725, + "step": 22010 + }, + { + "epoch": 0.10996529251666708, + "grad_norm": 0.09334488213062286, + "learning_rate": 2.6767879045783375e-05, + "loss": 10.1756, + "step": 22020 + }, + { + "epoch": 0.11001523134160653, + "grad_norm": 0.10055167973041534, + "learning_rate": 2.6766377130841822e-05, + "loss": 10.1705, + "step": 22030 + }, + { + "epoch": 0.11006517016654598, + "grad_norm": 0.09944339841604233, + "learning_rate": 2.6764875215900275e-05, + "loss": 10.1801, + "step": 22040 + }, + { + "epoch": 0.11011510899148542, + "grad_norm": 0.10031431168317795, + "learning_rate": 2.6763373300958722e-05, + "loss": 10.1704, + "step": 22050 + }, + { + "epoch": 0.11016504781642487, + "grad_norm": 0.09707958251237869, + "learning_rate": 2.6761871386017172e-05, + "loss": 10.1752, + "step": 22060 + }, + { + "epoch": 0.11021498664136432, + "grad_norm": 0.09501238167285919, + "learning_rate": 2.6760369471075623e-05, + "loss": 10.1698, + "step": 22070 + }, + { + "epoch": 0.11026492546630377, + "grad_norm": 0.09578476846218109, + "learning_rate": 2.675886755613407e-05, + "loss": 10.175, + "step": 22080 + }, + { + "epoch": 0.11031486429124322, + "grad_norm": 0.09270567446947098, + "learning_rate": 2.6757365641192523e-05, + "loss": 10.1732, + "step": 22090 + }, + { + "epoch": 0.11036480311618267, + "grad_norm": 0.09588302671909332, + "learning_rate": 2.675586372625097e-05, + "loss": 10.1726, + "step": 22100 + }, + { + "epoch": 0.11041474194112212, + "grad_norm": 0.09637803584337234, + "learning_rate": 2.675436181130942e-05, + "loss": 10.1763, + "step": 22110 + }, + { + "epoch": 0.11046468076606157, + "grad_norm": 0.09648361802101135, + "learning_rate": 2.675285989636787e-05, + "loss": 10.1731, + "step": 22120 + }, + { + "epoch": 0.11051461959100102, + "grad_norm": 0.0957055613398552, + "learning_rate": 2.6751357981426317e-05, + "loss": 10.1696, + "step": 22130 + }, + { + "epoch": 0.11056455841594047, + "grad_norm": 0.09542543441057205, + "learning_rate": 2.674985606648477e-05, + "loss": 10.1725, + "step": 22140 + }, + { + "epoch": 0.11061449724087992, + "grad_norm": 0.09192683547735214, + "learning_rate": 2.6748354151543217e-05, + "loss": 10.1725, + "step": 22150 + }, + { + "epoch": 0.11066443606581937, + "grad_norm": 0.09353506565093994, + "learning_rate": 2.6746852236601667e-05, + "loss": 10.1663, + "step": 22160 + }, + { + "epoch": 0.11071437489075882, + "grad_norm": 0.09622788429260254, + "learning_rate": 2.6745350321660118e-05, + "loss": 10.1658, + "step": 22170 + }, + { + "epoch": 0.11076431371569827, + "grad_norm": 0.0987735167145729, + "learning_rate": 2.6743848406718568e-05, + "loss": 10.1678, + "step": 22180 + }, + { + "epoch": 0.11081425254063772, + "grad_norm": 0.09520336985588074, + "learning_rate": 2.6742346491777018e-05, + "loss": 10.1679, + "step": 22190 + }, + { + "epoch": 0.11086419136557717, + "grad_norm": 0.0966307744383812, + "learning_rate": 2.6740844576835465e-05, + "loss": 10.1683, + "step": 22200 + }, + { + "epoch": 0.11091413019051662, + "grad_norm": 0.10376681387424469, + "learning_rate": 2.6739342661893915e-05, + "loss": 10.1714, + "step": 22210 + }, + { + "epoch": 0.11096406901545607, + "grad_norm": 0.09316543489694595, + "learning_rate": 2.6737840746952365e-05, + "loss": 10.169, + "step": 22220 + }, + { + "epoch": 0.11101400784039551, + "grad_norm": 0.09309463202953339, + "learning_rate": 2.6736338832010815e-05, + "loss": 10.1702, + "step": 22230 + }, + { + "epoch": 0.11106394666533496, + "grad_norm": 0.09572753310203552, + "learning_rate": 2.6734836917069265e-05, + "loss": 10.1718, + "step": 22240 + }, + { + "epoch": 0.11111388549027441, + "grad_norm": 0.0948120579123497, + "learning_rate": 2.6733335002127712e-05, + "loss": 10.1707, + "step": 22250 + }, + { + "epoch": 0.11116382431521386, + "grad_norm": 0.09530732035636902, + "learning_rate": 2.6731833087186162e-05, + "loss": 10.1678, + "step": 22260 + }, + { + "epoch": 0.11121376314015331, + "grad_norm": 0.10655371099710464, + "learning_rate": 2.6730331172244613e-05, + "loss": 10.1669, + "step": 22270 + }, + { + "epoch": 0.11126370196509276, + "grad_norm": 0.09528721123933792, + "learning_rate": 2.6728829257303063e-05, + "loss": 10.1656, + "step": 22280 + }, + { + "epoch": 0.11131364079003221, + "grad_norm": 0.09272367507219315, + "learning_rate": 2.6727327342361513e-05, + "loss": 10.1669, + "step": 22290 + }, + { + "epoch": 0.11136357961497166, + "grad_norm": 0.0940340906381607, + "learning_rate": 2.672582542741996e-05, + "loss": 10.1666, + "step": 22300 + }, + { + "epoch": 0.11141351843991111, + "grad_norm": 0.09455035626888275, + "learning_rate": 2.672432351247841e-05, + "loss": 10.1686, + "step": 22310 + }, + { + "epoch": 0.11146345726485056, + "grad_norm": 0.09895509481430054, + "learning_rate": 2.672282159753686e-05, + "loss": 10.1666, + "step": 22320 + }, + { + "epoch": 0.11151339608979001, + "grad_norm": 0.09388074278831482, + "learning_rate": 2.672131968259531e-05, + "loss": 10.1655, + "step": 22330 + }, + { + "epoch": 0.11156333491472946, + "grad_norm": 0.09425579011440277, + "learning_rate": 2.671981776765376e-05, + "loss": 10.1626, + "step": 22340 + }, + { + "epoch": 0.11161327373966891, + "grad_norm": 0.09784049540758133, + "learning_rate": 2.6718315852712207e-05, + "loss": 10.1689, + "step": 22350 + }, + { + "epoch": 0.11166321256460836, + "grad_norm": 0.09686186909675598, + "learning_rate": 2.6716813937770657e-05, + "loss": 10.1677, + "step": 22360 + }, + { + "epoch": 0.11171315138954781, + "grad_norm": 0.09419489651918411, + "learning_rate": 2.6715312022829108e-05, + "loss": 10.1638, + "step": 22370 + }, + { + "epoch": 0.11176309021448726, + "grad_norm": 0.09300322085618973, + "learning_rate": 2.6713810107887558e-05, + "loss": 10.1697, + "step": 22380 + }, + { + "epoch": 0.1118130290394267, + "grad_norm": 0.09139790385961533, + "learning_rate": 2.6712308192946008e-05, + "loss": 10.165, + "step": 22390 + }, + { + "epoch": 0.11186296786436616, + "grad_norm": 0.09501411765813828, + "learning_rate": 2.6710806278004455e-05, + "loss": 10.1646, + "step": 22400 + }, + { + "epoch": 0.1119129066893056, + "grad_norm": 0.09894577413797379, + "learning_rate": 2.6709304363062905e-05, + "loss": 10.1673, + "step": 22410 + }, + { + "epoch": 0.11196284551424505, + "grad_norm": 0.09686759859323502, + "learning_rate": 2.6707802448121355e-05, + "loss": 10.1675, + "step": 22420 + }, + { + "epoch": 0.1120127843391845, + "grad_norm": 0.09430637210607529, + "learning_rate": 2.6706300533179805e-05, + "loss": 10.165, + "step": 22430 + }, + { + "epoch": 0.11206272316412395, + "grad_norm": 0.09339676797389984, + "learning_rate": 2.6704798618238256e-05, + "loss": 10.1635, + "step": 22440 + }, + { + "epoch": 0.1121126619890634, + "grad_norm": 0.09374126046895981, + "learning_rate": 2.6703296703296702e-05, + "loss": 10.1578, + "step": 22450 + }, + { + "epoch": 0.11216260081400285, + "grad_norm": 0.09082947671413422, + "learning_rate": 2.6701794788355152e-05, + "loss": 10.1673, + "step": 22460 + }, + { + "epoch": 0.1122125396389423, + "grad_norm": 0.0888797864317894, + "learning_rate": 2.6700292873413603e-05, + "loss": 10.1637, + "step": 22470 + }, + { + "epoch": 0.11226247846388175, + "grad_norm": 0.09704219549894333, + "learning_rate": 2.6698790958472053e-05, + "loss": 10.1671, + "step": 22480 + }, + { + "epoch": 0.1123124172888212, + "grad_norm": 0.09523064643144608, + "learning_rate": 2.6697289043530503e-05, + "loss": 10.1643, + "step": 22490 + }, + { + "epoch": 0.11236235611376064, + "grad_norm": 0.09382279962301254, + "learning_rate": 2.6695787128588953e-05, + "loss": 10.1594, + "step": 22500 + }, + { + "epoch": 0.11241229493870009, + "grad_norm": 0.09759511053562164, + "learning_rate": 2.66942852136474e-05, + "loss": 10.1612, + "step": 22510 + }, + { + "epoch": 0.11246223376363954, + "grad_norm": 0.09149253368377686, + "learning_rate": 2.669278329870585e-05, + "loss": 10.1641, + "step": 22520 + }, + { + "epoch": 0.11251217258857898, + "grad_norm": 0.09213311225175858, + "learning_rate": 2.66912813837643e-05, + "loss": 10.1583, + "step": 22530 + }, + { + "epoch": 0.11256211141351843, + "grad_norm": 0.09670817106962204, + "learning_rate": 2.668977946882275e-05, + "loss": 10.1616, + "step": 22540 + }, + { + "epoch": 0.11261205023845788, + "grad_norm": 0.09670945256948471, + "learning_rate": 2.66882775538812e-05, + "loss": 10.1624, + "step": 22550 + }, + { + "epoch": 0.11266198906339733, + "grad_norm": 0.09598565846681595, + "learning_rate": 2.6686775638939647e-05, + "loss": 10.1624, + "step": 22560 + }, + { + "epoch": 0.11271192788833678, + "grad_norm": 0.0956323966383934, + "learning_rate": 2.6685273723998098e-05, + "loss": 10.1591, + "step": 22570 + }, + { + "epoch": 0.11276186671327623, + "grad_norm": 0.09476076811552048, + "learning_rate": 2.6683771809056548e-05, + "loss": 10.1595, + "step": 22580 + }, + { + "epoch": 0.11281180553821568, + "grad_norm": 0.09837251901626587, + "learning_rate": 2.6682269894114998e-05, + "loss": 10.1638, + "step": 22590 + }, + { + "epoch": 0.11286174436315513, + "grad_norm": 0.09544532746076584, + "learning_rate": 2.6680767979173448e-05, + "loss": 10.1615, + "step": 22600 + }, + { + "epoch": 0.11291168318809458, + "grad_norm": 0.09193170815706253, + "learning_rate": 2.6679266064231895e-05, + "loss": 10.162, + "step": 22610 + }, + { + "epoch": 0.11296162201303403, + "grad_norm": 0.09794174134731293, + "learning_rate": 2.6677764149290345e-05, + "loss": 10.1608, + "step": 22620 + }, + { + "epoch": 0.11301156083797348, + "grad_norm": 0.08994992822408676, + "learning_rate": 2.6676262234348795e-05, + "loss": 10.1568, + "step": 22630 + }, + { + "epoch": 0.11306149966291293, + "grad_norm": 0.09660772234201431, + "learning_rate": 2.6674760319407246e-05, + "loss": 10.166, + "step": 22640 + }, + { + "epoch": 0.11311143848785238, + "grad_norm": 0.09649979323148727, + "learning_rate": 2.6673258404465696e-05, + "loss": 10.1625, + "step": 22650 + }, + { + "epoch": 0.11316137731279183, + "grad_norm": 0.0917321965098381, + "learning_rate": 2.6671756489524142e-05, + "loss": 10.1596, + "step": 22660 + }, + { + "epoch": 0.11321131613773128, + "grad_norm": 0.0942411795258522, + "learning_rate": 2.6670254574582593e-05, + "loss": 10.1607, + "step": 22670 + }, + { + "epoch": 0.11326125496267073, + "grad_norm": 0.0937599241733551, + "learning_rate": 2.6668752659641043e-05, + "loss": 10.1558, + "step": 22680 + }, + { + "epoch": 0.11331119378761018, + "grad_norm": 0.09375976026058197, + "learning_rate": 2.6667250744699493e-05, + "loss": 10.1595, + "step": 22690 + }, + { + "epoch": 0.11336113261254963, + "grad_norm": 0.09384457021951675, + "learning_rate": 2.6665748829757943e-05, + "loss": 10.1543, + "step": 22700 + }, + { + "epoch": 0.11341107143748907, + "grad_norm": 0.09696808457374573, + "learning_rate": 2.666424691481639e-05, + "loss": 10.1586, + "step": 22710 + }, + { + "epoch": 0.11346101026242852, + "grad_norm": 0.09339691698551178, + "learning_rate": 2.666274499987484e-05, + "loss": 10.1598, + "step": 22720 + }, + { + "epoch": 0.11351094908736797, + "grad_norm": 0.09268683195114136, + "learning_rate": 2.666124308493329e-05, + "loss": 10.1516, + "step": 22730 + }, + { + "epoch": 0.11356088791230742, + "grad_norm": 0.10044291615486145, + "learning_rate": 2.665974116999174e-05, + "loss": 10.1582, + "step": 22740 + }, + { + "epoch": 0.11361082673724687, + "grad_norm": 0.0959634929895401, + "learning_rate": 2.665823925505019e-05, + "loss": 10.1546, + "step": 22750 + }, + { + "epoch": 0.11366076556218632, + "grad_norm": 0.09456854313611984, + "learning_rate": 2.6656737340108637e-05, + "loss": 10.154, + "step": 22760 + }, + { + "epoch": 0.11371070438712577, + "grad_norm": 0.10183733701705933, + "learning_rate": 2.6655235425167088e-05, + "loss": 10.1546, + "step": 22770 + }, + { + "epoch": 0.11376064321206522, + "grad_norm": 0.09504352509975433, + "learning_rate": 2.6653733510225538e-05, + "loss": 10.1579, + "step": 22780 + }, + { + "epoch": 0.11381058203700467, + "grad_norm": 0.09749595075845718, + "learning_rate": 2.6652231595283988e-05, + "loss": 10.1589, + "step": 22790 + }, + { + "epoch": 0.11386052086194412, + "grad_norm": 0.09672117978334427, + "learning_rate": 2.6650729680342438e-05, + "loss": 10.156, + "step": 22800 + }, + { + "epoch": 0.11391045968688357, + "grad_norm": 0.10440251231193542, + "learning_rate": 2.6649227765400885e-05, + "loss": 10.1534, + "step": 22810 + }, + { + "epoch": 0.11396039851182302, + "grad_norm": 0.09660029411315918, + "learning_rate": 2.664772585045934e-05, + "loss": 10.1507, + "step": 22820 + }, + { + "epoch": 0.11401033733676247, + "grad_norm": 0.09805593639612198, + "learning_rate": 2.6646223935517785e-05, + "loss": 10.1509, + "step": 22830 + }, + { + "epoch": 0.11406027616170192, + "grad_norm": 0.09688452631235123, + "learning_rate": 2.6644722020576236e-05, + "loss": 10.1586, + "step": 22840 + }, + { + "epoch": 0.11411021498664137, + "grad_norm": 0.09908785670995712, + "learning_rate": 2.6643220105634686e-05, + "loss": 10.1529, + "step": 22850 + }, + { + "epoch": 0.11416015381158082, + "grad_norm": 0.0970318540930748, + "learning_rate": 2.6641718190693133e-05, + "loss": 10.15, + "step": 22860 + }, + { + "epoch": 0.11421009263652027, + "grad_norm": 0.0938095897436142, + "learning_rate": 2.6640216275751586e-05, + "loss": 10.1534, + "step": 22870 + }, + { + "epoch": 0.11426003146145972, + "grad_norm": 0.09250219166278839, + "learning_rate": 2.6638714360810033e-05, + "loss": 10.1529, + "step": 22880 + }, + { + "epoch": 0.11430997028639917, + "grad_norm": 0.09546291828155518, + "learning_rate": 2.6637212445868483e-05, + "loss": 10.1523, + "step": 22890 + }, + { + "epoch": 0.11435990911133861, + "grad_norm": 0.09982664883136749, + "learning_rate": 2.6635710530926933e-05, + "loss": 10.1498, + "step": 22900 + }, + { + "epoch": 0.11440984793627806, + "grad_norm": 0.0915086641907692, + "learning_rate": 2.663420861598538e-05, + "loss": 10.1501, + "step": 22910 + }, + { + "epoch": 0.11445978676121751, + "grad_norm": 0.09142784029245377, + "learning_rate": 2.6632706701043834e-05, + "loss": 10.1543, + "step": 22920 + }, + { + "epoch": 0.11450972558615696, + "grad_norm": 0.09327565878629684, + "learning_rate": 2.663120478610228e-05, + "loss": 10.1495, + "step": 22930 + }, + { + "epoch": 0.11455966441109641, + "grad_norm": 0.10108178108930588, + "learning_rate": 2.662970287116073e-05, + "loss": 10.15, + "step": 22940 + }, + { + "epoch": 0.11460960323603586, + "grad_norm": 0.10374581068754196, + "learning_rate": 2.662820095621918e-05, + "loss": 10.1521, + "step": 22950 + }, + { + "epoch": 0.11465954206097531, + "grad_norm": 0.09310846030712128, + "learning_rate": 2.6626699041277628e-05, + "loss": 10.1512, + "step": 22960 + }, + { + "epoch": 0.11470948088591476, + "grad_norm": 0.093265101313591, + "learning_rate": 2.662519712633608e-05, + "loss": 10.1533, + "step": 22970 + }, + { + "epoch": 0.11475941971085421, + "grad_norm": 0.09261230379343033, + "learning_rate": 2.6623695211394528e-05, + "loss": 10.1498, + "step": 22980 + }, + { + "epoch": 0.11480935853579366, + "grad_norm": 0.09824974089860916, + "learning_rate": 2.6622193296452978e-05, + "loss": 10.1488, + "step": 22990 + }, + { + "epoch": 0.1148592973607331, + "grad_norm": 0.09499943256378174, + "learning_rate": 2.6620691381511428e-05, + "loss": 10.1487, + "step": 23000 + }, + { + "epoch": 0.11490923618567254, + "grad_norm": 0.09166286885738373, + "learning_rate": 2.6619189466569875e-05, + "loss": 10.1469, + "step": 23010 + }, + { + "epoch": 0.114959175010612, + "grad_norm": 0.09660668671131134, + "learning_rate": 2.661768755162833e-05, + "loss": 10.1496, + "step": 23020 + }, + { + "epoch": 0.11500911383555144, + "grad_norm": 0.09428876638412476, + "learning_rate": 2.6616185636686775e-05, + "loss": 10.1469, + "step": 23030 + }, + { + "epoch": 0.1150590526604909, + "grad_norm": 0.09304223209619522, + "learning_rate": 2.6614683721745226e-05, + "loss": 10.1458, + "step": 23040 + }, + { + "epoch": 0.11510899148543034, + "grad_norm": 0.09473152458667755, + "learning_rate": 2.6613181806803676e-05, + "loss": 10.1446, + "step": 23050 + }, + { + "epoch": 0.11515893031036979, + "grad_norm": 0.09486373513936996, + "learning_rate": 2.6611679891862123e-05, + "loss": 10.1459, + "step": 23060 + }, + { + "epoch": 0.11520886913530924, + "grad_norm": 0.0942765548825264, + "learning_rate": 2.6610177976920576e-05, + "loss": 10.1487, + "step": 23070 + }, + { + "epoch": 0.11525880796024869, + "grad_norm": 0.09221851825714111, + "learning_rate": 2.6608676061979023e-05, + "loss": 10.1516, + "step": 23080 + }, + { + "epoch": 0.11530874678518814, + "grad_norm": 0.09266392886638641, + "learning_rate": 2.6607174147037473e-05, + "loss": 10.1478, + "step": 23090 + }, + { + "epoch": 0.11535868561012759, + "grad_norm": 0.09701654314994812, + "learning_rate": 2.6605672232095923e-05, + "loss": 10.146, + "step": 23100 + }, + { + "epoch": 0.11540862443506704, + "grad_norm": 0.09978927671909332, + "learning_rate": 2.660417031715437e-05, + "loss": 10.1477, + "step": 23110 + }, + { + "epoch": 0.11545856326000649, + "grad_norm": 0.09633700549602509, + "learning_rate": 2.6602668402212824e-05, + "loss": 10.1477, + "step": 23120 + }, + { + "epoch": 0.11550850208494594, + "grad_norm": 0.09246396273374557, + "learning_rate": 2.660116648727127e-05, + "loss": 10.1476, + "step": 23130 + }, + { + "epoch": 0.11555844090988539, + "grad_norm": 0.09979529678821564, + "learning_rate": 2.6599664572329724e-05, + "loss": 10.1439, + "step": 23140 + }, + { + "epoch": 0.11560837973482484, + "grad_norm": 0.09765896201133728, + "learning_rate": 2.659816265738817e-05, + "loss": 10.1442, + "step": 23150 + }, + { + "epoch": 0.11565831855976429, + "grad_norm": 0.09836973249912262, + "learning_rate": 2.6596660742446618e-05, + "loss": 10.1468, + "step": 23160 + }, + { + "epoch": 0.11570825738470374, + "grad_norm": 0.09357340633869171, + "learning_rate": 2.659515882750507e-05, + "loss": 10.1475, + "step": 23170 + }, + { + "epoch": 0.11575819620964319, + "grad_norm": 0.09507403522729874, + "learning_rate": 2.6593656912563518e-05, + "loss": 10.1419, + "step": 23180 + }, + { + "epoch": 0.11580813503458263, + "grad_norm": 0.09896954894065857, + "learning_rate": 2.659215499762197e-05, + "loss": 10.1431, + "step": 23190 + }, + { + "epoch": 0.11585807385952208, + "grad_norm": 0.09166592359542847, + "learning_rate": 2.6590653082680418e-05, + "loss": 10.144, + "step": 23200 + }, + { + "epoch": 0.11590801268446153, + "grad_norm": 0.09727290272712708, + "learning_rate": 2.6589151167738865e-05, + "loss": 10.1475, + "step": 23210 + }, + { + "epoch": 0.11595795150940098, + "grad_norm": 0.09719046205282211, + "learning_rate": 2.658764925279732e-05, + "loss": 10.1424, + "step": 23220 + }, + { + "epoch": 0.11600789033434043, + "grad_norm": 0.09605339914560318, + "learning_rate": 2.6586147337855765e-05, + "loss": 10.1377, + "step": 23230 + }, + { + "epoch": 0.11605782915927988, + "grad_norm": 0.10000360757112503, + "learning_rate": 2.658464542291422e-05, + "loss": 10.1372, + "step": 23240 + }, + { + "epoch": 0.11610776798421933, + "grad_norm": 0.09418469667434692, + "learning_rate": 2.6583143507972666e-05, + "loss": 10.1447, + "step": 23250 + }, + { + "epoch": 0.11615770680915878, + "grad_norm": 0.09615140408277512, + "learning_rate": 2.6581641593031113e-05, + "loss": 10.1425, + "step": 23260 + }, + { + "epoch": 0.11620764563409823, + "grad_norm": 0.0987091064453125, + "learning_rate": 2.6580139678089566e-05, + "loss": 10.1446, + "step": 23270 + }, + { + "epoch": 0.11625758445903768, + "grad_norm": 0.09312444925308228, + "learning_rate": 2.6578637763148013e-05, + "loss": 10.1424, + "step": 23280 + }, + { + "epoch": 0.11630752328397713, + "grad_norm": 0.09860526770353317, + "learning_rate": 2.6577135848206466e-05, + "loss": 10.143, + "step": 23290 + }, + { + "epoch": 0.11635746210891658, + "grad_norm": 0.0991264283657074, + "learning_rate": 2.6575633933264913e-05, + "loss": 10.1415, + "step": 23300 + }, + { + "epoch": 0.11640740093385603, + "grad_norm": 0.0935107171535492, + "learning_rate": 2.657413201832336e-05, + "loss": 10.1431, + "step": 23310 + }, + { + "epoch": 0.11645733975879548, + "grad_norm": 0.09420657157897949, + "learning_rate": 2.6572630103381814e-05, + "loss": 10.1419, + "step": 23320 + }, + { + "epoch": 0.11650727858373493, + "grad_norm": 0.09580549597740173, + "learning_rate": 2.657112818844026e-05, + "loss": 10.1444, + "step": 23330 + }, + { + "epoch": 0.11655721740867438, + "grad_norm": 0.09820415079593658, + "learning_rate": 2.6569626273498714e-05, + "loss": 10.1408, + "step": 23340 + }, + { + "epoch": 0.11660715623361383, + "grad_norm": 0.0958128347992897, + "learning_rate": 2.656812435855716e-05, + "loss": 10.143, + "step": 23350 + }, + { + "epoch": 0.11665709505855328, + "grad_norm": 0.09436112642288208, + "learning_rate": 2.6566622443615608e-05, + "loss": 10.1437, + "step": 23360 + }, + { + "epoch": 0.11670703388349273, + "grad_norm": 0.09518658369779587, + "learning_rate": 2.656512052867406e-05, + "loss": 10.1386, + "step": 23370 + }, + { + "epoch": 0.11675697270843217, + "grad_norm": 0.08935555815696716, + "learning_rate": 2.6563618613732508e-05, + "loss": 10.1418, + "step": 23380 + }, + { + "epoch": 0.11680691153337162, + "grad_norm": 0.09481310844421387, + "learning_rate": 2.656211669879096e-05, + "loss": 10.1424, + "step": 23390 + }, + { + "epoch": 0.11685685035831107, + "grad_norm": 0.09546245634555817, + "learning_rate": 2.6560614783849408e-05, + "loss": 10.1411, + "step": 23400 + }, + { + "epoch": 0.11690678918325052, + "grad_norm": 0.09654063731431961, + "learning_rate": 2.6559112868907855e-05, + "loss": 10.1385, + "step": 23410 + }, + { + "epoch": 0.11695672800818997, + "grad_norm": 0.09841468185186386, + "learning_rate": 2.655761095396631e-05, + "loss": 10.1373, + "step": 23420 + }, + { + "epoch": 0.11700666683312942, + "grad_norm": 0.09300010651350021, + "learning_rate": 2.6556109039024755e-05, + "loss": 10.1345, + "step": 23430 + }, + { + "epoch": 0.11705660565806887, + "grad_norm": 0.09334631264209747, + "learning_rate": 2.655460712408321e-05, + "loss": 10.1359, + "step": 23440 + }, + { + "epoch": 0.11710654448300832, + "grad_norm": 0.09312853217124939, + "learning_rate": 2.6553105209141656e-05, + "loss": 10.1425, + "step": 23450 + }, + { + "epoch": 0.11715648330794777, + "grad_norm": 0.0956452414393425, + "learning_rate": 2.6551603294200106e-05, + "loss": 10.1425, + "step": 23460 + }, + { + "epoch": 0.11720642213288722, + "grad_norm": 0.0932190865278244, + "learning_rate": 2.6550101379258556e-05, + "loss": 10.1389, + "step": 23470 + }, + { + "epoch": 0.11725636095782667, + "grad_norm": 0.09214978665113449, + "learning_rate": 2.6548599464317003e-05, + "loss": 10.1381, + "step": 23480 + }, + { + "epoch": 0.1173062997827661, + "grad_norm": 0.09422241896390915, + "learning_rate": 2.6547097549375456e-05, + "loss": 10.137, + "step": 23490 + }, + { + "epoch": 0.11735623860770555, + "grad_norm": 0.09438539296388626, + "learning_rate": 2.6545595634433903e-05, + "loss": 10.1369, + "step": 23500 + }, + { + "epoch": 0.117406177432645, + "grad_norm": 0.09662014991044998, + "learning_rate": 2.6544093719492353e-05, + "loss": 10.1367, + "step": 23510 + }, + { + "epoch": 0.11745611625758445, + "grad_norm": 0.08834582567214966, + "learning_rate": 2.6542591804550804e-05, + "loss": 10.1387, + "step": 23520 + }, + { + "epoch": 0.1175060550825239, + "grad_norm": 0.09564156085252762, + "learning_rate": 2.654108988960925e-05, + "loss": 10.1417, + "step": 23530 + }, + { + "epoch": 0.11755599390746335, + "grad_norm": 0.09563954174518585, + "learning_rate": 2.6539587974667704e-05, + "loss": 10.1339, + "step": 23540 + }, + { + "epoch": 0.1176059327324028, + "grad_norm": 0.0935792475938797, + "learning_rate": 2.653808605972615e-05, + "loss": 10.1307, + "step": 23550 + }, + { + "epoch": 0.11765587155734225, + "grad_norm": 0.10603775829076767, + "learning_rate": 2.65365841447846e-05, + "loss": 10.1355, + "step": 23560 + }, + { + "epoch": 0.1177058103822817, + "grad_norm": 0.09258319437503815, + "learning_rate": 2.653508222984305e-05, + "loss": 10.1355, + "step": 23570 + }, + { + "epoch": 0.11775574920722115, + "grad_norm": 0.09184292703866959, + "learning_rate": 2.6533580314901498e-05, + "loss": 10.1333, + "step": 23580 + }, + { + "epoch": 0.1178056880321606, + "grad_norm": 0.09775898605585098, + "learning_rate": 2.653207839995995e-05, + "loss": 10.137, + "step": 23590 + }, + { + "epoch": 0.11785562685710005, + "grad_norm": 0.0923045426607132, + "learning_rate": 2.6530576485018398e-05, + "loss": 10.1308, + "step": 23600 + }, + { + "epoch": 0.1179055656820395, + "grad_norm": 0.09056799858808517, + "learning_rate": 2.652907457007685e-05, + "loss": 10.1359, + "step": 23610 + }, + { + "epoch": 0.11795550450697895, + "grad_norm": 0.09372033178806305, + "learning_rate": 2.65275726551353e-05, + "loss": 10.1317, + "step": 23620 + }, + { + "epoch": 0.1180054433319184, + "grad_norm": 0.09503508359193802, + "learning_rate": 2.6526070740193745e-05, + "loss": 10.1331, + "step": 23630 + }, + { + "epoch": 0.11805538215685785, + "grad_norm": 0.09287771582603455, + "learning_rate": 2.65245688252522e-05, + "loss": 10.1362, + "step": 23640 + }, + { + "epoch": 0.1181053209817973, + "grad_norm": 0.09275928884744644, + "learning_rate": 2.6523066910310646e-05, + "loss": 10.1329, + "step": 23650 + }, + { + "epoch": 0.11815525980673675, + "grad_norm": 0.09057994186878204, + "learning_rate": 2.6521564995369096e-05, + "loss": 10.1317, + "step": 23660 + }, + { + "epoch": 0.1182051986316762, + "grad_norm": 0.09248855710029602, + "learning_rate": 2.6520063080427546e-05, + "loss": 10.1321, + "step": 23670 + }, + { + "epoch": 0.11825513745661564, + "grad_norm": 0.09153629094362259, + "learning_rate": 2.6518561165485993e-05, + "loss": 10.1315, + "step": 23680 + }, + { + "epoch": 0.1183050762815551, + "grad_norm": 0.08953340351581573, + "learning_rate": 2.6517059250544446e-05, + "loss": 10.1348, + "step": 23690 + }, + { + "epoch": 0.11835501510649454, + "grad_norm": 0.09959051012992859, + "learning_rate": 2.6515557335602893e-05, + "loss": 10.1346, + "step": 23700 + }, + { + "epoch": 0.11840495393143399, + "grad_norm": 0.09226525574922562, + "learning_rate": 2.6514055420661343e-05, + "loss": 10.1304, + "step": 23710 + }, + { + "epoch": 0.11845489275637344, + "grad_norm": 0.09568504989147186, + "learning_rate": 2.6512553505719794e-05, + "loss": 10.1275, + "step": 23720 + }, + { + "epoch": 0.11850483158131289, + "grad_norm": 0.0921492800116539, + "learning_rate": 2.651105159077824e-05, + "loss": 10.1283, + "step": 23730 + }, + { + "epoch": 0.11855477040625234, + "grad_norm": 0.09444059431552887, + "learning_rate": 2.6509549675836694e-05, + "loss": 10.1336, + "step": 23740 + }, + { + "epoch": 0.11860470923119179, + "grad_norm": 0.10168434679508209, + "learning_rate": 2.650804776089514e-05, + "loss": 10.1297, + "step": 23750 + }, + { + "epoch": 0.11865464805613124, + "grad_norm": 0.09380350261926651, + "learning_rate": 2.650654584595359e-05, + "loss": 10.1278, + "step": 23760 + }, + { + "epoch": 0.11870458688107069, + "grad_norm": 0.10053762793540955, + "learning_rate": 2.650504393101204e-05, + "loss": 10.1349, + "step": 23770 + }, + { + "epoch": 0.11875452570601014, + "grad_norm": 0.09257914870977402, + "learning_rate": 2.650354201607049e-05, + "loss": 10.1311, + "step": 23780 + }, + { + "epoch": 0.11880446453094959, + "grad_norm": 0.09091336280107498, + "learning_rate": 2.650204010112894e-05, + "loss": 10.1314, + "step": 23790 + }, + { + "epoch": 0.11885440335588904, + "grad_norm": 0.09331763535737991, + "learning_rate": 2.6500538186187388e-05, + "loss": 10.1299, + "step": 23800 + }, + { + "epoch": 0.11890434218082849, + "grad_norm": 0.09806916862726212, + "learning_rate": 2.649903627124584e-05, + "loss": 10.1285, + "step": 23810 + }, + { + "epoch": 0.11895428100576794, + "grad_norm": 0.09568050503730774, + "learning_rate": 2.649753435630429e-05, + "loss": 10.1295, + "step": 23820 + }, + { + "epoch": 0.11900421983070739, + "grad_norm": 0.09739750623703003, + "learning_rate": 2.649603244136274e-05, + "loss": 10.1288, + "step": 23830 + }, + { + "epoch": 0.11905415865564684, + "grad_norm": 0.0919250100851059, + "learning_rate": 2.649453052642119e-05, + "loss": 10.1282, + "step": 23840 + }, + { + "epoch": 0.11910409748058629, + "grad_norm": 0.09634441137313843, + "learning_rate": 2.6493028611479636e-05, + "loss": 10.1263, + "step": 23850 + }, + { + "epoch": 0.11915403630552573, + "grad_norm": 0.09239911288022995, + "learning_rate": 2.6491526696538086e-05, + "loss": 10.1264, + "step": 23860 + }, + { + "epoch": 0.11920397513046518, + "grad_norm": 0.09922140091657639, + "learning_rate": 2.6490024781596536e-05, + "loss": 10.1257, + "step": 23870 + }, + { + "epoch": 0.11925391395540463, + "grad_norm": 0.09263952821493149, + "learning_rate": 2.6488522866654986e-05, + "loss": 10.127, + "step": 23880 + }, + { + "epoch": 0.11930385278034408, + "grad_norm": 0.09615729004144669, + "learning_rate": 2.6487020951713437e-05, + "loss": 10.125, + "step": 23890 + }, + { + "epoch": 0.11935379160528353, + "grad_norm": 0.09644229710102081, + "learning_rate": 2.6485519036771883e-05, + "loss": 10.1238, + "step": 23900 + }, + { + "epoch": 0.11940373043022298, + "grad_norm": 0.09358090162277222, + "learning_rate": 2.6484017121830333e-05, + "loss": 10.1288, + "step": 23910 + }, + { + "epoch": 0.11945366925516243, + "grad_norm": 0.0884949341416359, + "learning_rate": 2.6482515206888784e-05, + "loss": 10.1262, + "step": 23920 + }, + { + "epoch": 0.11950360808010188, + "grad_norm": 0.09220154583454132, + "learning_rate": 2.6481013291947234e-05, + "loss": 10.1266, + "step": 23930 + }, + { + "epoch": 0.11955354690504133, + "grad_norm": 0.09416556358337402, + "learning_rate": 2.6479511377005684e-05, + "loss": 10.1293, + "step": 23940 + }, + { + "epoch": 0.11960348572998078, + "grad_norm": 0.09228892624378204, + "learning_rate": 2.647800946206413e-05, + "loss": 10.1261, + "step": 23950 + }, + { + "epoch": 0.11965342455492023, + "grad_norm": 0.09278880804777145, + "learning_rate": 2.647650754712258e-05, + "loss": 10.1252, + "step": 23960 + }, + { + "epoch": 0.11970336337985968, + "grad_norm": 0.09540235996246338, + "learning_rate": 2.647500563218103e-05, + "loss": 10.1248, + "step": 23970 + }, + { + "epoch": 0.11975330220479911, + "grad_norm": 0.09458044171333313, + "learning_rate": 2.647350371723948e-05, + "loss": 10.1272, + "step": 23980 + }, + { + "epoch": 0.11980324102973856, + "grad_norm": 0.09298811852931976, + "learning_rate": 2.647200180229793e-05, + "loss": 10.1196, + "step": 23990 + }, + { + "epoch": 0.11985317985467801, + "grad_norm": 0.09707672894001007, + "learning_rate": 2.6470499887356378e-05, + "loss": 10.1218, + "step": 24000 + }, + { + "epoch": 0.11990311867961746, + "grad_norm": 0.09375189244747162, + "learning_rate": 2.646899797241483e-05, + "loss": 10.1282, + "step": 24010 + }, + { + "epoch": 0.11995305750455691, + "grad_norm": 0.1018557995557785, + "learning_rate": 2.646749605747328e-05, + "loss": 10.1204, + "step": 24020 + }, + { + "epoch": 0.12000299632949636, + "grad_norm": 0.09262654185295105, + "learning_rate": 2.646599414253173e-05, + "loss": 10.1235, + "step": 24030 + }, + { + "epoch": 0.12005293515443581, + "grad_norm": 0.09324292093515396, + "learning_rate": 2.646449222759018e-05, + "loss": 10.1253, + "step": 24040 + }, + { + "epoch": 0.12010287397937526, + "grad_norm": 0.09518759697675705, + "learning_rate": 2.6462990312648626e-05, + "loss": 10.1231, + "step": 24050 + }, + { + "epoch": 0.12015281280431471, + "grad_norm": 0.09306249022483826, + "learning_rate": 2.6461488397707076e-05, + "loss": 10.126, + "step": 24060 + }, + { + "epoch": 0.12020275162925416, + "grad_norm": 0.09253344684839249, + "learning_rate": 2.6459986482765526e-05, + "loss": 10.1173, + "step": 24070 + }, + { + "epoch": 0.12025269045419361, + "grad_norm": 0.09460706263780594, + "learning_rate": 2.6458484567823976e-05, + "loss": 10.1269, + "step": 24080 + }, + { + "epoch": 0.12030262927913306, + "grad_norm": 0.08938644081354141, + "learning_rate": 2.6456982652882427e-05, + "loss": 10.1225, + "step": 24090 + }, + { + "epoch": 0.12035256810407251, + "grad_norm": 0.09312314540147781, + "learning_rate": 2.6455480737940873e-05, + "loss": 10.122, + "step": 24100 + }, + { + "epoch": 0.12040250692901196, + "grad_norm": 0.09598222374916077, + "learning_rate": 2.6453978822999323e-05, + "loss": 10.1195, + "step": 24110 + }, + { + "epoch": 0.1204524457539514, + "grad_norm": 0.09335832297801971, + "learning_rate": 2.6452476908057774e-05, + "loss": 10.1204, + "step": 24120 + }, + { + "epoch": 0.12050238457889086, + "grad_norm": 0.09654422104358673, + "learning_rate": 2.6450974993116224e-05, + "loss": 10.1206, + "step": 24130 + }, + { + "epoch": 0.1205523234038303, + "grad_norm": 0.0975603386759758, + "learning_rate": 2.6449473078174674e-05, + "loss": 10.1211, + "step": 24140 + }, + { + "epoch": 0.12060226222876975, + "grad_norm": 0.09276950359344482, + "learning_rate": 2.6447971163233124e-05, + "loss": 10.1188, + "step": 24150 + }, + { + "epoch": 0.1206522010537092, + "grad_norm": 0.0958276093006134, + "learning_rate": 2.644646924829157e-05, + "loss": 10.1241, + "step": 24160 + }, + { + "epoch": 0.12070213987864865, + "grad_norm": 0.09583685547113419, + "learning_rate": 2.644496733335002e-05, + "loss": 10.1209, + "step": 24170 + }, + { + "epoch": 0.1207520787035881, + "grad_norm": 0.09288566559553146, + "learning_rate": 2.644346541840847e-05, + "loss": 10.1184, + "step": 24180 + }, + { + "epoch": 0.12080201752852755, + "grad_norm": 0.0899089053273201, + "learning_rate": 2.644196350346692e-05, + "loss": 10.1188, + "step": 24190 + }, + { + "epoch": 0.120851956353467, + "grad_norm": 0.09436280280351639, + "learning_rate": 2.6440461588525372e-05, + "loss": 10.12, + "step": 24200 + }, + { + "epoch": 0.12090189517840645, + "grad_norm": 0.09277939051389694, + "learning_rate": 2.643895967358382e-05, + "loss": 10.1183, + "step": 24210 + }, + { + "epoch": 0.1209518340033459, + "grad_norm": 0.08972795307636261, + "learning_rate": 2.643745775864227e-05, + "loss": 10.1201, + "step": 24220 + }, + { + "epoch": 0.12100177282828535, + "grad_norm": 0.09541310369968414, + "learning_rate": 2.643595584370072e-05, + "loss": 10.1192, + "step": 24230 + }, + { + "epoch": 0.1210517116532248, + "grad_norm": 0.08959456533193588, + "learning_rate": 2.643445392875917e-05, + "loss": 10.1224, + "step": 24240 + }, + { + "epoch": 0.12110165047816425, + "grad_norm": 0.09786181151866913, + "learning_rate": 2.643295201381762e-05, + "loss": 10.117, + "step": 24250 + }, + { + "epoch": 0.1211515893031037, + "grad_norm": 0.09291178733110428, + "learning_rate": 2.6431450098876066e-05, + "loss": 10.1203, + "step": 24260 + }, + { + "epoch": 0.12120152812804315, + "grad_norm": 0.10062135756015778, + "learning_rate": 2.6429948183934516e-05, + "loss": 10.1179, + "step": 24270 + }, + { + "epoch": 0.1212514669529826, + "grad_norm": 0.09641305357217789, + "learning_rate": 2.6428446268992966e-05, + "loss": 10.1198, + "step": 24280 + }, + { + "epoch": 0.12130140577792205, + "grad_norm": 0.09644697606563568, + "learning_rate": 2.6426944354051417e-05, + "loss": 10.113, + "step": 24290 + }, + { + "epoch": 0.1213513446028615, + "grad_norm": 0.0892978310585022, + "learning_rate": 2.6425442439109867e-05, + "loss": 10.118, + "step": 24300 + }, + { + "epoch": 0.12140128342780095, + "grad_norm": 0.0910135954618454, + "learning_rate": 2.6423940524168313e-05, + "loss": 10.1194, + "step": 24310 + }, + { + "epoch": 0.1214512222527404, + "grad_norm": 0.0961785763502121, + "learning_rate": 2.6422438609226764e-05, + "loss": 10.116, + "step": 24320 + }, + { + "epoch": 0.12150116107767985, + "grad_norm": 0.09364509582519531, + "learning_rate": 2.6420936694285214e-05, + "loss": 10.111, + "step": 24330 + }, + { + "epoch": 0.1215510999026193, + "grad_norm": 0.09308196604251862, + "learning_rate": 2.6419434779343664e-05, + "loss": 10.1159, + "step": 24340 + }, + { + "epoch": 0.12160103872755874, + "grad_norm": 0.09322807192802429, + "learning_rate": 2.6417932864402114e-05, + "loss": 10.1159, + "step": 24350 + }, + { + "epoch": 0.1216509775524982, + "grad_norm": 0.09460312128067017, + "learning_rate": 2.641643094946056e-05, + "loss": 10.1212, + "step": 24360 + }, + { + "epoch": 0.12170091637743764, + "grad_norm": 0.0933726355433464, + "learning_rate": 2.641492903451901e-05, + "loss": 10.1137, + "step": 24370 + }, + { + "epoch": 0.12175085520237709, + "grad_norm": 0.09838582575321198, + "learning_rate": 2.641342711957746e-05, + "loss": 10.1168, + "step": 24380 + }, + { + "epoch": 0.12180079402731654, + "grad_norm": 0.09491819143295288, + "learning_rate": 2.641192520463591e-05, + "loss": 10.1143, + "step": 24390 + }, + { + "epoch": 0.12185073285225599, + "grad_norm": 0.0954071581363678, + "learning_rate": 2.6410423289694362e-05, + "loss": 10.1135, + "step": 24400 + }, + { + "epoch": 0.12190067167719544, + "grad_norm": 0.09948980063199997, + "learning_rate": 2.6408921374752812e-05, + "loss": 10.1176, + "step": 24410 + }, + { + "epoch": 0.12195061050213489, + "grad_norm": 0.09514214843511581, + "learning_rate": 2.640741945981126e-05, + "loss": 10.1187, + "step": 24420 + }, + { + "epoch": 0.12200054932707434, + "grad_norm": 0.09480323642492294, + "learning_rate": 2.640591754486971e-05, + "loss": 10.1122, + "step": 24430 + }, + { + "epoch": 0.12205048815201379, + "grad_norm": 0.09891213476657867, + "learning_rate": 2.640441562992816e-05, + "loss": 10.1107, + "step": 24440 + }, + { + "epoch": 0.12210042697695324, + "grad_norm": 0.09468024224042892, + "learning_rate": 2.640291371498661e-05, + "loss": 10.1156, + "step": 24450 + }, + { + "epoch": 0.12215036580189269, + "grad_norm": 0.08860956132411957, + "learning_rate": 2.640141180004506e-05, + "loss": 10.1124, + "step": 24460 + }, + { + "epoch": 0.12220030462683214, + "grad_norm": 0.09128830581903458, + "learning_rate": 2.639990988510351e-05, + "loss": 10.1173, + "step": 24470 + }, + { + "epoch": 0.12225024345177157, + "grad_norm": 0.09434867650270462, + "learning_rate": 2.6398407970161956e-05, + "loss": 10.1109, + "step": 24480 + }, + { + "epoch": 0.12230018227671102, + "grad_norm": 0.08857893943786621, + "learning_rate": 2.6396906055220407e-05, + "loss": 10.1099, + "step": 24490 + }, + { + "epoch": 0.12235012110165047, + "grad_norm": 0.09494607895612717, + "learning_rate": 2.6395404140278857e-05, + "loss": 10.1118, + "step": 24500 + }, + { + "epoch": 0.12240005992658992, + "grad_norm": 0.09442712366580963, + "learning_rate": 2.6393902225337307e-05, + "loss": 10.1108, + "step": 24510 + }, + { + "epoch": 0.12244999875152937, + "grad_norm": 0.0935530737042427, + "learning_rate": 2.6392400310395757e-05, + "loss": 10.1098, + "step": 24520 + }, + { + "epoch": 0.12249993757646882, + "grad_norm": 0.09096985310316086, + "learning_rate": 2.6390898395454204e-05, + "loss": 10.1132, + "step": 24530 + }, + { + "epoch": 0.12254987640140827, + "grad_norm": 0.10358789563179016, + "learning_rate": 2.6389396480512654e-05, + "loss": 10.1113, + "step": 24540 + }, + { + "epoch": 0.12259981522634772, + "grad_norm": 0.10106457769870758, + "learning_rate": 2.6387894565571104e-05, + "loss": 10.1086, + "step": 24550 + }, + { + "epoch": 0.12264975405128717, + "grad_norm": 0.10155883431434631, + "learning_rate": 2.6386392650629554e-05, + "loss": 10.1106, + "step": 24560 + }, + { + "epoch": 0.12269969287622662, + "grad_norm": 0.09161602705717087, + "learning_rate": 2.6384890735688005e-05, + "loss": 10.1158, + "step": 24570 + }, + { + "epoch": 0.12274963170116607, + "grad_norm": 0.09335344284772873, + "learning_rate": 2.638338882074645e-05, + "loss": 10.1075, + "step": 24580 + }, + { + "epoch": 0.12279957052610552, + "grad_norm": 0.0948810800909996, + "learning_rate": 2.63818869058049e-05, + "loss": 10.1117, + "step": 24590 + }, + { + "epoch": 0.12284950935104497, + "grad_norm": 0.09355373680591583, + "learning_rate": 2.6380384990863352e-05, + "loss": 10.1119, + "step": 24600 + }, + { + "epoch": 0.12289944817598442, + "grad_norm": 0.09632193297147751, + "learning_rate": 2.6378883075921802e-05, + "loss": 10.109, + "step": 24610 + }, + { + "epoch": 0.12294938700092387, + "grad_norm": 0.09129033237695694, + "learning_rate": 2.6377381160980252e-05, + "loss": 10.1067, + "step": 24620 + }, + { + "epoch": 0.12299932582586331, + "grad_norm": 0.09343235939741135, + "learning_rate": 2.63758792460387e-05, + "loss": 10.11, + "step": 24630 + }, + { + "epoch": 0.12304926465080276, + "grad_norm": 0.0997580885887146, + "learning_rate": 2.637437733109715e-05, + "loss": 10.1113, + "step": 24640 + }, + { + "epoch": 0.12309920347574221, + "grad_norm": 0.09580541402101517, + "learning_rate": 2.63728754161556e-05, + "loss": 10.1108, + "step": 24650 + }, + { + "epoch": 0.12314914230068166, + "grad_norm": 0.0917908325791359, + "learning_rate": 2.637137350121405e-05, + "loss": 10.1129, + "step": 24660 + }, + { + "epoch": 0.12319908112562111, + "grad_norm": 0.09625443816184998, + "learning_rate": 2.63698715862725e-05, + "loss": 10.1065, + "step": 24670 + }, + { + "epoch": 0.12324901995056056, + "grad_norm": 0.0961778461933136, + "learning_rate": 2.6368369671330946e-05, + "loss": 10.1107, + "step": 24680 + }, + { + "epoch": 0.12329895877550001, + "grad_norm": 0.09377621114253998, + "learning_rate": 2.6366867756389397e-05, + "loss": 10.1132, + "step": 24690 + }, + { + "epoch": 0.12334889760043946, + "grad_norm": 0.09123267978429794, + "learning_rate": 2.6365365841447847e-05, + "loss": 10.1092, + "step": 24700 + }, + { + "epoch": 0.12339883642537891, + "grad_norm": 0.09666462987661362, + "learning_rate": 2.6363863926506297e-05, + "loss": 10.106, + "step": 24710 + }, + { + "epoch": 0.12344877525031836, + "grad_norm": 0.09382550418376923, + "learning_rate": 2.6362362011564747e-05, + "loss": 10.1071, + "step": 24720 + }, + { + "epoch": 0.12349871407525781, + "grad_norm": 0.10108333826065063, + "learning_rate": 2.6360860096623194e-05, + "loss": 10.1022, + "step": 24730 + }, + { + "epoch": 0.12354865290019726, + "grad_norm": 0.089703269302845, + "learning_rate": 2.6359358181681644e-05, + "loss": 10.1107, + "step": 24740 + }, + { + "epoch": 0.12359859172513671, + "grad_norm": 0.09390883892774582, + "learning_rate": 2.6357856266740094e-05, + "loss": 10.1051, + "step": 24750 + }, + { + "epoch": 0.12364853055007616, + "grad_norm": 0.0946129560470581, + "learning_rate": 2.6356354351798544e-05, + "loss": 10.1041, + "step": 24760 + }, + { + "epoch": 0.12369846937501561, + "grad_norm": 0.09596185386180878, + "learning_rate": 2.6354852436856995e-05, + "loss": 10.1073, + "step": 24770 + }, + { + "epoch": 0.12374840819995506, + "grad_norm": 0.09378567337989807, + "learning_rate": 2.635335052191544e-05, + "loss": 10.1047, + "step": 24780 + }, + { + "epoch": 0.1237983470248945, + "grad_norm": 0.09474289417266846, + "learning_rate": 2.6351848606973895e-05, + "loss": 10.1043, + "step": 24790 + }, + { + "epoch": 0.12384828584983396, + "grad_norm": 0.09176385402679443, + "learning_rate": 2.6350346692032342e-05, + "loss": 10.1048, + "step": 24800 + }, + { + "epoch": 0.1238982246747734, + "grad_norm": 0.0884096696972847, + "learning_rate": 2.6348844777090792e-05, + "loss": 10.1021, + "step": 24810 + }, + { + "epoch": 0.12394816349971285, + "grad_norm": 0.09617920964956284, + "learning_rate": 2.6347342862149242e-05, + "loss": 10.1041, + "step": 24820 + }, + { + "epoch": 0.1239981023246523, + "grad_norm": 0.09283927828073502, + "learning_rate": 2.634584094720769e-05, + "loss": 10.1026, + "step": 24830 + }, + { + "epoch": 0.12404804114959175, + "grad_norm": 0.09213951230049133, + "learning_rate": 2.6344339032266142e-05, + "loss": 10.1019, + "step": 24840 + }, + { + "epoch": 0.1240979799745312, + "grad_norm": 0.09610681235790253, + "learning_rate": 2.634283711732459e-05, + "loss": 10.1075, + "step": 24850 + }, + { + "epoch": 0.12414791879947065, + "grad_norm": 0.10006497800350189, + "learning_rate": 2.634133520238304e-05, + "loss": 10.103, + "step": 24860 + }, + { + "epoch": 0.1241978576244101, + "grad_norm": 0.09113096445798874, + "learning_rate": 2.633983328744149e-05, + "loss": 10.1074, + "step": 24870 + }, + { + "epoch": 0.12424779644934955, + "grad_norm": 0.10478292405605316, + "learning_rate": 2.6338331372499936e-05, + "loss": 10.0992, + "step": 24880 + }, + { + "epoch": 0.124297735274289, + "grad_norm": 0.09672685712575912, + "learning_rate": 2.633682945755839e-05, + "loss": 10.1028, + "step": 24890 + }, + { + "epoch": 0.12434767409922845, + "grad_norm": 0.0986834466457367, + "learning_rate": 2.6335327542616837e-05, + "loss": 10.0989, + "step": 24900 + }, + { + "epoch": 0.1243976129241679, + "grad_norm": 0.10003640502691269, + "learning_rate": 2.6333825627675287e-05, + "loss": 10.1044, + "step": 24910 + }, + { + "epoch": 0.12444755174910735, + "grad_norm": 0.09739897400140762, + "learning_rate": 2.6332323712733737e-05, + "loss": 10.1055, + "step": 24920 + }, + { + "epoch": 0.1244974905740468, + "grad_norm": 0.09403092414140701, + "learning_rate": 2.6330821797792184e-05, + "loss": 10.1052, + "step": 24930 + }, + { + "epoch": 0.12454742939898625, + "grad_norm": 0.0988364890217781, + "learning_rate": 2.6329319882850637e-05, + "loss": 10.1025, + "step": 24940 + }, + { + "epoch": 0.1245973682239257, + "grad_norm": 0.09991788864135742, + "learning_rate": 2.6327817967909084e-05, + "loss": 10.1009, + "step": 24950 + }, + { + "epoch": 0.12464730704886515, + "grad_norm": 0.09546318650245667, + "learning_rate": 2.6326316052967534e-05, + "loss": 10.0996, + "step": 24960 + }, + { + "epoch": 0.12469724587380458, + "grad_norm": 0.09446793794631958, + "learning_rate": 2.6324814138025985e-05, + "loss": 10.1004, + "step": 24970 + }, + { + "epoch": 0.12474718469874403, + "grad_norm": 0.09391462057828903, + "learning_rate": 2.632331222308443e-05, + "loss": 10.1005, + "step": 24980 + }, + { + "epoch": 0.12479712352368348, + "grad_norm": 0.08938247710466385, + "learning_rate": 2.6321810308142885e-05, + "loss": 10.0976, + "step": 24990 + }, + { + "epoch": 0.12484706234862293, + "grad_norm": 0.0972147136926651, + "learning_rate": 2.6320308393201332e-05, + "loss": 10.0994, + "step": 25000 + }, + { + "epoch": 0.12489700117356238, + "grad_norm": 0.09196930378675461, + "learning_rate": 2.6318806478259782e-05, + "loss": 10.0997, + "step": 25010 + }, + { + "epoch": 0.12494693999850183, + "grad_norm": 0.10306332260370255, + "learning_rate": 2.6317304563318232e-05, + "loss": 10.0984, + "step": 25020 + }, + { + "epoch": 0.12499687882344128, + "grad_norm": 0.09357580542564392, + "learning_rate": 2.631580264837668e-05, + "loss": 10.1022, + "step": 25030 + }, + { + "epoch": 0.12504681764838074, + "grad_norm": 0.09815985709428787, + "learning_rate": 2.6314300733435132e-05, + "loss": 10.0966, + "step": 25040 + }, + { + "epoch": 0.1250967564733202, + "grad_norm": 0.09139269590377808, + "learning_rate": 2.631279881849358e-05, + "loss": 10.0957, + "step": 25050 + }, + { + "epoch": 0.12514669529825964, + "grad_norm": 0.10160529613494873, + "learning_rate": 2.631129690355203e-05, + "loss": 10.098, + "step": 25060 + }, + { + "epoch": 0.1251966341231991, + "grad_norm": 0.0978497862815857, + "learning_rate": 2.630979498861048e-05, + "loss": 10.1, + "step": 25070 + }, + { + "epoch": 0.12524657294813854, + "grad_norm": 0.09045752882957458, + "learning_rate": 2.6308293073668926e-05, + "loss": 10.1027, + "step": 25080 + }, + { + "epoch": 0.125296511773078, + "grad_norm": 0.08881735056638718, + "learning_rate": 2.630679115872738e-05, + "loss": 10.0955, + "step": 25090 + }, + { + "epoch": 0.12534645059801744, + "grad_norm": 0.0939692035317421, + "learning_rate": 2.6305289243785827e-05, + "loss": 10.101, + "step": 25100 + }, + { + "epoch": 0.1253963894229569, + "grad_norm": 0.09511993080377579, + "learning_rate": 2.630378732884428e-05, + "loss": 10.0969, + "step": 25110 + }, + { + "epoch": 0.12544632824789634, + "grad_norm": 0.1041262075304985, + "learning_rate": 2.6302285413902727e-05, + "loss": 10.0912, + "step": 25120 + }, + { + "epoch": 0.1254962670728358, + "grad_norm": 0.09648961573839188, + "learning_rate": 2.6300783498961174e-05, + "loss": 10.0957, + "step": 25130 + }, + { + "epoch": 0.12554620589777524, + "grad_norm": 0.0973757728934288, + "learning_rate": 2.6299281584019627e-05, + "loss": 10.1005, + "step": 25140 + }, + { + "epoch": 0.1255961447227147, + "grad_norm": 0.08688199520111084, + "learning_rate": 2.6297779669078074e-05, + "loss": 10.0966, + "step": 25150 + }, + { + "epoch": 0.12564608354765414, + "grad_norm": 0.09321999549865723, + "learning_rate": 2.6296277754136528e-05, + "loss": 10.0924, + "step": 25160 + }, + { + "epoch": 0.12569602237259359, + "grad_norm": 0.09925235062837601, + "learning_rate": 2.6294775839194975e-05, + "loss": 10.0918, + "step": 25170 + }, + { + "epoch": 0.12574596119753303, + "grad_norm": 0.096058689057827, + "learning_rate": 2.629327392425342e-05, + "loss": 10.0918, + "step": 25180 + }, + { + "epoch": 0.12579590002247248, + "grad_norm": 0.09322872012853622, + "learning_rate": 2.6291772009311875e-05, + "loss": 10.0941, + "step": 25190 + }, + { + "epoch": 0.12584583884741193, + "grad_norm": 0.09035428613424301, + "learning_rate": 2.6290270094370322e-05, + "loss": 10.09, + "step": 25200 + }, + { + "epoch": 0.12589577767235138, + "grad_norm": 0.09396304190158844, + "learning_rate": 2.6288768179428775e-05, + "loss": 10.0977, + "step": 25210 + }, + { + "epoch": 0.1259457164972908, + "grad_norm": 0.09849023818969727, + "learning_rate": 2.6287266264487222e-05, + "loss": 10.0954, + "step": 25220 + }, + { + "epoch": 0.12599565532223025, + "grad_norm": 0.09524643421173096, + "learning_rate": 2.628576434954567e-05, + "loss": 10.0929, + "step": 25230 + }, + { + "epoch": 0.1260455941471697, + "grad_norm": 0.09641117602586746, + "learning_rate": 2.6284262434604122e-05, + "loss": 10.093, + "step": 25240 + }, + { + "epoch": 0.12609553297210915, + "grad_norm": 0.0981585755944252, + "learning_rate": 2.628276051966257e-05, + "loss": 10.0965, + "step": 25250 + }, + { + "epoch": 0.1261454717970486, + "grad_norm": 0.09640777111053467, + "learning_rate": 2.6281258604721023e-05, + "loss": 10.0929, + "step": 25260 + }, + { + "epoch": 0.12619541062198805, + "grad_norm": 0.0920519009232521, + "learning_rate": 2.627975668977947e-05, + "loss": 10.0947, + "step": 25270 + }, + { + "epoch": 0.1262453494469275, + "grad_norm": 0.09729231894016266, + "learning_rate": 2.6278254774837916e-05, + "loss": 10.0888, + "step": 25280 + }, + { + "epoch": 0.12629528827186695, + "grad_norm": 0.09746874123811722, + "learning_rate": 2.627675285989637e-05, + "loss": 10.0952, + "step": 25290 + }, + { + "epoch": 0.1263452270968064, + "grad_norm": 0.09574176371097565, + "learning_rate": 2.6275250944954817e-05, + "loss": 10.088, + "step": 25300 + }, + { + "epoch": 0.12639516592174585, + "grad_norm": 0.09351847320795059, + "learning_rate": 2.627374903001327e-05, + "loss": 10.0934, + "step": 25310 + }, + { + "epoch": 0.1264451047466853, + "grad_norm": 0.09245570003986359, + "learning_rate": 2.6272247115071717e-05, + "loss": 10.0963, + "step": 25320 + }, + { + "epoch": 0.12649504357162475, + "grad_norm": 0.0946178138256073, + "learning_rate": 2.6270745200130164e-05, + "loss": 10.0888, + "step": 25330 + }, + { + "epoch": 0.1265449823965642, + "grad_norm": 0.09379423409700394, + "learning_rate": 2.6269243285188617e-05, + "loss": 10.088, + "step": 25340 + }, + { + "epoch": 0.12659492122150365, + "grad_norm": 0.09336696565151215, + "learning_rate": 2.6267741370247064e-05, + "loss": 10.0913, + "step": 25350 + }, + { + "epoch": 0.1266448600464431, + "grad_norm": 0.09480159729719162, + "learning_rate": 2.6266239455305518e-05, + "loss": 10.0915, + "step": 25360 + }, + { + "epoch": 0.12669479887138255, + "grad_norm": 0.09639475494623184, + "learning_rate": 2.6264737540363965e-05, + "loss": 10.0904, + "step": 25370 + }, + { + "epoch": 0.126744737696322, + "grad_norm": 0.09714771062135696, + "learning_rate": 2.626323562542241e-05, + "loss": 10.0899, + "step": 25380 + }, + { + "epoch": 0.12679467652126145, + "grad_norm": 0.09559177607297897, + "learning_rate": 2.6261733710480865e-05, + "loss": 10.0885, + "step": 25390 + }, + { + "epoch": 0.1268446153462009, + "grad_norm": 0.09046468883752823, + "learning_rate": 2.6260231795539312e-05, + "loss": 10.0975, + "step": 25400 + }, + { + "epoch": 0.12689455417114034, + "grad_norm": 0.0968378558754921, + "learning_rate": 2.6258729880597765e-05, + "loss": 10.0879, + "step": 25410 + }, + { + "epoch": 0.1269444929960798, + "grad_norm": 0.09533467143774033, + "learning_rate": 2.6257227965656212e-05, + "loss": 10.0857, + "step": 25420 + }, + { + "epoch": 0.12699443182101924, + "grad_norm": 0.0917276069521904, + "learning_rate": 2.6255726050714662e-05, + "loss": 10.0927, + "step": 25430 + }, + { + "epoch": 0.1270443706459587, + "grad_norm": 0.096129409968853, + "learning_rate": 2.6254224135773113e-05, + "loss": 10.0886, + "step": 25440 + }, + { + "epoch": 0.12709430947089814, + "grad_norm": 0.09592961519956589, + "learning_rate": 2.625272222083156e-05, + "loss": 10.0869, + "step": 25450 + }, + { + "epoch": 0.1271442482958376, + "grad_norm": 0.1041722372174263, + "learning_rate": 2.6251220305890013e-05, + "loss": 10.0925, + "step": 25460 + }, + { + "epoch": 0.12719418712077704, + "grad_norm": 0.09002278000116348, + "learning_rate": 2.624971839094846e-05, + "loss": 10.0886, + "step": 25470 + }, + { + "epoch": 0.1272441259457165, + "grad_norm": 0.09098079055547714, + "learning_rate": 2.624821647600691e-05, + "loss": 10.0871, + "step": 25480 + }, + { + "epoch": 0.12729406477065594, + "grad_norm": 0.09563304483890533, + "learning_rate": 2.624671456106536e-05, + "loss": 10.0872, + "step": 25490 + }, + { + "epoch": 0.1273440035955954, + "grad_norm": 0.09505288302898407, + "learning_rate": 2.6245212646123807e-05, + "loss": 10.0871, + "step": 25500 + }, + { + "epoch": 0.12739394242053484, + "grad_norm": 0.09651105850934982, + "learning_rate": 2.624371073118226e-05, + "loss": 10.0902, + "step": 25510 + }, + { + "epoch": 0.1274438812454743, + "grad_norm": 0.09501788765192032, + "learning_rate": 2.6242208816240707e-05, + "loss": 10.0867, + "step": 25520 + }, + { + "epoch": 0.12749382007041374, + "grad_norm": 0.09667658805847168, + "learning_rate": 2.6240706901299157e-05, + "loss": 10.0871, + "step": 25530 + }, + { + "epoch": 0.1275437588953532, + "grad_norm": 0.09683676809072495, + "learning_rate": 2.6239204986357608e-05, + "loss": 10.0856, + "step": 25540 + }, + { + "epoch": 0.12759369772029264, + "grad_norm": 0.09272299706935883, + "learning_rate": 2.6237703071416054e-05, + "loss": 10.0899, + "step": 25550 + }, + { + "epoch": 0.1276436365452321, + "grad_norm": 0.08883216977119446, + "learning_rate": 2.6236201156474508e-05, + "loss": 10.0848, + "step": 25560 + }, + { + "epoch": 0.12769357537017154, + "grad_norm": 0.0918259397149086, + "learning_rate": 2.6234699241532955e-05, + "loss": 10.0828, + "step": 25570 + }, + { + "epoch": 0.12774351419511099, + "grad_norm": 0.0947398915886879, + "learning_rate": 2.6233197326591405e-05, + "loss": 10.0866, + "step": 25580 + }, + { + "epoch": 0.12779345302005043, + "grad_norm": 0.09740963578224182, + "learning_rate": 2.6231695411649855e-05, + "loss": 10.0829, + "step": 25590 + }, + { + "epoch": 0.12784339184498988, + "grad_norm": 0.09066878259181976, + "learning_rate": 2.6230193496708302e-05, + "loss": 10.0876, + "step": 25600 + }, + { + "epoch": 0.12789333066992933, + "grad_norm": 0.09469142556190491, + "learning_rate": 2.6228691581766755e-05, + "loss": 10.0785, + "step": 25610 + }, + { + "epoch": 0.12794326949486878, + "grad_norm": 0.09159637987613678, + "learning_rate": 2.6227189666825202e-05, + "loss": 10.0811, + "step": 25620 + }, + { + "epoch": 0.12799320831980823, + "grad_norm": 0.09090546518564224, + "learning_rate": 2.6225687751883652e-05, + "loss": 10.0842, + "step": 25630 + }, + { + "epoch": 0.12804314714474768, + "grad_norm": 0.0922180786728859, + "learning_rate": 2.6224185836942103e-05, + "loss": 10.0828, + "step": 25640 + }, + { + "epoch": 0.12809308596968713, + "grad_norm": 0.09305445849895477, + "learning_rate": 2.622268392200055e-05, + "loss": 10.0872, + "step": 25650 + }, + { + "epoch": 0.12814302479462658, + "grad_norm": 0.09471817314624786, + "learning_rate": 2.6221182007059003e-05, + "loss": 10.0857, + "step": 25660 + }, + { + "epoch": 0.12819296361956603, + "grad_norm": 0.09348314255475998, + "learning_rate": 2.621968009211745e-05, + "loss": 10.0884, + "step": 25670 + }, + { + "epoch": 0.12824290244450548, + "grad_norm": 0.09191960841417313, + "learning_rate": 2.62181781771759e-05, + "loss": 10.0895, + "step": 25680 + }, + { + "epoch": 0.12829284126944493, + "grad_norm": 0.09789571166038513, + "learning_rate": 2.621667626223435e-05, + "loss": 10.0818, + "step": 25690 + }, + { + "epoch": 0.12834278009438438, + "grad_norm": 0.09516225755214691, + "learning_rate": 2.6215174347292797e-05, + "loss": 10.0831, + "step": 25700 + }, + { + "epoch": 0.12839271891932383, + "grad_norm": 0.09300491213798523, + "learning_rate": 2.621367243235125e-05, + "loss": 10.0844, + "step": 25710 + }, + { + "epoch": 0.12844265774426328, + "grad_norm": 0.09599104523658752, + "learning_rate": 2.6212170517409697e-05, + "loss": 10.0853, + "step": 25720 + }, + { + "epoch": 0.12849259656920273, + "grad_norm": 0.09136704355478287, + "learning_rate": 2.6210668602468147e-05, + "loss": 10.0873, + "step": 25730 + }, + { + "epoch": 0.12854253539414218, + "grad_norm": 0.09239959716796875, + "learning_rate": 2.6209166687526598e-05, + "loss": 10.0794, + "step": 25740 + }, + { + "epoch": 0.12859247421908163, + "grad_norm": 0.09590020775794983, + "learning_rate": 2.6207664772585048e-05, + "loss": 10.0835, + "step": 25750 + }, + { + "epoch": 0.12864241304402108, + "grad_norm": 0.09230869263410568, + "learning_rate": 2.6206162857643498e-05, + "loss": 10.0833, + "step": 25760 + }, + { + "epoch": 0.12869235186896053, + "grad_norm": 0.0960649698972702, + "learning_rate": 2.6204660942701945e-05, + "loss": 10.0822, + "step": 25770 + }, + { + "epoch": 0.12874229069389997, + "grad_norm": 0.09332367777824402, + "learning_rate": 2.6203159027760395e-05, + "loss": 10.0848, + "step": 25780 + }, + { + "epoch": 0.12879222951883942, + "grad_norm": 0.09813748300075531, + "learning_rate": 2.6201657112818845e-05, + "loss": 10.0807, + "step": 25790 + }, + { + "epoch": 0.12884216834377887, + "grad_norm": 0.09281601011753082, + "learning_rate": 2.6200155197877295e-05, + "loss": 10.0862, + "step": 25800 + }, + { + "epoch": 0.12889210716871832, + "grad_norm": 0.09574627131223679, + "learning_rate": 2.6198653282935745e-05, + "loss": 10.0784, + "step": 25810 + }, + { + "epoch": 0.12894204599365777, + "grad_norm": 0.0939335897564888, + "learning_rate": 2.6197151367994192e-05, + "loss": 10.0865, + "step": 25820 + }, + { + "epoch": 0.12899198481859722, + "grad_norm": 0.0975639671087265, + "learning_rate": 2.6195649453052642e-05, + "loss": 10.0783, + "step": 25830 + }, + { + "epoch": 0.12904192364353667, + "grad_norm": 0.0939013734459877, + "learning_rate": 2.6194147538111093e-05, + "loss": 10.0837, + "step": 25840 + }, + { + "epoch": 0.12909186246847612, + "grad_norm": 0.09316052496433258, + "learning_rate": 2.6192645623169543e-05, + "loss": 10.0781, + "step": 25850 + }, + { + "epoch": 0.12914180129341557, + "grad_norm": 0.09889581054449081, + "learning_rate": 2.6191143708227993e-05, + "loss": 10.0742, + "step": 25860 + }, + { + "epoch": 0.12919174011835502, + "grad_norm": 0.09303954988718033, + "learning_rate": 2.618964179328644e-05, + "loss": 10.0812, + "step": 25870 + }, + { + "epoch": 0.12924167894329447, + "grad_norm": 0.09554919600486755, + "learning_rate": 2.618813987834489e-05, + "loss": 10.0706, + "step": 25880 + }, + { + "epoch": 0.12929161776823392, + "grad_norm": 0.09778593480587006, + "learning_rate": 2.618663796340334e-05, + "loss": 10.0771, + "step": 25890 + }, + { + "epoch": 0.12934155659317337, + "grad_norm": 0.09445203095674515, + "learning_rate": 2.618513604846179e-05, + "loss": 10.0793, + "step": 25900 + }, + { + "epoch": 0.12939149541811282, + "grad_norm": 0.09079139679670334, + "learning_rate": 2.618363413352024e-05, + "loss": 10.0755, + "step": 25910 + }, + { + "epoch": 0.12944143424305227, + "grad_norm": 5018.11767578125, + "learning_rate": 2.6182132218578687e-05, + "loss": 10.1051, + "step": 25920 + }, + { + "epoch": 0.12949137306799172, + "grad_norm": 0.09410411864519119, + "learning_rate": 2.6180630303637137e-05, + "loss": 10.0779, + "step": 25930 + }, + { + "epoch": 0.12954131189293117, + "grad_norm": 0.09051043540239334, + "learning_rate": 2.6179128388695588e-05, + "loss": 10.0751, + "step": 25940 + }, + { + "epoch": 0.12959125071787062, + "grad_norm": 0.09015491604804993, + "learning_rate": 2.6177626473754038e-05, + "loss": 10.0803, + "step": 25950 + }, + { + "epoch": 0.12964118954281006, + "grad_norm": 0.09758006781339645, + "learning_rate": 2.6176124558812488e-05, + "loss": 10.0734, + "step": 25960 + }, + { + "epoch": 0.12969112836774951, + "grad_norm": 0.0914853885769844, + "learning_rate": 2.6174622643870935e-05, + "loss": 10.0797, + "step": 25970 + }, + { + "epoch": 0.12974106719268896, + "grad_norm": 0.09737791121006012, + "learning_rate": 2.6173120728929385e-05, + "loss": 10.0749, + "step": 25980 + }, + { + "epoch": 0.1297910060176284, + "grad_norm": 0.09865185618400574, + "learning_rate": 2.6171618813987835e-05, + "loss": 10.0764, + "step": 25990 + }, + { + "epoch": 0.12984094484256786, + "grad_norm": 0.0985475480556488, + "learning_rate": 2.6170116899046285e-05, + "loss": 10.075, + "step": 26000 + }, + { + "epoch": 0.1298908836675073, + "grad_norm": 0.09121818840503693, + "learning_rate": 2.6168614984104735e-05, + "loss": 10.0797, + "step": 26010 + }, + { + "epoch": 0.12994082249244676, + "grad_norm": 0.10556277632713318, + "learning_rate": 2.6167113069163182e-05, + "loss": 10.0783, + "step": 26020 + }, + { + "epoch": 0.1299907613173862, + "grad_norm": 0.0959169790148735, + "learning_rate": 2.6165611154221632e-05, + "loss": 10.0753, + "step": 26030 + }, + { + "epoch": 0.13004070014232566, + "grad_norm": 0.09328239411115646, + "learning_rate": 2.6164109239280083e-05, + "loss": 10.0748, + "step": 26040 + }, + { + "epoch": 0.1300906389672651, + "grad_norm": 0.08964414894580841, + "learning_rate": 2.6162607324338533e-05, + "loss": 10.075, + "step": 26050 + }, + { + "epoch": 0.13014057779220456, + "grad_norm": 0.1068107932806015, + "learning_rate": 2.6161105409396983e-05, + "loss": 10.0722, + "step": 26060 + }, + { + "epoch": 0.130190516617144, + "grad_norm": 0.09689830243587494, + "learning_rate": 2.6159603494455433e-05, + "loss": 10.0727, + "step": 26070 + }, + { + "epoch": 0.13024045544208346, + "grad_norm": 0.09589334577322006, + "learning_rate": 2.615810157951388e-05, + "loss": 10.0663, + "step": 26080 + }, + { + "epoch": 0.1302903942670229, + "grad_norm": 0.09473288059234619, + "learning_rate": 2.615659966457233e-05, + "loss": 10.0675, + "step": 26090 + }, + { + "epoch": 0.13034033309196236, + "grad_norm": 0.09076739102602005, + "learning_rate": 2.615509774963078e-05, + "loss": 10.0772, + "step": 26100 + }, + { + "epoch": 0.1303902719169018, + "grad_norm": 0.09733912348747253, + "learning_rate": 2.615359583468923e-05, + "loss": 10.0746, + "step": 26110 + }, + { + "epoch": 0.13044021074184126, + "grad_norm": 0.10011307895183563, + "learning_rate": 2.615209391974768e-05, + "loss": 10.0657, + "step": 26120 + }, + { + "epoch": 0.1304901495667807, + "grad_norm": 0.09807174652814865, + "learning_rate": 2.6150592004806127e-05, + "loss": 10.0688, + "step": 26130 + }, + { + "epoch": 0.13054008839172015, + "grad_norm": 0.092112235724926, + "learning_rate": 2.6149090089864578e-05, + "loss": 10.0723, + "step": 26140 + }, + { + "epoch": 0.1305900272166596, + "grad_norm": 0.09255208820104599, + "learning_rate": 2.6147588174923028e-05, + "loss": 10.0682, + "step": 26150 + }, + { + "epoch": 0.13063996604159905, + "grad_norm": 0.09178891032934189, + "learning_rate": 2.6146086259981478e-05, + "loss": 10.0694, + "step": 26160 + }, + { + "epoch": 0.1306899048665385, + "grad_norm": 0.09472060203552246, + "learning_rate": 2.6144584345039928e-05, + "loss": 10.0669, + "step": 26170 + }, + { + "epoch": 0.13073984369147795, + "grad_norm": 0.08864433318376541, + "learning_rate": 2.6143082430098375e-05, + "loss": 10.0716, + "step": 26180 + }, + { + "epoch": 0.1307897825164174, + "grad_norm": 0.09347887337207794, + "learning_rate": 2.6141580515156825e-05, + "loss": 10.0755, + "step": 26190 + }, + { + "epoch": 0.13083972134135685, + "grad_norm": 0.09428052604198456, + "learning_rate": 2.6140078600215275e-05, + "loss": 10.0727, + "step": 26200 + }, + { + "epoch": 0.13088966016629627, + "grad_norm": 0.09582565724849701, + "learning_rate": 2.6138576685273725e-05, + "loss": 10.0665, + "step": 26210 + }, + { + "epoch": 0.13093959899123572, + "grad_norm": 0.09516657888889313, + "learning_rate": 2.6137074770332176e-05, + "loss": 10.0645, + "step": 26220 + }, + { + "epoch": 0.13098953781617517, + "grad_norm": 0.10089933127164841, + "learning_rate": 2.6135572855390622e-05, + "loss": 10.0687, + "step": 26230 + }, + { + "epoch": 0.13103947664111462, + "grad_norm": 0.09348998963832855, + "learning_rate": 2.6134070940449073e-05, + "loss": 10.0749, + "step": 26240 + }, + { + "epoch": 0.13108941546605407, + "grad_norm": 0.09638164937496185, + "learning_rate": 2.6132569025507523e-05, + "loss": 10.0666, + "step": 26250 + }, + { + "epoch": 0.13113935429099352, + "grad_norm": 0.08938256651163101, + "learning_rate": 2.6131067110565973e-05, + "loss": 10.0736, + "step": 26260 + }, + { + "epoch": 0.13118929311593297, + "grad_norm": 0.09375530481338501, + "learning_rate": 2.6129565195624423e-05, + "loss": 10.0723, + "step": 26270 + }, + { + "epoch": 0.13123923194087242, + "grad_norm": 0.09841980785131454, + "learning_rate": 2.612806328068287e-05, + "loss": 10.0654, + "step": 26280 + }, + { + "epoch": 0.13128917076581187, + "grad_norm": 0.0966048538684845, + "learning_rate": 2.612656136574132e-05, + "loss": 10.0719, + "step": 26290 + }, + { + "epoch": 0.13133910959075132, + "grad_norm": 0.09200683981180191, + "learning_rate": 2.612505945079977e-05, + "loss": 10.072, + "step": 26300 + }, + { + "epoch": 0.13138904841569077, + "grad_norm": 0.09424161165952682, + "learning_rate": 2.612355753585822e-05, + "loss": 10.067, + "step": 26310 + }, + { + "epoch": 0.13143898724063022, + "grad_norm": 0.09710439294576645, + "learning_rate": 2.612205562091667e-05, + "loss": 10.0681, + "step": 26320 + }, + { + "epoch": 0.13148892606556967, + "grad_norm": 0.09168367087841034, + "learning_rate": 2.6120553705975117e-05, + "loss": 10.0653, + "step": 26330 + }, + { + "epoch": 0.13153886489050912, + "grad_norm": 0.09984847903251648, + "learning_rate": 2.6119051791033568e-05, + "loss": 10.0658, + "step": 26340 + }, + { + "epoch": 0.13158880371544857, + "grad_norm": 0.09909839183092117, + "learning_rate": 2.6117549876092018e-05, + "loss": 10.0639, + "step": 26350 + }, + { + "epoch": 0.13163874254038802, + "grad_norm": 0.09866788238286972, + "learning_rate": 2.6116047961150468e-05, + "loss": 10.0642, + "step": 26360 + }, + { + "epoch": 0.13168868136532746, + "grad_norm": 0.0965615063905716, + "learning_rate": 2.6114546046208918e-05, + "loss": 10.0713, + "step": 26370 + }, + { + "epoch": 0.13173862019026691, + "grad_norm": 0.09519165009260178, + "learning_rate": 2.6113044131267365e-05, + "loss": 10.0636, + "step": 26380 + }, + { + "epoch": 0.13178855901520636, + "grad_norm": 0.10046073794364929, + "learning_rate": 2.611154221632582e-05, + "loss": 10.0641, + "step": 26390 + }, + { + "epoch": 0.1318384978401458, + "grad_norm": 0.09428393840789795, + "learning_rate": 2.6110040301384265e-05, + "loss": 10.0645, + "step": 26400 + }, + { + "epoch": 0.13188843666508526, + "grad_norm": 0.09596385806798935, + "learning_rate": 2.6108538386442715e-05, + "loss": 10.0841, + "step": 26410 + }, + { + "epoch": 0.1319383754900247, + "grad_norm": 0.0943203791975975, + "learning_rate": 2.6107036471501166e-05, + "loss": 10.0695, + "step": 26420 + }, + { + "epoch": 0.13198831431496416, + "grad_norm": 0.0989902913570404, + "learning_rate": 2.6105534556559612e-05, + "loss": 10.0657, + "step": 26430 + }, + { + "epoch": 0.1320382531399036, + "grad_norm": 0.0922727957367897, + "learning_rate": 2.6104032641618066e-05, + "loss": 10.5972, + "step": 26440 + }, + { + "epoch": 0.13208819196484306, + "grad_norm": 0.09232750535011292, + "learning_rate": 2.6102530726676513e-05, + "loss": 10.063, + "step": 26450 + }, + { + "epoch": 0.1321381307897825, + "grad_norm": 0.09481442719697952, + "learning_rate": 2.6101028811734963e-05, + "loss": 10.0732, + "step": 26460 + }, + { + "epoch": 0.13218806961472196, + "grad_norm": 0.09335809201002121, + "learning_rate": 2.6099526896793413e-05, + "loss": 10.0669, + "step": 26470 + }, + { + "epoch": 0.1322380084396614, + "grad_norm": 0.10007120668888092, + "learning_rate": 2.609802498185186e-05, + "loss": 10.0633, + "step": 26480 + }, + { + "epoch": 0.13228794726460086, + "grad_norm": 0.09182542562484741, + "learning_rate": 2.6096523066910313e-05, + "loss": 10.0626, + "step": 26490 + }, + { + "epoch": 0.1323378860895403, + "grad_norm": 0.08880601823329926, + "learning_rate": 2.609502115196876e-05, + "loss": 10.0639, + "step": 26500 + }, + { + "epoch": 0.13238782491447976, + "grad_norm": 0.09500660002231598, + "learning_rate": 2.609351923702721e-05, + "loss": 10.0594, + "step": 26510 + }, + { + "epoch": 0.1324377637394192, + "grad_norm": 0.09459840506315231, + "learning_rate": 2.609201732208566e-05, + "loss": 10.0689, + "step": 26520 + }, + { + "epoch": 0.13248770256435866, + "grad_norm": 0.09772436320781708, + "learning_rate": 2.6090515407144107e-05, + "loss": 10.0646, + "step": 26530 + }, + { + "epoch": 0.1325376413892981, + "grad_norm": 0.09241725504398346, + "learning_rate": 2.608901349220256e-05, + "loss": 10.0626, + "step": 26540 + }, + { + "epoch": 0.13258758021423755, + "grad_norm": 0.09605549275875092, + "learning_rate": 2.6087511577261008e-05, + "loss": 10.062, + "step": 26550 + }, + { + "epoch": 0.132637519039177, + "grad_norm": 0.08990667015314102, + "learning_rate": 2.6086009662319458e-05, + "loss": 10.0607, + "step": 26560 + }, + { + "epoch": 0.13268745786411645, + "grad_norm": 0.09539429098367691, + "learning_rate": 2.6084507747377908e-05, + "loss": 10.0607, + "step": 26570 + }, + { + "epoch": 0.1327373966890559, + "grad_norm": 0.09879159927368164, + "learning_rate": 2.6083005832436355e-05, + "loss": 10.059, + "step": 26580 + }, + { + "epoch": 0.13278733551399535, + "grad_norm": 0.09552617371082306, + "learning_rate": 2.608150391749481e-05, + "loss": 10.0612, + "step": 26590 + }, + { + "epoch": 0.1328372743389348, + "grad_norm": 0.09348921477794647, + "learning_rate": 2.6080002002553255e-05, + "loss": 10.062, + "step": 26600 + }, + { + "epoch": 0.13288721316387425, + "grad_norm": 0.0956648588180542, + "learning_rate": 2.6078500087611705e-05, + "loss": 10.0591, + "step": 26610 + }, + { + "epoch": 0.1329371519888137, + "grad_norm": 0.09512264281511307, + "learning_rate": 2.6076998172670156e-05, + "loss": 10.0588, + "step": 26620 + }, + { + "epoch": 0.13298709081375315, + "grad_norm": 0.09354131668806076, + "learning_rate": 2.6075496257728602e-05, + "loss": 10.0635, + "step": 26630 + }, + { + "epoch": 0.1330370296386926, + "grad_norm": 0.09954062849283218, + "learning_rate": 2.6073994342787056e-05, + "loss": 10.0639, + "step": 26640 + }, + { + "epoch": 0.13308696846363205, + "grad_norm": 0.09436924755573273, + "learning_rate": 2.6072492427845503e-05, + "loss": 10.0608, + "step": 26650 + }, + { + "epoch": 0.1331369072885715, + "grad_norm": 0.09259260445833206, + "learning_rate": 2.6070990512903953e-05, + "loss": 10.0609, + "step": 26660 + }, + { + "epoch": 0.13318684611351095, + "grad_norm": 0.09337363392114639, + "learning_rate": 2.6069488597962403e-05, + "loss": 10.0629, + "step": 26670 + }, + { + "epoch": 0.1332367849384504, + "grad_norm": 0.09187287837266922, + "learning_rate": 2.606798668302085e-05, + "loss": 10.0571, + "step": 26680 + }, + { + "epoch": 0.13328672376338985, + "grad_norm": 0.09519325196743011, + "learning_rate": 2.6066484768079303e-05, + "loss": 10.063, + "step": 26690 + }, + { + "epoch": 0.1333366625883293, + "grad_norm": 0.09201279282569885, + "learning_rate": 2.606498285313775e-05, + "loss": 10.0609, + "step": 26700 + }, + { + "epoch": 0.13338660141326875, + "grad_norm": 0.09440702199935913, + "learning_rate": 2.6063480938196204e-05, + "loss": 10.0543, + "step": 26710 + }, + { + "epoch": 0.1334365402382082, + "grad_norm": 0.09421040117740631, + "learning_rate": 2.606197902325465e-05, + "loss": 10.0605, + "step": 26720 + }, + { + "epoch": 0.13348647906314764, + "grad_norm": 0.09064962714910507, + "learning_rate": 2.6060477108313097e-05, + "loss": 10.0651, + "step": 26730 + }, + { + "epoch": 0.1335364178880871, + "grad_norm": 0.10387849062681198, + "learning_rate": 2.605897519337155e-05, + "loss": 10.0539, + "step": 26740 + }, + { + "epoch": 0.13358635671302654, + "grad_norm": 0.09637302160263062, + "learning_rate": 2.6057473278429998e-05, + "loss": 10.0546, + "step": 26750 + }, + { + "epoch": 0.133636295537966, + "grad_norm": 0.09184058010578156, + "learning_rate": 2.605597136348845e-05, + "loss": 10.0554, + "step": 26760 + }, + { + "epoch": 0.13368623436290544, + "grad_norm": 0.09594525396823883, + "learning_rate": 2.6054469448546898e-05, + "loss": 10.0525, + "step": 26770 + }, + { + "epoch": 0.1337361731878449, + "grad_norm": 0.09645157307386398, + "learning_rate": 2.6052967533605345e-05, + "loss": 10.0595, + "step": 26780 + }, + { + "epoch": 0.13378611201278434, + "grad_norm": 0.09152582287788391, + "learning_rate": 2.60514656186638e-05, + "loss": 10.0652, + "step": 26790 + }, + { + "epoch": 0.1338360508377238, + "grad_norm": 0.09593851864337921, + "learning_rate": 2.6049963703722245e-05, + "loss": 10.0514, + "step": 26800 + }, + { + "epoch": 0.13388598966266324, + "grad_norm": 0.09910731017589569, + "learning_rate": 2.60484617887807e-05, + "loss": 10.0577, + "step": 26810 + }, + { + "epoch": 0.1339359284876027, + "grad_norm": 0.09356366842985153, + "learning_rate": 2.6046959873839146e-05, + "loss": 10.0535, + "step": 26820 + }, + { + "epoch": 0.13398586731254214, + "grad_norm": 0.09344334155321121, + "learning_rate": 2.6045457958897592e-05, + "loss": 10.0619, + "step": 26830 + }, + { + "epoch": 0.1340358061374816, + "grad_norm": 0.09502086043357849, + "learning_rate": 2.6043956043956046e-05, + "loss": 10.0514, + "step": 26840 + }, + { + "epoch": 0.13408574496242104, + "grad_norm": 0.0968647301197052, + "learning_rate": 2.6042454129014493e-05, + "loss": 10.0599, + "step": 26850 + }, + { + "epoch": 0.1341356837873605, + "grad_norm": 0.09345351159572601, + "learning_rate": 2.6040952214072946e-05, + "loss": 10.0544, + "step": 26860 + }, + { + "epoch": 0.13418562261229994, + "grad_norm": 0.09822539240121841, + "learning_rate": 2.6039450299131393e-05, + "loss": 10.0519, + "step": 26870 + }, + { + "epoch": 0.1342355614372394, + "grad_norm": 0.09218066930770874, + "learning_rate": 2.603794838418984e-05, + "loss": 10.0573, + "step": 26880 + }, + { + "epoch": 0.13428550026217884, + "grad_norm": 0.09610606729984283, + "learning_rate": 2.6036446469248293e-05, + "loss": 10.0547, + "step": 26890 + }, + { + "epoch": 0.13433543908711829, + "grad_norm": 0.09326432645320892, + "learning_rate": 2.603494455430674e-05, + "loss": 10.0527, + "step": 26900 + }, + { + "epoch": 0.13438537791205774, + "grad_norm": 0.0933103859424591, + "learning_rate": 2.6033442639365194e-05, + "loss": 10.0503, + "step": 26910 + }, + { + "epoch": 0.13443531673699718, + "grad_norm": 0.09903916716575623, + "learning_rate": 2.603194072442364e-05, + "loss": 10.0514, + "step": 26920 + }, + { + "epoch": 0.13448525556193663, + "grad_norm": 0.10167834907770157, + "learning_rate": 2.6030438809482087e-05, + "loss": 10.0525, + "step": 26930 + }, + { + "epoch": 0.13453519438687608, + "grad_norm": 0.09627428650856018, + "learning_rate": 2.602893689454054e-05, + "loss": 10.0544, + "step": 26940 + }, + { + "epoch": 0.13458513321181553, + "grad_norm": 0.09547838568687439, + "learning_rate": 2.6027434979598988e-05, + "loss": 10.0523, + "step": 26950 + }, + { + "epoch": 0.13463507203675498, + "grad_norm": 0.0934177041053772, + "learning_rate": 2.602593306465744e-05, + "loss": 10.0577, + "step": 26960 + }, + { + "epoch": 0.13468501086169443, + "grad_norm": 0.09201879799365997, + "learning_rate": 2.6024431149715888e-05, + "loss": 10.0561, + "step": 26970 + }, + { + "epoch": 0.13473494968663388, + "grad_norm": 0.10229359567165375, + "learning_rate": 2.6022929234774335e-05, + "loss": 10.0537, + "step": 26980 + }, + { + "epoch": 0.13478488851157333, + "grad_norm": 0.0951157957315445, + "learning_rate": 2.602142731983279e-05, + "loss": 10.0489, + "step": 26990 + }, + { + "epoch": 0.13483482733651278, + "grad_norm": 0.09461617469787598, + "learning_rate": 2.6019925404891235e-05, + "loss": 10.0504, + "step": 27000 + }, + { + "epoch": 0.13488476616145223, + "grad_norm": 0.09404211491346359, + "learning_rate": 2.601842348994969e-05, + "loss": 10.05, + "step": 27010 + }, + { + "epoch": 0.13493470498639168, + "grad_norm": 0.09388327598571777, + "learning_rate": 2.6016921575008136e-05, + "loss": 10.0507, + "step": 27020 + }, + { + "epoch": 0.13498464381133113, + "grad_norm": 0.08963081240653992, + "learning_rate": 2.6015419660066582e-05, + "loss": 10.0509, + "step": 27030 + }, + { + "epoch": 0.13503458263627058, + "grad_norm": 0.09284106642007828, + "learning_rate": 2.6013917745125036e-05, + "loss": 10.0477, + "step": 27040 + }, + { + "epoch": 0.13508452146121003, + "grad_norm": 0.09641976654529572, + "learning_rate": 2.6012415830183483e-05, + "loss": 10.0484, + "step": 27050 + }, + { + "epoch": 0.13513446028614948, + "grad_norm": 0.09596869349479675, + "learning_rate": 2.6010913915241936e-05, + "loss": 10.0532, + "step": 27060 + }, + { + "epoch": 0.13518439911108893, + "grad_norm": 0.09437578171491623, + "learning_rate": 2.6009412000300383e-05, + "loss": 10.0535, + "step": 27070 + }, + { + "epoch": 0.13523433793602838, + "grad_norm": 0.09599731862545013, + "learning_rate": 2.6007910085358833e-05, + "loss": 10.0469, + "step": 27080 + }, + { + "epoch": 0.13528427676096783, + "grad_norm": 0.09609924256801605, + "learning_rate": 2.6006408170417284e-05, + "loss": 10.0484, + "step": 27090 + }, + { + "epoch": 0.13533421558590727, + "grad_norm": 0.09731438755989075, + "learning_rate": 2.600490625547573e-05, + "loss": 10.0502, + "step": 27100 + }, + { + "epoch": 0.13538415441084672, + "grad_norm": 0.09791937470436096, + "learning_rate": 2.6003404340534184e-05, + "loss": 10.0518, + "step": 27110 + }, + { + "epoch": 0.13543409323578617, + "grad_norm": 0.10084173083305359, + "learning_rate": 2.600190242559263e-05, + "loss": 10.0464, + "step": 27120 + }, + { + "epoch": 0.13548403206072562, + "grad_norm": 0.09352555871009827, + "learning_rate": 2.600040051065108e-05, + "loss": 10.0546, + "step": 27130 + }, + { + "epoch": 0.13553397088566507, + "grad_norm": 0.09164499491453171, + "learning_rate": 2.599889859570953e-05, + "loss": 10.05, + "step": 27140 + }, + { + "epoch": 0.13558390971060452, + "grad_norm": 0.09976156800985336, + "learning_rate": 2.5997396680767978e-05, + "loss": 10.0509, + "step": 27150 + }, + { + "epoch": 0.13563384853554397, + "grad_norm": 0.09886104613542557, + "learning_rate": 2.599589476582643e-05, + "loss": 10.0528, + "step": 27160 + }, + { + "epoch": 0.13568378736048342, + "grad_norm": 0.09629525989294052, + "learning_rate": 2.5994392850884878e-05, + "loss": 10.0424, + "step": 27170 + }, + { + "epoch": 0.13573372618542287, + "grad_norm": 0.09974990785121918, + "learning_rate": 2.599289093594333e-05, + "loss": 10.0463, + "step": 27180 + }, + { + "epoch": 0.13578366501036232, + "grad_norm": 0.09218470752239227, + "learning_rate": 2.599138902100178e-05, + "loss": 10.0519, + "step": 27190 + }, + { + "epoch": 0.13583360383530174, + "grad_norm": 0.09512577950954437, + "learning_rate": 2.5989887106060225e-05, + "loss": 10.0465, + "step": 27200 + }, + { + "epoch": 0.1358835426602412, + "grad_norm": 0.09620299190282822, + "learning_rate": 2.598838519111868e-05, + "loss": 10.0529, + "step": 27210 + }, + { + "epoch": 0.13593348148518064, + "grad_norm": 0.09048976749181747, + "learning_rate": 2.5986883276177126e-05, + "loss": 10.0434, + "step": 27220 + }, + { + "epoch": 0.1359834203101201, + "grad_norm": 0.09220554679632187, + "learning_rate": 2.5985381361235576e-05, + "loss": 10.049, + "step": 27230 + }, + { + "epoch": 0.13603335913505954, + "grad_norm": 0.0914304107427597, + "learning_rate": 2.5983879446294026e-05, + "loss": 10.0438, + "step": 27240 + }, + { + "epoch": 0.136083297959999, + "grad_norm": 0.09436330199241638, + "learning_rate": 2.5982377531352473e-05, + "loss": 10.047, + "step": 27250 + }, + { + "epoch": 0.13613323678493844, + "grad_norm": 0.09602109342813492, + "learning_rate": 2.5980875616410926e-05, + "loss": 10.0436, + "step": 27260 + }, + { + "epoch": 0.1361831756098779, + "grad_norm": 0.09520605206489563, + "learning_rate": 2.5979373701469373e-05, + "loss": 10.0481, + "step": 27270 + }, + { + "epoch": 0.13623311443481734, + "grad_norm": 0.09378485381603241, + "learning_rate": 2.5977871786527823e-05, + "loss": 10.049, + "step": 27280 + }, + { + "epoch": 0.1362830532597568, + "grad_norm": 0.08965323120355606, + "learning_rate": 2.5976369871586274e-05, + "loss": 10.0505, + "step": 27290 + }, + { + "epoch": 0.13633299208469624, + "grad_norm": 0.09475590288639069, + "learning_rate": 2.597486795664472e-05, + "loss": 10.0487, + "step": 27300 + }, + { + "epoch": 0.13638293090963569, + "grad_norm": 0.09472818672657013, + "learning_rate": 2.5973366041703174e-05, + "loss": 10.0429, + "step": 27310 + }, + { + "epoch": 0.13643286973457514, + "grad_norm": 0.09863143414258957, + "learning_rate": 2.597186412676162e-05, + "loss": 10.0474, + "step": 27320 + }, + { + "epoch": 0.13648280855951458, + "grad_norm": 0.0966014415025711, + "learning_rate": 2.597036221182007e-05, + "loss": 10.0459, + "step": 27330 + }, + { + "epoch": 0.13653274738445403, + "grad_norm": 0.09497446566820145, + "learning_rate": 2.596886029687852e-05, + "loss": 10.0439, + "step": 27340 + }, + { + "epoch": 0.13658268620939348, + "grad_norm": 0.09334766864776611, + "learning_rate": 2.5967358381936968e-05, + "loss": 10.0404, + "step": 27350 + }, + { + "epoch": 0.13663262503433293, + "grad_norm": 0.09661349654197693, + "learning_rate": 2.596585646699542e-05, + "loss": 10.0469, + "step": 27360 + }, + { + "epoch": 0.13668256385927238, + "grad_norm": 0.0978384017944336, + "learning_rate": 2.5964354552053868e-05, + "loss": 10.0472, + "step": 27370 + }, + { + "epoch": 0.13673250268421183, + "grad_norm": 0.09664426743984222, + "learning_rate": 2.596285263711232e-05, + "loss": 10.0426, + "step": 27380 + }, + { + "epoch": 0.13678244150915128, + "grad_norm": 0.09428177773952484, + "learning_rate": 2.596135072217077e-05, + "loss": 10.0432, + "step": 27390 + }, + { + "epoch": 0.13683238033409073, + "grad_norm": 0.09939315170049667, + "learning_rate": 2.595984880722922e-05, + "loss": 10.0388, + "step": 27400 + }, + { + "epoch": 0.13688231915903018, + "grad_norm": 0.09356003999710083, + "learning_rate": 2.595834689228767e-05, + "loss": 10.0395, + "step": 27410 + }, + { + "epoch": 0.13693225798396963, + "grad_norm": 0.09405787289142609, + "learning_rate": 2.5956844977346116e-05, + "loss": 10.0424, + "step": 27420 + }, + { + "epoch": 0.13698219680890908, + "grad_norm": 0.09802021831274033, + "learning_rate": 2.5955343062404566e-05, + "loss": 10.0428, + "step": 27430 + }, + { + "epoch": 0.13703213563384853, + "grad_norm": 0.10091257095336914, + "learning_rate": 2.5953841147463016e-05, + "loss": 10.0415, + "step": 27440 + }, + { + "epoch": 0.13708207445878798, + "grad_norm": 0.09429014474153519, + "learning_rate": 2.5952339232521466e-05, + "loss": 10.0421, + "step": 27450 + }, + { + "epoch": 0.13713201328372743, + "grad_norm": 0.09676757454872131, + "learning_rate": 2.5950837317579916e-05, + "loss": 10.0448, + "step": 27460 + }, + { + "epoch": 0.13718195210866688, + "grad_norm": 0.09487222135066986, + "learning_rate": 2.5949335402638363e-05, + "loss": 10.0386, + "step": 27470 + }, + { + "epoch": 0.13723189093360633, + "grad_norm": 0.09765653312206268, + "learning_rate": 2.5947833487696813e-05, + "loss": 10.0413, + "step": 27480 + }, + { + "epoch": 0.13728182975854578, + "grad_norm": 0.09836971014738083, + "learning_rate": 2.5946331572755264e-05, + "loss": 10.0376, + "step": 27490 + }, + { + "epoch": 0.13733176858348523, + "grad_norm": 0.09549671411514282, + "learning_rate": 2.5944829657813714e-05, + "loss": 10.0385, + "step": 27500 + }, + { + "epoch": 0.13738170740842467, + "grad_norm": 0.08956001698970795, + "learning_rate": 2.5943327742872164e-05, + "loss": 10.0366, + "step": 27510 + }, + { + "epoch": 0.13743164623336412, + "grad_norm": 0.09554048627614975, + "learning_rate": 2.594182582793061e-05, + "loss": 10.0428, + "step": 27520 + }, + { + "epoch": 0.13748158505830357, + "grad_norm": 0.10129525512456894, + "learning_rate": 2.594032391298906e-05, + "loss": 10.0432, + "step": 27530 + }, + { + "epoch": 0.13753152388324302, + "grad_norm": 0.09291373938322067, + "learning_rate": 2.593882199804751e-05, + "loss": 10.0404, + "step": 27540 + }, + { + "epoch": 0.13758146270818247, + "grad_norm": 0.09440364688634872, + "learning_rate": 2.593732008310596e-05, + "loss": 10.0399, + "step": 27550 + }, + { + "epoch": 0.13763140153312192, + "grad_norm": 0.09612912684679031, + "learning_rate": 2.593581816816441e-05, + "loss": 10.0394, + "step": 27560 + }, + { + "epoch": 0.13768134035806137, + "grad_norm": 0.09454838931560516, + "learning_rate": 2.5934316253222858e-05, + "loss": 10.0397, + "step": 27570 + }, + { + "epoch": 0.13773127918300082, + "grad_norm": 0.10840394347906113, + "learning_rate": 2.593281433828131e-05, + "loss": 10.0346, + "step": 27580 + }, + { + "epoch": 0.13778121800794027, + "grad_norm": 0.09229467809200287, + "learning_rate": 2.593131242333976e-05, + "loss": 10.0374, + "step": 27590 + }, + { + "epoch": 0.13783115683287972, + "grad_norm": 0.09308604896068573, + "learning_rate": 2.592981050839821e-05, + "loss": 10.0424, + "step": 27600 + }, + { + "epoch": 0.13788109565781917, + "grad_norm": 0.09472574293613434, + "learning_rate": 2.592830859345666e-05, + "loss": 10.0377, + "step": 27610 + }, + { + "epoch": 0.13793103448275862, + "grad_norm": 0.09195975959300995, + "learning_rate": 2.5926806678515106e-05, + "loss": 10.0403, + "step": 27620 + }, + { + "epoch": 0.13798097330769807, + "grad_norm": 0.09835057705640793, + "learning_rate": 2.5925304763573556e-05, + "loss": 10.0352, + "step": 27630 + }, + { + "epoch": 0.13803091213263752, + "grad_norm": 0.10062667727470398, + "learning_rate": 2.5923802848632006e-05, + "loss": 10.0336, + "step": 27640 + }, + { + "epoch": 0.13808085095757697, + "grad_norm": 0.08686701953411102, + "learning_rate": 2.5922300933690456e-05, + "loss": 10.0383, + "step": 27650 + }, + { + "epoch": 0.13813078978251642, + "grad_norm": 0.09707959741353989, + "learning_rate": 2.5920799018748906e-05, + "loss": 10.0345, + "step": 27660 + }, + { + "epoch": 0.13818072860745587, + "grad_norm": 0.09171800315380096, + "learning_rate": 2.5919297103807353e-05, + "loss": 10.0336, + "step": 27670 + }, + { + "epoch": 0.13823066743239532, + "grad_norm": 0.0923093855381012, + "learning_rate": 2.5917795188865803e-05, + "loss": 10.0352, + "step": 27680 + }, + { + "epoch": 0.13828060625733476, + "grad_norm": 0.0912717804312706, + "learning_rate": 2.5916293273924254e-05, + "loss": 10.0339, + "step": 27690 + }, + { + "epoch": 0.13833054508227421, + "grad_norm": 0.09480732679367065, + "learning_rate": 2.5914791358982704e-05, + "loss": 10.0389, + "step": 27700 + }, + { + "epoch": 0.13838048390721366, + "grad_norm": 0.09554393589496613, + "learning_rate": 2.5913289444041154e-05, + "loss": 10.0352, + "step": 27710 + }, + { + "epoch": 0.1384304227321531, + "grad_norm": 0.09049583226442337, + "learning_rate": 2.5911787529099604e-05, + "loss": 10.032, + "step": 27720 + }, + { + "epoch": 0.13848036155709256, + "grad_norm": 0.09413400292396545, + "learning_rate": 2.591028561415805e-05, + "loss": 10.0393, + "step": 27730 + }, + { + "epoch": 0.138530300382032, + "grad_norm": 0.09317301958799362, + "learning_rate": 2.59087836992165e-05, + "loss": 10.0328, + "step": 27740 + }, + { + "epoch": 0.13858023920697146, + "grad_norm": 0.09751195460557938, + "learning_rate": 2.590728178427495e-05, + "loss": 10.0299, + "step": 27750 + }, + { + "epoch": 0.1386301780319109, + "grad_norm": 0.09604477882385254, + "learning_rate": 2.59057798693334e-05, + "loss": 10.0348, + "step": 27760 + }, + { + "epoch": 0.13868011685685036, + "grad_norm": 0.09635339677333832, + "learning_rate": 2.590427795439185e-05, + "loss": 10.0346, + "step": 27770 + }, + { + "epoch": 0.1387300556817898, + "grad_norm": 0.10057932138442993, + "learning_rate": 2.59027760394503e-05, + "loss": 10.0319, + "step": 27780 + }, + { + "epoch": 0.13877999450672926, + "grad_norm": 0.09397371113300323, + "learning_rate": 2.590127412450875e-05, + "loss": 10.0331, + "step": 27790 + }, + { + "epoch": 0.1388299333316687, + "grad_norm": 0.09505651146173477, + "learning_rate": 2.58997722095672e-05, + "loss": 10.0293, + "step": 27800 + }, + { + "epoch": 0.13887987215660816, + "grad_norm": 0.09163686633110046, + "learning_rate": 2.589827029462565e-05, + "loss": 10.0265, + "step": 27810 + }, + { + "epoch": 0.1389298109815476, + "grad_norm": 0.09803500026464462, + "learning_rate": 2.58967683796841e-05, + "loss": 10.0362, + "step": 27820 + }, + { + "epoch": 0.13897974980648706, + "grad_norm": 0.1022925078868866, + "learning_rate": 2.5895266464742546e-05, + "loss": 10.033, + "step": 27830 + }, + { + "epoch": 0.1390296886314265, + "grad_norm": 0.09815297275781631, + "learning_rate": 2.5893764549800996e-05, + "loss": 10.0308, + "step": 27840 + }, + { + "epoch": 0.13907962745636596, + "grad_norm": 0.09278080612421036, + "learning_rate": 2.5892262634859446e-05, + "loss": 10.0297, + "step": 27850 + }, + { + "epoch": 0.1391295662813054, + "grad_norm": 0.09531033784151077, + "learning_rate": 2.5890760719917896e-05, + "loss": 10.0336, + "step": 27860 + }, + { + "epoch": 0.13917950510624486, + "grad_norm": 0.1017637774348259, + "learning_rate": 2.5889258804976347e-05, + "loss": 10.0323, + "step": 27870 + }, + { + "epoch": 0.1392294439311843, + "grad_norm": 0.08980844169855118, + "learning_rate": 2.5887756890034793e-05, + "loss": 10.0351, + "step": 27880 + }, + { + "epoch": 0.13927938275612375, + "grad_norm": 0.09667633473873138, + "learning_rate": 2.5886254975093244e-05, + "loss": 10.0344, + "step": 27890 + }, + { + "epoch": 0.1393293215810632, + "grad_norm": 0.09221838414669037, + "learning_rate": 2.5884753060151694e-05, + "loss": 10.0287, + "step": 27900 + }, + { + "epoch": 0.13937926040600265, + "grad_norm": 0.09094412624835968, + "learning_rate": 2.5883251145210144e-05, + "loss": 10.0363, + "step": 27910 + }, + { + "epoch": 0.1394291992309421, + "grad_norm": 0.09816347807645798, + "learning_rate": 2.5881749230268594e-05, + "loss": 10.0317, + "step": 27920 + }, + { + "epoch": 0.13947913805588155, + "grad_norm": 0.09702466428279877, + "learning_rate": 2.588024731532704e-05, + "loss": 10.0289, + "step": 27930 + }, + { + "epoch": 0.139529076880821, + "grad_norm": 0.09658083319664001, + "learning_rate": 2.587874540038549e-05, + "loss": 10.0304, + "step": 27940 + }, + { + "epoch": 0.13957901570576045, + "grad_norm": 0.09620265662670135, + "learning_rate": 2.587724348544394e-05, + "loss": 10.0229, + "step": 27950 + }, + { + "epoch": 0.1396289545306999, + "grad_norm": 0.09803494811058044, + "learning_rate": 2.587574157050239e-05, + "loss": 10.0264, + "step": 27960 + }, + { + "epoch": 0.13967889335563935, + "grad_norm": 0.09247355163097382, + "learning_rate": 2.587423965556084e-05, + "loss": 10.0331, + "step": 27970 + }, + { + "epoch": 0.1397288321805788, + "grad_norm": 0.09678816050291061, + "learning_rate": 2.587273774061929e-05, + "loss": 10.0242, + "step": 27980 + }, + { + "epoch": 0.13977877100551825, + "grad_norm": 0.0913391187787056, + "learning_rate": 2.587123582567774e-05, + "loss": 10.0266, + "step": 27990 + }, + { + "epoch": 0.1398287098304577, + "grad_norm": 0.0930318757891655, + "learning_rate": 2.586973391073619e-05, + "loss": 10.0247, + "step": 28000 + }, + { + "epoch": 0.13987864865539715, + "grad_norm": 0.09507840126752853, + "learning_rate": 2.586823199579464e-05, + "loss": 10.0282, + "step": 28010 + }, + { + "epoch": 0.1399285874803366, + "grad_norm": 0.09485821425914764, + "learning_rate": 2.586673008085309e-05, + "loss": 10.0286, + "step": 28020 + }, + { + "epoch": 0.13997852630527605, + "grad_norm": 0.09455297142267227, + "learning_rate": 2.5865228165911536e-05, + "loss": 10.0309, + "step": 28030 + }, + { + "epoch": 0.1400284651302155, + "grad_norm": 0.0947793647646904, + "learning_rate": 2.586372625096999e-05, + "loss": 10.027, + "step": 28040 + }, + { + "epoch": 0.14007840395515495, + "grad_norm": 0.09246467053890228, + "learning_rate": 2.5862224336028436e-05, + "loss": 10.0264, + "step": 28050 + }, + { + "epoch": 0.1401283427800944, + "grad_norm": 0.09456957876682281, + "learning_rate": 2.5860722421086886e-05, + "loss": 10.0264, + "step": 28060 + }, + { + "epoch": 0.14017828160503384, + "grad_norm": 0.09399653971195221, + "learning_rate": 2.5859220506145337e-05, + "loss": 10.0288, + "step": 28070 + }, + { + "epoch": 0.1402282204299733, + "grad_norm": 0.09784771502017975, + "learning_rate": 2.5857718591203783e-05, + "loss": 10.0218, + "step": 28080 + }, + { + "epoch": 0.14027815925491274, + "grad_norm": 0.09228380024433136, + "learning_rate": 2.5856216676262237e-05, + "loss": 10.0276, + "step": 28090 + }, + { + "epoch": 0.1403280980798522, + "grad_norm": 0.09873951226472855, + "learning_rate": 2.5854714761320684e-05, + "loss": 10.0257, + "step": 28100 + }, + { + "epoch": 0.14037803690479164, + "grad_norm": 0.09930938482284546, + "learning_rate": 2.5853212846379134e-05, + "loss": 10.0304, + "step": 28110 + }, + { + "epoch": 0.1404279757297311, + "grad_norm": 0.09772654622793198, + "learning_rate": 2.5851710931437584e-05, + "loss": 10.0231, + "step": 28120 + }, + { + "epoch": 0.14047791455467054, + "grad_norm": 0.08995553106069565, + "learning_rate": 2.585020901649603e-05, + "loss": 10.0225, + "step": 28130 + }, + { + "epoch": 0.14052785337961, + "grad_norm": 0.09744606912136078, + "learning_rate": 2.5848707101554484e-05, + "loss": 10.0267, + "step": 28140 + }, + { + "epoch": 0.14057779220454944, + "grad_norm": 0.09355470538139343, + "learning_rate": 2.584720518661293e-05, + "loss": 10.0261, + "step": 28150 + }, + { + "epoch": 0.1406277310294889, + "grad_norm": 0.097645103931427, + "learning_rate": 2.584570327167138e-05, + "loss": 10.0217, + "step": 28160 + }, + { + "epoch": 0.14067766985442834, + "grad_norm": 0.09685888141393661, + "learning_rate": 2.584420135672983e-05, + "loss": 10.0217, + "step": 28170 + }, + { + "epoch": 0.1407276086793678, + "grad_norm": 0.09667977690696716, + "learning_rate": 2.584269944178828e-05, + "loss": 10.0277, + "step": 28180 + }, + { + "epoch": 0.1407775475043072, + "grad_norm": 0.09020134806632996, + "learning_rate": 2.5841197526846732e-05, + "loss": 10.0203, + "step": 28190 + }, + { + "epoch": 0.14082748632924666, + "grad_norm": 0.09370194375514984, + "learning_rate": 2.583969561190518e-05, + "loss": 10.0225, + "step": 28200 + }, + { + "epoch": 0.1408774251541861, + "grad_norm": 0.09775953739881516, + "learning_rate": 2.583819369696363e-05, + "loss": 10.0206, + "step": 28210 + }, + { + "epoch": 0.14092736397912556, + "grad_norm": 0.09635622054338455, + "learning_rate": 2.583669178202208e-05, + "loss": 10.0231, + "step": 28220 + }, + { + "epoch": 0.140977302804065, + "grad_norm": 0.09073793143033981, + "learning_rate": 2.5835189867080526e-05, + "loss": 10.0269, + "step": 28230 + }, + { + "epoch": 0.14102724162900446, + "grad_norm": 0.10052619129419327, + "learning_rate": 2.583368795213898e-05, + "loss": 10.0258, + "step": 28240 + }, + { + "epoch": 0.1410771804539439, + "grad_norm": 0.09381050616502762, + "learning_rate": 2.5832186037197426e-05, + "loss": 10.0208, + "step": 28250 + }, + { + "epoch": 0.14112711927888336, + "grad_norm": 0.10019423812627792, + "learning_rate": 2.5830684122255876e-05, + "loss": 10.0195, + "step": 28260 + }, + { + "epoch": 0.1411770581038228, + "grad_norm": 0.09306546300649643, + "learning_rate": 2.5829182207314327e-05, + "loss": 10.0191, + "step": 28270 + }, + { + "epoch": 0.14122699692876226, + "grad_norm": 0.09635606408119202, + "learning_rate": 2.5827680292372773e-05, + "loss": 10.0268, + "step": 28280 + }, + { + "epoch": 0.1412769357537017, + "grad_norm": 0.10159686952829361, + "learning_rate": 2.5826178377431227e-05, + "loss": 10.0214, + "step": 28290 + }, + { + "epoch": 0.14132687457864115, + "grad_norm": 0.09453240036964417, + "learning_rate": 2.5824676462489674e-05, + "loss": 10.0215, + "step": 28300 + }, + { + "epoch": 0.1413768134035806, + "grad_norm": 0.09013237059116364, + "learning_rate": 2.5823174547548124e-05, + "loss": 10.024, + "step": 28310 + }, + { + "epoch": 0.14142675222852005, + "grad_norm": 0.0938793197274208, + "learning_rate": 2.5821672632606574e-05, + "loss": 10.0176, + "step": 28320 + }, + { + "epoch": 0.1414766910534595, + "grad_norm": 0.09485707432031631, + "learning_rate": 2.582017071766502e-05, + "loss": 10.0171, + "step": 28330 + }, + { + "epoch": 0.14152662987839895, + "grad_norm": 0.10150688141584396, + "learning_rate": 2.5818668802723474e-05, + "loss": 10.019, + "step": 28340 + }, + { + "epoch": 0.1415765687033384, + "grad_norm": 0.09411424398422241, + "learning_rate": 2.581716688778192e-05, + "loss": 10.0231, + "step": 28350 + }, + { + "epoch": 0.14162650752827785, + "grad_norm": 0.09432416409254074, + "learning_rate": 2.5815664972840375e-05, + "loss": 10.0187, + "step": 28360 + }, + { + "epoch": 0.1416764463532173, + "grad_norm": 0.09197516739368439, + "learning_rate": 2.581416305789882e-05, + "loss": 10.0196, + "step": 28370 + }, + { + "epoch": 0.14172638517815675, + "grad_norm": 0.09398947656154633, + "learning_rate": 2.581266114295727e-05, + "loss": 10.0159, + "step": 28380 + }, + { + "epoch": 0.1417763240030962, + "grad_norm": 0.10007443279027939, + "learning_rate": 2.5811159228015722e-05, + "loss": 10.0165, + "step": 28390 + }, + { + "epoch": 0.14182626282803565, + "grad_norm": 0.09710991382598877, + "learning_rate": 2.580965731307417e-05, + "loss": 10.0245, + "step": 28400 + }, + { + "epoch": 0.1418762016529751, + "grad_norm": 0.09509746730327606, + "learning_rate": 2.5808155398132622e-05, + "loss": 10.0178, + "step": 28410 + }, + { + "epoch": 0.14192614047791455, + "grad_norm": 0.09114133566617966, + "learning_rate": 2.580665348319107e-05, + "loss": 10.0192, + "step": 28420 + }, + { + "epoch": 0.141976079302854, + "grad_norm": 0.0989239513874054, + "learning_rate": 2.5805151568249516e-05, + "loss": 10.017, + "step": 28430 + }, + { + "epoch": 0.14202601812779345, + "grad_norm": 0.0925961434841156, + "learning_rate": 2.580364965330797e-05, + "loss": 10.0186, + "step": 28440 + }, + { + "epoch": 0.1420759569527329, + "grad_norm": 0.09893997758626938, + "learning_rate": 2.5802147738366416e-05, + "loss": 10.0155, + "step": 28450 + }, + { + "epoch": 0.14212589577767235, + "grad_norm": 0.0943673700094223, + "learning_rate": 2.580064582342487e-05, + "loss": 10.0159, + "step": 28460 + }, + { + "epoch": 0.1421758346026118, + "grad_norm": 0.09693938493728638, + "learning_rate": 2.5799143908483317e-05, + "loss": 10.0115, + "step": 28470 + }, + { + "epoch": 0.14222577342755124, + "grad_norm": 0.09356331825256348, + "learning_rate": 2.5797641993541763e-05, + "loss": 10.014, + "step": 28480 + }, + { + "epoch": 0.1422757122524907, + "grad_norm": 0.09694939106702805, + "learning_rate": 2.5796140078600217e-05, + "loss": 10.0138, + "step": 28490 + }, + { + "epoch": 0.14232565107743014, + "grad_norm": 0.09721967577934265, + "learning_rate": 2.5794638163658664e-05, + "loss": 10.0084, + "step": 28500 + }, + { + "epoch": 0.1423755899023696, + "grad_norm": 0.09602759033441544, + "learning_rate": 2.5793136248717117e-05, + "loss": 10.015, + "step": 28510 + }, + { + "epoch": 0.14242552872730904, + "grad_norm": 0.09390842169523239, + "learning_rate": 2.5791634333775564e-05, + "loss": 10.0172, + "step": 28520 + }, + { + "epoch": 0.1424754675522485, + "grad_norm": 0.09624374657869339, + "learning_rate": 2.579013241883401e-05, + "loss": 10.0161, + "step": 28530 + }, + { + "epoch": 0.14252540637718794, + "grad_norm": 0.09962889552116394, + "learning_rate": 2.5788630503892465e-05, + "loss": 10.0149, + "step": 28540 + }, + { + "epoch": 0.1425753452021274, + "grad_norm": 0.09729303419589996, + "learning_rate": 2.578712858895091e-05, + "loss": 10.0157, + "step": 28550 + }, + { + "epoch": 0.14262528402706684, + "grad_norm": 0.09288518875837326, + "learning_rate": 2.5785626674009365e-05, + "loss": 10.0173, + "step": 28560 + }, + { + "epoch": 0.1426752228520063, + "grad_norm": 0.09366150200366974, + "learning_rate": 2.578412475906781e-05, + "loss": 10.0122, + "step": 28570 + }, + { + "epoch": 0.14272516167694574, + "grad_norm": 0.0944962128996849, + "learning_rate": 2.5782622844126262e-05, + "loss": 10.0112, + "step": 28580 + }, + { + "epoch": 0.1427751005018852, + "grad_norm": 0.09178702533245087, + "learning_rate": 2.5781120929184712e-05, + "loss": 10.0179, + "step": 28590 + }, + { + "epoch": 0.14282503932682464, + "grad_norm": 0.0905400812625885, + "learning_rate": 2.577961901424316e-05, + "loss": 10.0134, + "step": 28600 + }, + { + "epoch": 0.1428749781517641, + "grad_norm": 0.09418443590402603, + "learning_rate": 2.5778117099301612e-05, + "loss": 10.0168, + "step": 28610 + }, + { + "epoch": 0.14292491697670354, + "grad_norm": 0.0970693901181221, + "learning_rate": 2.577661518436006e-05, + "loss": 10.0162, + "step": 28620 + }, + { + "epoch": 0.14297485580164299, + "grad_norm": 0.09255459159612656, + "learning_rate": 2.577511326941851e-05, + "loss": 10.0129, + "step": 28630 + }, + { + "epoch": 0.14302479462658244, + "grad_norm": 0.10080841928720474, + "learning_rate": 2.577361135447696e-05, + "loss": 10.0144, + "step": 28640 + }, + { + "epoch": 0.14307473345152188, + "grad_norm": 0.09627335518598557, + "learning_rate": 2.5772109439535406e-05, + "loss": 10.0095, + "step": 28650 + }, + { + "epoch": 0.14312467227646133, + "grad_norm": 0.09631509333848953, + "learning_rate": 2.577060752459386e-05, + "loss": 10.0167, + "step": 28660 + }, + { + "epoch": 0.14317461110140078, + "grad_norm": 0.09696370363235474, + "learning_rate": 2.5769105609652307e-05, + "loss": 10.0145, + "step": 28670 + }, + { + "epoch": 0.14322454992634023, + "grad_norm": 0.08783876895904541, + "learning_rate": 2.5767603694710757e-05, + "loss": 10.0139, + "step": 28680 + }, + { + "epoch": 0.14327448875127968, + "grad_norm": 0.09430178999900818, + "learning_rate": 2.5766101779769207e-05, + "loss": 10.0151, + "step": 28690 + }, + { + "epoch": 0.14332442757621913, + "grad_norm": 0.09581022709608078, + "learning_rate": 2.5764599864827654e-05, + "loss": 10.0074, + "step": 28700 + }, + { + "epoch": 0.14337436640115858, + "grad_norm": 0.10260827839374542, + "learning_rate": 2.5763097949886107e-05, + "loss": 10.0112, + "step": 28710 + }, + { + "epoch": 0.14342430522609803, + "grad_norm": 0.09252434223890305, + "learning_rate": 2.5761596034944554e-05, + "loss": 10.0146, + "step": 28720 + }, + { + "epoch": 0.14347424405103748, + "grad_norm": 0.08901256322860718, + "learning_rate": 2.5760094120003004e-05, + "loss": 10.0066, + "step": 28730 + }, + { + "epoch": 0.14352418287597693, + "grad_norm": 0.09571991860866547, + "learning_rate": 2.5758592205061455e-05, + "loss": 10.0124, + "step": 28740 + }, + { + "epoch": 0.14357412170091638, + "grad_norm": 0.09751693159341812, + "learning_rate": 2.57570902901199e-05, + "loss": 10.0108, + "step": 28750 + }, + { + "epoch": 0.14362406052585583, + "grad_norm": 0.09803242981433868, + "learning_rate": 2.5755588375178355e-05, + "loss": 10.0063, + "step": 28760 + }, + { + "epoch": 0.14367399935079528, + "grad_norm": 0.0984642431139946, + "learning_rate": 2.57540864602368e-05, + "loss": 10.0058, + "step": 28770 + }, + { + "epoch": 0.14372393817573473, + "grad_norm": 0.0973120778799057, + "learning_rate": 2.5752584545295252e-05, + "loss": 10.0062, + "step": 28780 + }, + { + "epoch": 0.14377387700067418, + "grad_norm": 0.09399261325597763, + "learning_rate": 2.5751082630353702e-05, + "loss": 10.0063, + "step": 28790 + }, + { + "epoch": 0.14382381582561363, + "grad_norm": 0.09200508892536163, + "learning_rate": 2.574958071541215e-05, + "loss": 10.0079, + "step": 28800 + }, + { + "epoch": 0.14387375465055308, + "grad_norm": 0.09684068709611893, + "learning_rate": 2.5748078800470602e-05, + "loss": 10.004, + "step": 28810 + }, + { + "epoch": 0.14392369347549253, + "grad_norm": 0.09978806972503662, + "learning_rate": 2.574657688552905e-05, + "loss": 10.0058, + "step": 28820 + }, + { + "epoch": 0.14397363230043198, + "grad_norm": 0.09788274019956589, + "learning_rate": 2.57450749705875e-05, + "loss": 10.0059, + "step": 28830 + }, + { + "epoch": 0.14402357112537142, + "grad_norm": 0.09136934578418732, + "learning_rate": 2.574357305564595e-05, + "loss": 10.0068, + "step": 28840 + }, + { + "epoch": 0.14407350995031087, + "grad_norm": 0.09080319106578827, + "learning_rate": 2.5742071140704396e-05, + "loss": 10.0057, + "step": 28850 + }, + { + "epoch": 0.14412344877525032, + "grad_norm": 0.09714391082525253, + "learning_rate": 2.574056922576285e-05, + "loss": 10.0084, + "step": 28860 + }, + { + "epoch": 0.14417338760018977, + "grad_norm": 0.1014237254858017, + "learning_rate": 2.5739067310821297e-05, + "loss": 10.0073, + "step": 28870 + }, + { + "epoch": 0.14422332642512922, + "grad_norm": 0.09329380840063095, + "learning_rate": 2.573756539587975e-05, + "loss": 10.0075, + "step": 28880 + }, + { + "epoch": 0.14427326525006867, + "grad_norm": 0.09168153256177902, + "learning_rate": 2.5736063480938197e-05, + "loss": 10.0039, + "step": 28890 + }, + { + "epoch": 0.14432320407500812, + "grad_norm": 0.09341901540756226, + "learning_rate": 2.5734561565996644e-05, + "loss": 10.0078, + "step": 28900 + }, + { + "epoch": 0.14437314289994757, + "grad_norm": 0.08910293877124786, + "learning_rate": 2.5733059651055097e-05, + "loss": 10.0063, + "step": 28910 + }, + { + "epoch": 0.14442308172488702, + "grad_norm": 0.09554349631071091, + "learning_rate": 2.5731557736113544e-05, + "loss": 10.0101, + "step": 28920 + }, + { + "epoch": 0.14447302054982647, + "grad_norm": 0.09408532083034515, + "learning_rate": 2.5730055821171998e-05, + "loss": 10.0043, + "step": 28930 + }, + { + "epoch": 0.14452295937476592, + "grad_norm": 0.09786105901002884, + "learning_rate": 2.5728553906230445e-05, + "loss": 10.0024, + "step": 28940 + }, + { + "epoch": 0.14457289819970537, + "grad_norm": 0.09660477936267853, + "learning_rate": 2.572705199128889e-05, + "loss": 10.0064, + "step": 28950 + }, + { + "epoch": 0.14462283702464482, + "grad_norm": 0.088541179895401, + "learning_rate": 2.5725550076347345e-05, + "loss": 10.0054, + "step": 28960 + }, + { + "epoch": 0.14467277584958427, + "grad_norm": 0.09211627393960953, + "learning_rate": 2.572404816140579e-05, + "loss": 10.0047, + "step": 28970 + }, + { + "epoch": 0.14472271467452372, + "grad_norm": 0.10103971511125565, + "learning_rate": 2.5722546246464245e-05, + "loss": 10.0037, + "step": 28980 + }, + { + "epoch": 0.14477265349946317, + "grad_norm": 0.09156627953052521, + "learning_rate": 2.5721044331522692e-05, + "loss": 10.0016, + "step": 28990 + }, + { + "epoch": 0.14482259232440262, + "grad_norm": 0.08952153474092484, + "learning_rate": 2.5719542416581142e-05, + "loss": 10.0043, + "step": 29000 + }, + { + "epoch": 0.14487253114934207, + "grad_norm": 0.09983877092599869, + "learning_rate": 2.5718040501639592e-05, + "loss": 10.0048, + "step": 29010 + }, + { + "epoch": 0.14492246997428151, + "grad_norm": 0.09366819262504578, + "learning_rate": 2.571653858669804e-05, + "loss": 10.0025, + "step": 29020 + }, + { + "epoch": 0.14497240879922096, + "grad_norm": 0.09434466063976288, + "learning_rate": 2.5715036671756493e-05, + "loss": 10.0053, + "step": 29030 + }, + { + "epoch": 0.1450223476241604, + "grad_norm": 0.09528578817844391, + "learning_rate": 2.571353475681494e-05, + "loss": 10.0025, + "step": 29040 + }, + { + "epoch": 0.14507228644909986, + "grad_norm": 0.09328389912843704, + "learning_rate": 2.571203284187339e-05, + "loss": 9.9988, + "step": 29050 + }, + { + "epoch": 0.1451222252740393, + "grad_norm": 0.09483915567398071, + "learning_rate": 2.571053092693184e-05, + "loss": 10.0042, + "step": 29060 + }, + { + "epoch": 0.14517216409897876, + "grad_norm": 0.09249535948038101, + "learning_rate": 2.5709029011990287e-05, + "loss": 10.0043, + "step": 29070 + }, + { + "epoch": 0.1452221029239182, + "grad_norm": 0.09545839577913284, + "learning_rate": 2.570752709704874e-05, + "loss": 10.0014, + "step": 29080 + }, + { + "epoch": 0.14527204174885766, + "grad_norm": 0.0961858257651329, + "learning_rate": 2.5706025182107187e-05, + "loss": 10.0063, + "step": 29090 + }, + { + "epoch": 0.1453219805737971, + "grad_norm": 0.09866221994161606, + "learning_rate": 2.5704523267165637e-05, + "loss": 9.9957, + "step": 29100 + }, + { + "epoch": 0.14537191939873656, + "grad_norm": 0.0952538251876831, + "learning_rate": 2.5703021352224087e-05, + "loss": 9.9998, + "step": 29110 + }, + { + "epoch": 0.145421858223676, + "grad_norm": 0.09659268707036972, + "learning_rate": 2.5701519437282534e-05, + "loss": 10.0082, + "step": 29120 + }, + { + "epoch": 0.14547179704861546, + "grad_norm": 0.09689272195100784, + "learning_rate": 2.5700017522340988e-05, + "loss": 10.0042, + "step": 29130 + }, + { + "epoch": 0.1455217358735549, + "grad_norm": 0.09408452361822128, + "learning_rate": 2.5698515607399435e-05, + "loss": 9.9958, + "step": 29140 + }, + { + "epoch": 0.14557167469849436, + "grad_norm": 0.09306766837835312, + "learning_rate": 2.5697013692457885e-05, + "loss": 9.9998, + "step": 29150 + }, + { + "epoch": 0.1456216135234338, + "grad_norm": 0.10271524637937546, + "learning_rate": 2.5695511777516335e-05, + "loss": 9.9997, + "step": 29160 + }, + { + "epoch": 0.14567155234837326, + "grad_norm": 0.09216585755348206, + "learning_rate": 2.5694009862574782e-05, + "loss": 9.9944, + "step": 29170 + }, + { + "epoch": 0.14572149117331268, + "grad_norm": 0.09553508460521698, + "learning_rate": 2.5692507947633235e-05, + "loss": 10.0022, + "step": 29180 + }, + { + "epoch": 0.14577142999825213, + "grad_norm": 0.09927748143672943, + "learning_rate": 2.5691006032691682e-05, + "loss": 9.9974, + "step": 29190 + }, + { + "epoch": 0.14582136882319158, + "grad_norm": 0.09433567523956299, + "learning_rate": 2.5689504117750132e-05, + "loss": 10.0014, + "step": 29200 + }, + { + "epoch": 0.14587130764813103, + "grad_norm": 0.0930803120136261, + "learning_rate": 2.5688002202808582e-05, + "loss": 10.0037, + "step": 29210 + }, + { + "epoch": 0.14592124647307048, + "grad_norm": 0.09351327270269394, + "learning_rate": 2.568650028786703e-05, + "loss": 9.9983, + "step": 29220 + }, + { + "epoch": 0.14597118529800993, + "grad_norm": 0.09411612898111343, + "learning_rate": 2.5684998372925483e-05, + "loss": 10.002, + "step": 29230 + }, + { + "epoch": 0.14602112412294938, + "grad_norm": 0.09385941177606583, + "learning_rate": 2.568349645798393e-05, + "loss": 10.0001, + "step": 29240 + }, + { + "epoch": 0.14607106294788882, + "grad_norm": 0.09528261423110962, + "learning_rate": 2.568199454304238e-05, + "loss": 9.9901, + "step": 29250 + }, + { + "epoch": 0.14612100177282827, + "grad_norm": 0.09263186156749725, + "learning_rate": 2.568049262810083e-05, + "loss": 10.0004, + "step": 29260 + }, + { + "epoch": 0.14617094059776772, + "grad_norm": 0.09151321649551392, + "learning_rate": 2.5678990713159277e-05, + "loss": 10.0003, + "step": 29270 + }, + { + "epoch": 0.14622087942270717, + "grad_norm": 0.09789709746837616, + "learning_rate": 2.567748879821773e-05, + "loss": 9.9961, + "step": 29280 + }, + { + "epoch": 0.14627081824764662, + "grad_norm": 0.0894283652305603, + "learning_rate": 2.5675986883276177e-05, + "loss": 9.9954, + "step": 29290 + }, + { + "epoch": 0.14632075707258607, + "grad_norm": 0.10029701143503189, + "learning_rate": 2.5674484968334627e-05, + "loss": 9.9962, + "step": 29300 + }, + { + "epoch": 0.14637069589752552, + "grad_norm": 0.09673379361629486, + "learning_rate": 2.5672983053393077e-05, + "loss": 9.9969, + "step": 29310 + }, + { + "epoch": 0.14642063472246497, + "grad_norm": 0.09456515312194824, + "learning_rate": 2.5671481138451528e-05, + "loss": 9.9946, + "step": 29320 + }, + { + "epoch": 0.14647057354740442, + "grad_norm": 0.09132513403892517, + "learning_rate": 2.5669979223509978e-05, + "loss": 9.994, + "step": 29330 + }, + { + "epoch": 0.14652051237234387, + "grad_norm": 0.09338773041963577, + "learning_rate": 2.5668477308568425e-05, + "loss": 9.9982, + "step": 29340 + }, + { + "epoch": 0.14657045119728332, + "grad_norm": 0.0984477698802948, + "learning_rate": 2.5666975393626875e-05, + "loss": 9.9914, + "step": 29350 + }, + { + "epoch": 0.14662039002222277, + "grad_norm": 0.09212279319763184, + "learning_rate": 2.5665473478685325e-05, + "loss": 9.9958, + "step": 29360 + }, + { + "epoch": 0.14667032884716222, + "grad_norm": 0.10177190601825714, + "learning_rate": 2.5663971563743775e-05, + "loss": 9.9991, + "step": 29370 + }, + { + "epoch": 0.14672026767210167, + "grad_norm": 0.09410154819488525, + "learning_rate": 2.5662469648802225e-05, + "loss": 9.9934, + "step": 29380 + }, + { + "epoch": 0.14677020649704112, + "grad_norm": 0.09799119085073471, + "learning_rate": 2.5660967733860672e-05, + "loss": 9.9874, + "step": 29390 + }, + { + "epoch": 0.14682014532198057, + "grad_norm": 0.10114170610904694, + "learning_rate": 2.5659465818919122e-05, + "loss": 9.9944, + "step": 29400 + }, + { + "epoch": 0.14687008414692002, + "grad_norm": 0.09417644143104553, + "learning_rate": 2.5657963903977572e-05, + "loss": 9.9959, + "step": 29410 + }, + { + "epoch": 0.14692002297185947, + "grad_norm": 0.09454021602869034, + "learning_rate": 2.5656461989036023e-05, + "loss": 9.9909, + "step": 29420 + }, + { + "epoch": 0.14696996179679891, + "grad_norm": 0.09609093517065048, + "learning_rate": 2.5654960074094473e-05, + "loss": 9.9893, + "step": 29430 + }, + { + "epoch": 0.14701990062173836, + "grad_norm": 0.09451326727867126, + "learning_rate": 2.565345815915292e-05, + "loss": 9.9925, + "step": 29440 + }, + { + "epoch": 0.1470698394466778, + "grad_norm": 0.09044196456670761, + "learning_rate": 2.565195624421137e-05, + "loss": 9.9939, + "step": 29450 + }, + { + "epoch": 0.14711977827161726, + "grad_norm": 0.10042398422956467, + "learning_rate": 2.565045432926982e-05, + "loss": 9.9976, + "step": 29460 + }, + { + "epoch": 0.1471697170965567, + "grad_norm": 743.2418823242188, + "learning_rate": 2.564895241432827e-05, + "loss": 9.9953, + "step": 29470 + }, + { + "epoch": 0.14721965592149616, + "grad_norm": 0.10025718063116074, + "learning_rate": 2.564745049938672e-05, + "loss": 10.5647, + "step": 29480 + }, + { + "epoch": 0.1472695947464356, + "grad_norm": 0.0930456891655922, + "learning_rate": 2.5645948584445167e-05, + "loss": 9.9927, + "step": 29490 + }, + { + "epoch": 0.14731953357137506, + "grad_norm": 0.09810245782136917, + "learning_rate": 2.5644446669503617e-05, + "loss": 9.9906, + "step": 29500 + }, + { + "epoch": 0.1473694723963145, + "grad_norm": 0.0949150025844574, + "learning_rate": 2.5642944754562067e-05, + "loss": 11.4577, + "step": 29510 + }, + { + "epoch": 0.14741941122125396, + "grad_norm": 0.09857135266065598, + "learning_rate": 2.5641442839620518e-05, + "loss": 9.9955, + "step": 29520 + }, + { + "epoch": 0.1474693500461934, + "grad_norm": 0.09233613312244415, + "learning_rate": 2.5639940924678968e-05, + "loss": 9.9922, + "step": 29530 + }, + { + "epoch": 0.14751928887113286, + "grad_norm": 0.0993812084197998, + "learning_rate": 2.5638439009737415e-05, + "loss": 9.9864, + "step": 29540 + }, + { + "epoch": 0.1475692276960723, + "grad_norm": 0.09710720926523209, + "learning_rate": 2.5636937094795865e-05, + "loss": 9.9892, + "step": 29550 + }, + { + "epoch": 0.14761916652101176, + "grad_norm": 0.09210072457790375, + "learning_rate": 2.5635435179854315e-05, + "loss": 9.9928, + "step": 29560 + }, + { + "epoch": 0.1476691053459512, + "grad_norm": 0.0917966440320015, + "learning_rate": 2.5633933264912765e-05, + "loss": 9.9889, + "step": 29570 + }, + { + "epoch": 0.14771904417089066, + "grad_norm": 0.09141924977302551, + "learning_rate": 2.5632431349971215e-05, + "loss": 9.9931, + "step": 29580 + }, + { + "epoch": 0.1477689829958301, + "grad_norm": 0.09177865833044052, + "learning_rate": 2.5630929435029662e-05, + "loss": 9.9971, + "step": 29590 + }, + { + "epoch": 0.14781892182076956, + "grad_norm": 0.09628382325172424, + "learning_rate": 2.5629427520088112e-05, + "loss": 9.991, + "step": 29600 + }, + { + "epoch": 0.147868860645709, + "grad_norm": 0.10353731364011765, + "learning_rate": 2.5627925605146562e-05, + "loss": 9.9869, + "step": 29610 + }, + { + "epoch": 0.14791879947064845, + "grad_norm": 0.08936607092618942, + "learning_rate": 2.5626423690205013e-05, + "loss": 9.991, + "step": 29620 + }, + { + "epoch": 0.1479687382955879, + "grad_norm": 0.09186360985040665, + "learning_rate": 2.5624921775263463e-05, + "loss": 9.9913, + "step": 29630 + }, + { + "epoch": 0.14801867712052735, + "grad_norm": 0.09984660148620605, + "learning_rate": 2.5623419860321913e-05, + "loss": 9.9862, + "step": 29640 + }, + { + "epoch": 0.1480686159454668, + "grad_norm": 0.0906200036406517, + "learning_rate": 2.562191794538036e-05, + "loss": 9.9897, + "step": 29650 + }, + { + "epoch": 0.14811855477040625, + "grad_norm": 0.1004503145813942, + "learning_rate": 2.562041603043881e-05, + "loss": 9.9894, + "step": 29660 + }, + { + "epoch": 0.1481684935953457, + "grad_norm": 0.09145411103963852, + "learning_rate": 2.561891411549726e-05, + "loss": 9.9877, + "step": 29670 + }, + { + "epoch": 0.14821843242028515, + "grad_norm": 0.09083334356546402, + "learning_rate": 2.561741220055571e-05, + "loss": 9.9825, + "step": 29680 + }, + { + "epoch": 0.1482683712452246, + "grad_norm": 0.1027732640504837, + "learning_rate": 2.561591028561416e-05, + "loss": 9.988, + "step": 29690 + }, + { + "epoch": 0.14831831007016405, + "grad_norm": 0.09426605701446533, + "learning_rate": 2.5614408370672607e-05, + "loss": 9.986, + "step": 29700 + }, + { + "epoch": 0.1483682488951035, + "grad_norm": 0.09159605205059052, + "learning_rate": 2.5612906455731057e-05, + "loss": 9.9925, + "step": 29710 + }, + { + "epoch": 0.14841818772004295, + "grad_norm": 0.09627106785774231, + "learning_rate": 2.5611404540789508e-05, + "loss": 9.9841, + "step": 29720 + }, + { + "epoch": 0.1484681265449824, + "grad_norm": 0.09180369973182678, + "learning_rate": 2.5609902625847958e-05, + "loss": 9.992, + "step": 29730 + }, + { + "epoch": 0.14851806536992185, + "grad_norm": 0.093332439661026, + "learning_rate": 2.5608400710906408e-05, + "loss": 9.9877, + "step": 29740 + }, + { + "epoch": 0.1485680041948613, + "grad_norm": 0.09420640766620636, + "learning_rate": 2.5606898795964855e-05, + "loss": 9.9862, + "step": 29750 + }, + { + "epoch": 0.14861794301980075, + "grad_norm": 0.08834953606128693, + "learning_rate": 2.5605396881023305e-05, + "loss": 9.9859, + "step": 29760 + }, + { + "epoch": 0.1486678818447402, + "grad_norm": 0.09357737004756927, + "learning_rate": 2.5603894966081755e-05, + "loss": 9.9881, + "step": 29770 + }, + { + "epoch": 0.14871782066967965, + "grad_norm": 0.09611498564481735, + "learning_rate": 2.5602393051140205e-05, + "loss": 9.9835, + "step": 29780 + }, + { + "epoch": 0.1487677594946191, + "grad_norm": 0.09604179859161377, + "learning_rate": 2.5600891136198655e-05, + "loss": 9.9854, + "step": 29790 + }, + { + "epoch": 0.14881769831955854, + "grad_norm": 0.0894085168838501, + "learning_rate": 2.5599389221257102e-05, + "loss": 9.9843, + "step": 29800 + }, + { + "epoch": 0.148867637144498, + "grad_norm": 0.0987863540649414, + "learning_rate": 2.5597887306315552e-05, + "loss": 9.9861, + "step": 29810 + }, + { + "epoch": 0.14891757596943744, + "grad_norm": 0.09360215812921524, + "learning_rate": 2.5596385391374003e-05, + "loss": 9.979, + "step": 29820 + }, + { + "epoch": 0.1489675147943769, + "grad_norm": 0.09655702859163284, + "learning_rate": 2.5594883476432453e-05, + "loss": 9.9865, + "step": 29830 + }, + { + "epoch": 0.14901745361931634, + "grad_norm": 0.09509619325399399, + "learning_rate": 2.5593381561490903e-05, + "loss": 9.982, + "step": 29840 + }, + { + "epoch": 0.1490673924442558, + "grad_norm": 0.09575647860765457, + "learning_rate": 2.559187964654935e-05, + "loss": 9.9806, + "step": 29850 + }, + { + "epoch": 0.14911733126919524, + "grad_norm": 0.09219710528850555, + "learning_rate": 2.55903777316078e-05, + "loss": 9.9857, + "step": 29860 + }, + { + "epoch": 0.1491672700941347, + "grad_norm": 0.09096106886863708, + "learning_rate": 2.558887581666625e-05, + "loss": 9.9862, + "step": 29870 + }, + { + "epoch": 0.14921720891907414, + "grad_norm": 0.09332380443811417, + "learning_rate": 2.55873739017247e-05, + "loss": 9.9837, + "step": 29880 + }, + { + "epoch": 0.1492671477440136, + "grad_norm": 0.09098540246486664, + "learning_rate": 2.558587198678315e-05, + "loss": 9.9893, + "step": 29890 + }, + { + "epoch": 0.14931708656895304, + "grad_norm": 0.09403214603662491, + "learning_rate": 2.5584370071841597e-05, + "loss": 9.9855, + "step": 29900 + }, + { + "epoch": 0.1493670253938925, + "grad_norm": 0.10132903605699539, + "learning_rate": 2.5582868156900047e-05, + "loss": 9.9836, + "step": 29910 + }, + { + "epoch": 0.14941696421883194, + "grad_norm": 0.09413734078407288, + "learning_rate": 2.5581366241958498e-05, + "loss": 9.9824, + "step": 29920 + }, + { + "epoch": 0.1494669030437714, + "grad_norm": 0.09860097616910934, + "learning_rate": 2.5579864327016948e-05, + "loss": 9.9844, + "step": 29930 + }, + { + "epoch": 0.14951684186871084, + "grad_norm": 0.09427344053983688, + "learning_rate": 2.5578362412075398e-05, + "loss": 9.9842, + "step": 29940 + }, + { + "epoch": 0.1495667806936503, + "grad_norm": 0.09929951280355453, + "learning_rate": 2.5576860497133845e-05, + "loss": 9.9852, + "step": 29950 + }, + { + "epoch": 0.14961671951858974, + "grad_norm": 0.08913746476173401, + "learning_rate": 2.5575358582192295e-05, + "loss": 9.988, + "step": 29960 + }, + { + "epoch": 0.14966665834352919, + "grad_norm": 0.09487251192331314, + "learning_rate": 2.5573856667250745e-05, + "loss": 9.9882, + "step": 29970 + }, + { + "epoch": 0.14971659716846863, + "grad_norm": 0.0945911556482315, + "learning_rate": 2.5572354752309195e-05, + "loss": 9.9848, + "step": 29980 + }, + { + "epoch": 0.14976653599340808, + "grad_norm": 0.08827967196702957, + "learning_rate": 2.5570852837367645e-05, + "loss": 9.9878, + "step": 29990 + }, + { + "epoch": 0.14981647481834753, + "grad_norm": 0.09094835072755814, + "learning_rate": 2.5569350922426092e-05, + "loss": 9.9809, + "step": 30000 + }, + { + "epoch": 0.14986641364328698, + "grad_norm": 0.0922902449965477, + "learning_rate": 2.5567849007484546e-05, + "loss": 9.9868, + "step": 30010 + }, + { + "epoch": 0.14991635246822643, + "grad_norm": 0.09045281261205673, + "learning_rate": 2.5566347092542993e-05, + "loss": 9.9841, + "step": 30020 + }, + { + "epoch": 0.14996629129316588, + "grad_norm": 0.09216750413179398, + "learning_rate": 2.5564845177601443e-05, + "loss": 9.9797, + "step": 30030 + }, + { + "epoch": 0.15001623011810533, + "grad_norm": 0.09659641981124878, + "learning_rate": 2.5563343262659893e-05, + "loss": 9.9785, + "step": 30040 + }, + { + "epoch": 0.15006616894304478, + "grad_norm": 0.09455203264951706, + "learning_rate": 2.556184134771834e-05, + "loss": 9.9815, + "step": 30050 + }, + { + "epoch": 0.15011610776798423, + "grad_norm": 0.089305080473423, + "learning_rate": 2.5560339432776793e-05, + "loss": 9.9792, + "step": 30060 + }, + { + "epoch": 0.15016604659292368, + "grad_norm": 0.09118703007698059, + "learning_rate": 2.555883751783524e-05, + "loss": 9.9802, + "step": 30070 + }, + { + "epoch": 0.15021598541786313, + "grad_norm": 0.09706766903400421, + "learning_rate": 2.555733560289369e-05, + "loss": 9.9764, + "step": 30080 + }, + { + "epoch": 0.15026592424280258, + "grad_norm": 0.09246896207332611, + "learning_rate": 2.555583368795214e-05, + "loss": 9.981, + "step": 30090 + }, + { + "epoch": 0.15031586306774203, + "grad_norm": 0.09420756250619888, + "learning_rate": 2.5554331773010587e-05, + "loss": 9.9817, + "step": 30100 + }, + { + "epoch": 0.15036580189268148, + "grad_norm": 0.0959128737449646, + "learning_rate": 2.555282985806904e-05, + "loss": 9.9834, + "step": 30110 + }, + { + "epoch": 0.15041574071762093, + "grad_norm": 0.09950920939445496, + "learning_rate": 2.5551327943127488e-05, + "loss": 9.979, + "step": 30120 + }, + { + "epoch": 0.15046567954256038, + "grad_norm": 0.09626981616020203, + "learning_rate": 2.5549826028185938e-05, + "loss": 9.974, + "step": 30130 + }, + { + "epoch": 0.15051561836749983, + "grad_norm": 0.09594815224409103, + "learning_rate": 2.5548324113244388e-05, + "loss": 9.9792, + "step": 30140 + }, + { + "epoch": 0.15056555719243928, + "grad_norm": 0.10035426914691925, + "learning_rate": 2.5546822198302835e-05, + "loss": 9.976, + "step": 30150 + }, + { + "epoch": 0.15061549601737872, + "grad_norm": 0.09873773902654648, + "learning_rate": 2.554532028336129e-05, + "loss": 9.9735, + "step": 30160 + }, + { + "epoch": 0.15066543484231815, + "grad_norm": 0.09737424552440643, + "learning_rate": 2.5543818368419735e-05, + "loss": 9.9808, + "step": 30170 + }, + { + "epoch": 0.1507153736672576, + "grad_norm": 0.09332314133644104, + "learning_rate": 2.5542316453478185e-05, + "loss": 9.9723, + "step": 30180 + }, + { + "epoch": 0.15076531249219705, + "grad_norm": 0.09951841086149216, + "learning_rate": 2.5540814538536636e-05, + "loss": 9.9775, + "step": 30190 + }, + { + "epoch": 0.1508152513171365, + "grad_norm": 0.09631986916065216, + "learning_rate": 2.5539312623595082e-05, + "loss": 9.9765, + "step": 30200 + }, + { + "epoch": 0.15086519014207594, + "grad_norm": 0.08957190811634064, + "learning_rate": 2.5537810708653536e-05, + "loss": 9.985, + "step": 30210 + }, + { + "epoch": 0.1509151289670154, + "grad_norm": 0.09298118948936462, + "learning_rate": 2.5536308793711983e-05, + "loss": 9.9777, + "step": 30220 + }, + { + "epoch": 0.15096506779195484, + "grad_norm": 0.09950188547372818, + "learning_rate": 2.5534806878770433e-05, + "loss": 9.9758, + "step": 30230 + }, + { + "epoch": 0.1510150066168943, + "grad_norm": 0.09786109626293182, + "learning_rate": 2.5533304963828883e-05, + "loss": 9.9709, + "step": 30240 + }, + { + "epoch": 0.15106494544183374, + "grad_norm": 0.09600091725587845, + "learning_rate": 2.553180304888733e-05, + "loss": 9.9772, + "step": 30250 + }, + { + "epoch": 0.1511148842667732, + "grad_norm": 0.0918637216091156, + "learning_rate": 2.5530301133945783e-05, + "loss": 9.9781, + "step": 30260 + }, + { + "epoch": 0.15116482309171264, + "grad_norm": 0.09260197728872299, + "learning_rate": 2.552879921900423e-05, + "loss": 9.9701, + "step": 30270 + }, + { + "epoch": 0.1512147619166521, + "grad_norm": 0.0909905806183815, + "learning_rate": 2.552729730406268e-05, + "loss": 9.9747, + "step": 30280 + }, + { + "epoch": 0.15126470074159154, + "grad_norm": 0.09207629412412643, + "learning_rate": 2.552579538912113e-05, + "loss": 9.9713, + "step": 30290 + }, + { + "epoch": 0.151314639566531, + "grad_norm": 0.09557502716779709, + "learning_rate": 2.5524293474179577e-05, + "loss": 9.9844, + "step": 30300 + }, + { + "epoch": 0.15136457839147044, + "grad_norm": 0.0928947702050209, + "learning_rate": 2.552279155923803e-05, + "loss": 9.9739, + "step": 30310 + }, + { + "epoch": 0.1514145172164099, + "grad_norm": 0.09521695226430893, + "learning_rate": 2.5521289644296478e-05, + "loss": 9.9733, + "step": 30320 + }, + { + "epoch": 0.15146445604134934, + "grad_norm": 0.0963277593255043, + "learning_rate": 2.551978772935493e-05, + "loss": 9.969, + "step": 30330 + }, + { + "epoch": 0.1515143948662888, + "grad_norm": 0.10125837475061417, + "learning_rate": 2.5518285814413378e-05, + "loss": 9.9739, + "step": 30340 + }, + { + "epoch": 0.15156433369122824, + "grad_norm": 0.09548031538724899, + "learning_rate": 2.5516783899471825e-05, + "loss": 9.9702, + "step": 30350 + }, + { + "epoch": 0.1516142725161677, + "grad_norm": 0.09445524960756302, + "learning_rate": 2.551528198453028e-05, + "loss": 9.9767, + "step": 30360 + }, + { + "epoch": 0.15166421134110714, + "grad_norm": 0.09280666708946228, + "learning_rate": 2.5513780069588725e-05, + "loss": 9.9779, + "step": 30370 + }, + { + "epoch": 0.15171415016604659, + "grad_norm": 0.09591789543628693, + "learning_rate": 2.551227815464718e-05, + "loss": 9.9722, + "step": 30380 + }, + { + "epoch": 0.15176408899098603, + "grad_norm": 0.09540422260761261, + "learning_rate": 2.5510776239705626e-05, + "loss": 9.9732, + "step": 30390 + }, + { + "epoch": 0.15181402781592548, + "grad_norm": 0.0966714397072792, + "learning_rate": 2.5509274324764072e-05, + "loss": 9.9744, + "step": 30400 + }, + { + "epoch": 0.15186396664086493, + "grad_norm": 0.09428321570158005, + "learning_rate": 2.5507772409822526e-05, + "loss": 9.9751, + "step": 30410 + }, + { + "epoch": 0.15191390546580438, + "grad_norm": 0.09702390432357788, + "learning_rate": 2.5506270494880973e-05, + "loss": 9.9724, + "step": 30420 + }, + { + "epoch": 0.15196384429074383, + "grad_norm": 0.09882203489542007, + "learning_rate": 2.5504768579939426e-05, + "loss": 9.9698, + "step": 30430 + }, + { + "epoch": 0.15201378311568328, + "grad_norm": 0.09188614040613174, + "learning_rate": 2.5503266664997873e-05, + "loss": 9.9745, + "step": 30440 + }, + { + "epoch": 0.15206372194062273, + "grad_norm": 0.09487085789442062, + "learning_rate": 2.550176475005632e-05, + "loss": 9.9672, + "step": 30450 + }, + { + "epoch": 0.15211366076556218, + "grad_norm": 0.09500377625226974, + "learning_rate": 2.5500262835114773e-05, + "loss": 9.9702, + "step": 30460 + }, + { + "epoch": 0.15216359959050163, + "grad_norm": 0.09745945781469345, + "learning_rate": 2.549876092017322e-05, + "loss": 9.9739, + "step": 30470 + }, + { + "epoch": 0.15221353841544108, + "grad_norm": 0.0949743390083313, + "learning_rate": 2.5497259005231674e-05, + "loss": 9.9646, + "step": 30480 + }, + { + "epoch": 0.15226347724038053, + "grad_norm": 0.09280611574649811, + "learning_rate": 2.549575709029012e-05, + "loss": 9.967, + "step": 30490 + }, + { + "epoch": 0.15231341606531998, + "grad_norm": 0.09759785979986191, + "learning_rate": 2.5494255175348567e-05, + "loss": 9.9691, + "step": 30500 + }, + { + "epoch": 0.15236335489025943, + "grad_norm": 0.10251536220312119, + "learning_rate": 2.549275326040702e-05, + "loss": 9.968, + "step": 30510 + }, + { + "epoch": 0.15241329371519888, + "grad_norm": 0.09339610487222672, + "learning_rate": 2.5491251345465468e-05, + "loss": 9.9709, + "step": 30520 + }, + { + "epoch": 0.15246323254013833, + "grad_norm": 0.09631470590829849, + "learning_rate": 2.548974943052392e-05, + "loss": 9.9649, + "step": 30530 + }, + { + "epoch": 0.15251317136507778, + "grad_norm": 0.09462681412696838, + "learning_rate": 2.5488247515582368e-05, + "loss": 9.9656, + "step": 30540 + }, + { + "epoch": 0.15256311019001723, + "grad_norm": 0.09926269203424454, + "learning_rate": 2.5486745600640815e-05, + "loss": 9.9666, + "step": 30550 + }, + { + "epoch": 0.15261304901495668, + "grad_norm": 0.09245786815881729, + "learning_rate": 2.548524368569927e-05, + "loss": 9.9648, + "step": 30560 + }, + { + "epoch": 0.15266298783989612, + "grad_norm": 0.09163583815097809, + "learning_rate": 2.5483741770757715e-05, + "loss": 9.9667, + "step": 30570 + }, + { + "epoch": 0.15271292666483557, + "grad_norm": 0.0959586575627327, + "learning_rate": 2.548223985581617e-05, + "loss": 9.9713, + "step": 30580 + }, + { + "epoch": 0.15276286548977502, + "grad_norm": 0.09638317674398422, + "learning_rate": 2.5480737940874616e-05, + "loss": 9.9625, + "step": 30590 + }, + { + "epoch": 0.15281280431471447, + "grad_norm": 0.0959702804684639, + "learning_rate": 2.5479236025933062e-05, + "loss": 9.9676, + "step": 30600 + }, + { + "epoch": 0.15286274313965392, + "grad_norm": 0.09670331329107285, + "learning_rate": 2.5477734110991516e-05, + "loss": 9.9673, + "step": 30610 + }, + { + "epoch": 0.15291268196459337, + "grad_norm": 0.09731889516115189, + "learning_rate": 2.5476232196049963e-05, + "loss": 9.9669, + "step": 30620 + }, + { + "epoch": 0.15296262078953282, + "grad_norm": 0.09183140099048615, + "learning_rate": 2.5474730281108416e-05, + "loss": 9.9672, + "step": 30630 + }, + { + "epoch": 0.15301255961447227, + "grad_norm": 0.09476214647293091, + "learning_rate": 2.5473228366166863e-05, + "loss": 9.9662, + "step": 30640 + }, + { + "epoch": 0.15306249843941172, + "grad_norm": 0.09765252470970154, + "learning_rate": 2.5471726451225313e-05, + "loss": 9.9693, + "step": 30650 + }, + { + "epoch": 0.15311243726435117, + "grad_norm": 0.0965784415602684, + "learning_rate": 2.5470224536283763e-05, + "loss": 9.9612, + "step": 30660 + }, + { + "epoch": 0.15316237608929062, + "grad_norm": 0.0938754677772522, + "learning_rate": 2.546872262134221e-05, + "loss": 9.9638, + "step": 30670 + }, + { + "epoch": 0.15321231491423007, + "grad_norm": 0.09392806887626648, + "learning_rate": 2.5467220706400664e-05, + "loss": 9.9641, + "step": 30680 + }, + { + "epoch": 0.15326225373916952, + "grad_norm": 0.09800152480602264, + "learning_rate": 2.546571879145911e-05, + "loss": 9.9662, + "step": 30690 + }, + { + "epoch": 0.15331219256410897, + "grad_norm": 0.09477967023849487, + "learning_rate": 2.546421687651756e-05, + "loss": 9.9667, + "step": 30700 + }, + { + "epoch": 0.15336213138904842, + "grad_norm": 0.09264533221721649, + "learning_rate": 2.546271496157601e-05, + "loss": 9.966, + "step": 30710 + }, + { + "epoch": 0.15341207021398787, + "grad_norm": 0.09792211651802063, + "learning_rate": 2.5461213046634458e-05, + "loss": 9.9652, + "step": 30720 + }, + { + "epoch": 0.15346200903892732, + "grad_norm": 0.09196563065052032, + "learning_rate": 2.545971113169291e-05, + "loss": 9.9634, + "step": 30730 + }, + { + "epoch": 0.15351194786386677, + "grad_norm": 0.09375791996717453, + "learning_rate": 2.5458209216751358e-05, + "loss": 9.9615, + "step": 30740 + }, + { + "epoch": 0.15356188668880622, + "grad_norm": 0.10490723699331284, + "learning_rate": 2.5456707301809808e-05, + "loss": 9.9654, + "step": 30750 + }, + { + "epoch": 0.15361182551374566, + "grad_norm": 0.08919631689786911, + "learning_rate": 2.545520538686826e-05, + "loss": 9.9659, + "step": 30760 + }, + { + "epoch": 0.15366176433868511, + "grad_norm": 0.09268412739038467, + "learning_rate": 2.5453703471926705e-05, + "loss": 9.9595, + "step": 30770 + }, + { + "epoch": 0.15371170316362456, + "grad_norm": 0.09661845862865448, + "learning_rate": 2.545220155698516e-05, + "loss": 9.9571, + "step": 30780 + }, + { + "epoch": 0.153761641988564, + "grad_norm": 0.0946783795952797, + "learning_rate": 2.5450699642043606e-05, + "loss": 9.9624, + "step": 30790 + }, + { + "epoch": 0.15381158081350346, + "grad_norm": 0.08772801607847214, + "learning_rate": 2.5449197727102056e-05, + "loss": 9.9651, + "step": 30800 + }, + { + "epoch": 0.1538615196384429, + "grad_norm": 0.09275208413600922, + "learning_rate": 2.5447695812160506e-05, + "loss": 9.9595, + "step": 30810 + }, + { + "epoch": 0.15391145846338236, + "grad_norm": 0.09553563594818115, + "learning_rate": 2.5446193897218953e-05, + "loss": 9.9569, + "step": 30820 + }, + { + "epoch": 0.1539613972883218, + "grad_norm": 0.09462607651948929, + "learning_rate": 2.5444691982277406e-05, + "loss": 9.9615, + "step": 30830 + }, + { + "epoch": 0.15401133611326126, + "grad_norm": 0.09698895364999771, + "learning_rate": 2.5443190067335853e-05, + "loss": 9.9661, + "step": 30840 + }, + { + "epoch": 0.1540612749382007, + "grad_norm": 0.09504923969507217, + "learning_rate": 2.5441688152394303e-05, + "loss": 9.9681, + "step": 30850 + }, + { + "epoch": 0.15411121376314016, + "grad_norm": 0.10031065344810486, + "learning_rate": 2.5440186237452753e-05, + "loss": 9.9588, + "step": 30860 + }, + { + "epoch": 0.1541611525880796, + "grad_norm": 0.09581837058067322, + "learning_rate": 2.54386843225112e-05, + "loss": 9.959, + "step": 30870 + }, + { + "epoch": 0.15421109141301906, + "grad_norm": 0.09290026128292084, + "learning_rate": 2.5437182407569654e-05, + "loss": 9.9649, + "step": 30880 + }, + { + "epoch": 0.1542610302379585, + "grad_norm": 0.0961495116353035, + "learning_rate": 2.54356804926281e-05, + "loss": 9.9566, + "step": 30890 + }, + { + "epoch": 0.15431096906289796, + "grad_norm": 0.09705748409032822, + "learning_rate": 2.543417857768655e-05, + "loss": 9.9632, + "step": 30900 + }, + { + "epoch": 0.1543609078878374, + "grad_norm": 0.09860498458147049, + "learning_rate": 2.5432676662745e-05, + "loss": 9.957, + "step": 30910 + }, + { + "epoch": 0.15441084671277686, + "grad_norm": 0.09649617969989777, + "learning_rate": 2.5431174747803448e-05, + "loss": 9.9609, + "step": 30920 + }, + { + "epoch": 0.1544607855377163, + "grad_norm": 0.09514536708593369, + "learning_rate": 2.54296728328619e-05, + "loss": 9.9607, + "step": 30930 + }, + { + "epoch": 0.15451072436265575, + "grad_norm": 0.09036850929260254, + "learning_rate": 2.5428170917920348e-05, + "loss": 9.9562, + "step": 30940 + }, + { + "epoch": 0.1545606631875952, + "grad_norm": 0.09239833056926727, + "learning_rate": 2.5426669002978798e-05, + "loss": 9.9597, + "step": 30950 + }, + { + "epoch": 0.15461060201253465, + "grad_norm": 0.09781666100025177, + "learning_rate": 2.542516708803725e-05, + "loss": 9.9674, + "step": 30960 + }, + { + "epoch": 0.1546605408374741, + "grad_norm": 0.09337397664785385, + "learning_rate": 2.54236651730957e-05, + "loss": 9.9614, + "step": 30970 + }, + { + "epoch": 0.15471047966241355, + "grad_norm": 0.09049335867166519, + "learning_rate": 2.542216325815415e-05, + "loss": 9.9614, + "step": 30980 + }, + { + "epoch": 0.154760418487353, + "grad_norm": 0.09918399900197983, + "learning_rate": 2.5420661343212596e-05, + "loss": 9.9559, + "step": 30990 + }, + { + "epoch": 0.15481035731229245, + "grad_norm": 0.09200762212276459, + "learning_rate": 2.5419159428271046e-05, + "loss": 9.9508, + "step": 31000 + }, + { + "epoch": 0.1548602961372319, + "grad_norm": 0.09439615160226822, + "learning_rate": 2.5417657513329496e-05, + "loss": 9.9625, + "step": 31010 + }, + { + "epoch": 0.15491023496217135, + "grad_norm": 0.09249376505613327, + "learning_rate": 2.5416155598387946e-05, + "loss": 9.9598, + "step": 31020 + }, + { + "epoch": 0.1549601737871108, + "grad_norm": 0.09385478496551514, + "learning_rate": 2.5414653683446396e-05, + "loss": 9.9589, + "step": 31030 + }, + { + "epoch": 0.15501011261205025, + "grad_norm": 0.0966297909617424, + "learning_rate": 2.5413151768504843e-05, + "loss": 9.9642, + "step": 31040 + }, + { + "epoch": 0.1550600514369897, + "grad_norm": 0.09643752872943878, + "learning_rate": 2.5411649853563293e-05, + "loss": 9.9559, + "step": 31050 + }, + { + "epoch": 0.15510999026192915, + "grad_norm": 0.09509705007076263, + "learning_rate": 2.5410147938621743e-05, + "loss": 9.9555, + "step": 31060 + }, + { + "epoch": 0.1551599290868686, + "grad_norm": 0.09453869611024857, + "learning_rate": 2.5408646023680194e-05, + "loss": 9.9591, + "step": 31070 + }, + { + "epoch": 0.15520986791180805, + "grad_norm": 0.09096340090036392, + "learning_rate": 2.5407144108738644e-05, + "loss": 9.9628, + "step": 31080 + }, + { + "epoch": 0.1552598067367475, + "grad_norm": 0.09323681145906448, + "learning_rate": 2.540564219379709e-05, + "loss": 9.959, + "step": 31090 + }, + { + "epoch": 0.15530974556168695, + "grad_norm": 0.09170509874820709, + "learning_rate": 2.540414027885554e-05, + "loss": 9.9538, + "step": 31100 + }, + { + "epoch": 0.1553596843866264, + "grad_norm": 0.09655750542879105, + "learning_rate": 2.540263836391399e-05, + "loss": 9.9561, + "step": 31110 + }, + { + "epoch": 0.15540962321156584, + "grad_norm": 0.09387839585542679, + "learning_rate": 2.540113644897244e-05, + "loss": 9.9571, + "step": 31120 + }, + { + "epoch": 0.1554595620365053, + "grad_norm": 0.09941628575325012, + "learning_rate": 2.539963453403089e-05, + "loss": 9.9566, + "step": 31130 + }, + { + "epoch": 0.15550950086144474, + "grad_norm": 0.09646540135145187, + "learning_rate": 2.5398132619089338e-05, + "loss": 9.9543, + "step": 31140 + }, + { + "epoch": 0.15555943968638417, + "grad_norm": 0.10054589062929153, + "learning_rate": 2.5396630704147788e-05, + "loss": 9.9528, + "step": 31150 + }, + { + "epoch": 0.15560937851132361, + "grad_norm": 0.09326030313968658, + "learning_rate": 2.539512878920624e-05, + "loss": 9.962, + "step": 31160 + }, + { + "epoch": 0.15565931733626306, + "grad_norm": 0.08991136401891708, + "learning_rate": 2.539362687426469e-05, + "loss": 9.9522, + "step": 31170 + }, + { + "epoch": 0.15570925616120251, + "grad_norm": 0.09380258619785309, + "learning_rate": 2.539212495932314e-05, + "loss": 9.9584, + "step": 31180 + }, + { + "epoch": 0.15575919498614196, + "grad_norm": 0.09635492414236069, + "learning_rate": 2.5390623044381586e-05, + "loss": 9.9486, + "step": 31190 + }, + { + "epoch": 0.1558091338110814, + "grad_norm": 0.09452281892299652, + "learning_rate": 2.5389121129440036e-05, + "loss": 9.9505, + "step": 31200 + }, + { + "epoch": 0.15585907263602086, + "grad_norm": 0.09984589368104935, + "learning_rate": 2.5387619214498486e-05, + "loss": 9.9507, + "step": 31210 + }, + { + "epoch": 0.1559090114609603, + "grad_norm": 0.09607294946908951, + "learning_rate": 2.5386117299556936e-05, + "loss": 9.9526, + "step": 31220 + }, + { + "epoch": 0.15595895028589976, + "grad_norm": 0.0886487290263176, + "learning_rate": 2.5384615384615386e-05, + "loss": 9.9552, + "step": 31230 + }, + { + "epoch": 0.1560088891108392, + "grad_norm": 0.09027554839849472, + "learning_rate": 2.5383113469673833e-05, + "loss": 9.958, + "step": 31240 + }, + { + "epoch": 0.15605882793577866, + "grad_norm": 0.09098269045352936, + "learning_rate": 2.5381611554732283e-05, + "loss": 9.9465, + "step": 31250 + }, + { + "epoch": 0.1561087667607181, + "grad_norm": 0.10451994836330414, + "learning_rate": 2.5380109639790733e-05, + "loss": 9.9555, + "step": 31260 + }, + { + "epoch": 0.15615870558565756, + "grad_norm": 0.09637783467769623, + "learning_rate": 2.5378607724849184e-05, + "loss": 9.9493, + "step": 31270 + }, + { + "epoch": 0.156208644410597, + "grad_norm": 0.09026754647493362, + "learning_rate": 2.5377105809907634e-05, + "loss": 9.9526, + "step": 31280 + }, + { + "epoch": 0.15625858323553646, + "grad_norm": 0.09922222793102264, + "learning_rate": 2.5375603894966084e-05, + "loss": 9.9455, + "step": 31290 + }, + { + "epoch": 0.1563085220604759, + "grad_norm": 0.09385956078767776, + "learning_rate": 2.537410198002453e-05, + "loss": 9.9496, + "step": 31300 + }, + { + "epoch": 0.15635846088541536, + "grad_norm": 0.09471774101257324, + "learning_rate": 2.537260006508298e-05, + "loss": 9.9536, + "step": 31310 + }, + { + "epoch": 0.1564083997103548, + "grad_norm": 0.09749903529882431, + "learning_rate": 2.537109815014143e-05, + "loss": 9.9501, + "step": 31320 + }, + { + "epoch": 0.15645833853529426, + "grad_norm": 0.09855715185403824, + "learning_rate": 2.536959623519988e-05, + "loss": 9.9486, + "step": 31330 + }, + { + "epoch": 0.1565082773602337, + "grad_norm": 0.0999755784869194, + "learning_rate": 2.536809432025833e-05, + "loss": 9.9482, + "step": 31340 + }, + { + "epoch": 0.15655821618517315, + "grad_norm": 0.10329025983810425, + "learning_rate": 2.5366592405316778e-05, + "loss": 9.9535, + "step": 31350 + }, + { + "epoch": 0.1566081550101126, + "grad_norm": 0.09251412004232407, + "learning_rate": 2.536509049037523e-05, + "loss": 9.9509, + "step": 31360 + }, + { + "epoch": 0.15665809383505205, + "grad_norm": 0.09849464893341064, + "learning_rate": 2.536358857543368e-05, + "loss": 9.9476, + "step": 31370 + }, + { + "epoch": 0.1567080326599915, + "grad_norm": 0.09341175109148026, + "learning_rate": 2.536208666049213e-05, + "loss": 9.9519, + "step": 31380 + }, + { + "epoch": 0.15675797148493095, + "grad_norm": 0.09484104812145233, + "learning_rate": 2.536058474555058e-05, + "loss": 9.9555, + "step": 31390 + }, + { + "epoch": 0.1568079103098704, + "grad_norm": 0.09805377572774887, + "learning_rate": 2.5359082830609026e-05, + "loss": 9.948, + "step": 31400 + }, + { + "epoch": 0.15685784913480985, + "grad_norm": 0.09305404126644135, + "learning_rate": 2.5357580915667476e-05, + "loss": 9.95, + "step": 31410 + }, + { + "epoch": 0.1569077879597493, + "grad_norm": 0.09033924341201782, + "learning_rate": 2.5356079000725926e-05, + "loss": 9.949, + "step": 31420 + }, + { + "epoch": 0.15695772678468875, + "grad_norm": 0.094557024538517, + "learning_rate": 2.5354577085784376e-05, + "loss": 9.9521, + "step": 31430 + }, + { + "epoch": 0.1570076656096282, + "grad_norm": 0.09300687164068222, + "learning_rate": 2.5353075170842826e-05, + "loss": 9.945, + "step": 31440 + }, + { + "epoch": 0.15705760443456765, + "grad_norm": 0.09191596508026123, + "learning_rate": 2.5351573255901273e-05, + "loss": 9.948, + "step": 31450 + }, + { + "epoch": 0.1571075432595071, + "grad_norm": 0.0944577232003212, + "learning_rate": 2.5350071340959723e-05, + "loss": 9.9515, + "step": 31460 + }, + { + "epoch": 0.15715748208444655, + "grad_norm": 0.09393108636140823, + "learning_rate": 2.5348569426018174e-05, + "loss": 9.9459, + "step": 31470 + }, + { + "epoch": 0.157207420909386, + "grad_norm": 0.09408476948738098, + "learning_rate": 2.5347067511076624e-05, + "loss": 9.9547, + "step": 31480 + }, + { + "epoch": 0.15725735973432545, + "grad_norm": 0.09239377081394196, + "learning_rate": 2.5345565596135074e-05, + "loss": 9.9479, + "step": 31490 + }, + { + "epoch": 0.1573072985592649, + "grad_norm": 0.09746348112821579, + "learning_rate": 2.534406368119352e-05, + "loss": 9.9445, + "step": 31500 + }, + { + "epoch": 0.15735723738420435, + "grad_norm": 0.09604940563440323, + "learning_rate": 2.534256176625197e-05, + "loss": 9.9443, + "step": 31510 + }, + { + "epoch": 0.1574071762091438, + "grad_norm": 0.09521540254354477, + "learning_rate": 2.534105985131042e-05, + "loss": 9.9499, + "step": 31520 + }, + { + "epoch": 0.15745711503408324, + "grad_norm": 0.09604847431182861, + "learning_rate": 2.533955793636887e-05, + "loss": 9.9507, + "step": 31530 + }, + { + "epoch": 0.1575070538590227, + "grad_norm": 0.09625346958637238, + "learning_rate": 2.533805602142732e-05, + "loss": 9.9499, + "step": 31540 + }, + { + "epoch": 0.15755699268396214, + "grad_norm": 0.09137043356895447, + "learning_rate": 2.5336554106485768e-05, + "loss": 9.9387, + "step": 31550 + }, + { + "epoch": 0.1576069315089016, + "grad_norm": 0.09646125137805939, + "learning_rate": 2.533505219154422e-05, + "loss": 9.9431, + "step": 31560 + }, + { + "epoch": 0.15765687033384104, + "grad_norm": 0.0926939845085144, + "learning_rate": 2.533355027660267e-05, + "loss": 9.9423, + "step": 31570 + }, + { + "epoch": 0.1577068091587805, + "grad_norm": 0.09652858227491379, + "learning_rate": 2.533204836166112e-05, + "loss": 9.9464, + "step": 31580 + }, + { + "epoch": 0.15775674798371994, + "grad_norm": 0.09520108252763748, + "learning_rate": 2.533054644671957e-05, + "loss": 9.946, + "step": 31590 + }, + { + "epoch": 0.1578066868086594, + "grad_norm": 0.0973607674241066, + "learning_rate": 2.5329044531778016e-05, + "loss": 9.94, + "step": 31600 + }, + { + "epoch": 0.15785662563359884, + "grad_norm": 0.09493687748908997, + "learning_rate": 2.532754261683647e-05, + "loss": 9.944, + "step": 31610 + }, + { + "epoch": 0.1579065644585383, + "grad_norm": 0.09770392626523972, + "learning_rate": 2.5326040701894916e-05, + "loss": 9.9393, + "step": 31620 + }, + { + "epoch": 0.15795650328347774, + "grad_norm": 0.09622366726398468, + "learning_rate": 2.5324538786953366e-05, + "loss": 9.9456, + "step": 31630 + }, + { + "epoch": 0.1580064421084172, + "grad_norm": 0.09520258009433746, + "learning_rate": 2.5323036872011817e-05, + "loss": 9.9408, + "step": 31640 + }, + { + "epoch": 0.15805638093335664, + "grad_norm": 0.09456585347652435, + "learning_rate": 2.5321534957070263e-05, + "loss": 9.9386, + "step": 31650 + }, + { + "epoch": 0.1581063197582961, + "grad_norm": 0.09494321048259735, + "learning_rate": 2.5320033042128717e-05, + "loss": 9.9401, + "step": 31660 + }, + { + "epoch": 0.15815625858323554, + "grad_norm": 0.09259496629238129, + "learning_rate": 2.5318531127187164e-05, + "loss": 9.9453, + "step": 31670 + }, + { + "epoch": 0.158206197408175, + "grad_norm": 0.0940915197134018, + "learning_rate": 2.5317029212245614e-05, + "loss": 9.95, + "step": 31680 + }, + { + "epoch": 0.15825613623311444, + "grad_norm": 0.09100774675607681, + "learning_rate": 2.5315527297304064e-05, + "loss": 9.9416, + "step": 31690 + }, + { + "epoch": 0.15830607505805389, + "grad_norm": 0.09511730074882507, + "learning_rate": 2.531402538236251e-05, + "loss": 9.9399, + "step": 31700 + }, + { + "epoch": 0.15835601388299334, + "grad_norm": 0.0985988974571228, + "learning_rate": 2.5312523467420964e-05, + "loss": 11.4693, + "step": 31710 + }, + { + "epoch": 0.15840595270793278, + "grad_norm": 0.09434106200933456, + "learning_rate": 2.531102155247941e-05, + "loss": 20.0531, + "step": 31720 + }, + { + "epoch": 0.15845589153287223, + "grad_norm": 0.0996764674782753, + "learning_rate": 2.530951963753786e-05, + "loss": 9.9376, + "step": 31730 + }, + { + "epoch": 0.15850583035781168, + "grad_norm": 0.09661601483821869, + "learning_rate": 2.530801772259631e-05, + "loss": 13.1668, + "step": 31740 + }, + { + "epoch": 0.15855576918275113, + "grad_norm": 0.09104971587657928, + "learning_rate": 2.530651580765476e-05, + "loss": 10.18, + "step": 31750 + }, + { + "epoch": 0.15860570800769058, + "grad_norm": 0.09955008327960968, + "learning_rate": 2.5305013892713212e-05, + "loss": 9.9397, + "step": 31760 + }, + { + "epoch": 0.15865564683263003, + "grad_norm": 0.09806697070598602, + "learning_rate": 2.530351197777166e-05, + "loss": 9.9451, + "step": 31770 + }, + { + "epoch": 0.15870558565756948, + "grad_norm": 0.09614180773496628, + "learning_rate": 2.530201006283011e-05, + "loss": 9.9402, + "step": 31780 + }, + { + "epoch": 0.15875552448250893, + "grad_norm": 0.09188991039991379, + "learning_rate": 2.530050814788856e-05, + "loss": 9.9389, + "step": 31790 + }, + { + "epoch": 0.15880546330744838, + "grad_norm": 0.0986422747373581, + "learning_rate": 2.5299006232947006e-05, + "loss": 9.9408, + "step": 31800 + }, + { + "epoch": 0.15885540213238783, + "grad_norm": 0.09337294101715088, + "learning_rate": 2.529750431800546e-05, + "loss": 9.9416, + "step": 31810 + }, + { + "epoch": 0.15890534095732728, + "grad_norm": 0.09738942235708237, + "learning_rate": 2.5296002403063906e-05, + "loss": 9.9414, + "step": 31820 + }, + { + "epoch": 0.15895527978226673, + "grad_norm": 0.09532233327627182, + "learning_rate": 2.5294500488122356e-05, + "loss": 9.9396, + "step": 31830 + }, + { + "epoch": 0.15900521860720618, + "grad_norm": 0.09321427345275879, + "learning_rate": 2.5292998573180807e-05, + "loss": 9.9449, + "step": 31840 + }, + { + "epoch": 0.15905515743214563, + "grad_norm": 0.09735007584095001, + "learning_rate": 2.5291496658239253e-05, + "loss": 9.9388, + "step": 31850 + }, + { + "epoch": 0.15910509625708508, + "grad_norm": 0.09496832638978958, + "learning_rate": 2.5289994743297707e-05, + "loss": 9.9346, + "step": 31860 + }, + { + "epoch": 0.15915503508202453, + "grad_norm": 0.09069550782442093, + "learning_rate": 2.5288492828356154e-05, + "loss": 9.9392, + "step": 31870 + }, + { + "epoch": 0.15920497390696398, + "grad_norm": 0.09822098165750504, + "learning_rate": 2.5286990913414604e-05, + "loss": 9.9366, + "step": 31880 + }, + { + "epoch": 0.15925491273190343, + "grad_norm": 0.09519645571708679, + "learning_rate": 2.5285488998473054e-05, + "loss": 9.9355, + "step": 31890 + }, + { + "epoch": 0.15930485155684287, + "grad_norm": 0.09621410071849823, + "learning_rate": 2.52839870835315e-05, + "loss": 9.9392, + "step": 31900 + }, + { + "epoch": 0.15935479038178232, + "grad_norm": 0.0956474095582962, + "learning_rate": 2.5282485168589954e-05, + "loss": 9.9361, + "step": 31910 + }, + { + "epoch": 0.15940472920672177, + "grad_norm": 0.09148707240819931, + "learning_rate": 2.52809832536484e-05, + "loss": 9.9364, + "step": 31920 + }, + { + "epoch": 0.15945466803166122, + "grad_norm": 0.09263239800930023, + "learning_rate": 2.5279481338706855e-05, + "loss": 9.9372, + "step": 31930 + }, + { + "epoch": 0.15950460685660067, + "grad_norm": 0.09924571961164474, + "learning_rate": 2.52779794237653e-05, + "loss": 9.9365, + "step": 31940 + }, + { + "epoch": 0.15955454568154012, + "grad_norm": 0.09445744752883911, + "learning_rate": 2.527647750882375e-05, + "loss": 9.9395, + "step": 31950 + }, + { + "epoch": 0.15960448450647957, + "grad_norm": 0.09523255378007889, + "learning_rate": 2.5274975593882202e-05, + "loss": 9.9367, + "step": 31960 + }, + { + "epoch": 0.15965442333141902, + "grad_norm": 0.09308159351348877, + "learning_rate": 2.527347367894065e-05, + "loss": 9.9337, + "step": 31970 + }, + { + "epoch": 0.15970436215635847, + "grad_norm": 0.09193790704011917, + "learning_rate": 2.5271971763999102e-05, + "loss": 9.9315, + "step": 31980 + }, + { + "epoch": 0.15975430098129792, + "grad_norm": 0.09079098701477051, + "learning_rate": 2.527046984905755e-05, + "loss": 9.942, + "step": 31990 + }, + { + "epoch": 0.15980423980623737, + "grad_norm": 0.09207965433597565, + "learning_rate": 2.5268967934115996e-05, + "loss": 9.9348, + "step": 32000 + }, + { + "epoch": 0.15985417863117682, + "grad_norm": 0.09450741112232208, + "learning_rate": 2.526746601917445e-05, + "loss": 9.938, + "step": 32010 + }, + { + "epoch": 0.15990411745611627, + "grad_norm": 0.08837747573852539, + "learning_rate": 2.5265964104232896e-05, + "loss": 9.9355, + "step": 32020 + }, + { + "epoch": 0.15995405628105572, + "grad_norm": 0.09545224159955978, + "learning_rate": 2.526446218929135e-05, + "loss": 9.9378, + "step": 32030 + }, + { + "epoch": 0.16000399510599517, + "grad_norm": 0.09526308625936508, + "learning_rate": 2.5262960274349797e-05, + "loss": 9.9396, + "step": 32040 + }, + { + "epoch": 0.16005393393093462, + "grad_norm": 0.09556493163108826, + "learning_rate": 2.5261458359408243e-05, + "loss": 9.929, + "step": 32050 + }, + { + "epoch": 0.16010387275587407, + "grad_norm": 0.09006509184837341, + "learning_rate": 2.5259956444466697e-05, + "loss": 9.9293, + "step": 32060 + }, + { + "epoch": 0.16015381158081352, + "grad_norm": 0.09654060006141663, + "learning_rate": 2.5258454529525144e-05, + "loss": 9.9321, + "step": 32070 + }, + { + "epoch": 0.16020375040575296, + "grad_norm": 0.09235110133886337, + "learning_rate": 2.5256952614583597e-05, + "loss": 9.9315, + "step": 32080 + }, + { + "epoch": 0.16025368923069241, + "grad_norm": 0.09377003461122513, + "learning_rate": 2.5255450699642044e-05, + "loss": 9.9327, + "step": 32090 + }, + { + "epoch": 0.16030362805563186, + "grad_norm": 0.0920942947268486, + "learning_rate": 2.525394878470049e-05, + "loss": 9.93, + "step": 32100 + }, + { + "epoch": 0.1603535668805713, + "grad_norm": 0.09704404324293137, + "learning_rate": 2.5252446869758944e-05, + "loss": 9.9293, + "step": 32110 + }, + { + "epoch": 0.16040350570551076, + "grad_norm": 0.09537354111671448, + "learning_rate": 2.525094495481739e-05, + "loss": 9.9361, + "step": 32120 + }, + { + "epoch": 0.1604534445304502, + "grad_norm": 0.09306449443101883, + "learning_rate": 2.5249443039875845e-05, + "loss": 9.9365, + "step": 32130 + }, + { + "epoch": 0.16050338335538963, + "grad_norm": 0.09331690520048141, + "learning_rate": 2.524794112493429e-05, + "loss": 9.9333, + "step": 32140 + }, + { + "epoch": 0.16055332218032908, + "grad_norm": 0.09822415560483932, + "learning_rate": 2.524643920999274e-05, + "loss": 9.9411, + "step": 32150 + }, + { + "epoch": 0.16060326100526853, + "grad_norm": 0.0978788435459137, + "learning_rate": 2.5244937295051192e-05, + "loss": 9.9302, + "step": 32160 + }, + { + "epoch": 0.16065319983020798, + "grad_norm": 0.09318576008081436, + "learning_rate": 2.524343538010964e-05, + "loss": 9.9322, + "step": 32170 + }, + { + "epoch": 0.16070313865514743, + "grad_norm": 0.09461916238069534, + "learning_rate": 2.5241933465168092e-05, + "loss": 9.9289, + "step": 32180 + }, + { + "epoch": 0.16075307748008688, + "grad_norm": 0.08854075521230698, + "learning_rate": 2.524043155022654e-05, + "loss": 9.9322, + "step": 32190 + }, + { + "epoch": 0.16080301630502633, + "grad_norm": 0.09415885806083679, + "learning_rate": 2.5238929635284986e-05, + "loss": 9.9298, + "step": 32200 + }, + { + "epoch": 0.16085295512996578, + "grad_norm": 0.09000342339277267, + "learning_rate": 2.523742772034344e-05, + "loss": 9.9248, + "step": 32210 + }, + { + "epoch": 0.16090289395490523, + "grad_norm": 0.09261956810951233, + "learning_rate": 2.5235925805401886e-05, + "loss": 9.9282, + "step": 32220 + }, + { + "epoch": 0.16095283277984468, + "grad_norm": 0.09217025339603424, + "learning_rate": 2.523442389046034e-05, + "loss": 9.9289, + "step": 32230 + }, + { + "epoch": 0.16100277160478413, + "grad_norm": 0.09619983285665512, + "learning_rate": 2.5232921975518787e-05, + "loss": 9.9366, + "step": 32240 + }, + { + "epoch": 0.16105271042972358, + "grad_norm": 0.0968618094921112, + "learning_rate": 2.5231420060577237e-05, + "loss": 9.9347, + "step": 32250 + }, + { + "epoch": 0.16110264925466303, + "grad_norm": 0.09648020565509796, + "learning_rate": 2.5229918145635687e-05, + "loss": 9.929, + "step": 32260 + }, + { + "epoch": 0.16115258807960248, + "grad_norm": 0.09538908302783966, + "learning_rate": 2.5228416230694134e-05, + "loss": 9.9289, + "step": 32270 + }, + { + "epoch": 0.16120252690454193, + "grad_norm": 0.09745009243488312, + "learning_rate": 2.5226914315752587e-05, + "loss": 9.9325, + "step": 32280 + }, + { + "epoch": 0.16125246572948138, + "grad_norm": 0.09166844934225082, + "learning_rate": 2.5225412400811034e-05, + "loss": 9.9295, + "step": 32290 + }, + { + "epoch": 0.16130240455442083, + "grad_norm": 0.09489347785711288, + "learning_rate": 2.5223910485869484e-05, + "loss": 9.9295, + "step": 32300 + }, + { + "epoch": 0.16135234337936027, + "grad_norm": 0.09680917114019394, + "learning_rate": 2.5222408570927934e-05, + "loss": 9.926, + "step": 32310 + }, + { + "epoch": 0.16140228220429972, + "grad_norm": 0.08917848020792007, + "learning_rate": 2.522090665598638e-05, + "loss": 9.9332, + "step": 32320 + }, + { + "epoch": 0.16145222102923917, + "grad_norm": 0.0952332615852356, + "learning_rate": 2.5219404741044835e-05, + "loss": 9.922, + "step": 32330 + }, + { + "epoch": 0.16150215985417862, + "grad_norm": 0.095283642411232, + "learning_rate": 2.521790282610328e-05, + "loss": 9.9313, + "step": 32340 + }, + { + "epoch": 0.16155209867911807, + "grad_norm": 0.09335513412952423, + "learning_rate": 2.5216400911161732e-05, + "loss": 9.9285, + "step": 32350 + }, + { + "epoch": 0.16160203750405752, + "grad_norm": 0.09106207638978958, + "learning_rate": 2.5214898996220182e-05, + "loss": 9.9263, + "step": 32360 + }, + { + "epoch": 0.16165197632899697, + "grad_norm": 0.08792679011821747, + "learning_rate": 2.521339708127863e-05, + "loss": 9.9225, + "step": 32370 + }, + { + "epoch": 0.16170191515393642, + "grad_norm": 0.08863825350999832, + "learning_rate": 2.5211895166337082e-05, + "loss": 9.9298, + "step": 32380 + }, + { + "epoch": 0.16175185397887587, + "grad_norm": 0.08740777522325516, + "learning_rate": 2.521039325139553e-05, + "loss": 9.9241, + "step": 32390 + }, + { + "epoch": 0.16180179280381532, + "grad_norm": 0.10181976854801178, + "learning_rate": 2.520889133645398e-05, + "loss": 9.9275, + "step": 32400 + }, + { + "epoch": 0.16185173162875477, + "grad_norm": 0.09686804562807083, + "learning_rate": 2.520738942151243e-05, + "loss": 9.9219, + "step": 32410 + }, + { + "epoch": 0.16190167045369422, + "grad_norm": 0.09239942580461502, + "learning_rate": 2.5205887506570876e-05, + "loss": 9.9238, + "step": 32420 + }, + { + "epoch": 0.16195160927863367, + "grad_norm": 0.09450032562017441, + "learning_rate": 2.520438559162933e-05, + "loss": 9.9273, + "step": 32430 + }, + { + "epoch": 0.16200154810357312, + "grad_norm": 0.09564870595932007, + "learning_rate": 2.5202883676687777e-05, + "loss": 9.919, + "step": 32440 + }, + { + "epoch": 0.16205148692851257, + "grad_norm": 0.09656788408756256, + "learning_rate": 2.5201381761746227e-05, + "loss": 9.9207, + "step": 32450 + }, + { + "epoch": 0.16210142575345202, + "grad_norm": 0.09183277189731598, + "learning_rate": 2.5199879846804677e-05, + "loss": 9.928, + "step": 32460 + }, + { + "epoch": 0.16215136457839147, + "grad_norm": 0.09268153458833694, + "learning_rate": 2.5198377931863124e-05, + "loss": 9.9258, + "step": 32470 + }, + { + "epoch": 0.16220130340333092, + "grad_norm": 0.09812065958976746, + "learning_rate": 2.5196876016921577e-05, + "loss": 9.9289, + "step": 32480 + }, + { + "epoch": 0.16225124222827036, + "grad_norm": 0.09496765583753586, + "learning_rate": 2.5195374101980024e-05, + "loss": 9.9278, + "step": 32490 + }, + { + "epoch": 0.16230118105320981, + "grad_norm": 0.09568601101636887, + "learning_rate": 2.5193872187038474e-05, + "loss": 9.9279, + "step": 32500 + }, + { + "epoch": 0.16235111987814926, + "grad_norm": 0.0902891531586647, + "learning_rate": 2.5192370272096924e-05, + "loss": 9.9261, + "step": 32510 + }, + { + "epoch": 0.1624010587030887, + "grad_norm": 0.09505823254585266, + "learning_rate": 2.519086835715537e-05, + "loss": 9.921, + "step": 32520 + }, + { + "epoch": 0.16245099752802816, + "grad_norm": 0.09195969253778458, + "learning_rate": 2.5189366442213825e-05, + "loss": 9.925, + "step": 32530 + }, + { + "epoch": 0.1625009363529676, + "grad_norm": 0.09071581065654755, + "learning_rate": 2.518786452727227e-05, + "loss": 9.9256, + "step": 32540 + }, + { + "epoch": 0.16255087517790706, + "grad_norm": 0.0933861956000328, + "learning_rate": 2.5186362612330722e-05, + "loss": 9.9248, + "step": 32550 + }, + { + "epoch": 0.1626008140028465, + "grad_norm": 0.09781572967767715, + "learning_rate": 2.5184860697389172e-05, + "loss": 9.9215, + "step": 32560 + }, + { + "epoch": 0.16265075282778596, + "grad_norm": 0.09224456548690796, + "learning_rate": 2.5183358782447622e-05, + "loss": 9.925, + "step": 32570 + }, + { + "epoch": 0.1627006916527254, + "grad_norm": 0.09544172137975693, + "learning_rate": 2.5181856867506072e-05, + "loss": 9.9167, + "step": 32580 + }, + { + "epoch": 0.16275063047766486, + "grad_norm": 0.09155009686946869, + "learning_rate": 2.518035495256452e-05, + "loss": 9.9184, + "step": 32590 + }, + { + "epoch": 0.1628005693026043, + "grad_norm": 0.10186931490898132, + "learning_rate": 2.517885303762297e-05, + "loss": 9.9262, + "step": 32600 + }, + { + "epoch": 0.16285050812754376, + "grad_norm": 0.09243344515562057, + "learning_rate": 2.517735112268142e-05, + "loss": 9.9216, + "step": 32610 + }, + { + "epoch": 0.1629004469524832, + "grad_norm": 0.08900917321443558, + "learning_rate": 2.517584920773987e-05, + "loss": 9.9274, + "step": 32620 + }, + { + "epoch": 0.16295038577742266, + "grad_norm": 0.09651615470647812, + "learning_rate": 2.517434729279832e-05, + "loss": 9.9232, + "step": 32630 + }, + { + "epoch": 0.1630003246023621, + "grad_norm": 0.09579268842935562, + "learning_rate": 2.5172845377856767e-05, + "loss": 9.9149, + "step": 32640 + }, + { + "epoch": 0.16305026342730156, + "grad_norm": 0.09731176495552063, + "learning_rate": 2.5171343462915217e-05, + "loss": 9.9206, + "step": 32650 + }, + { + "epoch": 0.163100202252241, + "grad_norm": 0.10048079490661621, + "learning_rate": 2.5169841547973667e-05, + "loss": 9.9182, + "step": 32660 + }, + { + "epoch": 0.16315014107718045, + "grad_norm": 0.0903536006808281, + "learning_rate": 2.5168339633032117e-05, + "loss": 9.9161, + "step": 32670 + }, + { + "epoch": 0.1632000799021199, + "grad_norm": 0.096148781478405, + "learning_rate": 2.5166837718090567e-05, + "loss": 9.9185, + "step": 32680 + }, + { + "epoch": 0.16325001872705935, + "grad_norm": 0.09729062020778656, + "learning_rate": 2.5165335803149014e-05, + "loss": 9.9234, + "step": 32690 + }, + { + "epoch": 0.1632999575519988, + "grad_norm": 0.09655334800481796, + "learning_rate": 2.5163833888207464e-05, + "loss": 9.9228, + "step": 32700 + }, + { + "epoch": 0.16334989637693825, + "grad_norm": 0.09325189888477325, + "learning_rate": 2.5162331973265914e-05, + "loss": 9.9221, + "step": 32710 + }, + { + "epoch": 0.1633998352018777, + "grad_norm": 0.09188661724328995, + "learning_rate": 2.5160830058324365e-05, + "loss": 9.9184, + "step": 32720 + }, + { + "epoch": 0.16344977402681715, + "grad_norm": 0.08944937586784363, + "learning_rate": 2.5159328143382815e-05, + "loss": 9.9146, + "step": 32730 + }, + { + "epoch": 0.1634997128517566, + "grad_norm": 0.09515637904405594, + "learning_rate": 2.515782622844126e-05, + "loss": 9.9245, + "step": 32740 + }, + { + "epoch": 0.16354965167669605, + "grad_norm": 0.09636104851961136, + "learning_rate": 2.5156324313499712e-05, + "loss": 9.9186, + "step": 32750 + }, + { + "epoch": 0.1635995905016355, + "grad_norm": 0.09423839300870895, + "learning_rate": 2.5154822398558162e-05, + "loss": 9.9193, + "step": 32760 + }, + { + "epoch": 0.16364952932657495, + "grad_norm": 0.09561903774738312, + "learning_rate": 2.5153320483616612e-05, + "loss": 9.9095, + "step": 32770 + }, + { + "epoch": 0.1636994681515144, + "grad_norm": 0.0979316234588623, + "learning_rate": 2.5151818568675062e-05, + "loss": 9.9165, + "step": 32780 + }, + { + "epoch": 0.16374940697645385, + "grad_norm": 0.09158878028392792, + "learning_rate": 2.515031665373351e-05, + "loss": 9.9188, + "step": 32790 + }, + { + "epoch": 0.1637993458013933, + "grad_norm": 0.09471914172172546, + "learning_rate": 2.514881473879196e-05, + "loss": 9.9175, + "step": 32800 + }, + { + "epoch": 0.16384928462633275, + "grad_norm": 0.08805624395608902, + "learning_rate": 2.514731282385041e-05, + "loss": 9.9205, + "step": 32810 + }, + { + "epoch": 0.1638992234512722, + "grad_norm": 0.09481382369995117, + "learning_rate": 2.514581090890886e-05, + "loss": 9.9162, + "step": 32820 + }, + { + "epoch": 0.16394916227621165, + "grad_norm": 0.09198730438947678, + "learning_rate": 2.514430899396731e-05, + "loss": 9.9177, + "step": 32830 + }, + { + "epoch": 0.1639991011011511, + "grad_norm": 0.09711703658103943, + "learning_rate": 2.5142807079025757e-05, + "loss": 9.9141, + "step": 32840 + }, + { + "epoch": 0.16404903992609055, + "grad_norm": 0.09685643017292023, + "learning_rate": 2.5141305164084207e-05, + "loss": 9.9108, + "step": 32850 + }, + { + "epoch": 0.16409897875103, + "grad_norm": 0.09421391785144806, + "learning_rate": 2.5139803249142657e-05, + "loss": 9.9192, + "step": 32860 + }, + { + "epoch": 0.16414891757596944, + "grad_norm": 0.09753596037626266, + "learning_rate": 2.5138301334201107e-05, + "loss": 9.9174, + "step": 32870 + }, + { + "epoch": 0.1641988564009089, + "grad_norm": 0.09626904875040054, + "learning_rate": 2.5136799419259557e-05, + "loss": 9.9172, + "step": 32880 + }, + { + "epoch": 0.16424879522584834, + "grad_norm": 0.0984250158071518, + "learning_rate": 2.5135297504318004e-05, + "loss": 9.9117, + "step": 32890 + }, + { + "epoch": 0.1642987340507878, + "grad_norm": 0.09708181023597717, + "learning_rate": 2.5133795589376454e-05, + "loss": 9.9127, + "step": 32900 + }, + { + "epoch": 0.16434867287572724, + "grad_norm": 0.09856320172548294, + "learning_rate": 2.5132293674434904e-05, + "loss": 9.9167, + "step": 32910 + }, + { + "epoch": 0.1643986117006667, + "grad_norm": 0.09314543008804321, + "learning_rate": 2.5130791759493355e-05, + "loss": 9.9153, + "step": 32920 + }, + { + "epoch": 0.16444855052560614, + "grad_norm": 0.09600864350795746, + "learning_rate": 2.5129289844551805e-05, + "loss": 9.9172, + "step": 32930 + }, + { + "epoch": 0.1644984893505456, + "grad_norm": 0.09382302314043045, + "learning_rate": 2.5127787929610255e-05, + "loss": 9.9149, + "step": 32940 + }, + { + "epoch": 0.16454842817548504, + "grad_norm": 0.09613067656755447, + "learning_rate": 2.5126286014668702e-05, + "loss": 9.9112, + "step": 32950 + }, + { + "epoch": 0.1645983670004245, + "grad_norm": 0.09294107556343079, + "learning_rate": 2.5124784099727152e-05, + "loss": 9.9157, + "step": 32960 + }, + { + "epoch": 0.16464830582536394, + "grad_norm": 0.09056387096643448, + "learning_rate": 2.5123282184785602e-05, + "loss": 9.9145, + "step": 32970 + }, + { + "epoch": 0.1646982446503034, + "grad_norm": 0.09541086107492447, + "learning_rate": 2.5121780269844052e-05, + "loss": 9.9074, + "step": 32980 + }, + { + "epoch": 0.16474818347524284, + "grad_norm": 0.09487113356590271, + "learning_rate": 2.5120278354902502e-05, + "loss": 9.9122, + "step": 32990 + }, + { + "epoch": 0.1647981223001823, + "grad_norm": 0.0954999253153801, + "learning_rate": 2.511877643996095e-05, + "loss": 9.9143, + "step": 33000 + }, + { + "epoch": 0.16484806112512174, + "grad_norm": 0.09315845370292664, + "learning_rate": 2.51172745250194e-05, + "loss": 9.9189, + "step": 33010 + }, + { + "epoch": 0.16489799995006119, + "grad_norm": 0.09187664091587067, + "learning_rate": 2.511577261007785e-05, + "loss": 9.9106, + "step": 33020 + }, + { + "epoch": 0.16494793877500064, + "grad_norm": 0.09694460779428482, + "learning_rate": 2.51142706951363e-05, + "loss": 9.9149, + "step": 33030 + }, + { + "epoch": 0.16499787759994008, + "grad_norm": 0.08832021057605743, + "learning_rate": 2.511276878019475e-05, + "loss": 9.911, + "step": 33040 + }, + { + "epoch": 0.16504781642487953, + "grad_norm": 0.09793966263532639, + "learning_rate": 2.5111266865253197e-05, + "loss": 9.9152, + "step": 33050 + }, + { + "epoch": 0.16509775524981898, + "grad_norm": 0.09660662710666656, + "learning_rate": 2.5109764950311647e-05, + "loss": 9.9142, + "step": 33060 + }, + { + "epoch": 0.16514769407475843, + "grad_norm": 0.09192895144224167, + "learning_rate": 2.5108263035370097e-05, + "loss": 9.9064, + "step": 33070 + }, + { + "epoch": 0.16519763289969788, + "grad_norm": 0.09510926902294159, + "learning_rate": 2.5106761120428547e-05, + "loss": 9.9095, + "step": 33080 + }, + { + "epoch": 0.16524757172463733, + "grad_norm": 0.09554640203714371, + "learning_rate": 2.5105259205486998e-05, + "loss": 9.9071, + "step": 33090 + }, + { + "epoch": 0.16529751054957678, + "grad_norm": 0.0876171737909317, + "learning_rate": 2.5103757290545448e-05, + "loss": 9.9134, + "step": 33100 + }, + { + "epoch": 0.16534744937451623, + "grad_norm": 0.09115521609783173, + "learning_rate": 2.5102255375603894e-05, + "loss": 9.9153, + "step": 33110 + }, + { + "epoch": 0.16539738819945568, + "grad_norm": 0.09953109920024872, + "learning_rate": 2.5100753460662345e-05, + "loss": 9.9073, + "step": 33120 + }, + { + "epoch": 0.1654473270243951, + "grad_norm": 0.09405183792114258, + "learning_rate": 2.5099251545720795e-05, + "loss": 9.906, + "step": 33130 + }, + { + "epoch": 0.16549726584933455, + "grad_norm": 0.09410317987203598, + "learning_rate": 2.5097749630779245e-05, + "loss": 9.9176, + "step": 33140 + }, + { + "epoch": 0.165547204674274, + "grad_norm": 0.09550243616104126, + "learning_rate": 2.5096247715837695e-05, + "loss": 9.9054, + "step": 33150 + }, + { + "epoch": 0.16559714349921345, + "grad_norm": 0.09511136263608932, + "learning_rate": 2.5094745800896142e-05, + "loss": 9.917, + "step": 33160 + }, + { + "epoch": 0.1656470823241529, + "grad_norm": 0.09608213603496552, + "learning_rate": 2.5093243885954592e-05, + "loss": 9.9164, + "step": 33170 + }, + { + "epoch": 0.16569702114909235, + "grad_norm": 0.0949927344918251, + "learning_rate": 2.5091741971013042e-05, + "loss": 9.9107, + "step": 33180 + }, + { + "epoch": 0.1657469599740318, + "grad_norm": 0.09686841070652008, + "learning_rate": 2.5090240056071493e-05, + "loss": 9.9089, + "step": 33190 + }, + { + "epoch": 0.16579689879897125, + "grad_norm": 0.09343890100717545, + "learning_rate": 2.5088738141129943e-05, + "loss": 9.9128, + "step": 33200 + }, + { + "epoch": 0.1658468376239107, + "grad_norm": 0.09721093624830246, + "learning_rate": 2.508723622618839e-05, + "loss": 9.9105, + "step": 33210 + }, + { + "epoch": 0.16589677644885015, + "grad_norm": 0.0927630066871643, + "learning_rate": 2.508573431124684e-05, + "loss": 9.9052, + "step": 33220 + }, + { + "epoch": 0.1659467152737896, + "grad_norm": 0.09124623984098434, + "learning_rate": 2.508423239630529e-05, + "loss": 9.9141, + "step": 33230 + }, + { + "epoch": 0.16599665409872905, + "grad_norm": 0.10382476449012756, + "learning_rate": 2.508273048136374e-05, + "loss": 9.9072, + "step": 33240 + }, + { + "epoch": 0.1660465929236685, + "grad_norm": 0.09035416692495346, + "learning_rate": 2.508122856642219e-05, + "loss": 9.9108, + "step": 33250 + }, + { + "epoch": 0.16609653174860795, + "grad_norm": 0.0920075997710228, + "learning_rate": 2.507972665148064e-05, + "loss": 9.9083, + "step": 33260 + }, + { + "epoch": 0.1661464705735474, + "grad_norm": 0.09175487607717514, + "learning_rate": 2.5078224736539087e-05, + "loss": 9.9091, + "step": 33270 + }, + { + "epoch": 0.16619640939848684, + "grad_norm": 0.09441912174224854, + "learning_rate": 2.5076722821597537e-05, + "loss": 9.9066, + "step": 33280 + }, + { + "epoch": 0.1662463482234263, + "grad_norm": 0.09477470815181732, + "learning_rate": 2.5075220906655988e-05, + "loss": 9.9087, + "step": 33290 + }, + { + "epoch": 0.16629628704836574, + "grad_norm": 0.08985955268144608, + "learning_rate": 2.5073718991714438e-05, + "loss": 9.9046, + "step": 33300 + }, + { + "epoch": 0.1663462258733052, + "grad_norm": 0.0965331420302391, + "learning_rate": 2.5072217076772888e-05, + "loss": 9.9071, + "step": 33310 + }, + { + "epoch": 0.16639616469824464, + "grad_norm": 0.0963759496808052, + "learning_rate": 2.5070715161831335e-05, + "loss": 9.9057, + "step": 33320 + }, + { + "epoch": 0.1664461035231841, + "grad_norm": 0.097702257335186, + "learning_rate": 2.5069213246889785e-05, + "loss": 9.9115, + "step": 33330 + }, + { + "epoch": 0.16649604234812354, + "grad_norm": 0.09388408064842224, + "learning_rate": 2.5067711331948235e-05, + "loss": 9.9045, + "step": 33340 + }, + { + "epoch": 0.166545981173063, + "grad_norm": 0.09655765444040298, + "learning_rate": 2.5066209417006685e-05, + "loss": 9.9053, + "step": 33350 + }, + { + "epoch": 0.16659591999800244, + "grad_norm": 0.09741511940956116, + "learning_rate": 2.5064707502065135e-05, + "loss": 9.9012, + "step": 33360 + }, + { + "epoch": 0.1666458588229419, + "grad_norm": 0.09620445966720581, + "learning_rate": 2.5063205587123582e-05, + "loss": 9.8977, + "step": 33370 + }, + { + "epoch": 0.16669579764788134, + "grad_norm": 0.09696647524833679, + "learning_rate": 2.5061703672182032e-05, + "loss": 9.9067, + "step": 33380 + }, + { + "epoch": 0.1667457364728208, + "grad_norm": 0.09665276110172272, + "learning_rate": 2.5060201757240483e-05, + "loss": 9.8977, + "step": 33390 + }, + { + "epoch": 0.16679567529776024, + "grad_norm": 0.09218195825815201, + "learning_rate": 2.5058699842298933e-05, + "loss": 9.9092, + "step": 33400 + }, + { + "epoch": 0.1668456141226997, + "grad_norm": 0.09188750386238098, + "learning_rate": 2.5057197927357383e-05, + "loss": 9.9054, + "step": 33410 + }, + { + "epoch": 0.16689555294763914, + "grad_norm": 0.09387121349573135, + "learning_rate": 2.505569601241583e-05, + "loss": 9.9054, + "step": 33420 + }, + { + "epoch": 0.16694549177257859, + "grad_norm": 0.09394029527902603, + "learning_rate": 2.505419409747428e-05, + "loss": 9.8994, + "step": 33430 + }, + { + "epoch": 0.16699543059751804, + "grad_norm": 0.09561529755592346, + "learning_rate": 2.505269218253273e-05, + "loss": 9.9005, + "step": 33440 + }, + { + "epoch": 0.16704536942245748, + "grad_norm": 0.09472378343343735, + "learning_rate": 2.505119026759118e-05, + "loss": 9.8989, + "step": 33450 + }, + { + "epoch": 0.16709530824739693, + "grad_norm": 0.09782572835683823, + "learning_rate": 2.504968835264963e-05, + "loss": 9.8989, + "step": 33460 + }, + { + "epoch": 0.16714524707233638, + "grad_norm": 0.0978260487318039, + "learning_rate": 2.5048186437708077e-05, + "loss": 9.9044, + "step": 33470 + }, + { + "epoch": 0.16719518589727583, + "grad_norm": 0.09694740176200867, + "learning_rate": 2.5046684522766527e-05, + "loss": 9.9096, + "step": 33480 + }, + { + "epoch": 0.16724512472221528, + "grad_norm": 0.0943945124745369, + "learning_rate": 2.5045182607824978e-05, + "loss": 9.9032, + "step": 33490 + }, + { + "epoch": 0.16729506354715473, + "grad_norm": 0.09394306689500809, + "learning_rate": 2.5043680692883428e-05, + "loss": 9.8996, + "step": 33500 + }, + { + "epoch": 0.16734500237209418, + "grad_norm": 0.09680566936731339, + "learning_rate": 2.5042178777941878e-05, + "loss": 9.8961, + "step": 33510 + }, + { + "epoch": 0.16739494119703363, + "grad_norm": 0.09609083831310272, + "learning_rate": 2.5040676863000325e-05, + "loss": 9.9013, + "step": 33520 + }, + { + "epoch": 0.16744488002197308, + "grad_norm": 0.09596184641122818, + "learning_rate": 2.5039174948058775e-05, + "loss": 9.9, + "step": 33530 + }, + { + "epoch": 0.16749481884691253, + "grad_norm": 0.09482643753290176, + "learning_rate": 2.5037673033117225e-05, + "loss": 9.8908, + "step": 33540 + }, + { + "epoch": 0.16754475767185198, + "grad_norm": 0.09858851134777069, + "learning_rate": 2.5036171118175675e-05, + "loss": 9.8957, + "step": 33550 + }, + { + "epoch": 0.16759469649679143, + "grad_norm": 0.09529074281454086, + "learning_rate": 2.5034669203234125e-05, + "loss": 9.8975, + "step": 33560 + }, + { + "epoch": 0.16764463532173088, + "grad_norm": 0.09430502355098724, + "learning_rate": 2.5033167288292572e-05, + "loss": 9.9093, + "step": 33570 + }, + { + "epoch": 0.16769457414667033, + "grad_norm": 0.09423274546861649, + "learning_rate": 2.5031665373351026e-05, + "loss": 9.9008, + "step": 33580 + }, + { + "epoch": 0.16774451297160978, + "grad_norm": 0.09213889390230179, + "learning_rate": 2.5030163458409473e-05, + "loss": 9.8997, + "step": 33590 + }, + { + "epoch": 0.16779445179654923, + "grad_norm": 0.10176771134138107, + "learning_rate": 2.5028661543467923e-05, + "loss": 9.8974, + "step": 33600 + }, + { + "epoch": 0.16784439062148868, + "grad_norm": 0.0979059562087059, + "learning_rate": 2.5027159628526373e-05, + "loss": 9.8964, + "step": 33610 + }, + { + "epoch": 0.16789432944642813, + "grad_norm": 0.09319134056568146, + "learning_rate": 2.502565771358482e-05, + "loss": 9.8955, + "step": 33620 + }, + { + "epoch": 0.16794426827136757, + "grad_norm": 0.09434917569160461, + "learning_rate": 2.5024155798643273e-05, + "loss": 9.9026, + "step": 33630 + }, + { + "epoch": 0.16799420709630702, + "grad_norm": 0.09023196995258331, + "learning_rate": 2.502265388370172e-05, + "loss": 9.8935, + "step": 33640 + }, + { + "epoch": 0.16804414592124647, + "grad_norm": 0.08715837448835373, + "learning_rate": 2.502115196876017e-05, + "loss": 9.8996, + "step": 33650 + }, + { + "epoch": 0.16809408474618592, + "grad_norm": 0.09682469815015793, + "learning_rate": 2.501965005381862e-05, + "loss": 9.9018, + "step": 33660 + }, + { + "epoch": 0.16814402357112537, + "grad_norm": 0.0928163006901741, + "learning_rate": 2.5018148138877067e-05, + "loss": 9.8917, + "step": 33670 + }, + { + "epoch": 0.16819396239606482, + "grad_norm": 0.10032060742378235, + "learning_rate": 2.501664622393552e-05, + "loss": 9.8959, + "step": 33680 + }, + { + "epoch": 0.16824390122100427, + "grad_norm": 0.0933447927236557, + "learning_rate": 2.5015144308993968e-05, + "loss": 9.9003, + "step": 33690 + }, + { + "epoch": 0.16829384004594372, + "grad_norm": 0.09343995898962021, + "learning_rate": 2.5013642394052418e-05, + "loss": 9.897, + "step": 33700 + }, + { + "epoch": 0.16834377887088317, + "grad_norm": 0.09390633553266525, + "learning_rate": 2.5012140479110868e-05, + "loss": 9.8975, + "step": 33710 + }, + { + "epoch": 0.16839371769582262, + "grad_norm": 0.10121657699346542, + "learning_rate": 2.5010638564169315e-05, + "loss": 9.9001, + "step": 33720 + }, + { + "epoch": 0.16844365652076207, + "grad_norm": 0.09688078612089157, + "learning_rate": 2.5009136649227768e-05, + "loss": 9.8943, + "step": 33730 + }, + { + "epoch": 0.16849359534570152, + "grad_norm": 0.09601810574531555, + "learning_rate": 2.5007634734286215e-05, + "loss": 9.8948, + "step": 33740 + }, + { + "epoch": 0.16854353417064097, + "grad_norm": 0.09208814799785614, + "learning_rate": 2.5006132819344665e-05, + "loss": 9.8938, + "step": 33750 + }, + { + "epoch": 0.16859347299558042, + "grad_norm": 0.10235219448804855, + "learning_rate": 2.5004630904403115e-05, + "loss": 9.8951, + "step": 33760 + }, + { + "epoch": 0.16864341182051987, + "grad_norm": 0.09691403061151505, + "learning_rate": 2.5003128989461562e-05, + "loss": 9.8948, + "step": 33770 + }, + { + "epoch": 0.16869335064545932, + "grad_norm": 0.0951026976108551, + "learning_rate": 2.5001627074520016e-05, + "loss": 9.9, + "step": 33780 + }, + { + "epoch": 0.16874328947039877, + "grad_norm": 0.08896276354789734, + "learning_rate": 2.5000125159578463e-05, + "loss": 9.8944, + "step": 33790 + }, + { + "epoch": 0.16879322829533822, + "grad_norm": 0.09217982739210129, + "learning_rate": 2.4998623244636913e-05, + "loss": 9.8922, + "step": 33800 + }, + { + "epoch": 0.16884316712027767, + "grad_norm": 0.09872495383024216, + "learning_rate": 2.4997121329695363e-05, + "loss": 9.8956, + "step": 33810 + }, + { + "epoch": 0.16889310594521711, + "grad_norm": 0.09138542413711548, + "learning_rate": 2.499561941475381e-05, + "loss": 9.8934, + "step": 33820 + }, + { + "epoch": 0.16894304477015656, + "grad_norm": 0.09778387099504471, + "learning_rate": 2.4994117499812263e-05, + "loss": 9.8991, + "step": 33830 + }, + { + "epoch": 0.168992983595096, + "grad_norm": 0.0923265740275383, + "learning_rate": 2.499261558487071e-05, + "loss": 9.8911, + "step": 33840 + }, + { + "epoch": 0.16904292242003546, + "grad_norm": 0.09650073945522308, + "learning_rate": 2.499111366992916e-05, + "loss": 9.8927, + "step": 33850 + }, + { + "epoch": 0.1690928612449749, + "grad_norm": 0.09646689891815186, + "learning_rate": 2.498961175498761e-05, + "loss": 9.8933, + "step": 33860 + }, + { + "epoch": 0.16914280006991436, + "grad_norm": 0.09530924260616302, + "learning_rate": 2.4988109840046057e-05, + "loss": 9.893, + "step": 33870 + }, + { + "epoch": 0.1691927388948538, + "grad_norm": 0.09329960495233536, + "learning_rate": 2.498660792510451e-05, + "loss": 9.8912, + "step": 33880 + }, + { + "epoch": 0.16924267771979326, + "grad_norm": 0.09914696216583252, + "learning_rate": 2.4985106010162958e-05, + "loss": 9.8876, + "step": 33890 + }, + { + "epoch": 0.1692926165447327, + "grad_norm": 0.09866105765104294, + "learning_rate": 2.498360409522141e-05, + "loss": 9.9011, + "step": 33900 + }, + { + "epoch": 0.16934255536967216, + "grad_norm": 0.09826882928609848, + "learning_rate": 2.4982102180279858e-05, + "loss": 9.8992, + "step": 33910 + }, + { + "epoch": 0.1693924941946116, + "grad_norm": 0.09396472573280334, + "learning_rate": 2.4980600265338305e-05, + "loss": 9.8908, + "step": 33920 + }, + { + "epoch": 0.16944243301955106, + "grad_norm": 0.08894440531730652, + "learning_rate": 2.4979098350396758e-05, + "loss": 9.8922, + "step": 33930 + }, + { + "epoch": 0.1694923718444905, + "grad_norm": 0.09588942676782608, + "learning_rate": 2.4977596435455205e-05, + "loss": 9.8917, + "step": 33940 + }, + { + "epoch": 0.16954231066942996, + "grad_norm": 0.09317868947982788, + "learning_rate": 2.497609452051366e-05, + "loss": 9.8941, + "step": 33950 + }, + { + "epoch": 0.1695922494943694, + "grad_norm": 0.0976976752281189, + "learning_rate": 2.4974592605572105e-05, + "loss": 9.8955, + "step": 33960 + }, + { + "epoch": 0.16964218831930886, + "grad_norm": 0.10638713836669922, + "learning_rate": 2.4973090690630552e-05, + "loss": 9.8906, + "step": 33970 + }, + { + "epoch": 0.1696921271442483, + "grad_norm": 0.0989893451333046, + "learning_rate": 2.4971588775689006e-05, + "loss": 9.8929, + "step": 33980 + }, + { + "epoch": 0.16974206596918776, + "grad_norm": 0.09523206949234009, + "learning_rate": 2.4970086860747453e-05, + "loss": 9.8959, + "step": 33990 + }, + { + "epoch": 0.1697920047941272, + "grad_norm": 0.08967464417219162, + "learning_rate": 2.4968584945805906e-05, + "loss": 9.8947, + "step": 34000 + }, + { + "epoch": 0.16984194361906665, + "grad_norm": 0.09731745719909668, + "learning_rate": 2.4967083030864353e-05, + "loss": 9.8914, + "step": 34010 + }, + { + "epoch": 0.1698918824440061, + "grad_norm": 0.09333444386720657, + "learning_rate": 2.49655811159228e-05, + "loss": 9.897, + "step": 34020 + }, + { + "epoch": 0.16994182126894555, + "grad_norm": 0.09605353325605392, + "learning_rate": 2.4964079200981253e-05, + "loss": 9.8829, + "step": 34030 + }, + { + "epoch": 0.169991760093885, + "grad_norm": 0.08907973766326904, + "learning_rate": 2.49625772860397e-05, + "loss": 9.8886, + "step": 34040 + }, + { + "epoch": 0.17004169891882445, + "grad_norm": 0.09453407675027847, + "learning_rate": 2.4961075371098154e-05, + "loss": 9.8884, + "step": 34050 + }, + { + "epoch": 0.1700916377437639, + "grad_norm": 0.09984616935253143, + "learning_rate": 2.49595734561566e-05, + "loss": 9.899, + "step": 34060 + }, + { + "epoch": 0.17014157656870335, + "grad_norm": 0.0900527685880661, + "learning_rate": 2.4958071541215047e-05, + "loss": 9.8932, + "step": 34070 + }, + { + "epoch": 0.1701915153936428, + "grad_norm": 0.09352007508277893, + "learning_rate": 2.49565696262735e-05, + "loss": 9.8887, + "step": 34080 + }, + { + "epoch": 0.17024145421858225, + "grad_norm": 0.0973382219672203, + "learning_rate": 2.4955067711331948e-05, + "loss": 9.8908, + "step": 34090 + }, + { + "epoch": 0.1702913930435217, + "grad_norm": 0.09201547503471375, + "learning_rate": 2.49535657963904e-05, + "loss": 9.8875, + "step": 34100 + }, + { + "epoch": 0.17034133186846115, + "grad_norm": 0.09478182345628738, + "learning_rate": 2.4952063881448848e-05, + "loss": 9.8797, + "step": 34110 + }, + { + "epoch": 0.17039127069340057, + "grad_norm": 0.09311734884977341, + "learning_rate": 2.4950561966507295e-05, + "loss": 9.8855, + "step": 34120 + }, + { + "epoch": 0.17044120951834002, + "grad_norm": 0.09643889218568802, + "learning_rate": 2.4949060051565748e-05, + "loss": 9.8851, + "step": 34130 + }, + { + "epoch": 0.17049114834327947, + "grad_norm": 0.09145352989435196, + "learning_rate": 2.4947558136624195e-05, + "loss": 9.8896, + "step": 34140 + }, + { + "epoch": 0.17054108716821892, + "grad_norm": 0.0948040559887886, + "learning_rate": 2.494605622168265e-05, + "loss": 9.8845, + "step": 34150 + }, + { + "epoch": 0.17059102599315837, + "grad_norm": 0.09217320382595062, + "learning_rate": 2.4944554306741095e-05, + "loss": 9.8888, + "step": 34160 + }, + { + "epoch": 0.17064096481809782, + "grad_norm": 0.09132317453622818, + "learning_rate": 2.4943052391799542e-05, + "loss": 9.8871, + "step": 34170 + }, + { + "epoch": 0.17069090364303727, + "grad_norm": 0.0995708778500557, + "learning_rate": 2.4941550476857996e-05, + "loss": 9.8957, + "step": 34180 + }, + { + "epoch": 0.17074084246797672, + "grad_norm": 0.09466378390789032, + "learning_rate": 2.4940048561916443e-05, + "loss": 9.8955, + "step": 34190 + }, + { + "epoch": 0.17079078129291617, + "grad_norm": 0.09082814306020737, + "learning_rate": 2.4938546646974896e-05, + "loss": 9.8832, + "step": 34200 + }, + { + "epoch": 0.17084072011785562, + "grad_norm": 0.09607654809951782, + "learning_rate": 2.4937044732033343e-05, + "loss": 9.887, + "step": 34210 + }, + { + "epoch": 0.17089065894279507, + "grad_norm": 0.09264031052589417, + "learning_rate": 2.4935542817091793e-05, + "loss": 9.8864, + "step": 34220 + }, + { + "epoch": 0.17094059776773451, + "grad_norm": 0.09279338270425797, + "learning_rate": 2.4934040902150243e-05, + "loss": 9.888, + "step": 34230 + }, + { + "epoch": 0.17099053659267396, + "grad_norm": 0.09093698859214783, + "learning_rate": 2.493253898720869e-05, + "loss": 9.8898, + "step": 34240 + }, + { + "epoch": 0.1710404754176134, + "grad_norm": 0.09663654863834381, + "learning_rate": 2.4931037072267144e-05, + "loss": 9.8794, + "step": 34250 + }, + { + "epoch": 0.17109041424255286, + "grad_norm": 0.09918060153722763, + "learning_rate": 2.492953515732559e-05, + "loss": 9.8779, + "step": 34260 + }, + { + "epoch": 0.1711403530674923, + "grad_norm": 0.09901434928178787, + "learning_rate": 2.492803324238404e-05, + "loss": 9.8849, + "step": 34270 + }, + { + "epoch": 0.17119029189243176, + "grad_norm": 0.09233778715133667, + "learning_rate": 2.492653132744249e-05, + "loss": 9.8817, + "step": 34280 + }, + { + "epoch": 0.1712402307173712, + "grad_norm": 0.08926848322153091, + "learning_rate": 2.4925029412500938e-05, + "loss": 9.8866, + "step": 34290 + }, + { + "epoch": 0.17129016954231066, + "grad_norm": 0.0978291779756546, + "learning_rate": 2.492352749755939e-05, + "loss": 9.8874, + "step": 34300 + }, + { + "epoch": 0.1713401083672501, + "grad_norm": 0.09829317033290863, + "learning_rate": 2.4922025582617838e-05, + "loss": 9.8843, + "step": 34310 + }, + { + "epoch": 0.17139004719218956, + "grad_norm": 0.08857450634241104, + "learning_rate": 2.4920523667676288e-05, + "loss": 9.8929, + "step": 34320 + }, + { + "epoch": 0.171439986017129, + "grad_norm": 0.10009986907243729, + "learning_rate": 2.4919021752734738e-05, + "loss": 9.8932, + "step": 34330 + }, + { + "epoch": 0.17148992484206846, + "grad_norm": 0.09351924806833267, + "learning_rate": 2.4917519837793185e-05, + "loss": 9.8878, + "step": 34340 + }, + { + "epoch": 0.1715398636670079, + "grad_norm": 0.09353633224964142, + "learning_rate": 2.491601792285164e-05, + "loss": 9.8878, + "step": 34350 + }, + { + "epoch": 0.17158980249194736, + "grad_norm": 0.09166029840707779, + "learning_rate": 2.4914516007910085e-05, + "loss": 9.8803, + "step": 34360 + }, + { + "epoch": 0.1716397413168868, + "grad_norm": 0.09764494746923447, + "learning_rate": 2.4913014092968536e-05, + "loss": 9.8855, + "step": 34370 + }, + { + "epoch": 0.17168968014182626, + "grad_norm": 0.0947176069021225, + "learning_rate": 2.4911512178026986e-05, + "loss": 9.8796, + "step": 34380 + }, + { + "epoch": 0.1717396189667657, + "grad_norm": 0.09326879680156708, + "learning_rate": 2.4910010263085433e-05, + "loss": 9.8775, + "step": 34390 + }, + { + "epoch": 0.17178955779170516, + "grad_norm": 0.0948229506611824, + "learning_rate": 2.4908508348143886e-05, + "loss": 9.8862, + "step": 34400 + }, + { + "epoch": 0.1718394966166446, + "grad_norm": 0.09219582378864288, + "learning_rate": 2.4907006433202333e-05, + "loss": 9.8888, + "step": 34410 + }, + { + "epoch": 0.17188943544158405, + "grad_norm": 0.0936218723654747, + "learning_rate": 2.4905504518260783e-05, + "loss": 9.8747, + "step": 34420 + }, + { + "epoch": 0.1719393742665235, + "grad_norm": 0.09321941435337067, + "learning_rate": 2.4904002603319233e-05, + "loss": 9.891, + "step": 34430 + }, + { + "epoch": 0.17198931309146295, + "grad_norm": 0.10023363679647446, + "learning_rate": 2.490250068837768e-05, + "loss": 9.8808, + "step": 34440 + }, + { + "epoch": 0.1720392519164024, + "grad_norm": 0.09513888508081436, + "learning_rate": 2.4900998773436134e-05, + "loss": 9.882, + "step": 34450 + }, + { + "epoch": 0.17208919074134185, + "grad_norm": 0.09558258205652237, + "learning_rate": 2.489949685849458e-05, + "loss": 9.8793, + "step": 34460 + }, + { + "epoch": 0.1721391295662813, + "grad_norm": 0.10544897615909576, + "learning_rate": 2.489799494355303e-05, + "loss": 9.8799, + "step": 34470 + }, + { + "epoch": 0.17218906839122075, + "grad_norm": 0.09681084007024765, + "learning_rate": 2.489649302861148e-05, + "loss": 9.879, + "step": 34480 + }, + { + "epoch": 0.1722390072161602, + "grad_norm": 0.10312927514314651, + "learning_rate": 2.4894991113669928e-05, + "loss": 9.8859, + "step": 34490 + }, + { + "epoch": 0.17228894604109965, + "grad_norm": 0.09519031643867493, + "learning_rate": 2.489348919872838e-05, + "loss": 9.8728, + "step": 34500 + }, + { + "epoch": 0.1723388848660391, + "grad_norm": 0.09442928433418274, + "learning_rate": 2.4891987283786828e-05, + "loss": 9.8753, + "step": 34510 + }, + { + "epoch": 0.17238882369097855, + "grad_norm": 0.09661301970481873, + "learning_rate": 2.4890485368845278e-05, + "loss": 9.8788, + "step": 34520 + }, + { + "epoch": 0.172438762515918, + "grad_norm": 0.0956299677491188, + "learning_rate": 2.488898345390373e-05, + "loss": 9.8807, + "step": 34530 + }, + { + "epoch": 0.17248870134085745, + "grad_norm": 0.097622811794281, + "learning_rate": 2.488748153896218e-05, + "loss": 9.8781, + "step": 34540 + }, + { + "epoch": 0.1725386401657969, + "grad_norm": 0.0975886806845665, + "learning_rate": 2.488597962402063e-05, + "loss": 9.8766, + "step": 34550 + }, + { + "epoch": 0.17258857899073635, + "grad_norm": 0.09032001346349716, + "learning_rate": 2.4884477709079075e-05, + "loss": 9.8834, + "step": 34560 + }, + { + "epoch": 0.1726385178156758, + "grad_norm": 0.09692685306072235, + "learning_rate": 2.4882975794137526e-05, + "loss": 9.8768, + "step": 34570 + }, + { + "epoch": 0.17268845664061525, + "grad_norm": 0.09569180756807327, + "learning_rate": 2.4881473879195976e-05, + "loss": 9.8756, + "step": 34580 + }, + { + "epoch": 0.1727383954655547, + "grad_norm": 0.0921887531876564, + "learning_rate": 2.4879971964254426e-05, + "loss": 9.8815, + "step": 34590 + }, + { + "epoch": 0.17278833429049414, + "grad_norm": 0.09386293590068817, + "learning_rate": 2.4878470049312876e-05, + "loss": 9.8763, + "step": 34600 + }, + { + "epoch": 0.1728382731154336, + "grad_norm": 0.09377525746822357, + "learning_rate": 2.4876968134371323e-05, + "loss": 9.8792, + "step": 34610 + }, + { + "epoch": 0.17288821194037304, + "grad_norm": 0.09777195751667023, + "learning_rate": 2.4875466219429773e-05, + "loss": 9.8779, + "step": 34620 + }, + { + "epoch": 0.1729381507653125, + "grad_norm": 0.08953432738780975, + "learning_rate": 2.4873964304488223e-05, + "loss": 9.8801, + "step": 34630 + }, + { + "epoch": 0.17298808959025194, + "grad_norm": 0.09429240971803665, + "learning_rate": 2.4872462389546674e-05, + "loss": 9.8756, + "step": 34640 + }, + { + "epoch": 0.1730380284151914, + "grad_norm": 0.09474718570709229, + "learning_rate": 2.4870960474605124e-05, + "loss": 9.8751, + "step": 34650 + }, + { + "epoch": 0.17308796724013084, + "grad_norm": 0.09288160502910614, + "learning_rate": 2.486945855966357e-05, + "loss": 9.8807, + "step": 34660 + }, + { + "epoch": 0.1731379060650703, + "grad_norm": 0.09404736012220383, + "learning_rate": 2.486795664472202e-05, + "loss": 9.8768, + "step": 34670 + }, + { + "epoch": 0.17318784489000974, + "grad_norm": 0.0975610539317131, + "learning_rate": 2.486645472978047e-05, + "loss": 9.8753, + "step": 34680 + }, + { + "epoch": 0.1732377837149492, + "grad_norm": 0.0982472151517868, + "learning_rate": 2.486495281483892e-05, + "loss": 9.8792, + "step": 34690 + }, + { + "epoch": 0.17328772253988864, + "grad_norm": 0.09330155700445175, + "learning_rate": 2.486345089989737e-05, + "loss": 9.8726, + "step": 34700 + }, + { + "epoch": 0.1733376613648281, + "grad_norm": 0.09651447087526321, + "learning_rate": 2.4861948984955818e-05, + "loss": 9.8716, + "step": 34710 + }, + { + "epoch": 0.17338760018976754, + "grad_norm": 0.09619554132223129, + "learning_rate": 2.4860447070014268e-05, + "loss": 9.8736, + "step": 34720 + }, + { + "epoch": 0.173437539014707, + "grad_norm": 0.09537581354379654, + "learning_rate": 2.485894515507272e-05, + "loss": 9.8743, + "step": 34730 + }, + { + "epoch": 0.17348747783964644, + "grad_norm": 0.09322098642587662, + "learning_rate": 2.485744324013117e-05, + "loss": 9.8832, + "step": 34740 + }, + { + "epoch": 0.1735374166645859, + "grad_norm": 0.09934062510728836, + "learning_rate": 2.485594132518962e-05, + "loss": 9.8735, + "step": 34750 + }, + { + "epoch": 0.17358735548952534, + "grad_norm": 0.09587859362363815, + "learning_rate": 2.4854439410248065e-05, + "loss": 9.8625, + "step": 34760 + }, + { + "epoch": 0.17363729431446479, + "grad_norm": 0.09331879019737244, + "learning_rate": 2.4852937495306516e-05, + "loss": 9.8709, + "step": 34770 + }, + { + "epoch": 0.17368723313940423, + "grad_norm": 0.0957440435886383, + "learning_rate": 2.4851435580364966e-05, + "loss": 9.8665, + "step": 34780 + }, + { + "epoch": 0.17373717196434368, + "grad_norm": 0.10028447955846786, + "learning_rate": 2.4849933665423416e-05, + "loss": 9.8696, + "step": 34790 + }, + { + "epoch": 0.17378711078928313, + "grad_norm": 0.09930728375911713, + "learning_rate": 2.4848431750481866e-05, + "loss": 9.8731, + "step": 34800 + }, + { + "epoch": 0.17383704961422258, + "grad_norm": 0.09186220914125443, + "learning_rate": 2.4846929835540313e-05, + "loss": 9.8791, + "step": 34810 + }, + { + "epoch": 0.17388698843916203, + "grad_norm": 0.09665971249341965, + "learning_rate": 2.4845427920598763e-05, + "loss": 9.8828, + "step": 34820 + }, + { + "epoch": 0.17393692726410148, + "grad_norm": 0.09465493261814117, + "learning_rate": 2.4843926005657213e-05, + "loss": 9.8729, + "step": 34830 + }, + { + "epoch": 0.17398686608904093, + "grad_norm": 0.09141222387552261, + "learning_rate": 2.4842424090715664e-05, + "loss": 9.8718, + "step": 34840 + }, + { + "epoch": 0.17403680491398038, + "grad_norm": 0.09826017171144485, + "learning_rate": 2.4840922175774114e-05, + "loss": 9.8765, + "step": 34850 + }, + { + "epoch": 0.17408674373891983, + "grad_norm": 0.10129930078983307, + "learning_rate": 2.4839420260832564e-05, + "loss": 9.8716, + "step": 34860 + }, + { + "epoch": 0.17413668256385928, + "grad_norm": 0.09397407621145248, + "learning_rate": 2.483791834589101e-05, + "loss": 9.875, + "step": 34870 + }, + { + "epoch": 0.17418662138879873, + "grad_norm": 0.09884011745452881, + "learning_rate": 2.483641643094946e-05, + "loss": 9.8708, + "step": 34880 + }, + { + "epoch": 0.17423656021373818, + "grad_norm": 0.09467148035764694, + "learning_rate": 2.483491451600791e-05, + "loss": 9.8709, + "step": 34890 + }, + { + "epoch": 0.17428649903867763, + "grad_norm": 0.0914902612566948, + "learning_rate": 2.483341260106636e-05, + "loss": 9.8711, + "step": 34900 + }, + { + "epoch": 0.17433643786361708, + "grad_norm": 0.0950956791639328, + "learning_rate": 2.483191068612481e-05, + "loss": 9.8675, + "step": 34910 + }, + { + "epoch": 0.17438637668855653, + "grad_norm": 0.09284093976020813, + "learning_rate": 2.4830408771183258e-05, + "loss": 9.8682, + "step": 34920 + }, + { + "epoch": 0.17443631551349598, + "grad_norm": 0.09593179076910019, + "learning_rate": 2.482890685624171e-05, + "loss": 9.8714, + "step": 34930 + }, + { + "epoch": 0.17448625433843543, + "grad_norm": 0.09419502317905426, + "learning_rate": 2.482740494130016e-05, + "loss": 9.8694, + "step": 34940 + }, + { + "epoch": 0.17453619316337488, + "grad_norm": 0.09543459862470627, + "learning_rate": 2.482590302635861e-05, + "loss": 9.8715, + "step": 34950 + }, + { + "epoch": 0.17458613198831432, + "grad_norm": 0.09066810458898544, + "learning_rate": 2.482440111141706e-05, + "loss": 9.8734, + "step": 34960 + }, + { + "epoch": 0.17463607081325377, + "grad_norm": 0.09701773524284363, + "learning_rate": 2.4822899196475506e-05, + "loss": 9.8711, + "step": 34970 + }, + { + "epoch": 0.17468600963819322, + "grad_norm": 0.09097738564014435, + "learning_rate": 2.4821397281533956e-05, + "loss": 9.875, + "step": 34980 + }, + { + "epoch": 0.17473594846313267, + "grad_norm": 0.0955294743180275, + "learning_rate": 2.4819895366592406e-05, + "loss": 9.8632, + "step": 34990 + }, + { + "epoch": 0.17478588728807212, + "grad_norm": 0.09581121802330017, + "learning_rate": 2.4818393451650856e-05, + "loss": 9.8698, + "step": 35000 + }, + { + "epoch": 0.17483582611301157, + "grad_norm": 0.09599103778600693, + "learning_rate": 2.4816891536709306e-05, + "loss": 9.867, + "step": 35010 + }, + { + "epoch": 0.17488576493795102, + "grad_norm": 0.1030481606721878, + "learning_rate": 2.4815389621767753e-05, + "loss": 9.8662, + "step": 35020 + }, + { + "epoch": 0.17493570376289047, + "grad_norm": 0.09832312166690826, + "learning_rate": 2.4813887706826203e-05, + "loss": 9.8675, + "step": 35030 + }, + { + "epoch": 0.17498564258782992, + "grad_norm": 0.09342101961374283, + "learning_rate": 2.4812385791884654e-05, + "loss": 9.8692, + "step": 35040 + }, + { + "epoch": 0.17503558141276937, + "grad_norm": 0.09640762209892273, + "learning_rate": 2.4810883876943104e-05, + "loss": 9.8705, + "step": 35050 + }, + { + "epoch": 0.17508552023770882, + "grad_norm": 0.09944464266300201, + "learning_rate": 2.4809381962001554e-05, + "loss": 9.8669, + "step": 35060 + }, + { + "epoch": 0.17513545906264827, + "grad_norm": 0.09033916890621185, + "learning_rate": 2.480788004706e-05, + "loss": 9.8649, + "step": 35070 + }, + { + "epoch": 0.17518539788758772, + "grad_norm": 0.09908484667539597, + "learning_rate": 2.480637813211845e-05, + "loss": 9.8719, + "step": 35080 + }, + { + "epoch": 0.17523533671252717, + "grad_norm": 0.09301396459341049, + "learning_rate": 2.48048762171769e-05, + "loss": 9.8616, + "step": 35090 + }, + { + "epoch": 0.17528527553746662, + "grad_norm": 0.09278962016105652, + "learning_rate": 2.480337430223535e-05, + "loss": 9.8608, + "step": 35100 + }, + { + "epoch": 0.17533521436240604, + "grad_norm": 0.09971888363361359, + "learning_rate": 2.48018723872938e-05, + "loss": 9.8672, + "step": 35110 + }, + { + "epoch": 0.1753851531873455, + "grad_norm": 0.09398912638425827, + "learning_rate": 2.4800370472352248e-05, + "loss": 9.8683, + "step": 35120 + }, + { + "epoch": 0.17543509201228494, + "grad_norm": 0.09367619454860687, + "learning_rate": 2.47988685574107e-05, + "loss": 9.8646, + "step": 35130 + }, + { + "epoch": 0.1754850308372244, + "grad_norm": 0.08890117704868317, + "learning_rate": 2.479736664246915e-05, + "loss": 9.8611, + "step": 35140 + }, + { + "epoch": 0.17553496966216384, + "grad_norm": 0.09400789439678192, + "learning_rate": 2.47958647275276e-05, + "loss": 9.8675, + "step": 35150 + }, + { + "epoch": 0.1755849084871033, + "grad_norm": 0.0941472202539444, + "learning_rate": 2.479436281258605e-05, + "loss": 9.8662, + "step": 35160 + }, + { + "epoch": 0.17563484731204274, + "grad_norm": 0.0907822698354721, + "learning_rate": 2.4792860897644496e-05, + "loss": 9.8724, + "step": 35170 + }, + { + "epoch": 0.17568478613698219, + "grad_norm": 0.09430518746376038, + "learning_rate": 2.479135898270295e-05, + "loss": 9.8677, + "step": 35180 + }, + { + "epoch": 0.17573472496192163, + "grad_norm": 0.09625846147537231, + "learning_rate": 2.4789857067761396e-05, + "loss": 9.86, + "step": 35190 + }, + { + "epoch": 0.17578466378686108, + "grad_norm": 0.09888491779565811, + "learning_rate": 2.4788355152819846e-05, + "loss": 9.864, + "step": 35200 + }, + { + "epoch": 0.17583460261180053, + "grad_norm": 0.10357237607240677, + "learning_rate": 2.4786853237878296e-05, + "loss": 9.8568, + "step": 35210 + }, + { + "epoch": 0.17588454143673998, + "grad_norm": 0.09656186401844025, + "learning_rate": 2.4785351322936743e-05, + "loss": 9.8683, + "step": 35220 + }, + { + "epoch": 0.17593448026167943, + "grad_norm": 0.09330909699201584, + "learning_rate": 2.4783849407995197e-05, + "loss": 9.8613, + "step": 35230 + }, + { + "epoch": 0.17598441908661888, + "grad_norm": 0.09761014580726624, + "learning_rate": 2.4782347493053644e-05, + "loss": 9.864, + "step": 35240 + }, + { + "epoch": 0.17603435791155833, + "grad_norm": 0.09538024663925171, + "learning_rate": 2.4780845578112094e-05, + "loss": 9.8589, + "step": 35250 + }, + { + "epoch": 0.17608429673649778, + "grad_norm": 0.09242220222949982, + "learning_rate": 2.4779343663170544e-05, + "loss": 9.8593, + "step": 35260 + }, + { + "epoch": 0.17613423556143723, + "grad_norm": 0.09523601830005646, + "learning_rate": 2.477784174822899e-05, + "loss": 9.8591, + "step": 35270 + }, + { + "epoch": 0.17618417438637668, + "grad_norm": 0.09717884659767151, + "learning_rate": 2.4776339833287444e-05, + "loss": 9.8658, + "step": 35280 + }, + { + "epoch": 0.17623411321131613, + "grad_norm": 0.11009235680103302, + "learning_rate": 2.477483791834589e-05, + "loss": 9.8577, + "step": 35290 + }, + { + "epoch": 0.17628405203625558, + "grad_norm": 0.09665638953447342, + "learning_rate": 2.477333600340434e-05, + "loss": 9.8682, + "step": 35300 + }, + { + "epoch": 0.17633399086119503, + "grad_norm": 0.09212061762809753, + "learning_rate": 2.477183408846279e-05, + "loss": 9.8629, + "step": 35310 + }, + { + "epoch": 0.17638392968613448, + "grad_norm": 0.09347554296255112, + "learning_rate": 2.4770332173521238e-05, + "loss": 9.8615, + "step": 35320 + }, + { + "epoch": 0.17643386851107393, + "grad_norm": 0.09957797080278397, + "learning_rate": 2.4768830258579692e-05, + "loss": 9.8613, + "step": 35330 + }, + { + "epoch": 0.17648380733601338, + "grad_norm": 0.0971374586224556, + "learning_rate": 2.476732834363814e-05, + "loss": 9.861, + "step": 35340 + }, + { + "epoch": 0.17653374616095283, + "grad_norm": 0.09239890426397324, + "learning_rate": 2.476582642869659e-05, + "loss": 9.865, + "step": 35350 + }, + { + "epoch": 0.17658368498589228, + "grad_norm": 0.09785747528076172, + "learning_rate": 2.476432451375504e-05, + "loss": 9.8611, + "step": 35360 + }, + { + "epoch": 0.17663362381083172, + "grad_norm": 0.09622731059789658, + "learning_rate": 2.4762822598813486e-05, + "loss": 9.8552, + "step": 35370 + }, + { + "epoch": 0.17668356263577117, + "grad_norm": 0.09199772030115128, + "learning_rate": 2.476132068387194e-05, + "loss": 9.8599, + "step": 35380 + }, + { + "epoch": 0.17673350146071062, + "grad_norm": 0.09622587263584137, + "learning_rate": 2.4759818768930386e-05, + "loss": 9.8641, + "step": 35390 + }, + { + "epoch": 0.17678344028565007, + "grad_norm": 0.09253069758415222, + "learning_rate": 2.4758316853988836e-05, + "loss": 9.8557, + "step": 35400 + }, + { + "epoch": 0.17683337911058952, + "grad_norm": 0.09692025184631348, + "learning_rate": 2.4756814939047286e-05, + "loss": 9.8591, + "step": 35410 + }, + { + "epoch": 0.17688331793552897, + "grad_norm": 0.0917033851146698, + "learning_rate": 2.4755313024105733e-05, + "loss": 9.8558, + "step": 35420 + }, + { + "epoch": 0.17693325676046842, + "grad_norm": 0.09357017278671265, + "learning_rate": 2.4753811109164187e-05, + "loss": 9.8624, + "step": 35430 + }, + { + "epoch": 0.17698319558540787, + "grad_norm": 0.09529455006122589, + "learning_rate": 2.4752309194222634e-05, + "loss": 9.8557, + "step": 35440 + }, + { + "epoch": 0.17703313441034732, + "grad_norm": 0.0990082174539566, + "learning_rate": 2.4750807279281084e-05, + "loss": 9.8607, + "step": 35450 + }, + { + "epoch": 0.17708307323528677, + "grad_norm": 0.09450554102659225, + "learning_rate": 2.4749305364339534e-05, + "loss": 9.8506, + "step": 35460 + }, + { + "epoch": 0.17713301206022622, + "grad_norm": 0.09906260669231415, + "learning_rate": 2.474780344939798e-05, + "loss": 9.8575, + "step": 35470 + }, + { + "epoch": 0.17718295088516567, + "grad_norm": 0.09663223475217819, + "learning_rate": 2.4746301534456434e-05, + "loss": 9.8683, + "step": 35480 + }, + { + "epoch": 0.17723288971010512, + "grad_norm": 0.0887630507349968, + "learning_rate": 2.474479961951488e-05, + "loss": 9.8639, + "step": 35490 + }, + { + "epoch": 0.17728282853504457, + "grad_norm": 0.09145206212997437, + "learning_rate": 2.474329770457333e-05, + "loss": 9.8622, + "step": 35500 + }, + { + "epoch": 0.17733276735998402, + "grad_norm": 0.09705221652984619, + "learning_rate": 2.474179578963178e-05, + "loss": 9.8569, + "step": 35510 + }, + { + "epoch": 0.17738270618492347, + "grad_norm": 0.09884809702634811, + "learning_rate": 2.4740293874690228e-05, + "loss": 9.8575, + "step": 35520 + }, + { + "epoch": 0.17743264500986292, + "grad_norm": 0.09495537728071213, + "learning_rate": 2.4738791959748682e-05, + "loss": 9.8571, + "step": 35530 + }, + { + "epoch": 0.17748258383480237, + "grad_norm": 0.0902133509516716, + "learning_rate": 2.473729004480713e-05, + "loss": 9.8606, + "step": 35540 + }, + { + "epoch": 0.17753252265974181, + "grad_norm": 0.09612247347831726, + "learning_rate": 2.4735788129865582e-05, + "loss": 9.8519, + "step": 35550 + }, + { + "epoch": 0.17758246148468126, + "grad_norm": 0.09739767014980316, + "learning_rate": 2.473428621492403e-05, + "loss": 9.8538, + "step": 35560 + }, + { + "epoch": 0.1776324003096207, + "grad_norm": 0.10079686343669891, + "learning_rate": 2.4732784299982476e-05, + "loss": 9.8571, + "step": 35570 + }, + { + "epoch": 0.17768233913456016, + "grad_norm": 0.0968751311302185, + "learning_rate": 2.473128238504093e-05, + "loss": 9.8485, + "step": 35580 + }, + { + "epoch": 0.1777322779594996, + "grad_norm": 0.10343873500823975, + "learning_rate": 2.4729780470099376e-05, + "loss": 9.8578, + "step": 35590 + }, + { + "epoch": 0.17778221678443906, + "grad_norm": 0.09628947824239731, + "learning_rate": 2.472827855515783e-05, + "loss": 9.8457, + "step": 35600 + }, + { + "epoch": 0.1778321556093785, + "grad_norm": 0.08937991410493851, + "learning_rate": 2.4726776640216276e-05, + "loss": 9.8513, + "step": 35610 + }, + { + "epoch": 0.17788209443431796, + "grad_norm": 0.09612028300762177, + "learning_rate": 2.4725274725274723e-05, + "loss": 9.8552, + "step": 35620 + }, + { + "epoch": 0.1779320332592574, + "grad_norm": 0.09348214417695999, + "learning_rate": 2.4723772810333177e-05, + "loss": 9.8598, + "step": 35630 + }, + { + "epoch": 0.17798197208419686, + "grad_norm": 0.09429159015417099, + "learning_rate": 2.4722270895391624e-05, + "loss": 9.8548, + "step": 35640 + }, + { + "epoch": 0.1780319109091363, + "grad_norm": 0.09209638088941574, + "learning_rate": 2.4720768980450077e-05, + "loss": 9.8631, + "step": 35650 + }, + { + "epoch": 0.17808184973407576, + "grad_norm": 0.1002785861492157, + "learning_rate": 2.4719267065508524e-05, + "loss": 9.845, + "step": 35660 + }, + { + "epoch": 0.1781317885590152, + "grad_norm": 0.09212984144687653, + "learning_rate": 2.471776515056697e-05, + "loss": 9.8511, + "step": 35670 + }, + { + "epoch": 0.17818172738395466, + "grad_norm": 0.09603580087423325, + "learning_rate": 2.4716263235625424e-05, + "loss": 9.8495, + "step": 35680 + }, + { + "epoch": 0.1782316662088941, + "grad_norm": 0.0989808738231659, + "learning_rate": 2.471476132068387e-05, + "loss": 9.8624, + "step": 35690 + }, + { + "epoch": 0.17828160503383356, + "grad_norm": 0.09753132611513138, + "learning_rate": 2.4713259405742325e-05, + "loss": 9.8588, + "step": 35700 + }, + { + "epoch": 0.178331543858773, + "grad_norm": 0.09459420293569565, + "learning_rate": 2.471175749080077e-05, + "loss": 9.8551, + "step": 35710 + }, + { + "epoch": 0.17838148268371246, + "grad_norm": 0.09288576990365982, + "learning_rate": 2.4710255575859218e-05, + "loss": 9.852, + "step": 35720 + }, + { + "epoch": 0.1784314215086519, + "grad_norm": 0.09371695667505264, + "learning_rate": 2.4708753660917672e-05, + "loss": 9.8552, + "step": 35730 + }, + { + "epoch": 0.17848136033359135, + "grad_norm": 0.09233945608139038, + "learning_rate": 2.470725174597612e-05, + "loss": 9.8561, + "step": 35740 + }, + { + "epoch": 0.1785312991585308, + "grad_norm": 0.09840219467878342, + "learning_rate": 2.4705749831034572e-05, + "loss": 9.8552, + "step": 35750 + }, + { + "epoch": 0.17858123798347025, + "grad_norm": 0.09583665430545807, + "learning_rate": 2.470424791609302e-05, + "loss": 9.8496, + "step": 35760 + }, + { + "epoch": 0.1786311768084097, + "grad_norm": 0.09573546051979065, + "learning_rate": 2.4702746001151466e-05, + "loss": 9.8575, + "step": 35770 + }, + { + "epoch": 0.17868111563334915, + "grad_norm": 0.09594116359949112, + "learning_rate": 2.470124408620992e-05, + "loss": 9.8538, + "step": 35780 + }, + { + "epoch": 0.1787310544582886, + "grad_norm": 0.09792909026145935, + "learning_rate": 2.4699742171268366e-05, + "loss": 9.853, + "step": 35790 + }, + { + "epoch": 0.17878099328322805, + "grad_norm": 0.09887704998254776, + "learning_rate": 2.469824025632682e-05, + "loss": 9.8519, + "step": 35800 + }, + { + "epoch": 0.1788309321081675, + "grad_norm": 0.09814807772636414, + "learning_rate": 2.4696738341385266e-05, + "loss": 9.8464, + "step": 35810 + }, + { + "epoch": 0.17888087093310695, + "grad_norm": 0.09986128658056259, + "learning_rate": 2.4695236426443713e-05, + "loss": 9.8488, + "step": 35820 + }, + { + "epoch": 0.1789308097580464, + "grad_norm": 0.0871347114443779, + "learning_rate": 2.4693734511502167e-05, + "loss": 9.8522, + "step": 35830 + }, + { + "epoch": 0.17898074858298585, + "grad_norm": 0.09611291438341141, + "learning_rate": 2.4692232596560614e-05, + "loss": 9.8551, + "step": 35840 + }, + { + "epoch": 0.1790306874079253, + "grad_norm": 0.1038837879896164, + "learning_rate": 2.4690730681619067e-05, + "loss": 9.8544, + "step": 35850 + }, + { + "epoch": 0.17908062623286475, + "grad_norm": 0.09933873265981674, + "learning_rate": 2.4689228766677514e-05, + "loss": 9.8551, + "step": 35860 + }, + { + "epoch": 0.1791305650578042, + "grad_norm": 0.0895809531211853, + "learning_rate": 2.4687726851735964e-05, + "loss": 9.8486, + "step": 35870 + }, + { + "epoch": 0.17918050388274365, + "grad_norm": 0.09082268923521042, + "learning_rate": 2.4686224936794414e-05, + "loss": 9.8563, + "step": 35880 + }, + { + "epoch": 0.1792304427076831, + "grad_norm": 0.09877155721187592, + "learning_rate": 2.468472302185286e-05, + "loss": 9.8526, + "step": 35890 + }, + { + "epoch": 0.17928038153262255, + "grad_norm": 0.09575866162776947, + "learning_rate": 2.4683221106911315e-05, + "loss": 9.857, + "step": 35900 + }, + { + "epoch": 0.179330320357562, + "grad_norm": 0.0871095061302185, + "learning_rate": 2.468171919196976e-05, + "loss": 9.8527, + "step": 35910 + }, + { + "epoch": 0.17938025918250144, + "grad_norm": 0.09860642999410629, + "learning_rate": 2.468021727702821e-05, + "loss": 9.8475, + "step": 35920 + }, + { + "epoch": 0.1794301980074409, + "grad_norm": 0.09469640254974365, + "learning_rate": 2.4678715362086662e-05, + "loss": 9.8499, + "step": 35930 + }, + { + "epoch": 0.17948013683238034, + "grad_norm": 0.09654894471168518, + "learning_rate": 2.467721344714511e-05, + "loss": 9.8446, + "step": 35940 + }, + { + "epoch": 0.1795300756573198, + "grad_norm": 0.09052994847297668, + "learning_rate": 2.4675711532203562e-05, + "loss": 9.8469, + "step": 35950 + }, + { + "epoch": 0.17958001448225924, + "grad_norm": 0.09217459708452225, + "learning_rate": 2.467420961726201e-05, + "loss": 9.8483, + "step": 35960 + }, + { + "epoch": 0.1796299533071987, + "grad_norm": 0.09349222481250763, + "learning_rate": 2.467270770232046e-05, + "loss": 9.8498, + "step": 35970 + }, + { + "epoch": 0.17967989213213814, + "grad_norm": 0.09795039892196655, + "learning_rate": 2.467120578737891e-05, + "loss": 9.848, + "step": 35980 + }, + { + "epoch": 0.1797298309570776, + "grad_norm": 0.09592344611883163, + "learning_rate": 2.4669703872437356e-05, + "loss": 9.8407, + "step": 35990 + }, + { + "epoch": 0.17977976978201704, + "grad_norm": 0.09404613822698593, + "learning_rate": 2.466820195749581e-05, + "loss": 9.8429, + "step": 36000 + }, + { + "epoch": 0.1798297086069565, + "grad_norm": 0.09326544404029846, + "learning_rate": 2.4666700042554256e-05, + "loss": 9.8567, + "step": 36010 + }, + { + "epoch": 0.17987964743189594, + "grad_norm": 0.09783726930618286, + "learning_rate": 2.4665198127612707e-05, + "loss": 9.8545, + "step": 36020 + }, + { + "epoch": 0.1799295862568354, + "grad_norm": 0.09461402893066406, + "learning_rate": 2.4663696212671157e-05, + "loss": 9.8438, + "step": 36030 + }, + { + "epoch": 0.17997952508177484, + "grad_norm": 0.09616295248270035, + "learning_rate": 2.4662194297729604e-05, + "loss": 9.8417, + "step": 36040 + }, + { + "epoch": 0.1800294639067143, + "grad_norm": 0.09772490710020065, + "learning_rate": 2.4660692382788057e-05, + "loss": 9.8518, + "step": 36050 + }, + { + "epoch": 0.18007940273165374, + "grad_norm": 0.09420907497406006, + "learning_rate": 2.4659190467846504e-05, + "loss": 9.8473, + "step": 36060 + }, + { + "epoch": 0.1801293415565932, + "grad_norm": 0.09083731472492218, + "learning_rate": 2.4657688552904954e-05, + "loss": 9.8436, + "step": 36070 + }, + { + "epoch": 0.18017928038153264, + "grad_norm": 0.09854559600353241, + "learning_rate": 2.4656186637963404e-05, + "loss": 9.8468, + "step": 36080 + }, + { + "epoch": 0.18022921920647209, + "grad_norm": 0.09707348048686981, + "learning_rate": 2.465468472302185e-05, + "loss": 9.8503, + "step": 36090 + }, + { + "epoch": 0.1802791580314115, + "grad_norm": 0.09600484371185303, + "learning_rate": 2.4653182808080305e-05, + "loss": 9.8457, + "step": 36100 + }, + { + "epoch": 0.18032909685635096, + "grad_norm": 0.0931532233953476, + "learning_rate": 2.465168089313875e-05, + "loss": 9.8469, + "step": 36110 + }, + { + "epoch": 0.1803790356812904, + "grad_norm": 0.09178706258535385, + "learning_rate": 2.46501789781972e-05, + "loss": 9.8432, + "step": 36120 + }, + { + "epoch": 0.18042897450622986, + "grad_norm": 0.09376762807369232, + "learning_rate": 2.4648677063255652e-05, + "loss": 9.8451, + "step": 36130 + }, + { + "epoch": 0.1804789133311693, + "grad_norm": 0.09681384265422821, + "learning_rate": 2.46471751483141e-05, + "loss": 9.8427, + "step": 36140 + }, + { + "epoch": 0.18052885215610875, + "grad_norm": 0.09608221054077148, + "learning_rate": 2.4645673233372552e-05, + "loss": 9.8498, + "step": 36150 + }, + { + "epoch": 0.1805787909810482, + "grad_norm": 0.0937625989317894, + "learning_rate": 2.4644171318431e-05, + "loss": 9.8502, + "step": 36160 + }, + { + "epoch": 0.18062872980598765, + "grad_norm": 0.09719646722078323, + "learning_rate": 2.464266940348945e-05, + "loss": 9.849, + "step": 36170 + }, + { + "epoch": 0.1806786686309271, + "grad_norm": 0.09256922453641891, + "learning_rate": 2.46411674885479e-05, + "loss": 9.8471, + "step": 36180 + }, + { + "epoch": 0.18072860745586655, + "grad_norm": 0.09239103645086288, + "learning_rate": 2.463966557360635e-05, + "loss": 9.8389, + "step": 36190 + }, + { + "epoch": 0.180778546280806, + "grad_norm": 0.09770379960536957, + "learning_rate": 2.46381636586648e-05, + "loss": 9.8456, + "step": 36200 + }, + { + "epoch": 0.18082848510574545, + "grad_norm": 0.09287633001804352, + "learning_rate": 2.4636661743723246e-05, + "loss": 9.8502, + "step": 36210 + }, + { + "epoch": 0.1808784239306849, + "grad_norm": 0.0940169245004654, + "learning_rate": 2.4635159828781697e-05, + "loss": 9.848, + "step": 36220 + }, + { + "epoch": 0.18092836275562435, + "grad_norm": 0.0958079993724823, + "learning_rate": 2.4633657913840147e-05, + "loss": 9.8366, + "step": 36230 + }, + { + "epoch": 0.1809783015805638, + "grad_norm": 0.09197930246591568, + "learning_rate": 2.4632155998898597e-05, + "loss": 12.8969, + "step": 36240 + }, + { + "epoch": 0.18102824040550325, + "grad_norm": 0.08773362636566162, + "learning_rate": 2.4630654083957047e-05, + "loss": 14.5353, + "step": 36250 + }, + { + "epoch": 0.1810781792304427, + "grad_norm": 0.10025262087583542, + "learning_rate": 2.4629152169015494e-05, + "loss": 9.8401, + "step": 36260 + }, + { + "epoch": 0.18112811805538215, + "grad_norm": 0.0945044681429863, + "learning_rate": 2.4627650254073944e-05, + "loss": 9.8424, + "step": 36270 + }, + { + "epoch": 0.1811780568803216, + "grad_norm": 0.0924852043390274, + "learning_rate": 2.4626148339132394e-05, + "loss": 9.8448, + "step": 36280 + }, + { + "epoch": 0.18122799570526105, + "grad_norm": 0.09346658736467361, + "learning_rate": 2.4624646424190845e-05, + "loss": 9.8408, + "step": 36290 + }, + { + "epoch": 0.1812779345302005, + "grad_norm": 0.10162097215652466, + "learning_rate": 2.4623144509249295e-05, + "loss": 9.8382, + "step": 36300 + }, + { + "epoch": 0.18132787335513995, + "grad_norm": 0.093153215944767, + "learning_rate": 2.462164259430774e-05, + "loss": 9.8415, + "step": 36310 + }, + { + "epoch": 0.1813778121800794, + "grad_norm": 0.09772991389036179, + "learning_rate": 2.462014067936619e-05, + "loss": 9.8463, + "step": 36320 + }, + { + "epoch": 0.18142775100501884, + "grad_norm": 0.09410078823566437, + "learning_rate": 2.4618638764424642e-05, + "loss": 9.8374, + "step": 36330 + }, + { + "epoch": 0.1814776898299583, + "grad_norm": 0.09498627483844757, + "learning_rate": 2.4617136849483092e-05, + "loss": 9.8382, + "step": 36340 + }, + { + "epoch": 0.18152762865489774, + "grad_norm": 0.09718122333288193, + "learning_rate": 2.4615634934541542e-05, + "loss": 9.839, + "step": 36350 + }, + { + "epoch": 0.1815775674798372, + "grad_norm": 0.09762446582317352, + "learning_rate": 2.461413301959999e-05, + "loss": 9.8365, + "step": 36360 + }, + { + "epoch": 0.18162750630477664, + "grad_norm": 0.09202492982149124, + "learning_rate": 2.461263110465844e-05, + "loss": 9.8351, + "step": 36370 + }, + { + "epoch": 0.1816774451297161, + "grad_norm": 0.09190283715724945, + "learning_rate": 2.461112918971689e-05, + "loss": 9.8375, + "step": 36380 + }, + { + "epoch": 0.18172738395465554, + "grad_norm": 0.09559813141822815, + "learning_rate": 2.460962727477534e-05, + "loss": 9.8424, + "step": 36390 + }, + { + "epoch": 0.181777322779595, + "grad_norm": 0.09660519659519196, + "learning_rate": 2.460812535983379e-05, + "loss": 9.8377, + "step": 36400 + }, + { + "epoch": 0.18182726160453444, + "grad_norm": 0.0976775512099266, + "learning_rate": 2.4606623444892236e-05, + "loss": 9.841, + "step": 36410 + }, + { + "epoch": 0.1818772004294739, + "grad_norm": 0.10077891498804092, + "learning_rate": 2.4605121529950687e-05, + "loss": 9.8422, + "step": 36420 + }, + { + "epoch": 0.18192713925441334, + "grad_norm": 0.09487045556306839, + "learning_rate": 2.4603619615009137e-05, + "loss": 9.8388, + "step": 36430 + }, + { + "epoch": 0.1819770780793528, + "grad_norm": 0.08921613544225693, + "learning_rate": 2.4602117700067587e-05, + "loss": 9.8387, + "step": 36440 + }, + { + "epoch": 0.18202701690429224, + "grad_norm": 0.09209630638360977, + "learning_rate": 2.4600615785126037e-05, + "loss": 9.8364, + "step": 36450 + }, + { + "epoch": 0.1820769557292317, + "grad_norm": 0.09444855153560638, + "learning_rate": 2.4599113870184484e-05, + "loss": 9.8348, + "step": 36460 + }, + { + "epoch": 0.18212689455417114, + "grad_norm": 0.10311925411224365, + "learning_rate": 2.4597611955242934e-05, + "loss": 9.8378, + "step": 36470 + }, + { + "epoch": 0.1821768333791106, + "grad_norm": 0.09053095430135727, + "learning_rate": 2.4596110040301384e-05, + "loss": 9.8406, + "step": 36480 + }, + { + "epoch": 0.18222677220405004, + "grad_norm": 0.09320933371782303, + "learning_rate": 2.4594608125359835e-05, + "loss": 9.8384, + "step": 36490 + }, + { + "epoch": 0.18227671102898949, + "grad_norm": 0.09924657642841339, + "learning_rate": 2.4593106210418285e-05, + "loss": 9.8366, + "step": 36500 + }, + { + "epoch": 0.18232664985392893, + "grad_norm": 0.09330344945192337, + "learning_rate": 2.4591604295476735e-05, + "loss": 9.829, + "step": 36510 + }, + { + "epoch": 0.18237658867886838, + "grad_norm": 0.0905805230140686, + "learning_rate": 2.459010238053518e-05, + "loss": 9.8449, + "step": 36520 + }, + { + "epoch": 0.18242652750380783, + "grad_norm": 0.09339253604412079, + "learning_rate": 2.4588600465593632e-05, + "loss": 9.8361, + "step": 36530 + }, + { + "epoch": 0.18247646632874728, + "grad_norm": 0.09230770170688629, + "learning_rate": 2.4587098550652082e-05, + "loss": 9.8377, + "step": 36540 + }, + { + "epoch": 0.18252640515368673, + "grad_norm": 0.09087874740362167, + "learning_rate": 2.4585596635710532e-05, + "loss": 9.8389, + "step": 36550 + }, + { + "epoch": 0.18257634397862618, + "grad_norm": 0.09459523111581802, + "learning_rate": 2.4584094720768982e-05, + "loss": 9.8339, + "step": 36560 + }, + { + "epoch": 0.18262628280356563, + "grad_norm": 0.09835857897996902, + "learning_rate": 2.458259280582743e-05, + "loss": 9.8379, + "step": 36570 + }, + { + "epoch": 0.18267622162850508, + "grad_norm": 0.09462913870811462, + "learning_rate": 2.458109089088588e-05, + "loss": 9.8366, + "step": 36580 + }, + { + "epoch": 0.18272616045344453, + "grad_norm": 0.09966031461954117, + "learning_rate": 2.457958897594433e-05, + "loss": 9.8301, + "step": 36590 + }, + { + "epoch": 0.18277609927838398, + "grad_norm": 0.09340931475162506, + "learning_rate": 2.457808706100278e-05, + "loss": 9.8389, + "step": 36600 + }, + { + "epoch": 0.18282603810332343, + "grad_norm": 0.09110315144062042, + "learning_rate": 2.457658514606123e-05, + "loss": 9.8342, + "step": 36610 + }, + { + "epoch": 0.18287597692826288, + "grad_norm": 0.09554065018892288, + "learning_rate": 2.4575083231119677e-05, + "loss": 9.8408, + "step": 36620 + }, + { + "epoch": 0.18292591575320233, + "grad_norm": 0.09898494929075241, + "learning_rate": 2.4573581316178127e-05, + "loss": 9.8364, + "step": 36630 + }, + { + "epoch": 0.18297585457814178, + "grad_norm": 0.09261037409305573, + "learning_rate": 2.4572079401236577e-05, + "loss": 9.8335, + "step": 36640 + }, + { + "epoch": 0.18302579340308123, + "grad_norm": 0.09171412140130997, + "learning_rate": 2.4570577486295027e-05, + "loss": 9.8286, + "step": 36650 + }, + { + "epoch": 0.18307573222802068, + "grad_norm": 0.09472862631082535, + "learning_rate": 2.4569075571353477e-05, + "loss": 9.8358, + "step": 36660 + }, + { + "epoch": 0.18312567105296013, + "grad_norm": 0.09369245171546936, + "learning_rate": 2.4567573656411924e-05, + "loss": 9.8337, + "step": 36670 + }, + { + "epoch": 0.18317560987789958, + "grad_norm": 0.09198103100061417, + "learning_rate": 2.4566071741470374e-05, + "loss": 9.8384, + "step": 36680 + }, + { + "epoch": 0.18322554870283903, + "grad_norm": 0.09761819988489151, + "learning_rate": 2.4564569826528825e-05, + "loss": 9.833, + "step": 36690 + }, + { + "epoch": 0.18327548752777847, + "grad_norm": 0.09576039016246796, + "learning_rate": 2.4563067911587275e-05, + "loss": 9.8346, + "step": 36700 + }, + { + "epoch": 0.18332542635271792, + "grad_norm": 0.10029350966215134, + "learning_rate": 2.4561565996645725e-05, + "loss": 9.8383, + "step": 36710 + }, + { + "epoch": 0.18337536517765737, + "grad_norm": 0.09115588665008545, + "learning_rate": 2.456006408170417e-05, + "loss": 9.8311, + "step": 36720 + }, + { + "epoch": 0.18342530400259682, + "grad_norm": 0.09495572000741959, + "learning_rate": 2.4558562166762622e-05, + "loss": 9.8296, + "step": 36730 + }, + { + "epoch": 0.18347524282753627, + "grad_norm": 0.0938972532749176, + "learning_rate": 2.4557060251821072e-05, + "loss": 9.8303, + "step": 36740 + }, + { + "epoch": 0.18352518165247572, + "grad_norm": 0.0916903167963028, + "learning_rate": 2.4555558336879522e-05, + "loss": 9.8256, + "step": 36750 + }, + { + "epoch": 0.18357512047741517, + "grad_norm": 0.09134495258331299, + "learning_rate": 2.4554056421937972e-05, + "loss": 9.8269, + "step": 36760 + }, + { + "epoch": 0.18362505930235462, + "grad_norm": 0.09667996317148209, + "learning_rate": 2.455255450699642e-05, + "loss": 9.8308, + "step": 36770 + }, + { + "epoch": 0.18367499812729407, + "grad_norm": 0.09833179414272308, + "learning_rate": 2.455105259205487e-05, + "loss": 9.8352, + "step": 36780 + }, + { + "epoch": 0.18372493695223352, + "grad_norm": 0.09507912397384644, + "learning_rate": 2.454955067711332e-05, + "loss": 9.8326, + "step": 36790 + }, + { + "epoch": 0.18377487577717297, + "grad_norm": 0.09790566563606262, + "learning_rate": 2.454804876217177e-05, + "loss": 9.831, + "step": 36800 + }, + { + "epoch": 0.18382481460211242, + "grad_norm": 0.1005285233259201, + "learning_rate": 2.454654684723022e-05, + "loss": 9.8316, + "step": 36810 + }, + { + "epoch": 0.18387475342705187, + "grad_norm": 0.09769100695848465, + "learning_rate": 2.4545044932288667e-05, + "loss": 9.8273, + "step": 36820 + }, + { + "epoch": 0.18392469225199132, + "grad_norm": 0.09412973374128342, + "learning_rate": 2.454354301734712e-05, + "loss": 9.826, + "step": 36830 + }, + { + "epoch": 0.18397463107693077, + "grad_norm": 0.09262508153915405, + "learning_rate": 2.4542041102405567e-05, + "loss": 9.8291, + "step": 36840 + }, + { + "epoch": 0.18402456990187022, + "grad_norm": 0.09512876719236374, + "learning_rate": 2.4540539187464017e-05, + "loss": 9.8278, + "step": 36850 + }, + { + "epoch": 0.18407450872680967, + "grad_norm": 0.0943412259221077, + "learning_rate": 2.4539037272522467e-05, + "loss": 9.8363, + "step": 36860 + }, + { + "epoch": 0.18412444755174912, + "grad_norm": 0.09464114904403687, + "learning_rate": 2.4537535357580914e-05, + "loss": 9.8294, + "step": 36870 + }, + { + "epoch": 0.18417438637668856, + "grad_norm": 0.09377477318048477, + "learning_rate": 2.4536033442639368e-05, + "loss": 9.8332, + "step": 36880 + }, + { + "epoch": 0.18422432520162801, + "grad_norm": 0.09622970223426819, + "learning_rate": 2.4534531527697815e-05, + "loss": 9.8243, + "step": 36890 + }, + { + "epoch": 0.18427426402656746, + "grad_norm": 0.09202242642641068, + "learning_rate": 2.4533029612756265e-05, + "loss": 9.8243, + "step": 36900 + }, + { + "epoch": 0.1843242028515069, + "grad_norm": 0.09235205501317978, + "learning_rate": 2.4531527697814715e-05, + "loss": 9.8343, + "step": 36910 + }, + { + "epoch": 0.18437414167644636, + "grad_norm": 0.09455617517232895, + "learning_rate": 2.4530025782873162e-05, + "loss": 9.8255, + "step": 36920 + }, + { + "epoch": 0.1844240805013858, + "grad_norm": 0.09160789847373962, + "learning_rate": 2.4528523867931615e-05, + "loss": 9.8267, + "step": 36930 + }, + { + "epoch": 0.18447401932632526, + "grad_norm": 0.09022436290979385, + "learning_rate": 2.4527021952990062e-05, + "loss": 9.8328, + "step": 36940 + }, + { + "epoch": 0.1845239581512647, + "grad_norm": 0.09537603706121445, + "learning_rate": 2.4525520038048512e-05, + "loss": 9.8258, + "step": 36950 + }, + { + "epoch": 0.18457389697620416, + "grad_norm": 0.09323366731405258, + "learning_rate": 2.4524018123106962e-05, + "loss": 9.8279, + "step": 36960 + }, + { + "epoch": 0.1846238358011436, + "grad_norm": 0.0941099300980568, + "learning_rate": 2.452251620816541e-05, + "loss": 9.8292, + "step": 36970 + }, + { + "epoch": 0.18467377462608306, + "grad_norm": 0.0980052724480629, + "learning_rate": 2.4521014293223863e-05, + "loss": 9.8218, + "step": 36980 + }, + { + "epoch": 0.1847237134510225, + "grad_norm": 0.09246113896369934, + "learning_rate": 2.451951237828231e-05, + "loss": 9.8246, + "step": 36990 + }, + { + "epoch": 0.18477365227596196, + "grad_norm": 0.09564212709665298, + "learning_rate": 2.451801046334076e-05, + "loss": 9.8209, + "step": 37000 + }, + { + "epoch": 0.1848235911009014, + "grad_norm": 0.09970725327730179, + "learning_rate": 2.451650854839921e-05, + "loss": 9.8251, + "step": 37010 + }, + { + "epoch": 0.18487352992584086, + "grad_norm": 0.09713415801525116, + "learning_rate": 2.4515006633457657e-05, + "loss": 9.8269, + "step": 37020 + }, + { + "epoch": 0.1849234687507803, + "grad_norm": 0.09196287393569946, + "learning_rate": 2.451350471851611e-05, + "loss": 9.827, + "step": 37030 + }, + { + "epoch": 0.18497340757571976, + "grad_norm": 0.09455425292253494, + "learning_rate": 2.4512002803574557e-05, + "loss": 9.8193, + "step": 37040 + }, + { + "epoch": 0.1850233464006592, + "grad_norm": 0.09092764556407928, + "learning_rate": 2.4510500888633007e-05, + "loss": 9.8283, + "step": 37050 + }, + { + "epoch": 0.18507328522559865, + "grad_norm": 0.0928642749786377, + "learning_rate": 2.4508998973691457e-05, + "loss": 9.8273, + "step": 37060 + }, + { + "epoch": 0.1851232240505381, + "grad_norm": 0.09685216099023819, + "learning_rate": 2.4507497058749904e-05, + "loss": 9.8225, + "step": 37070 + }, + { + "epoch": 0.18517316287547755, + "grad_norm": 0.0929524376988411, + "learning_rate": 2.4505995143808358e-05, + "loss": 9.8339, + "step": 37080 + }, + { + "epoch": 0.18522310170041698, + "grad_norm": 0.09492181241512299, + "learning_rate": 2.4504493228866805e-05, + "loss": 9.8241, + "step": 37090 + }, + { + "epoch": 0.18527304052535642, + "grad_norm": 0.09493514150381088, + "learning_rate": 2.4502991313925255e-05, + "loss": 9.8202, + "step": 37100 + }, + { + "epoch": 0.18532297935029587, + "grad_norm": 0.0974758043885231, + "learning_rate": 2.4501489398983705e-05, + "loss": 9.8215, + "step": 37110 + }, + { + "epoch": 0.18537291817523532, + "grad_norm": 0.09460314363241196, + "learning_rate": 2.4499987484042152e-05, + "loss": 9.8209, + "step": 37120 + }, + { + "epoch": 0.18542285700017477, + "grad_norm": 0.10088241845369339, + "learning_rate": 2.4498485569100605e-05, + "loss": 9.8161, + "step": 37130 + }, + { + "epoch": 0.18547279582511422, + "grad_norm": 0.09594068676233292, + "learning_rate": 2.4496983654159052e-05, + "loss": 9.8158, + "step": 37140 + }, + { + "epoch": 0.18552273465005367, + "grad_norm": 0.09308291226625443, + "learning_rate": 2.4495481739217506e-05, + "loss": 9.8229, + "step": 37150 + }, + { + "epoch": 0.18557267347499312, + "grad_norm": 0.09384151548147202, + "learning_rate": 2.4493979824275952e-05, + "loss": 9.8258, + "step": 37160 + }, + { + "epoch": 0.18562261229993257, + "grad_norm": 0.09089542925357819, + "learning_rate": 2.44924779093344e-05, + "loss": 9.8214, + "step": 37170 + }, + { + "epoch": 0.18567255112487202, + "grad_norm": 0.10226479917764664, + "learning_rate": 2.4490975994392853e-05, + "loss": 9.8194, + "step": 37180 + }, + { + "epoch": 0.18572248994981147, + "grad_norm": 0.09530473500490189, + "learning_rate": 2.44894740794513e-05, + "loss": 9.8214, + "step": 37190 + }, + { + "epoch": 0.18577242877475092, + "grad_norm": 0.09461791068315506, + "learning_rate": 2.4487972164509753e-05, + "loss": 9.8234, + "step": 37200 + }, + { + "epoch": 0.18582236759969037, + "grad_norm": 0.09858336299657822, + "learning_rate": 2.44864702495682e-05, + "loss": 9.8273, + "step": 37210 + }, + { + "epoch": 0.18587230642462982, + "grad_norm": 0.0900527834892273, + "learning_rate": 2.4484968334626647e-05, + "loss": 9.8225, + "step": 37220 + }, + { + "epoch": 0.18592224524956927, + "grad_norm": 0.09260288625955582, + "learning_rate": 2.44834664196851e-05, + "loss": 9.8215, + "step": 37230 + }, + { + "epoch": 0.18597218407450872, + "grad_norm": 0.0938689112663269, + "learning_rate": 2.4481964504743547e-05, + "loss": 9.8189, + "step": 37240 + }, + { + "epoch": 0.18602212289944817, + "grad_norm": 0.09121273458003998, + "learning_rate": 2.4480462589802e-05, + "loss": 9.819, + "step": 37250 + }, + { + "epoch": 0.18607206172438762, + "grad_norm": 0.09315800666809082, + "learning_rate": 2.4478960674860447e-05, + "loss": 9.8215, + "step": 37260 + }, + { + "epoch": 0.18612200054932707, + "grad_norm": 0.09155386686325073, + "learning_rate": 2.4477458759918898e-05, + "loss": 9.8212, + "step": 37270 + }, + { + "epoch": 0.18617193937426652, + "grad_norm": 0.09750566631555557, + "learning_rate": 2.4475956844977348e-05, + "loss": 9.8185, + "step": 37280 + }, + { + "epoch": 0.18622187819920596, + "grad_norm": 0.09988453984260559, + "learning_rate": 2.4474454930035795e-05, + "loss": 9.8174, + "step": 37290 + }, + { + "epoch": 0.18627181702414541, + "grad_norm": 0.09393157064914703, + "learning_rate": 2.4472953015094248e-05, + "loss": 9.8232, + "step": 37300 + }, + { + "epoch": 0.18632175584908486, + "grad_norm": 0.09441966563463211, + "learning_rate": 2.4471451100152695e-05, + "loss": 9.8228, + "step": 37310 + }, + { + "epoch": 0.1863716946740243, + "grad_norm": 0.091115802526474, + "learning_rate": 2.4469949185211145e-05, + "loss": 9.8182, + "step": 37320 + }, + { + "epoch": 0.18642163349896376, + "grad_norm": 0.09511063247919083, + "learning_rate": 2.4468447270269595e-05, + "loss": 9.8202, + "step": 37330 + }, + { + "epoch": 0.1864715723239032, + "grad_norm": 0.10219678282737732, + "learning_rate": 2.4466945355328042e-05, + "loss": 9.8191, + "step": 37340 + }, + { + "epoch": 0.18652151114884266, + "grad_norm": 0.09869439899921417, + "learning_rate": 2.4465443440386496e-05, + "loss": 9.8132, + "step": 37350 + }, + { + "epoch": 0.1865714499737821, + "grad_norm": 0.09464725852012634, + "learning_rate": 2.4463941525444942e-05, + "loss": 9.8219, + "step": 37360 + }, + { + "epoch": 0.18662138879872156, + "grad_norm": 0.08857977390289307, + "learning_rate": 2.4462439610503393e-05, + "loss": 9.8222, + "step": 37370 + }, + { + "epoch": 0.186671327623661, + "grad_norm": 0.09524288773536682, + "learning_rate": 2.4460937695561843e-05, + "loss": 9.8197, + "step": 37380 + }, + { + "epoch": 0.18672126644860046, + "grad_norm": 0.09553853422403336, + "learning_rate": 2.445943578062029e-05, + "loss": 9.8198, + "step": 37390 + }, + { + "epoch": 0.1867712052735399, + "grad_norm": 0.10078942030668259, + "learning_rate": 2.4457933865678743e-05, + "loss": 9.8121, + "step": 37400 + }, + { + "epoch": 0.18682114409847936, + "grad_norm": 0.09711633622646332, + "learning_rate": 2.445643195073719e-05, + "loss": 9.8176, + "step": 37410 + }, + { + "epoch": 0.1868710829234188, + "grad_norm": 0.0985088124871254, + "learning_rate": 2.445493003579564e-05, + "loss": 9.8125, + "step": 37420 + }, + { + "epoch": 0.18692102174835826, + "grad_norm": 0.10270990431308746, + "learning_rate": 2.445342812085409e-05, + "loss": 9.824, + "step": 37430 + }, + { + "epoch": 0.1869709605732977, + "grad_norm": 0.09112194925546646, + "learning_rate": 2.4451926205912537e-05, + "loss": 9.8115, + "step": 37440 + }, + { + "epoch": 0.18702089939823716, + "grad_norm": 0.09221579879522324, + "learning_rate": 2.445042429097099e-05, + "loss": 9.8166, + "step": 37450 + }, + { + "epoch": 0.1870708382231766, + "grad_norm": 0.09492717683315277, + "learning_rate": 2.4448922376029437e-05, + "loss": 9.816, + "step": 37460 + }, + { + "epoch": 0.18712077704811605, + "grad_norm": 0.09594427794218063, + "learning_rate": 2.4447420461087888e-05, + "loss": 9.8106, + "step": 37470 + }, + { + "epoch": 0.1871707158730555, + "grad_norm": 0.10151471197605133, + "learning_rate": 2.4445918546146338e-05, + "loss": 9.8237, + "step": 37480 + }, + { + "epoch": 0.18722065469799495, + "grad_norm": 0.09668134897947311, + "learning_rate": 2.4444416631204785e-05, + "loss": 9.8231, + "step": 37490 + }, + { + "epoch": 0.1872705935229344, + "grad_norm": 0.09197778254747391, + "learning_rate": 2.4442914716263238e-05, + "loss": 9.817, + "step": 37500 + }, + { + "epoch": 0.18732053234787385, + "grad_norm": 0.09514784067869186, + "learning_rate": 2.4441412801321685e-05, + "loss": 9.817, + "step": 37510 + }, + { + "epoch": 0.1873704711728133, + "grad_norm": 0.09452585130929947, + "learning_rate": 2.4439910886380135e-05, + "loss": 9.8115, + "step": 37520 + }, + { + "epoch": 0.18742040999775275, + "grad_norm": 0.09140285104513168, + "learning_rate": 2.4438408971438585e-05, + "loss": 9.8115, + "step": 37530 + }, + { + "epoch": 0.1874703488226922, + "grad_norm": 0.10165819525718689, + "learning_rate": 2.4436907056497032e-05, + "loss": 9.8183, + "step": 37540 + }, + { + "epoch": 0.18752028764763165, + "grad_norm": 0.09387430548667908, + "learning_rate": 2.4435405141555486e-05, + "loss": 9.8187, + "step": 37550 + }, + { + "epoch": 0.1875702264725711, + "grad_norm": 0.09394615888595581, + "learning_rate": 2.4433903226613932e-05, + "loss": 9.8146, + "step": 37560 + }, + { + "epoch": 0.18762016529751055, + "grad_norm": 0.09397023171186447, + "learning_rate": 2.4432401311672386e-05, + "loss": 9.8182, + "step": 37570 + }, + { + "epoch": 0.18767010412245, + "grad_norm": 0.09779095649719238, + "learning_rate": 2.4430899396730833e-05, + "loss": 9.81, + "step": 37580 + }, + { + "epoch": 0.18772004294738945, + "grad_norm": 0.09826376289129257, + "learning_rate": 2.442939748178928e-05, + "loss": 9.8129, + "step": 37590 + }, + { + "epoch": 0.1877699817723289, + "grad_norm": 0.09404350817203522, + "learning_rate": 2.4427895566847733e-05, + "loss": 9.813, + "step": 37600 + }, + { + "epoch": 0.18781992059726835, + "grad_norm": 0.09514794498682022, + "learning_rate": 2.442639365190618e-05, + "loss": 9.8125, + "step": 37610 + }, + { + "epoch": 0.1878698594222078, + "grad_norm": 0.09750908613204956, + "learning_rate": 2.4424891736964634e-05, + "loss": 9.8146, + "step": 37620 + }, + { + "epoch": 0.18791979824714725, + "grad_norm": 0.09314355999231339, + "learning_rate": 2.442338982202308e-05, + "loss": 9.8149, + "step": 37630 + }, + { + "epoch": 0.1879697370720867, + "grad_norm": 0.08955375105142593, + "learning_rate": 2.4421887907081527e-05, + "loss": 9.8092, + "step": 37640 + }, + { + "epoch": 0.18801967589702615, + "grad_norm": 0.09085706621408463, + "learning_rate": 2.442038599213998e-05, + "loss": 9.8129, + "step": 37650 + }, + { + "epoch": 0.1880696147219656, + "grad_norm": 0.09261947870254517, + "learning_rate": 2.4418884077198427e-05, + "loss": 9.8183, + "step": 37660 + }, + { + "epoch": 0.18811955354690504, + "grad_norm": 0.09396150708198547, + "learning_rate": 2.441738216225688e-05, + "loss": 9.8103, + "step": 37670 + }, + { + "epoch": 0.1881694923718445, + "grad_norm": 0.10040120780467987, + "learning_rate": 2.4415880247315328e-05, + "loss": 9.815, + "step": 37680 + }, + { + "epoch": 0.18821943119678394, + "grad_norm": 0.09295698255300522, + "learning_rate": 2.4414378332373775e-05, + "loss": 9.8175, + "step": 37690 + }, + { + "epoch": 0.1882693700217234, + "grad_norm": 0.09030512720346451, + "learning_rate": 2.4412876417432228e-05, + "loss": 9.8094, + "step": 37700 + }, + { + "epoch": 0.18831930884666284, + "grad_norm": 0.08929255604743958, + "learning_rate": 2.4411374502490675e-05, + "loss": 9.8134, + "step": 37710 + }, + { + "epoch": 0.1883692476716023, + "grad_norm": 0.09024254232645035, + "learning_rate": 2.440987258754913e-05, + "loss": 9.816, + "step": 37720 + }, + { + "epoch": 0.18841918649654174, + "grad_norm": 0.09843594580888748, + "learning_rate": 2.4408370672607575e-05, + "loss": 9.8173, + "step": 37730 + }, + { + "epoch": 0.1884691253214812, + "grad_norm": 0.09375924617052078, + "learning_rate": 2.4406868757666022e-05, + "loss": 9.8139, + "step": 37740 + }, + { + "epoch": 0.18851906414642064, + "grad_norm": 0.09652825444936752, + "learning_rate": 2.4405366842724476e-05, + "loss": 9.8105, + "step": 37750 + }, + { + "epoch": 0.1885690029713601, + "grad_norm": 0.09221908450126648, + "learning_rate": 2.4403864927782922e-05, + "loss": 9.8123, + "step": 37760 + }, + { + "epoch": 0.18861894179629954, + "grad_norm": 0.0924220085144043, + "learning_rate": 2.4402363012841376e-05, + "loss": 9.8106, + "step": 37770 + }, + { + "epoch": 0.188668880621239, + "grad_norm": 0.09092016518115997, + "learning_rate": 2.4400861097899823e-05, + "loss": 9.8147, + "step": 37780 + }, + { + "epoch": 0.18871881944617844, + "grad_norm": 0.09375354647636414, + "learning_rate": 2.4399359182958273e-05, + "loss": 9.8052, + "step": 37790 + }, + { + "epoch": 0.1887687582711179, + "grad_norm": 0.09391508996486664, + "learning_rate": 2.4397857268016723e-05, + "loss": 9.8157, + "step": 37800 + }, + { + "epoch": 0.18881869709605734, + "grad_norm": 0.0947243720293045, + "learning_rate": 2.439635535307517e-05, + "loss": 9.808, + "step": 37810 + }, + { + "epoch": 0.18886863592099679, + "grad_norm": 0.09377534687519073, + "learning_rate": 2.4394853438133624e-05, + "loss": 9.8004, + "step": 37820 + }, + { + "epoch": 0.18891857474593624, + "grad_norm": 0.09614383429288864, + "learning_rate": 2.439335152319207e-05, + "loss": 9.8076, + "step": 37830 + }, + { + "epoch": 0.18896851357087568, + "grad_norm": 0.0933867022395134, + "learning_rate": 2.439184960825052e-05, + "loss": 9.8132, + "step": 37840 + }, + { + "epoch": 0.18901845239581513, + "grad_norm": 0.0957120954990387, + "learning_rate": 2.439034769330897e-05, + "loss": 9.8068, + "step": 37850 + }, + { + "epoch": 0.18906839122075458, + "grad_norm": 0.09493666887283325, + "learning_rate": 2.4388845778367417e-05, + "loss": 9.8015, + "step": 37860 + }, + { + "epoch": 0.18911833004569403, + "grad_norm": 0.09380702674388885, + "learning_rate": 2.438734386342587e-05, + "loss": 9.8049, + "step": 37870 + }, + { + "epoch": 0.18916826887063348, + "grad_norm": 0.09432664513587952, + "learning_rate": 2.4385841948484318e-05, + "loss": 9.8008, + "step": 37880 + }, + { + "epoch": 0.18921820769557293, + "grad_norm": 0.09340386092662811, + "learning_rate": 2.4384340033542768e-05, + "loss": 9.8072, + "step": 37890 + }, + { + "epoch": 0.18926814652051238, + "grad_norm": 0.09343277662992477, + "learning_rate": 2.4382838118601218e-05, + "loss": 9.8092, + "step": 37900 + }, + { + "epoch": 0.18931808534545183, + "grad_norm": 0.09355462342500687, + "learning_rate": 2.4381336203659665e-05, + "loss": 9.8062, + "step": 37910 + }, + { + "epoch": 0.18936802417039128, + "grad_norm": 0.09270920604467392, + "learning_rate": 2.437983428871812e-05, + "loss": 9.8113, + "step": 37920 + }, + { + "epoch": 0.18941796299533073, + "grad_norm": 0.09088815748691559, + "learning_rate": 2.4378332373776565e-05, + "loss": 9.8089, + "step": 37930 + }, + { + "epoch": 0.18946790182027018, + "grad_norm": 0.09358256310224533, + "learning_rate": 2.4376830458835016e-05, + "loss": 9.8005, + "step": 37940 + }, + { + "epoch": 0.18951784064520963, + "grad_norm": 0.09258565306663513, + "learning_rate": 2.4375328543893466e-05, + "loss": 9.8029, + "step": 37950 + }, + { + "epoch": 0.18956777947014908, + "grad_norm": 0.09664854407310486, + "learning_rate": 2.4373826628951912e-05, + "loss": 9.8015, + "step": 37960 + }, + { + "epoch": 0.18961771829508853, + "grad_norm": 0.09192045032978058, + "learning_rate": 2.4372324714010366e-05, + "loss": 9.8067, + "step": 37970 + }, + { + "epoch": 0.18966765712002798, + "grad_norm": 0.09485509991645813, + "learning_rate": 2.4370822799068813e-05, + "loss": 9.8016, + "step": 37980 + }, + { + "epoch": 0.18971759594496743, + "grad_norm": 0.09736743569374084, + "learning_rate": 2.4369320884127263e-05, + "loss": 9.8028, + "step": 37990 + }, + { + "epoch": 0.18976753476990688, + "grad_norm": 0.09561127424240112, + "learning_rate": 2.4367818969185713e-05, + "loss": 9.803, + "step": 38000 + }, + { + "epoch": 0.18981747359484633, + "grad_norm": 0.0922846645116806, + "learning_rate": 2.436631705424416e-05, + "loss": 9.8083, + "step": 38010 + }, + { + "epoch": 0.18986741241978577, + "grad_norm": 0.09271165728569031, + "learning_rate": 2.4364815139302614e-05, + "loss": 9.8141, + "step": 38020 + }, + { + "epoch": 0.18991735124472522, + "grad_norm": 0.09106654673814774, + "learning_rate": 2.436331322436106e-05, + "loss": 9.8048, + "step": 38030 + }, + { + "epoch": 0.18996729006966467, + "grad_norm": 0.09738942235708237, + "learning_rate": 2.436181130941951e-05, + "loss": 9.8077, + "step": 38040 + }, + { + "epoch": 0.19001722889460412, + "grad_norm": 0.09800420701503754, + "learning_rate": 2.436030939447796e-05, + "loss": 9.8041, + "step": 38050 + }, + { + "epoch": 0.19006716771954357, + "grad_norm": 0.09482547640800476, + "learning_rate": 2.4358807479536407e-05, + "loss": 9.804, + "step": 38060 + }, + { + "epoch": 0.19011710654448302, + "grad_norm": 0.09160934388637543, + "learning_rate": 2.435730556459486e-05, + "loss": 9.8012, + "step": 38070 + }, + { + "epoch": 0.19016704536942244, + "grad_norm": 0.09235961735248566, + "learning_rate": 2.4355803649653308e-05, + "loss": 9.8039, + "step": 38080 + }, + { + "epoch": 0.1902169841943619, + "grad_norm": 0.0927954837679863, + "learning_rate": 2.4354301734711758e-05, + "loss": 9.805, + "step": 38090 + }, + { + "epoch": 0.19026692301930134, + "grad_norm": 0.09588232636451721, + "learning_rate": 2.4352799819770208e-05, + "loss": 9.8001, + "step": 38100 + }, + { + "epoch": 0.1903168618442408, + "grad_norm": 0.09209771454334259, + "learning_rate": 2.435129790482866e-05, + "loss": 9.7988, + "step": 38110 + }, + { + "epoch": 0.19036680066918024, + "grad_norm": 0.09557371586561203, + "learning_rate": 2.434979598988711e-05, + "loss": 9.7982, + "step": 38120 + }, + { + "epoch": 0.1904167394941197, + "grad_norm": 0.08873193711042404, + "learning_rate": 2.4348294074945555e-05, + "loss": 9.8029, + "step": 38130 + }, + { + "epoch": 0.19046667831905914, + "grad_norm": 0.09229250252246857, + "learning_rate": 2.4346792160004006e-05, + "loss": 9.8071, + "step": 38140 + }, + { + "epoch": 0.1905166171439986, + "grad_norm": 0.09603758901357651, + "learning_rate": 2.4345290245062456e-05, + "loss": 9.8052, + "step": 38150 + }, + { + "epoch": 0.19056655596893804, + "grad_norm": 0.09332188963890076, + "learning_rate": 2.4343788330120906e-05, + "loss": 9.7968, + "step": 38160 + }, + { + "epoch": 0.1906164947938775, + "grad_norm": 0.09354902803897858, + "learning_rate": 2.4342286415179356e-05, + "loss": 9.7974, + "step": 38170 + }, + { + "epoch": 0.19066643361881694, + "grad_norm": 0.0928342416882515, + "learning_rate": 2.4340784500237803e-05, + "loss": 9.7945, + "step": 38180 + }, + { + "epoch": 0.1907163724437564, + "grad_norm": 0.0948493480682373, + "learning_rate": 2.4339282585296253e-05, + "loss": 9.7977, + "step": 38190 + }, + { + "epoch": 0.19076631126869584, + "grad_norm": 0.09960921853780746, + "learning_rate": 2.4337780670354703e-05, + "loss": 9.8052, + "step": 38200 + }, + { + "epoch": 0.1908162500936353, + "grad_norm": 0.09617515653371811, + "learning_rate": 2.4336278755413153e-05, + "loss": 9.7999, + "step": 38210 + }, + { + "epoch": 0.19086618891857474, + "grad_norm": 0.09649699926376343, + "learning_rate": 2.4334776840471604e-05, + "loss": 9.8018, + "step": 38220 + }, + { + "epoch": 0.19091612774351419, + "grad_norm": 0.09706174582242966, + "learning_rate": 2.433327492553005e-05, + "loss": 9.8035, + "step": 38230 + }, + { + "epoch": 0.19096606656845364, + "grad_norm": 0.09482137858867645, + "learning_rate": 2.43317730105885e-05, + "loss": 9.8009, + "step": 38240 + }, + { + "epoch": 0.19101600539339308, + "grad_norm": 0.0948929712176323, + "learning_rate": 2.433027109564695e-05, + "loss": 9.796, + "step": 38250 + }, + { + "epoch": 0.19106594421833253, + "grad_norm": 0.09649039804935455, + "learning_rate": 2.43287691807054e-05, + "loss": 9.8034, + "step": 38260 + }, + { + "epoch": 0.19111588304327198, + "grad_norm": 0.09573374688625336, + "learning_rate": 2.432726726576385e-05, + "loss": 9.7922, + "step": 38270 + }, + { + "epoch": 0.19116582186821143, + "grad_norm": 0.10029622167348862, + "learning_rate": 2.4325765350822298e-05, + "loss": 9.7982, + "step": 38280 + }, + { + "epoch": 0.19121576069315088, + "grad_norm": 0.09856070578098297, + "learning_rate": 2.4324263435880748e-05, + "loss": 9.8087, + "step": 38290 + }, + { + "epoch": 0.19126569951809033, + "grad_norm": 0.08724341541528702, + "learning_rate": 2.4322761520939198e-05, + "loss": 9.7989, + "step": 38300 + }, + { + "epoch": 0.19131563834302978, + "grad_norm": 0.09495957940816879, + "learning_rate": 2.432125960599765e-05, + "loss": 9.7892, + "step": 38310 + }, + { + "epoch": 0.19136557716796923, + "grad_norm": 0.09710295498371124, + "learning_rate": 2.43197576910561e-05, + "loss": 9.8049, + "step": 38320 + }, + { + "epoch": 0.19141551599290868, + "grad_norm": 0.0903317853808403, + "learning_rate": 2.4318255776114545e-05, + "loss": 9.8063, + "step": 38330 + }, + { + "epoch": 0.19146545481784813, + "grad_norm": 0.09293761849403381, + "learning_rate": 2.4316753861172996e-05, + "loss": 9.7994, + "step": 38340 + }, + { + "epoch": 0.19151539364278758, + "grad_norm": 0.09713525325059891, + "learning_rate": 2.4315251946231446e-05, + "loss": 9.7968, + "step": 38350 + }, + { + "epoch": 0.19156533246772703, + "grad_norm": 0.09488287568092346, + "learning_rate": 2.4313750031289896e-05, + "loss": 9.8005, + "step": 38360 + }, + { + "epoch": 0.19161527129266648, + "grad_norm": 0.0962325856089592, + "learning_rate": 2.4312248116348346e-05, + "loss": 9.7941, + "step": 38370 + }, + { + "epoch": 0.19166521011760593, + "grad_norm": 0.09315495938062668, + "learning_rate": 2.4310746201406793e-05, + "loss": 9.7957, + "step": 38380 + }, + { + "epoch": 0.19171514894254538, + "grad_norm": 0.09810058772563934, + "learning_rate": 2.4309244286465243e-05, + "loss": 9.8038, + "step": 38390 + }, + { + "epoch": 0.19176508776748483, + "grad_norm": 0.09418797492980957, + "learning_rate": 2.4307742371523693e-05, + "loss": 9.7966, + "step": 38400 + }, + { + "epoch": 0.19181502659242428, + "grad_norm": 0.0971032902598381, + "learning_rate": 2.4306240456582143e-05, + "loss": 9.7953, + "step": 38410 + }, + { + "epoch": 0.19186496541736373, + "grad_norm": 0.09385687112808228, + "learning_rate": 2.4304738541640594e-05, + "loss": 9.7957, + "step": 38420 + }, + { + "epoch": 0.19191490424230317, + "grad_norm": 0.09179292619228363, + "learning_rate": 2.430323662669904e-05, + "loss": 9.7907, + "step": 38430 + }, + { + "epoch": 0.19196484306724262, + "grad_norm": 0.09452084451913834, + "learning_rate": 2.430173471175749e-05, + "loss": 9.8014, + "step": 38440 + }, + { + "epoch": 0.19201478189218207, + "grad_norm": 0.09643220156431198, + "learning_rate": 2.430023279681594e-05, + "loss": 9.7915, + "step": 38450 + }, + { + "epoch": 0.19206472071712152, + "grad_norm": 0.0984674021601677, + "learning_rate": 2.429873088187439e-05, + "loss": 9.7976, + "step": 38460 + }, + { + "epoch": 0.19211465954206097, + "grad_norm": 0.09368205070495605, + "learning_rate": 2.429722896693284e-05, + "loss": 9.7938, + "step": 38470 + }, + { + "epoch": 0.19216459836700042, + "grad_norm": 0.09682975709438324, + "learning_rate": 2.429572705199129e-05, + "loss": 9.7942, + "step": 38480 + }, + { + "epoch": 0.19221453719193987, + "grad_norm": 0.09321840852499008, + "learning_rate": 2.4294225137049738e-05, + "loss": 9.7997, + "step": 38490 + }, + { + "epoch": 0.19226447601687932, + "grad_norm": 0.09486759454011917, + "learning_rate": 2.4292723222108188e-05, + "loss": 9.7955, + "step": 38500 + }, + { + "epoch": 0.19231441484181877, + "grad_norm": 0.09923727810382843, + "learning_rate": 2.429122130716664e-05, + "loss": 9.7936, + "step": 38510 + }, + { + "epoch": 0.19236435366675822, + "grad_norm": 0.09178563952445984, + "learning_rate": 2.428971939222509e-05, + "loss": 9.7945, + "step": 38520 + }, + { + "epoch": 0.19241429249169767, + "grad_norm": 0.09080483019351959, + "learning_rate": 2.428821747728354e-05, + "loss": 9.7983, + "step": 38530 + }, + { + "epoch": 0.19246423131663712, + "grad_norm": 0.09831558167934418, + "learning_rate": 2.4286715562341986e-05, + "loss": 9.7918, + "step": 38540 + }, + { + "epoch": 0.19251417014157657, + "grad_norm": 0.095986008644104, + "learning_rate": 2.4285213647400436e-05, + "loss": 9.792, + "step": 38550 + }, + { + "epoch": 0.19256410896651602, + "grad_norm": 0.10043915361166, + "learning_rate": 2.4283711732458886e-05, + "loss": 9.7913, + "step": 38560 + }, + { + "epoch": 0.19261404779145547, + "grad_norm": 0.09741128236055374, + "learning_rate": 2.4282209817517336e-05, + "loss": 9.7836, + "step": 38570 + }, + { + "epoch": 0.19266398661639492, + "grad_norm": 0.09496723860502243, + "learning_rate": 2.4280707902575786e-05, + "loss": 9.7869, + "step": 38580 + }, + { + "epoch": 0.19271392544133437, + "grad_norm": 0.09604073315858841, + "learning_rate": 2.4279205987634233e-05, + "loss": 9.7975, + "step": 38590 + }, + { + "epoch": 0.19276386426627382, + "grad_norm": 0.09631392359733582, + "learning_rate": 2.4277704072692683e-05, + "loss": 9.7917, + "step": 38600 + }, + { + "epoch": 0.19281380309121326, + "grad_norm": 0.09735754877328873, + "learning_rate": 2.4276202157751133e-05, + "loss": 9.7898, + "step": 38610 + }, + { + "epoch": 0.19286374191615271, + "grad_norm": 0.09502192586660385, + "learning_rate": 2.4274700242809584e-05, + "loss": 9.7959, + "step": 38620 + }, + { + "epoch": 0.19291368074109216, + "grad_norm": 0.09539380669593811, + "learning_rate": 2.4273198327868034e-05, + "loss": 9.7939, + "step": 38630 + }, + { + "epoch": 0.1929636195660316, + "grad_norm": 0.09433183073997498, + "learning_rate": 2.427169641292648e-05, + "loss": 9.7888, + "step": 38640 + }, + { + "epoch": 0.19301355839097106, + "grad_norm": 0.09847880899906158, + "learning_rate": 2.427019449798493e-05, + "loss": 9.7895, + "step": 38650 + }, + { + "epoch": 0.1930634972159105, + "grad_norm": 0.09963309019804001, + "learning_rate": 2.426869258304338e-05, + "loss": 9.7924, + "step": 38660 + }, + { + "epoch": 0.19311343604084996, + "grad_norm": 0.09441536664962769, + "learning_rate": 2.426719066810183e-05, + "loss": 9.7913, + "step": 38670 + }, + { + "epoch": 0.1931633748657894, + "grad_norm": 0.09207624942064285, + "learning_rate": 2.426568875316028e-05, + "loss": 9.7925, + "step": 38680 + }, + { + "epoch": 0.19321331369072886, + "grad_norm": 0.09765110164880753, + "learning_rate": 2.4264186838218728e-05, + "loss": 9.7963, + "step": 38690 + }, + { + "epoch": 0.1932632525156683, + "grad_norm": 0.09711901098489761, + "learning_rate": 2.4262684923277178e-05, + "loss": 9.788, + "step": 38700 + }, + { + "epoch": 0.19331319134060776, + "grad_norm": 0.09210279583930969, + "learning_rate": 2.426118300833563e-05, + "loss": 9.7852, + "step": 38710 + }, + { + "epoch": 0.1933631301655472, + "grad_norm": 0.09786038845777512, + "learning_rate": 2.425968109339408e-05, + "loss": 9.7915, + "step": 38720 + }, + { + "epoch": 0.19341306899048666, + "grad_norm": 0.09457087516784668, + "learning_rate": 2.425817917845253e-05, + "loss": 9.7967, + "step": 38730 + }, + { + "epoch": 0.1934630078154261, + "grad_norm": 0.09489721804857254, + "learning_rate": 2.4256677263510976e-05, + "loss": 9.7965, + "step": 38740 + }, + { + "epoch": 0.19351294664036556, + "grad_norm": 0.09461694210767746, + "learning_rate": 2.4255175348569426e-05, + "loss": 9.7884, + "step": 38750 + }, + { + "epoch": 0.193562885465305, + "grad_norm": 0.10121167451143265, + "learning_rate": 2.4253673433627876e-05, + "loss": 9.7838, + "step": 38760 + }, + { + "epoch": 0.19361282429024446, + "grad_norm": 0.09524286538362503, + "learning_rate": 2.4252171518686326e-05, + "loss": 9.7871, + "step": 38770 + }, + { + "epoch": 0.1936627631151839, + "grad_norm": 0.09742001444101334, + "learning_rate": 2.4250669603744776e-05, + "loss": 9.7947, + "step": 38780 + }, + { + "epoch": 0.19371270194012336, + "grad_norm": 0.09000395238399506, + "learning_rate": 2.4249167688803223e-05, + "loss": 9.7865, + "step": 38790 + }, + { + "epoch": 0.1937626407650628, + "grad_norm": 0.09661939740180969, + "learning_rate": 2.4247665773861677e-05, + "loss": 9.7887, + "step": 38800 + }, + { + "epoch": 0.19381257959000225, + "grad_norm": 0.09397201985120773, + "learning_rate": 2.4246163858920123e-05, + "loss": 9.7851, + "step": 38810 + }, + { + "epoch": 0.1938625184149417, + "grad_norm": 0.0975542664527893, + "learning_rate": 2.4244661943978574e-05, + "loss": 9.786, + "step": 38820 + }, + { + "epoch": 0.19391245723988115, + "grad_norm": 0.0956290140748024, + "learning_rate": 2.4243160029037024e-05, + "loss": 9.783, + "step": 38830 + }, + { + "epoch": 0.1939623960648206, + "grad_norm": 0.09244171530008316, + "learning_rate": 2.424165811409547e-05, + "loss": 9.7812, + "step": 38840 + }, + { + "epoch": 0.19401233488976005, + "grad_norm": 0.10087534040212631, + "learning_rate": 2.4240156199153924e-05, + "loss": 9.7849, + "step": 38850 + }, + { + "epoch": 0.1940622737146995, + "grad_norm": 0.09302230924367905, + "learning_rate": 2.423865428421237e-05, + "loss": 9.7873, + "step": 38860 + }, + { + "epoch": 0.19411221253963895, + "grad_norm": 0.09214049577713013, + "learning_rate": 2.423715236927082e-05, + "loss": 9.7873, + "step": 38870 + }, + { + "epoch": 0.1941621513645784, + "grad_norm": 0.09628362953662872, + "learning_rate": 2.423565045432927e-05, + "loss": 9.7885, + "step": 38880 + }, + { + "epoch": 0.19421209018951785, + "grad_norm": 0.09119916707277298, + "learning_rate": 2.4234148539387718e-05, + "loss": 9.7847, + "step": 38890 + }, + { + "epoch": 0.1942620290144573, + "grad_norm": 0.09237845242023468, + "learning_rate": 2.423264662444617e-05, + "loss": 9.7874, + "step": 38900 + }, + { + "epoch": 0.19431196783939675, + "grad_norm": 0.08875925093889236, + "learning_rate": 2.423114470950462e-05, + "loss": 9.7925, + "step": 38910 + }, + { + "epoch": 0.1943619066643362, + "grad_norm": 0.0932464450597763, + "learning_rate": 2.422964279456307e-05, + "loss": 9.7909, + "step": 38920 + }, + { + "epoch": 0.19441184548927565, + "grad_norm": 0.09021969884634018, + "learning_rate": 2.422814087962152e-05, + "loss": 9.7863, + "step": 38930 + }, + { + "epoch": 0.1944617843142151, + "grad_norm": 0.09389606863260269, + "learning_rate": 2.4226638964679966e-05, + "loss": 9.7808, + "step": 38940 + }, + { + "epoch": 0.19451172313915455, + "grad_norm": 0.09377970546483994, + "learning_rate": 2.422513704973842e-05, + "loss": 9.7729, + "step": 38950 + }, + { + "epoch": 0.194561661964094, + "grad_norm": 0.09550289064645767, + "learning_rate": 2.4223635134796866e-05, + "loss": 9.7801, + "step": 38960 + }, + { + "epoch": 0.19461160078903345, + "grad_norm": 0.0985620841383934, + "learning_rate": 2.4222133219855316e-05, + "loss": 9.7896, + "step": 38970 + }, + { + "epoch": 0.1946615396139729, + "grad_norm": 0.09124714881181717, + "learning_rate": 2.4220631304913766e-05, + "loss": 9.7848, + "step": 38980 + }, + { + "epoch": 0.19471147843891234, + "grad_norm": 0.09538350999355316, + "learning_rate": 2.4219129389972213e-05, + "loss": 9.7863, + "step": 38990 + }, + { + "epoch": 0.1947614172638518, + "grad_norm": 0.09843659400939941, + "learning_rate": 2.4217627475030667e-05, + "loss": 9.7772, + "step": 39000 + }, + { + "epoch": 0.19481135608879124, + "grad_norm": 0.09667788445949554, + "learning_rate": 2.4216125560089113e-05, + "loss": 9.7902, + "step": 39010 + }, + { + "epoch": 0.1948612949137307, + "grad_norm": 0.09687555581331253, + "learning_rate": 2.4214623645147564e-05, + "loss": 9.7835, + "step": 39020 + }, + { + "epoch": 0.19491123373867014, + "grad_norm": 0.09759131073951721, + "learning_rate": 2.4213121730206014e-05, + "loss": 9.7849, + "step": 39030 + }, + { + "epoch": 0.1949611725636096, + "grad_norm": 0.09392527490854263, + "learning_rate": 2.421161981526446e-05, + "loss": 9.784, + "step": 39040 + }, + { + "epoch": 0.19501111138854904, + "grad_norm": 0.09155687689781189, + "learning_rate": 2.4210117900322914e-05, + "loss": 9.7849, + "step": 39050 + }, + { + "epoch": 0.1950610502134885, + "grad_norm": 0.09576915949583054, + "learning_rate": 2.420861598538136e-05, + "loss": 9.7815, + "step": 39060 + }, + { + "epoch": 0.1951109890384279, + "grad_norm": 0.09315671026706696, + "learning_rate": 2.420711407043981e-05, + "loss": 9.7802, + "step": 39070 + }, + { + "epoch": 0.19516092786336736, + "grad_norm": 0.09243351221084595, + "learning_rate": 2.420561215549826e-05, + "loss": 9.7802, + "step": 39080 + }, + { + "epoch": 0.1952108666883068, + "grad_norm": 0.09323740005493164, + "learning_rate": 2.4204110240556708e-05, + "loss": 9.7835, + "step": 39090 + }, + { + "epoch": 0.19526080551324626, + "grad_norm": 0.09786425530910492, + "learning_rate": 2.420260832561516e-05, + "loss": 9.78, + "step": 39100 + }, + { + "epoch": 0.1953107443381857, + "grad_norm": 0.09116844832897186, + "learning_rate": 2.420110641067361e-05, + "loss": 9.7841, + "step": 39110 + }, + { + "epoch": 0.19536068316312516, + "grad_norm": 0.09829219430685043, + "learning_rate": 2.4199604495732062e-05, + "loss": 9.7839, + "step": 39120 + }, + { + "epoch": 0.1954106219880646, + "grad_norm": 0.0964910238981247, + "learning_rate": 2.419810258079051e-05, + "loss": 9.7754, + "step": 39130 + }, + { + "epoch": 0.19546056081300406, + "grad_norm": 0.09510182589292526, + "learning_rate": 2.4196600665848956e-05, + "loss": 9.779, + "step": 39140 + }, + { + "epoch": 0.1955104996379435, + "grad_norm": 0.09461788833141327, + "learning_rate": 2.419509875090741e-05, + "loss": 9.7828, + "step": 39150 + }, + { + "epoch": 0.19556043846288296, + "grad_norm": 0.09442439675331116, + "learning_rate": 2.4193596835965856e-05, + "loss": 45.9869, + "step": 39160 + }, + { + "epoch": 0.1956103772878224, + "grad_norm": 0.09443948417901993, + "learning_rate": 2.419209492102431e-05, + "loss": 9.7777, + "step": 39170 + }, + { + "epoch": 0.19566031611276186, + "grad_norm": 0.08871215581893921, + "learning_rate": 2.4190593006082756e-05, + "loss": 9.7808, + "step": 39180 + }, + { + "epoch": 0.1957102549377013, + "grad_norm": 0.09522920846939087, + "learning_rate": 2.4189091091141203e-05, + "loss": 9.7887, + "step": 39190 + }, + { + "epoch": 0.19576019376264076, + "grad_norm": 0.0978919193148613, + "learning_rate": 2.4187589176199657e-05, + "loss": 9.7724, + "step": 39200 + }, + { + "epoch": 0.1958101325875802, + "grad_norm": 0.09242391586303711, + "learning_rate": 2.4186087261258103e-05, + "loss": 9.7893, + "step": 39210 + }, + { + "epoch": 0.19586007141251965, + "grad_norm": 0.09181750565767288, + "learning_rate": 2.4184585346316557e-05, + "loss": 9.7748, + "step": 39220 + }, + { + "epoch": 0.1959100102374591, + "grad_norm": 0.0938679501414299, + "learning_rate": 2.4183083431375004e-05, + "loss": 9.7745, + "step": 39230 + }, + { + "epoch": 0.19595994906239855, + "grad_norm": 0.09096869826316833, + "learning_rate": 2.418158151643345e-05, + "loss": 9.7836, + "step": 39240 + }, + { + "epoch": 0.196009887887338, + "grad_norm": 0.09524725377559662, + "learning_rate": 2.4180079601491904e-05, + "loss": 9.787, + "step": 39250 + }, + { + "epoch": 0.19605982671227745, + "grad_norm": 0.09219633787870407, + "learning_rate": 2.417857768655035e-05, + "loss": 9.7777, + "step": 39260 + }, + { + "epoch": 0.1961097655372169, + "grad_norm": 0.09788725525140762, + "learning_rate": 2.4177075771608805e-05, + "loss": 9.7787, + "step": 39270 + }, + { + "epoch": 0.19615970436215635, + "grad_norm": 0.09663385152816772, + "learning_rate": 2.417557385666725e-05, + "loss": 9.7779, + "step": 39280 + }, + { + "epoch": 0.1962096431870958, + "grad_norm": 0.10418345034122467, + "learning_rate": 2.4174071941725698e-05, + "loss": 9.7713, + "step": 39290 + }, + { + "epoch": 0.19625958201203525, + "grad_norm": 0.09116250276565552, + "learning_rate": 2.417257002678415e-05, + "loss": 9.7772, + "step": 39300 + }, + { + "epoch": 0.1963095208369747, + "grad_norm": 0.09256359934806824, + "learning_rate": 2.41710681118426e-05, + "loss": 9.7782, + "step": 39310 + }, + { + "epoch": 0.19635945966191415, + "grad_norm": 0.10003110766410828, + "learning_rate": 2.4169566196901052e-05, + "loss": 9.7717, + "step": 39320 + }, + { + "epoch": 0.1964093984868536, + "grad_norm": 0.09774117916822433, + "learning_rate": 2.41680642819595e-05, + "loss": 9.777, + "step": 39330 + }, + { + "epoch": 0.19645933731179305, + "grad_norm": 0.09338162839412689, + "learning_rate": 2.4166562367017946e-05, + "loss": 9.7811, + "step": 39340 + }, + { + "epoch": 0.1965092761367325, + "grad_norm": 0.09204205125570297, + "learning_rate": 2.41650604520764e-05, + "loss": 9.7827, + "step": 39350 + }, + { + "epoch": 0.19655921496167195, + "grad_norm": 0.0937473401427269, + "learning_rate": 2.4163558537134846e-05, + "loss": 9.7759, + "step": 39360 + }, + { + "epoch": 0.1966091537866114, + "grad_norm": 0.09584249556064606, + "learning_rate": 2.41620566221933e-05, + "loss": 9.7817, + "step": 39370 + }, + { + "epoch": 0.19665909261155085, + "grad_norm": 0.09530872106552124, + "learning_rate": 2.4160554707251746e-05, + "loss": 9.7736, + "step": 39380 + }, + { + "epoch": 0.1967090314364903, + "grad_norm": 0.09426391869783401, + "learning_rate": 2.4159052792310193e-05, + "loss": 9.7796, + "step": 39390 + }, + { + "epoch": 0.19675897026142974, + "grad_norm": 0.09266220033168793, + "learning_rate": 2.4157550877368647e-05, + "loss": 9.7739, + "step": 39400 + }, + { + "epoch": 0.1968089090863692, + "grad_norm": 0.0935848280787468, + "learning_rate": 2.4156048962427093e-05, + "loss": 9.7755, + "step": 39410 + }, + { + "epoch": 0.19685884791130864, + "grad_norm": 0.0957341194152832, + "learning_rate": 2.4154547047485547e-05, + "loss": 9.7699, + "step": 39420 + }, + { + "epoch": 0.1969087867362481, + "grad_norm": 0.09587478637695312, + "learning_rate": 2.4153045132543994e-05, + "loss": 9.7725, + "step": 39430 + }, + { + "epoch": 0.19695872556118754, + "grad_norm": 0.09046075493097305, + "learning_rate": 2.4151543217602444e-05, + "loss": 9.776, + "step": 39440 + }, + { + "epoch": 0.197008664386127, + "grad_norm": 0.09086257219314575, + "learning_rate": 2.4150041302660894e-05, + "loss": 9.7735, + "step": 39450 + }, + { + "epoch": 0.19705860321106644, + "grad_norm": 0.10431148111820221, + "learning_rate": 2.414853938771934e-05, + "loss": 9.773, + "step": 39460 + }, + { + "epoch": 0.1971085420360059, + "grad_norm": 0.09601059556007385, + "learning_rate": 2.4147037472777795e-05, + "loss": 9.779, + "step": 39470 + }, + { + "epoch": 0.19715848086094534, + "grad_norm": 0.09748616814613342, + "learning_rate": 2.414553555783624e-05, + "loss": 9.7766, + "step": 39480 + }, + { + "epoch": 0.1972084196858848, + "grad_norm": 0.10166972130537033, + "learning_rate": 2.414403364289469e-05, + "loss": 9.7738, + "step": 39490 + }, + { + "epoch": 0.19725835851082424, + "grad_norm": 0.09647940844297409, + "learning_rate": 2.4142531727953142e-05, + "loss": 9.7646, + "step": 39500 + }, + { + "epoch": 0.1973082973357637, + "grad_norm": 0.09661668539047241, + "learning_rate": 2.414102981301159e-05, + "loss": 9.7793, + "step": 39510 + }, + { + "epoch": 0.19735823616070314, + "grad_norm": 0.09515753388404846, + "learning_rate": 2.4139527898070042e-05, + "loss": 9.7797, + "step": 39520 + }, + { + "epoch": 0.1974081749856426, + "grad_norm": 0.0913529172539711, + "learning_rate": 2.413802598312849e-05, + "loss": 9.7787, + "step": 39530 + }, + { + "epoch": 0.19745811381058204, + "grad_norm": 0.09632540494203568, + "learning_rate": 2.413652406818694e-05, + "loss": 9.7703, + "step": 39540 + }, + { + "epoch": 0.19750805263552149, + "grad_norm": 0.09521202743053436, + "learning_rate": 2.413502215324539e-05, + "loss": 9.7713, + "step": 39550 + }, + { + "epoch": 0.19755799146046094, + "grad_norm": 0.09425238519906998, + "learning_rate": 2.4133520238303836e-05, + "loss": 9.7705, + "step": 39560 + }, + { + "epoch": 0.19760793028540038, + "grad_norm": 0.09330090880393982, + "learning_rate": 2.413201832336229e-05, + "loss": 9.7747, + "step": 39570 + }, + { + "epoch": 0.19765786911033983, + "grad_norm": 0.09047570824623108, + "learning_rate": 2.4130516408420736e-05, + "loss": 9.7726, + "step": 39580 + }, + { + "epoch": 0.19770780793527928, + "grad_norm": 0.09425200521945953, + "learning_rate": 2.4129014493479187e-05, + "loss": 9.7723, + "step": 39590 + }, + { + "epoch": 0.19775774676021873, + "grad_norm": 0.09698259085416794, + "learning_rate": 2.4127512578537637e-05, + "loss": 9.7848, + "step": 39600 + }, + { + "epoch": 0.19780768558515818, + "grad_norm": 0.09237400442361832, + "learning_rate": 2.4126010663596083e-05, + "loss": 9.774, + "step": 39610 + }, + { + "epoch": 0.19785762441009763, + "grad_norm": 0.09415727108716965, + "learning_rate": 2.4124508748654537e-05, + "loss": 9.7657, + "step": 39620 + }, + { + "epoch": 0.19790756323503708, + "grad_norm": 0.09116394817829132, + "learning_rate": 2.4123006833712984e-05, + "loss": 9.7767, + "step": 39630 + }, + { + "epoch": 0.19795750205997653, + "grad_norm": 0.09226060658693314, + "learning_rate": 2.4121504918771434e-05, + "loss": 9.7691, + "step": 39640 + }, + { + "epoch": 0.19800744088491598, + "grad_norm": 0.09217618405818939, + "learning_rate": 2.4120003003829884e-05, + "loss": 9.7718, + "step": 39650 + }, + { + "epoch": 0.19805737970985543, + "grad_norm": 0.09849265962839127, + "learning_rate": 2.411850108888833e-05, + "loss": 9.7769, + "step": 39660 + }, + { + "epoch": 0.19810731853479488, + "grad_norm": 0.09423698484897614, + "learning_rate": 2.4116999173946785e-05, + "loss": 9.7673, + "step": 39670 + }, + { + "epoch": 0.19815725735973433, + "grad_norm": 0.09352479875087738, + "learning_rate": 2.411549725900523e-05, + "loss": 9.764, + "step": 39680 + }, + { + "epoch": 0.19820719618467378, + "grad_norm": 0.09337835013866425, + "learning_rate": 2.411399534406368e-05, + "loss": 9.7659, + "step": 39690 + }, + { + "epoch": 0.19825713500961323, + "grad_norm": 0.08959398418664932, + "learning_rate": 2.4112493429122132e-05, + "loss": 9.7724, + "step": 39700 + }, + { + "epoch": 0.19830707383455268, + "grad_norm": 0.09081783145666122, + "learning_rate": 2.411099151418058e-05, + "loss": 9.7665, + "step": 39710 + }, + { + "epoch": 0.19835701265949213, + "grad_norm": 0.09610147774219513, + "learning_rate": 2.4109489599239032e-05, + "loss": 9.759, + "step": 39720 + }, + { + "epoch": 0.19840695148443158, + "grad_norm": 0.09528447687625885, + "learning_rate": 2.410798768429748e-05, + "loss": 9.7684, + "step": 39730 + }, + { + "epoch": 0.19845689030937103, + "grad_norm": 0.0971222072839737, + "learning_rate": 2.410648576935593e-05, + "loss": 9.7688, + "step": 39740 + }, + { + "epoch": 0.19850682913431048, + "grad_norm": 0.09684096276760101, + "learning_rate": 2.410498385441438e-05, + "loss": 9.7715, + "step": 39750 + }, + { + "epoch": 0.19855676795924992, + "grad_norm": 0.0940563753247261, + "learning_rate": 2.410348193947283e-05, + "loss": 9.7577, + "step": 39760 + }, + { + "epoch": 0.19860670678418937, + "grad_norm": 0.09706791490316391, + "learning_rate": 2.410198002453128e-05, + "loss": 9.7708, + "step": 39770 + }, + { + "epoch": 0.19865664560912882, + "grad_norm": 0.10084111243486404, + "learning_rate": 2.4100478109589726e-05, + "loss": 9.7658, + "step": 39780 + }, + { + "epoch": 0.19870658443406827, + "grad_norm": 0.09000294655561447, + "learning_rate": 2.4098976194648177e-05, + "loss": 9.7698, + "step": 39790 + }, + { + "epoch": 0.19875652325900772, + "grad_norm": 0.09340784698724747, + "learning_rate": 2.4097474279706627e-05, + "loss": 9.7694, + "step": 39800 + }, + { + "epoch": 0.19880646208394717, + "grad_norm": 0.09671244770288467, + "learning_rate": 2.4095972364765077e-05, + "loss": 9.7608, + "step": 39810 + }, + { + "epoch": 0.19885640090888662, + "grad_norm": 0.09407161921262741, + "learning_rate": 2.4094470449823527e-05, + "loss": 9.7632, + "step": 39820 + }, + { + "epoch": 0.19890633973382607, + "grad_norm": 0.09390780329704285, + "learning_rate": 2.4092968534881974e-05, + "loss": 9.7639, + "step": 39830 + }, + { + "epoch": 0.19895627855876552, + "grad_norm": 0.10034262388944626, + "learning_rate": 2.4091466619940424e-05, + "loss": 9.7688, + "step": 39840 + }, + { + "epoch": 0.19900621738370497, + "grad_norm": 0.09268135577440262, + "learning_rate": 2.4089964704998874e-05, + "loss": 9.7654, + "step": 39850 + }, + { + "epoch": 0.19905615620864442, + "grad_norm": 0.09061171859502792, + "learning_rate": 2.4088462790057324e-05, + "loss": 9.7667, + "step": 39860 + }, + { + "epoch": 0.19910609503358387, + "grad_norm": 0.09159281104803085, + "learning_rate": 2.4086960875115775e-05, + "loss": 9.7727, + "step": 39870 + }, + { + "epoch": 0.19915603385852332, + "grad_norm": 0.09168130904436111, + "learning_rate": 2.408545896017422e-05, + "loss": 9.7752, + "step": 39880 + }, + { + "epoch": 0.19920597268346277, + "grad_norm": 0.09341683238744736, + "learning_rate": 2.408395704523267e-05, + "loss": 9.7705, + "step": 39890 + }, + { + "epoch": 0.19925591150840222, + "grad_norm": 0.10817967355251312, + "learning_rate": 2.4082455130291122e-05, + "loss": 9.7616, + "step": 39900 + }, + { + "epoch": 0.19930585033334167, + "grad_norm": 0.09794482588768005, + "learning_rate": 2.4080953215349572e-05, + "loss": 9.7589, + "step": 39910 + }, + { + "epoch": 0.19935578915828112, + "grad_norm": 0.09640230238437653, + "learning_rate": 2.4079451300408022e-05, + "loss": 9.7614, + "step": 39920 + }, + { + "epoch": 0.19940572798322057, + "grad_norm": 0.09363255649805069, + "learning_rate": 2.407794938546647e-05, + "loss": 9.7684, + "step": 39930 + }, + { + "epoch": 0.19945566680816001, + "grad_norm": 0.09409525245428085, + "learning_rate": 2.407644747052492e-05, + "loss": 9.7558, + "step": 39940 + }, + { + "epoch": 0.19950560563309946, + "grad_norm": 0.0943169966340065, + "learning_rate": 2.407494555558337e-05, + "loss": 9.7663, + "step": 39950 + }, + { + "epoch": 0.1995555444580389, + "grad_norm": 0.09789732098579407, + "learning_rate": 2.407344364064182e-05, + "loss": 9.7623, + "step": 39960 + }, + { + "epoch": 0.19960548328297836, + "grad_norm": 0.09631295502185822, + "learning_rate": 2.407194172570027e-05, + "loss": 9.7692, + "step": 39970 + }, + { + "epoch": 0.1996554221079178, + "grad_norm": 0.09948205947875977, + "learning_rate": 2.4070439810758716e-05, + "loss": 9.7623, + "step": 39980 + }, + { + "epoch": 0.19970536093285726, + "grad_norm": 0.0964735895395279, + "learning_rate": 2.4068937895817167e-05, + "loss": 9.7621, + "step": 39990 + }, + { + "epoch": 0.1997552997577967, + "grad_norm": 0.09828507900238037, + "learning_rate": 2.4067435980875617e-05, + "loss": 9.7603, + "step": 40000 + }, + { + "epoch": 0.19980523858273616, + "grad_norm": 0.09244830161333084, + "learning_rate": 2.4065934065934067e-05, + "loss": 9.7644, + "step": 40010 + }, + { + "epoch": 0.1998551774076756, + "grad_norm": 0.09815704822540283, + "learning_rate": 2.4064432150992517e-05, + "loss": 9.7665, + "step": 40020 + }, + { + "epoch": 0.19990511623261506, + "grad_norm": 0.0952480360865593, + "learning_rate": 2.4062930236050964e-05, + "loss": 9.7668, + "step": 40030 + }, + { + "epoch": 0.1999550550575545, + "grad_norm": 0.09597064554691315, + "learning_rate": 2.4061428321109414e-05, + "loss": 9.7613, + "step": 40040 + }, + { + "epoch": 0.20000499388249393, + "grad_norm": 0.09825017303228378, + "learning_rate": 2.4059926406167864e-05, + "loss": 9.76, + "step": 40050 + }, + { + "epoch": 0.20005493270743338, + "grad_norm": 0.09238851070404053, + "learning_rate": 2.4058424491226314e-05, + "loss": 9.765, + "step": 40060 + }, + { + "epoch": 0.20010487153237283, + "grad_norm": 0.09596622735261917, + "learning_rate": 2.4056922576284765e-05, + "loss": 9.7618, + "step": 40070 + }, + { + "epoch": 0.20015481035731228, + "grad_norm": 0.09516778588294983, + "learning_rate": 2.4055420661343215e-05, + "loss": 9.766, + "step": 40080 + }, + { + "epoch": 0.20020474918225173, + "grad_norm": 0.10546738654375076, + "learning_rate": 2.405391874640166e-05, + "loss": 9.7556, + "step": 40090 + }, + { + "epoch": 0.20025468800719118, + "grad_norm": 0.09358225017786026, + "learning_rate": 2.4052416831460112e-05, + "loss": 9.759, + "step": 40100 + }, + { + "epoch": 0.20030462683213063, + "grad_norm": 0.08909553289413452, + "learning_rate": 2.4050914916518562e-05, + "loss": 9.7602, + "step": 40110 + }, + { + "epoch": 0.20035456565707008, + "grad_norm": 0.1016073226928711, + "learning_rate": 2.4049413001577012e-05, + "loss": 9.7603, + "step": 40120 + }, + { + "epoch": 0.20040450448200953, + "grad_norm": 0.09752840548753738, + "learning_rate": 2.4047911086635462e-05, + "loss": 9.7584, + "step": 40130 + }, + { + "epoch": 0.20045444330694898, + "grad_norm": 0.09266415238380432, + "learning_rate": 2.404640917169391e-05, + "loss": 9.7625, + "step": 40140 + }, + { + "epoch": 0.20050438213188843, + "grad_norm": 0.09927397966384888, + "learning_rate": 2.404490725675236e-05, + "loss": 9.7559, + "step": 40150 + }, + { + "epoch": 0.20055432095682788, + "grad_norm": 0.09626518934965134, + "learning_rate": 2.404340534181081e-05, + "loss": 9.7664, + "step": 40160 + }, + { + "epoch": 0.20060425978176732, + "grad_norm": 0.09638325124979019, + "learning_rate": 2.404190342686926e-05, + "loss": 9.7583, + "step": 40170 + }, + { + "epoch": 0.20065419860670677, + "grad_norm": 0.09331472218036652, + "learning_rate": 2.404040151192771e-05, + "loss": 9.752, + "step": 40180 + }, + { + "epoch": 0.20070413743164622, + "grad_norm": 0.09364567697048187, + "learning_rate": 2.4038899596986157e-05, + "loss": 9.7648, + "step": 40190 + }, + { + "epoch": 0.20075407625658567, + "grad_norm": 0.09309536218643188, + "learning_rate": 2.4037397682044607e-05, + "loss": 9.7596, + "step": 40200 + }, + { + "epoch": 0.20080401508152512, + "grad_norm": 0.091143399477005, + "learning_rate": 2.4035895767103057e-05, + "loss": 9.7572, + "step": 40210 + }, + { + "epoch": 0.20085395390646457, + "grad_norm": 0.09167341887950897, + "learning_rate": 2.4034393852161507e-05, + "loss": 9.7644, + "step": 40220 + }, + { + "epoch": 0.20090389273140402, + "grad_norm": 0.09573984146118164, + "learning_rate": 2.4032891937219957e-05, + "loss": 9.7607, + "step": 40230 + }, + { + "epoch": 0.20095383155634347, + "grad_norm": 0.0927746593952179, + "learning_rate": 2.4031390022278404e-05, + "loss": 9.7661, + "step": 40240 + }, + { + "epoch": 0.20100377038128292, + "grad_norm": 0.09519543498754501, + "learning_rate": 2.4029888107336854e-05, + "loss": 9.758, + "step": 40250 + }, + { + "epoch": 0.20105370920622237, + "grad_norm": 0.09138039499521255, + "learning_rate": 2.4028386192395304e-05, + "loss": 9.7658, + "step": 40260 + }, + { + "epoch": 0.20110364803116182, + "grad_norm": 0.0913807824254036, + "learning_rate": 2.4026884277453755e-05, + "loss": 9.7553, + "step": 40270 + }, + { + "epoch": 0.20115358685610127, + "grad_norm": 0.09113917499780655, + "learning_rate": 2.4025382362512205e-05, + "loss": 9.7586, + "step": 40280 + }, + { + "epoch": 0.20120352568104072, + "grad_norm": 0.09379607439041138, + "learning_rate": 2.402388044757065e-05, + "loss": 9.7621, + "step": 40290 + }, + { + "epoch": 0.20125346450598017, + "grad_norm": 0.09333807975053787, + "learning_rate": 2.4022378532629102e-05, + "loss": 9.7623, + "step": 40300 + }, + { + "epoch": 0.20130340333091962, + "grad_norm": 0.09286303073167801, + "learning_rate": 2.4020876617687552e-05, + "loss": 9.7592, + "step": 40310 + }, + { + "epoch": 0.20135334215585907, + "grad_norm": 0.09583623707294464, + "learning_rate": 2.4019374702746002e-05, + "loss": 9.7556, + "step": 40320 + }, + { + "epoch": 0.20140328098079852, + "grad_norm": 0.09831235557794571, + "learning_rate": 2.4017872787804452e-05, + "loss": 9.7499, + "step": 40330 + }, + { + "epoch": 0.20145321980573797, + "grad_norm": 0.09651533514261246, + "learning_rate": 2.40163708728629e-05, + "loss": 9.7571, + "step": 40340 + }, + { + "epoch": 0.20150315863067741, + "grad_norm": 0.0948728695511818, + "learning_rate": 2.401486895792135e-05, + "loss": 9.7519, + "step": 40350 + }, + { + "epoch": 0.20155309745561686, + "grad_norm": 0.09901313483715057, + "learning_rate": 2.40133670429798e-05, + "loss": 9.7589, + "step": 40360 + }, + { + "epoch": 0.2016030362805563, + "grad_norm": 0.09512390196323395, + "learning_rate": 2.401186512803825e-05, + "loss": 9.7564, + "step": 40370 + }, + { + "epoch": 0.20165297510549576, + "grad_norm": 0.08876697719097137, + "learning_rate": 2.40103632130967e-05, + "loss": 9.7596, + "step": 40380 + }, + { + "epoch": 0.2017029139304352, + "grad_norm": 0.09231546521186829, + "learning_rate": 2.4008861298155147e-05, + "loss": 9.7513, + "step": 40390 + }, + { + "epoch": 0.20175285275537466, + "grad_norm": 0.08933798968791962, + "learning_rate": 2.40073593832136e-05, + "loss": 9.7478, + "step": 40400 + }, + { + "epoch": 0.2018027915803141, + "grad_norm": 0.09653082489967346, + "learning_rate": 2.4005857468272047e-05, + "loss": 9.751, + "step": 40410 + }, + { + "epoch": 0.20185273040525356, + "grad_norm": 0.09329882264137268, + "learning_rate": 2.4004355553330497e-05, + "loss": 9.7608, + "step": 40420 + }, + { + "epoch": 0.201902669230193, + "grad_norm": 0.09065824002027512, + "learning_rate": 2.4002853638388947e-05, + "loss": 9.7584, + "step": 40430 + }, + { + "epoch": 0.20195260805513246, + "grad_norm": 0.09478091448545456, + "learning_rate": 2.4001351723447394e-05, + "loss": 9.7633, + "step": 40440 + }, + { + "epoch": 0.2020025468800719, + "grad_norm": 0.09942101687192917, + "learning_rate": 2.3999849808505848e-05, + "loss": 9.7596, + "step": 40450 + }, + { + "epoch": 0.20205248570501136, + "grad_norm": 0.0919698178768158, + "learning_rate": 2.3998347893564294e-05, + "loss": 9.7657, + "step": 40460 + }, + { + "epoch": 0.2021024245299508, + "grad_norm": 0.08957584947347641, + "learning_rate": 2.3996845978622745e-05, + "loss": 9.7507, + "step": 40470 + }, + { + "epoch": 0.20215236335489026, + "grad_norm": 0.09440477192401886, + "learning_rate": 2.3995344063681195e-05, + "loss": 9.7558, + "step": 40480 + }, + { + "epoch": 0.2022023021798297, + "grad_norm": 0.0963861346244812, + "learning_rate": 2.399384214873964e-05, + "loss": 9.7496, + "step": 40490 + }, + { + "epoch": 0.20225224100476916, + "grad_norm": 0.09470155835151672, + "learning_rate": 2.3992340233798095e-05, + "loss": 9.7483, + "step": 40500 + }, + { + "epoch": 0.2023021798297086, + "grad_norm": 0.09395986050367355, + "learning_rate": 2.3990838318856542e-05, + "loss": 9.7426, + "step": 40510 + }, + { + "epoch": 0.20235211865464806, + "grad_norm": 0.09907002747058868, + "learning_rate": 2.3989336403914992e-05, + "loss": 9.7482, + "step": 40520 + }, + { + "epoch": 0.2024020574795875, + "grad_norm": 0.09406791627407074, + "learning_rate": 2.3987834488973442e-05, + "loss": 9.7551, + "step": 40530 + }, + { + "epoch": 0.20245199630452695, + "grad_norm": 0.09641828387975693, + "learning_rate": 2.398633257403189e-05, + "loss": 9.756, + "step": 40540 + }, + { + "epoch": 0.2025019351294664, + "grad_norm": 0.08803495764732361, + "learning_rate": 2.3984830659090343e-05, + "loss": 9.7517, + "step": 40550 + }, + { + "epoch": 0.20255187395440585, + "grad_norm": 0.089680016040802, + "learning_rate": 2.398332874414879e-05, + "loss": 9.7548, + "step": 40560 + }, + { + "epoch": 0.2026018127793453, + "grad_norm": 0.09483745694160461, + "learning_rate": 2.398182682920724e-05, + "loss": 9.7505, + "step": 40570 + }, + { + "epoch": 0.20265175160428475, + "grad_norm": 0.09230205416679382, + "learning_rate": 2.398032491426569e-05, + "loss": 9.7526, + "step": 40580 + }, + { + "epoch": 0.2027016904292242, + "grad_norm": 0.0983130931854248, + "learning_rate": 2.3978822999324137e-05, + "loss": 9.7463, + "step": 40590 + }, + { + "epoch": 0.20275162925416365, + "grad_norm": 0.0941585898399353, + "learning_rate": 2.397732108438259e-05, + "loss": 9.7575, + "step": 40600 + }, + { + "epoch": 0.2028015680791031, + "grad_norm": 0.09447518736124039, + "learning_rate": 2.3975819169441037e-05, + "loss": 9.7447, + "step": 40610 + }, + { + "epoch": 0.20285150690404255, + "grad_norm": 0.09139948338270187, + "learning_rate": 2.3974317254499487e-05, + "loss": 9.7499, + "step": 40620 + }, + { + "epoch": 0.202901445728982, + "grad_norm": 0.09922108799219131, + "learning_rate": 2.3972815339557937e-05, + "loss": 9.7463, + "step": 40630 + }, + { + "epoch": 0.20295138455392145, + "grad_norm": 0.09205441176891327, + "learning_rate": 2.3971313424616384e-05, + "loss": 9.7516, + "step": 40640 + }, + { + "epoch": 0.2030013233788609, + "grad_norm": 0.097032330930233, + "learning_rate": 2.3969811509674838e-05, + "loss": 9.7507, + "step": 40650 + }, + { + "epoch": 0.20305126220380035, + "grad_norm": 0.09273160994052887, + "learning_rate": 2.3968309594733284e-05, + "loss": 9.7485, + "step": 40660 + }, + { + "epoch": 0.2031012010287398, + "grad_norm": 0.09222143143415451, + "learning_rate": 2.3966807679791735e-05, + "loss": 9.7494, + "step": 40670 + }, + { + "epoch": 0.20315113985367925, + "grad_norm": 0.10249687731266022, + "learning_rate": 2.3965305764850185e-05, + "loss": 9.749, + "step": 40680 + }, + { + "epoch": 0.2032010786786187, + "grad_norm": 0.09170138835906982, + "learning_rate": 2.396380384990863e-05, + "loss": 9.7538, + "step": 40690 + }, + { + "epoch": 0.20325101750355815, + "grad_norm": 0.09489130973815918, + "learning_rate": 2.3962301934967085e-05, + "loss": 9.7539, + "step": 40700 + }, + { + "epoch": 0.2033009563284976, + "grad_norm": 0.09625539183616638, + "learning_rate": 2.3960800020025532e-05, + "loss": 9.7509, + "step": 40710 + }, + { + "epoch": 0.20335089515343704, + "grad_norm": 0.09620354324579239, + "learning_rate": 2.3959298105083986e-05, + "loss": 9.7504, + "step": 40720 + }, + { + "epoch": 0.2034008339783765, + "grad_norm": 0.10117631405591965, + "learning_rate": 2.3957796190142432e-05, + "loss": 9.7386, + "step": 40730 + }, + { + "epoch": 0.20345077280331594, + "grad_norm": 0.09158354997634888, + "learning_rate": 2.395629427520088e-05, + "loss": 9.7471, + "step": 40740 + }, + { + "epoch": 0.2035007116282554, + "grad_norm": 0.09191151708364487, + "learning_rate": 2.3954792360259333e-05, + "loss": 9.7529, + "step": 40750 + }, + { + "epoch": 0.20355065045319484, + "grad_norm": 0.09399069845676422, + "learning_rate": 2.395329044531778e-05, + "loss": 9.745, + "step": 40760 + }, + { + "epoch": 0.2036005892781343, + "grad_norm": 0.09574571996927261, + "learning_rate": 2.3951788530376233e-05, + "loss": 9.7506, + "step": 40770 + }, + { + "epoch": 0.20365052810307374, + "grad_norm": 0.09355718642473221, + "learning_rate": 2.395028661543468e-05, + "loss": 9.7426, + "step": 40780 + }, + { + "epoch": 0.2037004669280132, + "grad_norm": 0.09288044273853302, + "learning_rate": 2.3948784700493127e-05, + "loss": 9.752, + "step": 40790 + }, + { + "epoch": 0.20375040575295264, + "grad_norm": 0.09448196738958359, + "learning_rate": 2.394728278555158e-05, + "loss": 9.7449, + "step": 40800 + }, + { + "epoch": 0.2038003445778921, + "grad_norm": 0.09567010402679443, + "learning_rate": 2.3945780870610027e-05, + "loss": 9.7417, + "step": 40810 + }, + { + "epoch": 0.20385028340283154, + "grad_norm": 0.08981973677873611, + "learning_rate": 2.394427895566848e-05, + "loss": 9.7473, + "step": 40820 + }, + { + "epoch": 0.203900222227771, + "grad_norm": 0.09741527587175369, + "learning_rate": 2.3942777040726927e-05, + "loss": 9.7482, + "step": 40830 + }, + { + "epoch": 0.20395016105271044, + "grad_norm": 0.09928309917449951, + "learning_rate": 2.3941275125785374e-05, + "loss": 9.7452, + "step": 40840 + }, + { + "epoch": 0.2040000998776499, + "grad_norm": 0.09600803256034851, + "learning_rate": 2.3939773210843828e-05, + "loss": 9.7513, + "step": 40850 + }, + { + "epoch": 0.20405003870258934, + "grad_norm": 0.09777117520570755, + "learning_rate": 2.3938271295902274e-05, + "loss": 9.752, + "step": 40860 + }, + { + "epoch": 0.2040999775275288, + "grad_norm": 0.09423068910837173, + "learning_rate": 2.3936769380960728e-05, + "loss": 9.7462, + "step": 40870 + }, + { + "epoch": 0.20414991635246824, + "grad_norm": 0.09674641489982605, + "learning_rate": 2.3935267466019175e-05, + "loss": 9.745, + "step": 40880 + }, + { + "epoch": 0.20419985517740769, + "grad_norm": 0.095578134059906, + "learning_rate": 2.393376555107762e-05, + "loss": 9.7457, + "step": 40890 + }, + { + "epoch": 0.20424979400234713, + "grad_norm": 0.09196728467941284, + "learning_rate": 2.3932263636136075e-05, + "loss": 9.7413, + "step": 40900 + }, + { + "epoch": 0.20429973282728658, + "grad_norm": 0.09680663794279099, + "learning_rate": 2.3930761721194522e-05, + "loss": 9.7394, + "step": 40910 + }, + { + "epoch": 0.20434967165222603, + "grad_norm": 0.10243181139230728, + "learning_rate": 2.3929259806252976e-05, + "loss": 9.7504, + "step": 40920 + }, + { + "epoch": 0.20439961047716548, + "grad_norm": 0.09534817934036255, + "learning_rate": 2.3927757891311422e-05, + "loss": 9.7527, + "step": 40930 + }, + { + "epoch": 0.20444954930210493, + "grad_norm": 0.10091328620910645, + "learning_rate": 2.392625597636987e-05, + "loss": 9.7457, + "step": 40940 + }, + { + "epoch": 0.20449948812704438, + "grad_norm": 0.09642726182937622, + "learning_rate": 2.3924754061428323e-05, + "loss": 9.7483, + "step": 40950 + }, + { + "epoch": 0.20454942695198383, + "grad_norm": 0.09273668378591537, + "learning_rate": 2.392325214648677e-05, + "loss": 9.7487, + "step": 40960 + }, + { + "epoch": 0.20459936577692328, + "grad_norm": 0.09491418302059174, + "learning_rate": 2.3921750231545223e-05, + "loss": 9.7384, + "step": 40970 + }, + { + "epoch": 0.20464930460186273, + "grad_norm": 0.09379623830318451, + "learning_rate": 2.392024831660367e-05, + "loss": 9.7397, + "step": 40980 + }, + { + "epoch": 0.20469924342680218, + "grad_norm": 0.09489590674638748, + "learning_rate": 2.3918746401662117e-05, + "loss": 9.738, + "step": 40990 + }, + { + "epoch": 0.20474918225174163, + "grad_norm": 0.09257600456476212, + "learning_rate": 2.391724448672057e-05, + "loss": 9.743, + "step": 41000 + }, + { + "epoch": 0.20479912107668108, + "grad_norm": 0.09949919581413269, + "learning_rate": 2.3915742571779017e-05, + "loss": 9.7409, + "step": 41010 + }, + { + "epoch": 0.20484905990162053, + "grad_norm": 0.09563277661800385, + "learning_rate": 2.391424065683747e-05, + "loss": 9.738, + "step": 41020 + }, + { + "epoch": 0.20489899872655998, + "grad_norm": 0.09452064335346222, + "learning_rate": 2.3912738741895917e-05, + "loss": 9.736, + "step": 41030 + }, + { + "epoch": 0.2049489375514994, + "grad_norm": 0.09839780628681183, + "learning_rate": 2.3911236826954368e-05, + "loss": 9.7439, + "step": 41040 + }, + { + "epoch": 0.20499887637643885, + "grad_norm": 0.0967588871717453, + "learning_rate": 2.3909734912012818e-05, + "loss": 9.7426, + "step": 41050 + }, + { + "epoch": 0.2050488152013783, + "grad_norm": 0.09972970187664032, + "learning_rate": 2.3908232997071264e-05, + "loss": 9.7462, + "step": 41060 + }, + { + "epoch": 0.20509875402631775, + "grad_norm": 0.09793765842914581, + "learning_rate": 2.3906731082129718e-05, + "loss": 9.7425, + "step": 41070 + }, + { + "epoch": 0.2051486928512572, + "grad_norm": 0.09164223074913025, + "learning_rate": 2.3905229167188165e-05, + "loss": 9.7346, + "step": 41080 + }, + { + "epoch": 0.20519863167619665, + "grad_norm": 0.09696740657091141, + "learning_rate": 2.3903727252246615e-05, + "loss": 9.7373, + "step": 41090 + }, + { + "epoch": 0.2052485705011361, + "grad_norm": 0.09416086226701736, + "learning_rate": 2.3902225337305065e-05, + "loss": 9.74, + "step": 41100 + }, + { + "epoch": 0.20529850932607555, + "grad_norm": 0.09924998879432678, + "learning_rate": 2.3900723422363512e-05, + "loss": 9.7414, + "step": 41110 + }, + { + "epoch": 0.205348448151015, + "grad_norm": 0.08897435665130615, + "learning_rate": 2.3899221507421966e-05, + "loss": 9.7388, + "step": 41120 + }, + { + "epoch": 0.20539838697595444, + "grad_norm": 0.09157969057559967, + "learning_rate": 2.3897719592480412e-05, + "loss": 9.7346, + "step": 41130 + }, + { + "epoch": 0.2054483258008939, + "grad_norm": 0.09318001568317413, + "learning_rate": 2.3896217677538863e-05, + "loss": 9.7399, + "step": 41140 + }, + { + "epoch": 0.20549826462583334, + "grad_norm": 0.09765854477882385, + "learning_rate": 2.3894715762597313e-05, + "loss": 9.7369, + "step": 41150 + }, + { + "epoch": 0.2055482034507728, + "grad_norm": 0.09092031419277191, + "learning_rate": 2.389321384765576e-05, + "loss": 9.7477, + "step": 41160 + }, + { + "epoch": 0.20559814227571224, + "grad_norm": 0.09183992445468903, + "learning_rate": 2.3891711932714213e-05, + "loss": 9.741, + "step": 41170 + }, + { + "epoch": 0.2056480811006517, + "grad_norm": 0.0956728532910347, + "learning_rate": 2.389021001777266e-05, + "loss": 9.7374, + "step": 41180 + }, + { + "epoch": 0.20569801992559114, + "grad_norm": 0.09464164078235626, + "learning_rate": 2.388870810283111e-05, + "loss": 9.7397, + "step": 41190 + }, + { + "epoch": 0.2057479587505306, + "grad_norm": 0.09706345945596695, + "learning_rate": 2.388720618788956e-05, + "loss": 9.7364, + "step": 41200 + }, + { + "epoch": 0.20579789757547004, + "grad_norm": 0.09342235326766968, + "learning_rate": 2.3885704272948007e-05, + "loss": 9.7431, + "step": 41210 + }, + { + "epoch": 0.2058478364004095, + "grad_norm": 0.09735559672117233, + "learning_rate": 2.388420235800646e-05, + "loss": 9.737, + "step": 41220 + }, + { + "epoch": 0.20589777522534894, + "grad_norm": 0.0972183495759964, + "learning_rate": 2.3882700443064907e-05, + "loss": 9.735, + "step": 41230 + }, + { + "epoch": 0.2059477140502884, + "grad_norm": 0.09024586528539658, + "learning_rate": 2.3881198528123358e-05, + "loss": 9.7344, + "step": 41240 + }, + { + "epoch": 0.20599765287522784, + "grad_norm": 0.09795460104942322, + "learning_rate": 2.3879696613181808e-05, + "loss": 9.7426, + "step": 41250 + }, + { + "epoch": 0.2060475917001673, + "grad_norm": 0.10017476975917816, + "learning_rate": 2.3878194698240255e-05, + "loss": 9.7292, + "step": 41260 + }, + { + "epoch": 0.20609753052510674, + "grad_norm": 0.09363752603530884, + "learning_rate": 2.3876692783298708e-05, + "loss": 9.7424, + "step": 41270 + }, + { + "epoch": 0.2061474693500462, + "grad_norm": 0.09633088111877441, + "learning_rate": 2.3875190868357155e-05, + "loss": 9.7319, + "step": 41280 + }, + { + "epoch": 0.20619740817498564, + "grad_norm": 0.09266993403434753, + "learning_rate": 2.3873688953415605e-05, + "loss": 9.732, + "step": 41290 + }, + { + "epoch": 0.20624734699992509, + "grad_norm": 0.10170450061559677, + "learning_rate": 2.3872187038474055e-05, + "loss": 9.7402, + "step": 41300 + }, + { + "epoch": 0.20629728582486453, + "grad_norm": 0.093755342066288, + "learning_rate": 2.3870685123532502e-05, + "loss": 9.7369, + "step": 41310 + }, + { + "epoch": 0.20634722464980398, + "grad_norm": 0.09129472076892853, + "learning_rate": 2.3869183208590956e-05, + "loss": 9.7356, + "step": 41320 + }, + { + "epoch": 0.20639716347474343, + "grad_norm": 0.09476242959499359, + "learning_rate": 2.3867681293649402e-05, + "loss": 9.7394, + "step": 41330 + }, + { + "epoch": 0.20644710229968288, + "grad_norm": 0.09445454180240631, + "learning_rate": 2.3866179378707853e-05, + "loss": 9.7293, + "step": 41340 + }, + { + "epoch": 0.20649704112462233, + "grad_norm": 0.09727781265974045, + "learning_rate": 2.3864677463766303e-05, + "loss": 9.7333, + "step": 41350 + }, + { + "epoch": 0.20654697994956178, + "grad_norm": 0.093324214220047, + "learning_rate": 2.386317554882475e-05, + "loss": 9.7324, + "step": 41360 + }, + { + "epoch": 0.20659691877450123, + "grad_norm": 0.09454476833343506, + "learning_rate": 2.3861673633883203e-05, + "loss": 9.7345, + "step": 41370 + }, + { + "epoch": 0.20664685759944068, + "grad_norm": 0.09696672111749649, + "learning_rate": 2.386017171894165e-05, + "loss": 9.7345, + "step": 41380 + }, + { + "epoch": 0.20669679642438013, + "grad_norm": 0.09323279559612274, + "learning_rate": 2.38586698040001e-05, + "loss": 9.7358, + "step": 41390 + }, + { + "epoch": 0.20674673524931958, + "grad_norm": 0.09088480472564697, + "learning_rate": 2.385716788905855e-05, + "loss": 9.7327, + "step": 41400 + }, + { + "epoch": 0.20679667407425903, + "grad_norm": 0.08868908882141113, + "learning_rate": 2.3855665974117e-05, + "loss": 9.7327, + "step": 41410 + }, + { + "epoch": 0.20684661289919848, + "grad_norm": 0.0900205671787262, + "learning_rate": 2.385416405917545e-05, + "loss": 9.7318, + "step": 41420 + }, + { + "epoch": 0.20689655172413793, + "grad_norm": 0.0981634333729744, + "learning_rate": 2.3852662144233897e-05, + "loss": 9.7366, + "step": 41430 + }, + { + "epoch": 0.20694649054907738, + "grad_norm": 0.10074867308139801, + "learning_rate": 2.3851160229292348e-05, + "loss": 9.729, + "step": 41440 + }, + { + "epoch": 0.20699642937401683, + "grad_norm": 0.09191495180130005, + "learning_rate": 2.3849658314350798e-05, + "loss": 9.7327, + "step": 41450 + }, + { + "epoch": 0.20704636819895628, + "grad_norm": 0.09636077284812927, + "learning_rate": 2.3848156399409248e-05, + "loss": 9.7327, + "step": 41460 + }, + { + "epoch": 0.20709630702389573, + "grad_norm": 0.09494554251432419, + "learning_rate": 2.3846654484467698e-05, + "loss": 9.7348, + "step": 41470 + }, + { + "epoch": 0.20714624584883518, + "grad_norm": 0.09379441291093826, + "learning_rate": 2.3845152569526145e-05, + "loss": 9.7329, + "step": 41480 + }, + { + "epoch": 0.20719618467377462, + "grad_norm": 0.09855134785175323, + "learning_rate": 2.3843650654584595e-05, + "loss": 9.7449, + "step": 41490 + }, + { + "epoch": 0.20724612349871407, + "grad_norm": 0.09271756559610367, + "learning_rate": 2.3842148739643045e-05, + "loss": 9.7368, + "step": 41500 + }, + { + "epoch": 0.20729606232365352, + "grad_norm": 0.09612946957349777, + "learning_rate": 2.3840646824701495e-05, + "loss": 9.7314, + "step": 41510 + }, + { + "epoch": 0.20734600114859297, + "grad_norm": 0.09541863203048706, + "learning_rate": 2.3839144909759946e-05, + "loss": 9.7345, + "step": 41520 + }, + { + "epoch": 0.20739593997353242, + "grad_norm": 0.09798285365104675, + "learning_rate": 2.3837642994818392e-05, + "loss": 9.7369, + "step": 41530 + }, + { + "epoch": 0.20744587879847187, + "grad_norm": 0.09581144899129868, + "learning_rate": 2.3836141079876843e-05, + "loss": 9.7313, + "step": 41540 + }, + { + "epoch": 0.20749581762341132, + "grad_norm": 0.09617334604263306, + "learning_rate": 2.3834639164935293e-05, + "loss": 9.7242, + "step": 41550 + }, + { + "epoch": 0.20754575644835077, + "grad_norm": 0.09934971481561661, + "learning_rate": 2.3833137249993743e-05, + "loss": 9.7257, + "step": 41560 + }, + { + "epoch": 0.20759569527329022, + "grad_norm": 0.09321165084838867, + "learning_rate": 2.3831635335052193e-05, + "loss": 9.7286, + "step": 41570 + }, + { + "epoch": 0.20764563409822967, + "grad_norm": 0.09776625037193298, + "learning_rate": 2.383013342011064e-05, + "loss": 9.7297, + "step": 41580 + }, + { + "epoch": 0.20769557292316912, + "grad_norm": 0.09788148105144501, + "learning_rate": 2.382863150516909e-05, + "loss": 9.7298, + "step": 41590 + }, + { + "epoch": 0.20774551174810857, + "grad_norm": 0.09117887914180756, + "learning_rate": 2.382712959022754e-05, + "loss": 9.7353, + "step": 41600 + }, + { + "epoch": 0.20779545057304802, + "grad_norm": 0.09594250470399857, + "learning_rate": 2.382562767528599e-05, + "loss": 9.7353, + "step": 41610 + }, + { + "epoch": 0.20784538939798747, + "grad_norm": 0.08893455564975739, + "learning_rate": 2.382412576034444e-05, + "loss": 9.7268, + "step": 41620 + }, + { + "epoch": 0.20789532822292692, + "grad_norm": 0.0987037941813469, + "learning_rate": 2.3822623845402887e-05, + "loss": 9.7287, + "step": 41630 + }, + { + "epoch": 0.20794526704786637, + "grad_norm": 0.09531259536743164, + "learning_rate": 2.3821121930461338e-05, + "loss": 9.7335, + "step": 41640 + }, + { + "epoch": 0.20799520587280582, + "grad_norm": 0.09097182750701904, + "learning_rate": 2.3819620015519788e-05, + "loss": 9.7319, + "step": 41650 + }, + { + "epoch": 0.20804514469774527, + "grad_norm": 0.09137894958257675, + "learning_rate": 2.3818118100578238e-05, + "loss": 9.7361, + "step": 41660 + }, + { + "epoch": 0.20809508352268472, + "grad_norm": 0.09314315766096115, + "learning_rate": 2.3816616185636688e-05, + "loss": 9.727, + "step": 41670 + }, + { + "epoch": 0.20814502234762416, + "grad_norm": 0.09129998832941055, + "learning_rate": 2.3815114270695135e-05, + "loss": 9.7235, + "step": 41680 + }, + { + "epoch": 0.20819496117256361, + "grad_norm": 0.09588906913995743, + "learning_rate": 2.3813612355753585e-05, + "loss": 9.7317, + "step": 41690 + }, + { + "epoch": 0.20824489999750306, + "grad_norm": 0.089946448802948, + "learning_rate": 2.3812110440812035e-05, + "loss": 9.7317, + "step": 41700 + }, + { + "epoch": 0.2082948388224425, + "grad_norm": 0.09250712394714355, + "learning_rate": 2.3810608525870485e-05, + "loss": 9.7321, + "step": 41710 + }, + { + "epoch": 0.20834477764738196, + "grad_norm": 0.09257816523313522, + "learning_rate": 2.3809106610928936e-05, + "loss": 9.7294, + "step": 41720 + }, + { + "epoch": 0.2083947164723214, + "grad_norm": 0.09143438935279846, + "learning_rate": 2.3807604695987386e-05, + "loss": 9.731, + "step": 41730 + }, + { + "epoch": 0.20844465529726086, + "grad_norm": 0.09791240841150284, + "learning_rate": 2.3806102781045833e-05, + "loss": 9.7234, + "step": 41740 + }, + { + "epoch": 0.2084945941222003, + "grad_norm": 0.10063060373067856, + "learning_rate": 2.3804600866104283e-05, + "loss": 9.7279, + "step": 41750 + }, + { + "epoch": 0.20854453294713976, + "grad_norm": 0.09549122303724289, + "learning_rate": 2.3803098951162733e-05, + "loss": 9.7233, + "step": 41760 + }, + { + "epoch": 0.2085944717720792, + "grad_norm": 0.09345372021198273, + "learning_rate": 2.3801597036221183e-05, + "loss": 9.7282, + "step": 41770 + }, + { + "epoch": 0.20864441059701866, + "grad_norm": 0.0949343666434288, + "learning_rate": 2.3800095121279633e-05, + "loss": 9.721, + "step": 41780 + }, + { + "epoch": 0.2086943494219581, + "grad_norm": 0.09225095063447952, + "learning_rate": 2.3798593206338083e-05, + "loss": 9.7369, + "step": 41790 + }, + { + "epoch": 0.20874428824689756, + "grad_norm": 0.09359176456928253, + "learning_rate": 2.379709129139653e-05, + "loss": 9.7298, + "step": 41800 + }, + { + "epoch": 0.208794227071837, + "grad_norm": 0.09750591963529587, + "learning_rate": 2.379558937645498e-05, + "loss": 9.7263, + "step": 41810 + }, + { + "epoch": 0.20884416589677646, + "grad_norm": 0.0969536155462265, + "learning_rate": 2.379408746151343e-05, + "loss": 9.7193, + "step": 41820 + }, + { + "epoch": 0.2088941047217159, + "grad_norm": 0.09383013099431992, + "learning_rate": 2.379258554657188e-05, + "loss": 9.7168, + "step": 41830 + }, + { + "epoch": 0.20894404354665536, + "grad_norm": 0.09686887264251709, + "learning_rate": 2.379108363163033e-05, + "loss": 9.7198, + "step": 41840 + }, + { + "epoch": 0.2089939823715948, + "grad_norm": 0.09708838164806366, + "learning_rate": 2.3789581716688778e-05, + "loss": 9.7315, + "step": 41850 + }, + { + "epoch": 0.20904392119653425, + "grad_norm": 0.09624648839235306, + "learning_rate": 2.3788079801747228e-05, + "loss": 9.7148, + "step": 41860 + }, + { + "epoch": 0.2090938600214737, + "grad_norm": 0.09800230711698532, + "learning_rate": 2.3786577886805678e-05, + "loss": 9.7328, + "step": 41870 + }, + { + "epoch": 0.20914379884641315, + "grad_norm": 0.09165570139884949, + "learning_rate": 2.3785075971864128e-05, + "loss": 9.7219, + "step": 41880 + }, + { + "epoch": 0.2091937376713526, + "grad_norm": 0.09577257931232452, + "learning_rate": 2.378357405692258e-05, + "loss": 9.7289, + "step": 41890 + }, + { + "epoch": 0.20924367649629205, + "grad_norm": 0.09999013692140579, + "learning_rate": 2.3782072141981025e-05, + "loss": 9.7178, + "step": 41900 + }, + { + "epoch": 0.2092936153212315, + "grad_norm": 0.09558932483196259, + "learning_rate": 2.3780570227039475e-05, + "loss": 9.7261, + "step": 41910 + }, + { + "epoch": 0.20934355414617095, + "grad_norm": 0.09391254931688309, + "learning_rate": 2.3779068312097926e-05, + "loss": 9.7244, + "step": 41920 + }, + { + "epoch": 0.2093934929711104, + "grad_norm": 0.09899161756038666, + "learning_rate": 2.3777566397156376e-05, + "loss": 9.7294, + "step": 41930 + }, + { + "epoch": 0.20944343179604985, + "grad_norm": 0.09575141966342926, + "learning_rate": 2.3776064482214826e-05, + "loss": 9.72, + "step": 41940 + }, + { + "epoch": 0.2094933706209893, + "grad_norm": 0.10070329904556274, + "learning_rate": 2.3774562567273273e-05, + "loss": 9.7279, + "step": 41950 + }, + { + "epoch": 0.20954330944592875, + "grad_norm": 0.09497936069965363, + "learning_rate": 2.3773060652331723e-05, + "loss": 9.7317, + "step": 41960 + }, + { + "epoch": 0.2095932482708682, + "grad_norm": 0.09233599156141281, + "learning_rate": 2.3771558737390173e-05, + "loss": 9.7265, + "step": 41970 + }, + { + "epoch": 0.20964318709580765, + "grad_norm": 0.08788701891899109, + "learning_rate": 2.3770056822448623e-05, + "loss": 9.7191, + "step": 41980 + }, + { + "epoch": 0.2096931259207471, + "grad_norm": 0.09631108492612839, + "learning_rate": 2.3768554907507073e-05, + "loss": 9.7255, + "step": 41990 + }, + { + "epoch": 0.20974306474568655, + "grad_norm": 0.09688840806484222, + "learning_rate": 2.376705299256552e-05, + "loss": 9.7247, + "step": 42000 + }, + { + "epoch": 0.209793003570626, + "grad_norm": 0.09418191015720367, + "learning_rate": 2.376555107762397e-05, + "loss": 9.7267, + "step": 42010 + }, + { + "epoch": 0.20984294239556545, + "grad_norm": 0.0922936499118805, + "learning_rate": 2.376404916268242e-05, + "loss": 9.7249, + "step": 42020 + }, + { + "epoch": 0.20989288122050487, + "grad_norm": 0.09569290280342102, + "learning_rate": 2.376254724774087e-05, + "loss": 9.723, + "step": 42030 + }, + { + "epoch": 0.20994282004544432, + "grad_norm": 0.10295400768518448, + "learning_rate": 2.376104533279932e-05, + "loss": 9.7236, + "step": 42040 + }, + { + "epoch": 0.20999275887038377, + "grad_norm": 0.09752771258354187, + "learning_rate": 2.375954341785777e-05, + "loss": 9.7179, + "step": 42050 + }, + { + "epoch": 0.21004269769532322, + "grad_norm": 0.09303586930036545, + "learning_rate": 2.3758041502916218e-05, + "loss": 9.7193, + "step": 42060 + }, + { + "epoch": 0.21009263652026267, + "grad_norm": 0.10309559851884842, + "learning_rate": 2.3756539587974668e-05, + "loss": 9.7216, + "step": 42070 + }, + { + "epoch": 0.21014257534520212, + "grad_norm": 0.09949391335248947, + "learning_rate": 2.375503767303312e-05, + "loss": 9.7174, + "step": 42080 + }, + { + "epoch": 0.21019251417014156, + "grad_norm": 0.0931687206029892, + "learning_rate": 2.375353575809157e-05, + "loss": 9.7203, + "step": 42090 + }, + { + "epoch": 0.21024245299508101, + "grad_norm": 0.09643073379993439, + "learning_rate": 2.375203384315002e-05, + "loss": 9.7225, + "step": 42100 + }, + { + "epoch": 0.21029239182002046, + "grad_norm": 0.09048034995794296, + "learning_rate": 2.3750531928208465e-05, + "loss": 9.7207, + "step": 42110 + }, + { + "epoch": 0.2103423306449599, + "grad_norm": 0.10079729557037354, + "learning_rate": 2.3749030013266916e-05, + "loss": 9.7121, + "step": 42120 + }, + { + "epoch": 0.21039226946989936, + "grad_norm": 0.09450411796569824, + "learning_rate": 2.3747528098325366e-05, + "loss": 9.7202, + "step": 42130 + }, + { + "epoch": 0.2104422082948388, + "grad_norm": 0.09755988419055939, + "learning_rate": 2.3746026183383816e-05, + "loss": 9.7198, + "step": 42140 + }, + { + "epoch": 0.21049214711977826, + "grad_norm": 0.09404526650905609, + "learning_rate": 2.3744524268442266e-05, + "loss": 9.7194, + "step": 42150 + }, + { + "epoch": 0.2105420859447177, + "grad_norm": 0.09205637127161026, + "learning_rate": 2.3743022353500713e-05, + "loss": 9.7256, + "step": 42160 + }, + { + "epoch": 0.21059202476965716, + "grad_norm": 0.09348385035991669, + "learning_rate": 2.3741520438559163e-05, + "loss": 9.7176, + "step": 42170 + }, + { + "epoch": 0.2106419635945966, + "grad_norm": 0.09955950081348419, + "learning_rate": 2.3740018523617613e-05, + "loss": 9.7243, + "step": 42180 + }, + { + "epoch": 0.21069190241953606, + "grad_norm": 0.09449559450149536, + "learning_rate": 2.3738516608676063e-05, + "loss": 9.7126, + "step": 42190 + }, + { + "epoch": 0.2107418412444755, + "grad_norm": 0.09414367377758026, + "learning_rate": 2.3737014693734514e-05, + "loss": 9.7162, + "step": 42200 + }, + { + "epoch": 0.21079178006941496, + "grad_norm": 0.09569656103849411, + "learning_rate": 2.373551277879296e-05, + "loss": 9.7146, + "step": 42210 + }, + { + "epoch": 0.2108417188943544, + "grad_norm": 0.10219884663820267, + "learning_rate": 2.373401086385141e-05, + "loss": 9.7196, + "step": 42220 + }, + { + "epoch": 0.21089165771929386, + "grad_norm": 0.091194286942482, + "learning_rate": 2.373250894890986e-05, + "loss": 9.7179, + "step": 42230 + }, + { + "epoch": 0.2109415965442333, + "grad_norm": 0.09087838232517242, + "learning_rate": 2.373100703396831e-05, + "loss": 9.7237, + "step": 42240 + }, + { + "epoch": 0.21099153536917276, + "grad_norm": 0.09350999444723129, + "learning_rate": 2.372950511902676e-05, + "loss": 9.7151, + "step": 42250 + }, + { + "epoch": 0.2110414741941122, + "grad_norm": 0.08830499649047852, + "learning_rate": 2.3728003204085208e-05, + "loss": 9.7138, + "step": 42260 + }, + { + "epoch": 0.21109141301905165, + "grad_norm": 0.09413766860961914, + "learning_rate": 2.3726501289143658e-05, + "loss": 9.7158, + "step": 42270 + }, + { + "epoch": 0.2111413518439911, + "grad_norm": 0.09809092432260513, + "learning_rate": 2.372499937420211e-05, + "loss": 9.7141, + "step": 42280 + }, + { + "epoch": 0.21119129066893055, + "grad_norm": 0.09166799485683441, + "learning_rate": 2.372349745926056e-05, + "loss": 9.7125, + "step": 42290 + }, + { + "epoch": 0.21124122949387, + "grad_norm": 0.09736365079879761, + "learning_rate": 2.372199554431901e-05, + "loss": 9.7256, + "step": 42300 + }, + { + "epoch": 0.21129116831880945, + "grad_norm": 0.10044820606708527, + "learning_rate": 2.3720493629377455e-05, + "loss": 9.7196, + "step": 42310 + }, + { + "epoch": 0.2113411071437489, + "grad_norm": 0.09292640537023544, + "learning_rate": 2.3718991714435906e-05, + "loss": 9.7137, + "step": 42320 + }, + { + "epoch": 0.21139104596868835, + "grad_norm": 0.09433227777481079, + "learning_rate": 2.3717489799494356e-05, + "loss": 9.7101, + "step": 42330 + }, + { + "epoch": 0.2114409847936278, + "grad_norm": 0.0979892909526825, + "learning_rate": 2.3715987884552806e-05, + "loss": 9.7208, + "step": 42340 + }, + { + "epoch": 0.21149092361856725, + "grad_norm": 0.09797633439302444, + "learning_rate": 2.3714485969611256e-05, + "loss": 9.7147, + "step": 42350 + }, + { + "epoch": 0.2115408624435067, + "grad_norm": 0.09626011550426483, + "learning_rate": 2.3712984054669703e-05, + "loss": 9.71, + "step": 42360 + }, + { + "epoch": 0.21159080126844615, + "grad_norm": 0.10008444637060165, + "learning_rate": 2.3711482139728157e-05, + "loss": 9.7133, + "step": 42370 + }, + { + "epoch": 0.2116407400933856, + "grad_norm": 0.09693645685911179, + "learning_rate": 2.3709980224786603e-05, + "loss": 9.7155, + "step": 42380 + }, + { + "epoch": 0.21169067891832505, + "grad_norm": 0.09394808113574982, + "learning_rate": 2.3708478309845054e-05, + "loss": 9.7087, + "step": 42390 + }, + { + "epoch": 0.2117406177432645, + "grad_norm": 0.0966886356472969, + "learning_rate": 2.3706976394903504e-05, + "loss": 9.7124, + "step": 42400 + }, + { + "epoch": 0.21179055656820395, + "grad_norm": 0.09753970056772232, + "learning_rate": 2.370547447996195e-05, + "loss": 9.712, + "step": 42410 + }, + { + "epoch": 0.2118404953931434, + "grad_norm": 0.095003142952919, + "learning_rate": 2.3703972565020404e-05, + "loss": 9.7176, + "step": 42420 + }, + { + "epoch": 0.21189043421808285, + "grad_norm": 0.09311114996671677, + "learning_rate": 2.370247065007885e-05, + "loss": 9.7169, + "step": 42430 + }, + { + "epoch": 0.2119403730430223, + "grad_norm": 0.08979105204343796, + "learning_rate": 2.37009687351373e-05, + "loss": 9.7247, + "step": 42440 + }, + { + "epoch": 0.21199031186796174, + "grad_norm": 0.09097933769226074, + "learning_rate": 2.369946682019575e-05, + "loss": 9.7103, + "step": 42450 + }, + { + "epoch": 0.2120402506929012, + "grad_norm": 0.09349437057971954, + "learning_rate": 2.3697964905254198e-05, + "loss": 9.7147, + "step": 42460 + }, + { + "epoch": 0.21209018951784064, + "grad_norm": 0.09845960140228271, + "learning_rate": 2.369646299031265e-05, + "loss": 9.7105, + "step": 42470 + }, + { + "epoch": 0.2121401283427801, + "grad_norm": 0.09305304288864136, + "learning_rate": 2.36949610753711e-05, + "loss": 9.7152, + "step": 42480 + }, + { + "epoch": 0.21219006716771954, + "grad_norm": 0.09439215064048767, + "learning_rate": 2.369345916042955e-05, + "loss": 9.7083, + "step": 42490 + }, + { + "epoch": 0.212240005992659, + "grad_norm": 0.09390342235565186, + "learning_rate": 2.3691957245488e-05, + "loss": 9.7082, + "step": 42500 + }, + { + "epoch": 0.21228994481759844, + "grad_norm": 0.08965789526700974, + "learning_rate": 2.3690455330546445e-05, + "loss": 9.7106, + "step": 42510 + }, + { + "epoch": 0.2123398836425379, + "grad_norm": 0.09593262523412704, + "learning_rate": 2.36889534156049e-05, + "loss": 9.7163, + "step": 42520 + }, + { + "epoch": 0.21238982246747734, + "grad_norm": 0.09541445225477219, + "learning_rate": 2.3687451500663346e-05, + "loss": 9.7153, + "step": 42530 + }, + { + "epoch": 0.2124397612924168, + "grad_norm": 0.09254483878612518, + "learning_rate": 2.3685949585721796e-05, + "loss": 9.7166, + "step": 42540 + }, + { + "epoch": 0.21248970011735624, + "grad_norm": 0.09330740571022034, + "learning_rate": 2.3684447670780246e-05, + "loss": 9.7049, + "step": 42550 + }, + { + "epoch": 0.2125396389422957, + "grad_norm": 0.10075175762176514, + "learning_rate": 2.3682945755838693e-05, + "loss": 9.7118, + "step": 42560 + }, + { + "epoch": 0.21258957776723514, + "grad_norm": 0.09620960056781769, + "learning_rate": 2.3681443840897147e-05, + "loss": 9.711, + "step": 42570 + }, + { + "epoch": 0.2126395165921746, + "grad_norm": 0.09333734214305878, + "learning_rate": 2.3679941925955593e-05, + "loss": 9.7069, + "step": 42580 + }, + { + "epoch": 0.21268945541711404, + "grad_norm": 0.09951247274875641, + "learning_rate": 2.3678440011014044e-05, + "loss": 9.7065, + "step": 42590 + }, + { + "epoch": 0.2127393942420535, + "grad_norm": 0.09395639598369598, + "learning_rate": 2.3676938096072494e-05, + "loss": 9.7071, + "step": 42600 + }, + { + "epoch": 0.21278933306699294, + "grad_norm": 0.10100211948156357, + "learning_rate": 2.367543618113094e-05, + "loss": 9.7104, + "step": 42610 + }, + { + "epoch": 0.21283927189193239, + "grad_norm": 0.09487060457468033, + "learning_rate": 2.3673934266189394e-05, + "loss": 9.7075, + "step": 42620 + }, + { + "epoch": 0.21288921071687184, + "grad_norm": 0.09256965667009354, + "learning_rate": 2.367243235124784e-05, + "loss": 9.7168, + "step": 42630 + }, + { + "epoch": 0.21293914954181128, + "grad_norm": 0.0953928530216217, + "learning_rate": 2.367093043630629e-05, + "loss": 9.708, + "step": 42640 + }, + { + "epoch": 0.21298908836675073, + "grad_norm": 0.09185754507780075, + "learning_rate": 2.366942852136474e-05, + "loss": 9.7165, + "step": 42650 + }, + { + "epoch": 0.21303902719169018, + "grad_norm": 0.09027005732059479, + "learning_rate": 2.3667926606423188e-05, + "loss": 9.71, + "step": 42660 + }, + { + "epoch": 0.21308896601662963, + "grad_norm": 0.0954156145453453, + "learning_rate": 2.366642469148164e-05, + "loss": 9.709, + "step": 42670 + }, + { + "epoch": 0.21313890484156908, + "grad_norm": 0.08864131569862366, + "learning_rate": 2.366492277654009e-05, + "loss": 9.7154, + "step": 42680 + }, + { + "epoch": 0.21318884366650853, + "grad_norm": 0.09630046784877777, + "learning_rate": 2.3663420861598542e-05, + "loss": 9.712, + "step": 42690 + }, + { + "epoch": 0.21323878249144798, + "grad_norm": 0.09661024063825607, + "learning_rate": 2.366191894665699e-05, + "loss": 9.7077, + "step": 42700 + }, + { + "epoch": 0.21328872131638743, + "grad_norm": 0.09176916629076004, + "learning_rate": 2.3660417031715436e-05, + "loss": 9.7138, + "step": 42710 + }, + { + "epoch": 0.21333866014132688, + "grad_norm": 0.09320518374443054, + "learning_rate": 2.365891511677389e-05, + "loss": 9.712, + "step": 42720 + }, + { + "epoch": 0.21338859896626633, + "grad_norm": 0.10201375186443329, + "learning_rate": 2.3657413201832336e-05, + "loss": 9.7025, + "step": 42730 + }, + { + "epoch": 0.21343853779120578, + "grad_norm": 0.09826554358005524, + "learning_rate": 2.365591128689079e-05, + "loss": 9.7106, + "step": 42740 + }, + { + "epoch": 0.21348847661614523, + "grad_norm": 0.09512682259082794, + "learning_rate": 2.3654409371949236e-05, + "loss": 9.7113, + "step": 42750 + }, + { + "epoch": 0.21353841544108468, + "grad_norm": 0.10446712374687195, + "learning_rate": 2.3652907457007683e-05, + "loss": 9.7097, + "step": 42760 + }, + { + "epoch": 0.21358835426602413, + "grad_norm": 0.09374655038118362, + "learning_rate": 2.3651405542066137e-05, + "loss": 9.7101, + "step": 42770 + }, + { + "epoch": 0.21363829309096358, + "grad_norm": 0.10137982666492462, + "learning_rate": 2.3649903627124583e-05, + "loss": 9.7063, + "step": 42780 + }, + { + "epoch": 0.21368823191590303, + "grad_norm": 0.09552907198667526, + "learning_rate": 2.3648401712183037e-05, + "loss": 9.7035, + "step": 42790 + }, + { + "epoch": 0.21373817074084248, + "grad_norm": 0.10118672996759415, + "learning_rate": 2.3646899797241484e-05, + "loss": 9.7137, + "step": 42800 + }, + { + "epoch": 0.21378810956578193, + "grad_norm": 0.09724298119544983, + "learning_rate": 2.364539788229993e-05, + "loss": 9.7052, + "step": 42810 + }, + { + "epoch": 0.21383804839072137, + "grad_norm": 0.09356910735368729, + "learning_rate": 2.3643895967358384e-05, + "loss": 9.7068, + "step": 42820 + }, + { + "epoch": 0.21388798721566082, + "grad_norm": 0.09422975033521652, + "learning_rate": 2.364239405241683e-05, + "loss": 9.7097, + "step": 42830 + }, + { + "epoch": 0.21393792604060027, + "grad_norm": 0.09154810756444931, + "learning_rate": 2.3640892137475284e-05, + "loss": 9.7082, + "step": 42840 + }, + { + "epoch": 0.21398786486553972, + "grad_norm": 0.08905761688947678, + "learning_rate": 2.363939022253373e-05, + "loss": 9.7014, + "step": 42850 + }, + { + "epoch": 0.21403780369047917, + "grad_norm": 0.0964711457490921, + "learning_rate": 2.3637888307592178e-05, + "loss": 9.7035, + "step": 42860 + }, + { + "epoch": 0.21408774251541862, + "grad_norm": 0.09149517863988876, + "learning_rate": 2.363638639265063e-05, + "loss": 9.7052, + "step": 42870 + }, + { + "epoch": 0.21413768134035807, + "grad_norm": 0.09464892745018005, + "learning_rate": 2.363488447770908e-05, + "loss": 9.7134, + "step": 42880 + }, + { + "epoch": 0.21418762016529752, + "grad_norm": 0.09502291679382324, + "learning_rate": 2.3633382562767532e-05, + "loss": 9.7036, + "step": 42890 + }, + { + "epoch": 0.21423755899023697, + "grad_norm": 0.09215651452541351, + "learning_rate": 2.363188064782598e-05, + "loss": 9.7038, + "step": 42900 + }, + { + "epoch": 0.21428749781517642, + "grad_norm": 0.09255354106426239, + "learning_rate": 2.3630378732884426e-05, + "loss": 9.6993, + "step": 42910 + }, + { + "epoch": 0.21433743664011587, + "grad_norm": 0.09328746050596237, + "learning_rate": 2.362887681794288e-05, + "loss": 9.7027, + "step": 42920 + }, + { + "epoch": 0.21438737546505532, + "grad_norm": 0.0980675145983696, + "learning_rate": 2.3627374903001326e-05, + "loss": 9.7081, + "step": 42930 + }, + { + "epoch": 0.21443731428999477, + "grad_norm": 0.09392713755369186, + "learning_rate": 2.362587298805978e-05, + "loss": 9.7009, + "step": 42940 + }, + { + "epoch": 0.21448725311493422, + "grad_norm": 0.09184177964925766, + "learning_rate": 2.3624371073118226e-05, + "loss": 9.7055, + "step": 42950 + }, + { + "epoch": 0.21453719193987367, + "grad_norm": 0.10423033684492111, + "learning_rate": 2.3622869158176673e-05, + "loss": 9.7083, + "step": 42960 + }, + { + "epoch": 0.21458713076481312, + "grad_norm": 0.09163152426481247, + "learning_rate": 2.3621367243235127e-05, + "loss": 10.5286, + "step": 42970 + }, + { + "epoch": 0.21463706958975257, + "grad_norm": 0.09652361273765564, + "learning_rate": 2.3619865328293573e-05, + "loss": 41.2823, + "step": 42980 + }, + { + "epoch": 0.21468700841469202, + "grad_norm": 0.09223783016204834, + "learning_rate": 2.3618363413352027e-05, + "loss": 9.7045, + "step": 42990 + }, + { + "epoch": 0.21473694723963146, + "grad_norm": 0.09330345690250397, + "learning_rate": 2.3616861498410474e-05, + "loss": 9.7024, + "step": 43000 + }, + { + "epoch": 0.21478688606457091, + "grad_norm": 0.09638207405805588, + "learning_rate": 2.3615359583468924e-05, + "loss": 9.7073, + "step": 43010 + }, + { + "epoch": 0.21483682488951034, + "grad_norm": 0.09385758638381958, + "learning_rate": 2.3613857668527374e-05, + "loss": 9.706, + "step": 43020 + }, + { + "epoch": 0.21488676371444979, + "grad_norm": 0.09166792035102844, + "learning_rate": 2.361235575358582e-05, + "loss": 9.6945, + "step": 43030 + }, + { + "epoch": 0.21493670253938923, + "grad_norm": 0.09677714109420776, + "learning_rate": 2.3610853838644274e-05, + "loss": 9.7021, + "step": 43040 + }, + { + "epoch": 0.21498664136432868, + "grad_norm": 0.09546297788619995, + "learning_rate": 2.360935192370272e-05, + "loss": 9.6937, + "step": 43050 + }, + { + "epoch": 0.21503658018926813, + "grad_norm": 0.09486702084541321, + "learning_rate": 2.360785000876117e-05, + "loss": 9.7021, + "step": 43060 + }, + { + "epoch": 0.21508651901420758, + "grad_norm": 0.09048479050397873, + "learning_rate": 2.360634809381962e-05, + "loss": 9.7101, + "step": 43070 + }, + { + "epoch": 0.21513645783914703, + "grad_norm": 0.0977143719792366, + "learning_rate": 2.360484617887807e-05, + "loss": 9.6991, + "step": 43080 + }, + { + "epoch": 0.21518639666408648, + "grad_norm": 0.0984925925731659, + "learning_rate": 2.3603344263936522e-05, + "loss": 9.6967, + "step": 43090 + }, + { + "epoch": 0.21523633548902593, + "grad_norm": 0.09045634418725967, + "learning_rate": 2.360184234899497e-05, + "loss": 9.6997, + "step": 43100 + }, + { + "epoch": 0.21528627431396538, + "grad_norm": 0.09143086522817612, + "learning_rate": 2.360034043405342e-05, + "loss": 9.7057, + "step": 43110 + }, + { + "epoch": 0.21533621313890483, + "grad_norm": 0.09460347890853882, + "learning_rate": 2.359883851911187e-05, + "loss": 9.6983, + "step": 43120 + }, + { + "epoch": 0.21538615196384428, + "grad_norm": 0.09431096911430359, + "learning_rate": 2.3597336604170316e-05, + "loss": 9.7025, + "step": 43130 + }, + { + "epoch": 0.21543609078878373, + "grad_norm": 0.1000736653804779, + "learning_rate": 2.359583468922877e-05, + "loss": 9.694, + "step": 43140 + }, + { + "epoch": 0.21548602961372318, + "grad_norm": 0.09227129071950912, + "learning_rate": 2.3594332774287216e-05, + "loss": 9.7007, + "step": 43150 + }, + { + "epoch": 0.21553596843866263, + "grad_norm": 0.09356602281332016, + "learning_rate": 2.3592830859345666e-05, + "loss": 9.6967, + "step": 43160 + }, + { + "epoch": 0.21558590726360208, + "grad_norm": 0.09989676624536514, + "learning_rate": 2.3591328944404117e-05, + "loss": 9.7074, + "step": 43170 + }, + { + "epoch": 0.21563584608854153, + "grad_norm": 0.0949828028678894, + "learning_rate": 2.3589827029462563e-05, + "loss": 9.6986, + "step": 43180 + }, + { + "epoch": 0.21568578491348098, + "grad_norm": 0.09488669037818909, + "learning_rate": 2.3588325114521017e-05, + "loss": 9.7024, + "step": 43190 + }, + { + "epoch": 0.21573572373842043, + "grad_norm": 0.09980147331953049, + "learning_rate": 2.3586823199579464e-05, + "loss": 9.6998, + "step": 43200 + }, + { + "epoch": 0.21578566256335988, + "grad_norm": 0.09408322721719742, + "learning_rate": 2.3585321284637914e-05, + "loss": 9.6962, + "step": 43210 + }, + { + "epoch": 0.21583560138829933, + "grad_norm": 0.09337528049945831, + "learning_rate": 2.3583819369696364e-05, + "loss": 9.7064, + "step": 43220 + }, + { + "epoch": 0.21588554021323877, + "grad_norm": 0.09422304481267929, + "learning_rate": 2.358231745475481e-05, + "loss": 9.6981, + "step": 43230 + }, + { + "epoch": 0.21593547903817822, + "grad_norm": 0.09392604231834412, + "learning_rate": 2.3580815539813264e-05, + "loss": 9.702, + "step": 43240 + }, + { + "epoch": 0.21598541786311767, + "grad_norm": 0.09568940103054047, + "learning_rate": 2.357931362487171e-05, + "loss": 9.6984, + "step": 43250 + }, + { + "epoch": 0.21603535668805712, + "grad_norm": 0.09230421483516693, + "learning_rate": 2.357781170993016e-05, + "loss": 9.6961, + "step": 43260 + }, + { + "epoch": 0.21608529551299657, + "grad_norm": 0.0997990295290947, + "learning_rate": 2.357630979498861e-05, + "loss": 9.6982, + "step": 43270 + }, + { + "epoch": 0.21613523433793602, + "grad_norm": 0.09223617613315582, + "learning_rate": 2.357480788004706e-05, + "loss": 9.6975, + "step": 43280 + }, + { + "epoch": 0.21618517316287547, + "grad_norm": 0.09891097992658615, + "learning_rate": 2.3573305965105512e-05, + "loss": 9.6933, + "step": 43290 + }, + { + "epoch": 0.21623511198781492, + "grad_norm": 0.09234698116779327, + "learning_rate": 2.357180405016396e-05, + "loss": 9.6913, + "step": 43300 + }, + { + "epoch": 0.21628505081275437, + "grad_norm": 0.0998927652835846, + "learning_rate": 2.357030213522241e-05, + "loss": 9.6922, + "step": 43310 + }, + { + "epoch": 0.21633498963769382, + "grad_norm": 0.09355306625366211, + "learning_rate": 2.356880022028086e-05, + "loss": 9.6943, + "step": 43320 + }, + { + "epoch": 0.21638492846263327, + "grad_norm": 0.09233734756708145, + "learning_rate": 2.356729830533931e-05, + "loss": 9.6983, + "step": 43330 + }, + { + "epoch": 0.21643486728757272, + "grad_norm": 0.09967396408319473, + "learning_rate": 2.356579639039776e-05, + "loss": 9.6938, + "step": 43340 + }, + { + "epoch": 0.21648480611251217, + "grad_norm": 0.09385713189840317, + "learning_rate": 2.3564294475456206e-05, + "loss": 9.6936, + "step": 43350 + }, + { + "epoch": 0.21653474493745162, + "grad_norm": 0.09764021635055542, + "learning_rate": 2.3562792560514656e-05, + "loss": 9.6984, + "step": 43360 + }, + { + "epoch": 0.21658468376239107, + "grad_norm": 0.0969097763299942, + "learning_rate": 2.3561290645573107e-05, + "loss": 9.6953, + "step": 43370 + }, + { + "epoch": 0.21663462258733052, + "grad_norm": 0.09680244326591492, + "learning_rate": 2.3559788730631557e-05, + "loss": 9.6967, + "step": 43380 + }, + { + "epoch": 0.21668456141226997, + "grad_norm": 0.09880103170871735, + "learning_rate": 2.3558286815690007e-05, + "loss": 9.703, + "step": 43390 + }, + { + "epoch": 0.21673450023720942, + "grad_norm": 0.09156890958547592, + "learning_rate": 2.3556784900748454e-05, + "loss": 9.6993, + "step": 43400 + }, + { + "epoch": 0.21678443906214886, + "grad_norm": 0.09816037863492966, + "learning_rate": 2.3555282985806904e-05, + "loss": 9.6899, + "step": 43410 + }, + { + "epoch": 0.21683437788708831, + "grad_norm": 0.09547406435012817, + "learning_rate": 2.3553781070865354e-05, + "loss": 9.6893, + "step": 43420 + }, + { + "epoch": 0.21688431671202776, + "grad_norm": 0.10225875675678253, + "learning_rate": 2.3552279155923804e-05, + "loss": 9.6887, + "step": 43430 + }, + { + "epoch": 0.2169342555369672, + "grad_norm": 0.09828753769397736, + "learning_rate": 2.3550777240982254e-05, + "loss": 9.6982, + "step": 43440 + }, + { + "epoch": 0.21698419436190666, + "grad_norm": 0.09586260467767715, + "learning_rate": 2.35492753260407e-05, + "loss": 9.6886, + "step": 43450 + }, + { + "epoch": 0.2170341331868461, + "grad_norm": 0.09193310141563416, + "learning_rate": 2.354777341109915e-05, + "loss": 9.7037, + "step": 43460 + }, + { + "epoch": 0.21708407201178556, + "grad_norm": 0.09209885448217392, + "learning_rate": 2.35462714961576e-05, + "loss": 9.692, + "step": 43470 + }, + { + "epoch": 0.217134010836725, + "grad_norm": 0.09702476859092712, + "learning_rate": 2.3544769581216052e-05, + "loss": 9.6904, + "step": 43480 + }, + { + "epoch": 0.21718394966166446, + "grad_norm": 0.0922791138291359, + "learning_rate": 2.3543267666274502e-05, + "loss": 9.6992, + "step": 43490 + }, + { + "epoch": 0.2172338884866039, + "grad_norm": 0.09711603820323944, + "learning_rate": 2.354176575133295e-05, + "loss": 9.6947, + "step": 43500 + }, + { + "epoch": 0.21728382731154336, + "grad_norm": 0.0968707948923111, + "learning_rate": 2.35402638363914e-05, + "loss": 9.6859, + "step": 43510 + }, + { + "epoch": 0.2173337661364828, + "grad_norm": 0.09466083347797394, + "learning_rate": 2.353876192144985e-05, + "loss": 9.6967, + "step": 43520 + }, + { + "epoch": 0.21738370496142226, + "grad_norm": 0.09528151154518127, + "learning_rate": 2.35372600065083e-05, + "loss": 9.6887, + "step": 43530 + }, + { + "epoch": 0.2174336437863617, + "grad_norm": 0.09486556053161621, + "learning_rate": 2.353575809156675e-05, + "loss": 9.688, + "step": 43540 + }, + { + "epoch": 0.21748358261130116, + "grad_norm": 0.09180723130702972, + "learning_rate": 2.3534256176625196e-05, + "loss": 9.6934, + "step": 43550 + }, + { + "epoch": 0.2175335214362406, + "grad_norm": 0.09143299609422684, + "learning_rate": 2.3532754261683646e-05, + "loss": 9.6973, + "step": 43560 + }, + { + "epoch": 0.21758346026118006, + "grad_norm": 0.09400318562984467, + "learning_rate": 2.3531252346742097e-05, + "loss": 9.6916, + "step": 43570 + }, + { + "epoch": 0.2176333990861195, + "grad_norm": 0.09261635690927505, + "learning_rate": 2.3529750431800547e-05, + "loss": 9.701, + "step": 43580 + }, + { + "epoch": 0.21768333791105896, + "grad_norm": 0.09949515014886856, + "learning_rate": 2.3528248516858997e-05, + "loss": 9.6943, + "step": 43590 + }, + { + "epoch": 0.2177332767359984, + "grad_norm": 0.09125660359859467, + "learning_rate": 2.3526746601917444e-05, + "loss": 9.6932, + "step": 43600 + }, + { + "epoch": 0.21778321556093785, + "grad_norm": 0.08959287405014038, + "learning_rate": 2.3525244686975894e-05, + "loss": 9.6986, + "step": 43610 + }, + { + "epoch": 0.2178331543858773, + "grad_norm": 0.0992596298456192, + "learning_rate": 2.3523742772034344e-05, + "loss": 9.6865, + "step": 43620 + }, + { + "epoch": 0.21788309321081675, + "grad_norm": 0.09467126429080963, + "learning_rate": 2.3522240857092794e-05, + "loss": 9.6988, + "step": 43630 + }, + { + "epoch": 0.2179330320357562, + "grad_norm": 0.09459062665700912, + "learning_rate": 2.3520738942151244e-05, + "loss": 9.6991, + "step": 43640 + }, + { + "epoch": 0.21798297086069565, + "grad_norm": 0.09231843799352646, + "learning_rate": 2.3519237027209695e-05, + "loss": 9.689, + "step": 43650 + }, + { + "epoch": 0.2180329096856351, + "grad_norm": 0.09330347180366516, + "learning_rate": 2.351773511226814e-05, + "loss": 9.6886, + "step": 43660 + }, + { + "epoch": 0.21808284851057455, + "grad_norm": 0.09178127348423004, + "learning_rate": 2.351623319732659e-05, + "loss": 9.6855, + "step": 43670 + }, + { + "epoch": 0.218132787335514, + "grad_norm": 0.09343292564153671, + "learning_rate": 2.3514731282385042e-05, + "loss": 9.6845, + "step": 43680 + }, + { + "epoch": 0.21818272616045345, + "grad_norm": 0.09548097103834152, + "learning_rate": 2.3513229367443492e-05, + "loss": 9.6896, + "step": 43690 + }, + { + "epoch": 0.2182326649853929, + "grad_norm": 0.0939573422074318, + "learning_rate": 2.3511727452501942e-05, + "loss": 9.6855, + "step": 43700 + }, + { + "epoch": 0.21828260381033235, + "grad_norm": 0.09515061974525452, + "learning_rate": 2.351022553756039e-05, + "loss": 9.6916, + "step": 43710 + }, + { + "epoch": 0.2183325426352718, + "grad_norm": 0.09717291593551636, + "learning_rate": 2.350872362261884e-05, + "loss": 9.6881, + "step": 43720 + }, + { + "epoch": 0.21838248146021125, + "grad_norm": 0.09339723736047745, + "learning_rate": 2.350722170767729e-05, + "loss": 9.6886, + "step": 43730 + }, + { + "epoch": 0.2184324202851507, + "grad_norm": 0.0933520719408989, + "learning_rate": 2.350571979273574e-05, + "loss": 9.686, + "step": 43740 + }, + { + "epoch": 0.21848235911009015, + "grad_norm": 0.09928298741579056, + "learning_rate": 2.350421787779419e-05, + "loss": 9.6908, + "step": 43750 + }, + { + "epoch": 0.2185322979350296, + "grad_norm": 0.09567858278751373, + "learning_rate": 2.3502715962852636e-05, + "loss": 9.6884, + "step": 43760 + }, + { + "epoch": 0.21858223675996905, + "grad_norm": 0.09107061475515366, + "learning_rate": 2.3501214047911087e-05, + "loss": 9.6938, + "step": 43770 + }, + { + "epoch": 0.2186321755849085, + "grad_norm": 0.09627822786569595, + "learning_rate": 2.3499712132969537e-05, + "loss": 9.6838, + "step": 43780 + }, + { + "epoch": 0.21868211440984794, + "grad_norm": 0.09654753655195236, + "learning_rate": 2.3498210218027987e-05, + "loss": 9.6865, + "step": 43790 + }, + { + "epoch": 0.2187320532347874, + "grad_norm": 0.09376217424869537, + "learning_rate": 2.3496708303086437e-05, + "loss": 9.6885, + "step": 43800 + }, + { + "epoch": 0.21878199205972684, + "grad_norm": 0.08824321627616882, + "learning_rate": 2.3495206388144884e-05, + "loss": 9.6851, + "step": 43810 + }, + { + "epoch": 0.2188319308846663, + "grad_norm": 0.09631256759166718, + "learning_rate": 2.3493704473203334e-05, + "loss": 9.6929, + "step": 43820 + }, + { + "epoch": 0.21888186970960574, + "grad_norm": 0.0864676758646965, + "learning_rate": 2.3492202558261784e-05, + "loss": 9.6841, + "step": 43830 + }, + { + "epoch": 0.2189318085345452, + "grad_norm": 0.09546799957752228, + "learning_rate": 2.3490700643320235e-05, + "loss": 9.691, + "step": 43840 + }, + { + "epoch": 0.21898174735948464, + "grad_norm": 0.09540455043315887, + "learning_rate": 2.3489198728378685e-05, + "loss": 9.6935, + "step": 43850 + }, + { + "epoch": 0.2190316861844241, + "grad_norm": 0.0951889380812645, + "learning_rate": 2.348769681343713e-05, + "loss": 9.6776, + "step": 43860 + }, + { + "epoch": 0.21908162500936354, + "grad_norm": 0.09509312361478806, + "learning_rate": 2.348619489849558e-05, + "loss": 9.6906, + "step": 43870 + }, + { + "epoch": 0.219131563834303, + "grad_norm": 0.09178054332733154, + "learning_rate": 2.3484692983554032e-05, + "loss": 9.6842, + "step": 43880 + }, + { + "epoch": 0.21918150265924244, + "grad_norm": 0.09988975524902344, + "learning_rate": 2.3483191068612482e-05, + "loss": 9.6777, + "step": 43890 + }, + { + "epoch": 0.2192314414841819, + "grad_norm": 0.09594890475273132, + "learning_rate": 2.3481689153670932e-05, + "loss": 9.6763, + "step": 43900 + }, + { + "epoch": 0.21928138030912134, + "grad_norm": 0.09186796098947525, + "learning_rate": 2.348018723872938e-05, + "loss": 9.6859, + "step": 43910 + }, + { + "epoch": 0.2193313191340608, + "grad_norm": 0.10070665925741196, + "learning_rate": 2.347868532378783e-05, + "loss": 9.6894, + "step": 43920 + }, + { + "epoch": 0.21938125795900024, + "grad_norm": 0.09850089251995087, + "learning_rate": 2.347718340884628e-05, + "loss": 9.6834, + "step": 43930 + }, + { + "epoch": 0.21943119678393969, + "grad_norm": 0.09155581891536713, + "learning_rate": 2.347568149390473e-05, + "loss": 9.6808, + "step": 43940 + }, + { + "epoch": 0.21948113560887914, + "grad_norm": 0.0951627716422081, + "learning_rate": 2.347417957896318e-05, + "loss": 9.6904, + "step": 43950 + }, + { + "epoch": 0.21953107443381858, + "grad_norm": 0.09084520488977432, + "learning_rate": 2.3472677664021626e-05, + "loss": 9.6849, + "step": 43960 + }, + { + "epoch": 0.21958101325875803, + "grad_norm": 0.09903666377067566, + "learning_rate": 2.347117574908008e-05, + "loss": 9.6876, + "step": 43970 + }, + { + "epoch": 0.21963095208369748, + "grad_norm": 0.09081403911113739, + "learning_rate": 2.3469673834138527e-05, + "loss": 9.6861, + "step": 43980 + }, + { + "epoch": 0.21968089090863693, + "grad_norm": 0.09616552293300629, + "learning_rate": 2.3468171919196977e-05, + "loss": 9.6771, + "step": 43990 + }, + { + "epoch": 0.21973082973357638, + "grad_norm": 0.09797076880931854, + "learning_rate": 2.3466670004255427e-05, + "loss": 9.6872, + "step": 44000 + }, + { + "epoch": 0.2197807685585158, + "grad_norm": 0.09519995003938675, + "learning_rate": 2.3465168089313874e-05, + "loss": 9.6835, + "step": 44010 + }, + { + "epoch": 0.21983070738345525, + "grad_norm": 0.0880652442574501, + "learning_rate": 2.3463666174372328e-05, + "loss": 9.68, + "step": 44020 + }, + { + "epoch": 0.2198806462083947, + "grad_norm": 0.09858622401952744, + "learning_rate": 2.3462164259430774e-05, + "loss": 9.688, + "step": 44030 + }, + { + "epoch": 0.21993058503333415, + "grad_norm": 0.09564345329999924, + "learning_rate": 2.3460662344489225e-05, + "loss": 9.6826, + "step": 44040 + }, + { + "epoch": 0.2199805238582736, + "grad_norm": 0.0934138298034668, + "learning_rate": 2.3459160429547675e-05, + "loss": 9.6803, + "step": 44050 + }, + { + "epoch": 0.22003046268321305, + "grad_norm": 0.09328287839889526, + "learning_rate": 2.345765851460612e-05, + "loss": 9.6871, + "step": 44060 + }, + { + "epoch": 0.2200804015081525, + "grad_norm": 0.09457116574048996, + "learning_rate": 2.3456156599664575e-05, + "loss": 9.6918, + "step": 44070 + }, + { + "epoch": 0.22013034033309195, + "grad_norm": 0.10091323405504227, + "learning_rate": 2.3454654684723022e-05, + "loss": 9.6813, + "step": 44080 + }, + { + "epoch": 0.2201802791580314, + "grad_norm": 0.09307578206062317, + "learning_rate": 2.3453152769781472e-05, + "loss": 9.6833, + "step": 44090 + }, + { + "epoch": 0.22023021798297085, + "grad_norm": 0.09910399466753006, + "learning_rate": 2.3451650854839922e-05, + "loss": 9.6762, + "step": 44100 + }, + { + "epoch": 0.2202801568079103, + "grad_norm": 0.10042911767959595, + "learning_rate": 2.345014893989837e-05, + "loss": 9.6861, + "step": 44110 + }, + { + "epoch": 0.22033009563284975, + "grad_norm": 0.09451974183320999, + "learning_rate": 2.3448647024956823e-05, + "loss": 9.6838, + "step": 44120 + }, + { + "epoch": 0.2203800344577892, + "grad_norm": 0.09194231033325195, + "learning_rate": 2.344714511001527e-05, + "loss": 9.6873, + "step": 44130 + }, + { + "epoch": 0.22042997328272865, + "grad_norm": 0.09405980259180069, + "learning_rate": 2.344564319507372e-05, + "loss": 9.6875, + "step": 44140 + }, + { + "epoch": 0.2204799121076681, + "grad_norm": 0.09576237946748734, + "learning_rate": 2.344414128013217e-05, + "loss": 9.6794, + "step": 44150 + }, + { + "epoch": 0.22052985093260755, + "grad_norm": 0.09872204065322876, + "learning_rate": 2.3442639365190616e-05, + "loss": 9.6833, + "step": 44160 + }, + { + "epoch": 0.220579789757547, + "grad_norm": 0.09933184832334518, + "learning_rate": 2.344113745024907e-05, + "loss": 9.6823, + "step": 44170 + }, + { + "epoch": 0.22062972858248645, + "grad_norm": 0.09750817716121674, + "learning_rate": 2.3439635535307517e-05, + "loss": 9.6704, + "step": 44180 + }, + { + "epoch": 0.2206796674074259, + "grad_norm": 0.09613807499408722, + "learning_rate": 2.3438133620365967e-05, + "loss": 9.6826, + "step": 44190 + }, + { + "epoch": 0.22072960623236534, + "grad_norm": 0.10209544748067856, + "learning_rate": 2.3436631705424417e-05, + "loss": 9.682, + "step": 44200 + }, + { + "epoch": 0.2207795450573048, + "grad_norm": 0.0971246138215065, + "learning_rate": 2.3435129790482864e-05, + "loss": 9.6785, + "step": 44210 + }, + { + "epoch": 0.22082948388224424, + "grad_norm": 0.10060384124517441, + "learning_rate": 2.3433627875541318e-05, + "loss": 9.6795, + "step": 44220 + }, + { + "epoch": 0.2208794227071837, + "grad_norm": 0.09796610474586487, + "learning_rate": 2.3432125960599764e-05, + "loss": 9.6724, + "step": 44230 + }, + { + "epoch": 0.22092936153212314, + "grad_norm": 0.09403125196695328, + "learning_rate": 2.3430624045658215e-05, + "loss": 9.6751, + "step": 44240 + }, + { + "epoch": 0.2209793003570626, + "grad_norm": 0.0945911630988121, + "learning_rate": 2.3429122130716665e-05, + "loss": 9.6734, + "step": 44250 + }, + { + "epoch": 0.22102923918200204, + "grad_norm": 0.09785491228103638, + "learning_rate": 2.342762021577511e-05, + "loss": 9.678, + "step": 44260 + }, + { + "epoch": 0.2210791780069415, + "grad_norm": 0.09269008040428162, + "learning_rate": 2.3426118300833565e-05, + "loss": 9.682, + "step": 44270 + }, + { + "epoch": 0.22112911683188094, + "grad_norm": 0.09193340688943863, + "learning_rate": 2.3424616385892012e-05, + "loss": 9.6794, + "step": 44280 + }, + { + "epoch": 0.2211790556568204, + "grad_norm": 0.09782993048429489, + "learning_rate": 2.3423114470950462e-05, + "loss": 9.6675, + "step": 44290 + }, + { + "epoch": 0.22122899448175984, + "grad_norm": 0.09436007589101791, + "learning_rate": 2.3421612556008912e-05, + "loss": 9.6741, + "step": 44300 + }, + { + "epoch": 0.2212789333066993, + "grad_norm": 0.09482413530349731, + "learning_rate": 2.342011064106736e-05, + "loss": 9.6703, + "step": 44310 + }, + { + "epoch": 0.22132887213163874, + "grad_norm": 0.09317035228013992, + "learning_rate": 2.3418608726125813e-05, + "loss": 9.6756, + "step": 44320 + }, + { + "epoch": 0.2213788109565782, + "grad_norm": 0.09680955857038498, + "learning_rate": 2.341710681118426e-05, + "loss": 9.6781, + "step": 44330 + }, + { + "epoch": 0.22142874978151764, + "grad_norm": 0.09705173224210739, + "learning_rate": 2.3415604896242713e-05, + "loss": 9.6713, + "step": 44340 + }, + { + "epoch": 0.22147868860645709, + "grad_norm": 0.09335210174322128, + "learning_rate": 2.341410298130116e-05, + "loss": 9.6774, + "step": 44350 + }, + { + "epoch": 0.22152862743139654, + "grad_norm": 0.09018994867801666, + "learning_rate": 2.3412601066359607e-05, + "loss": 9.673, + "step": 44360 + }, + { + "epoch": 0.22157856625633598, + "grad_norm": 0.09429071098566055, + "learning_rate": 2.341109915141806e-05, + "loss": 9.681, + "step": 44370 + }, + { + "epoch": 0.22162850508127543, + "grad_norm": 0.09190639108419418, + "learning_rate": 2.3409597236476507e-05, + "loss": 9.6753, + "step": 44380 + }, + { + "epoch": 0.22167844390621488, + "grad_norm": 0.10482806712388992, + "learning_rate": 2.340809532153496e-05, + "loss": 9.6804, + "step": 44390 + }, + { + "epoch": 0.22172838273115433, + "grad_norm": 0.09960668534040451, + "learning_rate": 2.3406593406593407e-05, + "loss": 9.6752, + "step": 44400 + }, + { + "epoch": 0.22177832155609378, + "grad_norm": 0.09253636002540588, + "learning_rate": 2.3405091491651854e-05, + "loss": 9.6777, + "step": 44410 + }, + { + "epoch": 0.22182826038103323, + "grad_norm": 0.09326852858066559, + "learning_rate": 2.3403589576710308e-05, + "loss": 9.673, + "step": 44420 + }, + { + "epoch": 0.22187819920597268, + "grad_norm": 0.0913088470697403, + "learning_rate": 2.3402087661768754e-05, + "loss": 9.6732, + "step": 44430 + }, + { + "epoch": 0.22192813803091213, + "grad_norm": 0.0977054163813591, + "learning_rate": 2.3400585746827208e-05, + "loss": 9.671, + "step": 44440 + }, + { + "epoch": 0.22197807685585158, + "grad_norm": 0.09495818614959717, + "learning_rate": 2.3399083831885655e-05, + "loss": 9.6742, + "step": 44450 + }, + { + "epoch": 0.22202801568079103, + "grad_norm": 0.09410752356052399, + "learning_rate": 2.33975819169441e-05, + "loss": 9.6658, + "step": 44460 + }, + { + "epoch": 0.22207795450573048, + "grad_norm": 0.09136299043893814, + "learning_rate": 2.3396080002002555e-05, + "loss": 9.6762, + "step": 44470 + }, + { + "epoch": 0.22212789333066993, + "grad_norm": 0.09776278585195541, + "learning_rate": 2.3394578087061002e-05, + "loss": 9.6718, + "step": 44480 + }, + { + "epoch": 0.22217783215560938, + "grad_norm": 0.09050683677196503, + "learning_rate": 2.3393076172119455e-05, + "loss": 9.6827, + "step": 44490 + }, + { + "epoch": 0.22222777098054883, + "grad_norm": 0.09264349937438965, + "learning_rate": 2.3391574257177902e-05, + "loss": 9.6754, + "step": 44500 + }, + { + "epoch": 0.22227770980548828, + "grad_norm": 0.09283478558063507, + "learning_rate": 2.339007234223635e-05, + "loss": 9.6718, + "step": 44510 + }, + { + "epoch": 0.22232764863042773, + "grad_norm": 0.09404191374778748, + "learning_rate": 2.3388570427294803e-05, + "loss": 9.6742, + "step": 44520 + }, + { + "epoch": 0.22237758745536718, + "grad_norm": 0.09505549818277359, + "learning_rate": 2.338706851235325e-05, + "loss": 9.6828, + "step": 44530 + }, + { + "epoch": 0.22242752628030663, + "grad_norm": 0.09772829711437225, + "learning_rate": 2.3385566597411703e-05, + "loss": 9.6717, + "step": 44540 + }, + { + "epoch": 0.22247746510524607, + "grad_norm": 0.09645674377679825, + "learning_rate": 2.338406468247015e-05, + "loss": 9.6705, + "step": 44550 + }, + { + "epoch": 0.22252740393018552, + "grad_norm": 0.0913274735212326, + "learning_rate": 2.3382562767528597e-05, + "loss": 9.6726, + "step": 44560 + }, + { + "epoch": 0.22257734275512497, + "grad_norm": 0.09277801215648651, + "learning_rate": 2.338106085258705e-05, + "loss": 9.6706, + "step": 44570 + }, + { + "epoch": 0.22262728158006442, + "grad_norm": 0.09037318825721741, + "learning_rate": 2.3379558937645497e-05, + "loss": 9.6727, + "step": 44580 + }, + { + "epoch": 0.22267722040500387, + "grad_norm": 0.09477484226226807, + "learning_rate": 2.337805702270395e-05, + "loss": 9.6708, + "step": 44590 + }, + { + "epoch": 0.22272715922994332, + "grad_norm": 0.09787924587726593, + "learning_rate": 2.3376555107762397e-05, + "loss": 9.6842, + "step": 44600 + }, + { + "epoch": 0.22277709805488277, + "grad_norm": 0.09797964245080948, + "learning_rate": 2.3375053192820844e-05, + "loss": 9.6674, + "step": 44610 + }, + { + "epoch": 0.22282703687982222, + "grad_norm": 0.09463818371295929, + "learning_rate": 2.3373551277879298e-05, + "loss": 9.6736, + "step": 44620 + }, + { + "epoch": 0.22287697570476167, + "grad_norm": 0.09300590306520462, + "learning_rate": 2.3372049362937744e-05, + "loss": 9.6651, + "step": 44630 + }, + { + "epoch": 0.22292691452970112, + "grad_norm": 0.09273765981197357, + "learning_rate": 2.3370547447996198e-05, + "loss": 9.6731, + "step": 44640 + }, + { + "epoch": 0.22297685335464057, + "grad_norm": 0.09590218961238861, + "learning_rate": 2.3369045533054645e-05, + "loss": 9.6727, + "step": 44650 + }, + { + "epoch": 0.22302679217958002, + "grad_norm": 0.09408383816480637, + "learning_rate": 2.3367543618113095e-05, + "loss": 9.6691, + "step": 44660 + }, + { + "epoch": 0.22307673100451947, + "grad_norm": 0.101545050740242, + "learning_rate": 2.3366041703171545e-05, + "loss": 9.6705, + "step": 44670 + }, + { + "epoch": 0.22312666982945892, + "grad_norm": 0.0954471305012703, + "learning_rate": 2.3364539788229992e-05, + "loss": 9.6699, + "step": 44680 + }, + { + "epoch": 0.22317660865439837, + "grad_norm": 0.0932590514421463, + "learning_rate": 2.3363037873288445e-05, + "loss": 9.669, + "step": 44690 + }, + { + "epoch": 0.22322654747933782, + "grad_norm": 0.09442409873008728, + "learning_rate": 2.3361535958346892e-05, + "loss": 9.6754, + "step": 44700 + }, + { + "epoch": 0.22327648630427727, + "grad_norm": 0.09490916877985, + "learning_rate": 2.3360034043405342e-05, + "loss": 9.6672, + "step": 44710 + }, + { + "epoch": 0.22332642512921672, + "grad_norm": 0.0963781476020813, + "learning_rate": 2.3358532128463793e-05, + "loss": 9.6703, + "step": 44720 + }, + { + "epoch": 0.22337636395415617, + "grad_norm": 0.09573322534561157, + "learning_rate": 2.335703021352224e-05, + "loss": 9.6784, + "step": 44730 + }, + { + "epoch": 0.22342630277909561, + "grad_norm": 0.09953943639993668, + "learning_rate": 2.3355528298580693e-05, + "loss": 9.6667, + "step": 44740 + }, + { + "epoch": 0.22347624160403506, + "grad_norm": 0.0922081395983696, + "learning_rate": 2.335402638363914e-05, + "loss": 9.6731, + "step": 44750 + }, + { + "epoch": 0.2235261804289745, + "grad_norm": 0.09071052074432373, + "learning_rate": 2.335252446869759e-05, + "loss": 9.6642, + "step": 44760 + }, + { + "epoch": 0.22357611925391396, + "grad_norm": 0.09215296804904938, + "learning_rate": 2.335102255375604e-05, + "loss": 9.667, + "step": 44770 + }, + { + "epoch": 0.2236260580788534, + "grad_norm": 0.10084545612335205, + "learning_rate": 2.3349520638814487e-05, + "loss": 9.6648, + "step": 44780 + }, + { + "epoch": 0.22367599690379286, + "grad_norm": 0.09967309981584549, + "learning_rate": 2.334801872387294e-05, + "loss": 9.659, + "step": 44790 + }, + { + "epoch": 0.2237259357287323, + "grad_norm": 0.09629634022712708, + "learning_rate": 2.3346516808931387e-05, + "loss": 9.6709, + "step": 44800 + }, + { + "epoch": 0.22377587455367176, + "grad_norm": 0.09285489469766617, + "learning_rate": 2.3345014893989837e-05, + "loss": 9.6672, + "step": 44810 + }, + { + "epoch": 0.2238258133786112, + "grad_norm": 0.09779229015111923, + "learning_rate": 2.3343512979048288e-05, + "loss": 9.6669, + "step": 44820 + }, + { + "epoch": 0.22387575220355066, + "grad_norm": 0.09266447275876999, + "learning_rate": 2.3342011064106734e-05, + "loss": 9.6644, + "step": 44830 + }, + { + "epoch": 0.2239256910284901, + "grad_norm": 0.09615839272737503, + "learning_rate": 2.3340509149165188e-05, + "loss": 9.6617, + "step": 44840 + }, + { + "epoch": 0.22397562985342956, + "grad_norm": 0.09608431905508041, + "learning_rate": 2.3339007234223635e-05, + "loss": 9.663, + "step": 44850 + }, + { + "epoch": 0.224025568678369, + "grad_norm": 0.09576740115880966, + "learning_rate": 2.3337505319282085e-05, + "loss": 9.6596, + "step": 44860 + }, + { + "epoch": 0.22407550750330846, + "grad_norm": 0.09225677698850632, + "learning_rate": 2.3336003404340535e-05, + "loss": 9.6597, + "step": 44870 + }, + { + "epoch": 0.2241254463282479, + "grad_norm": 0.09069118648767471, + "learning_rate": 2.3334501489398982e-05, + "loss": 9.6613, + "step": 44880 + }, + { + "epoch": 0.22417538515318736, + "grad_norm": 0.10063677281141281, + "learning_rate": 2.3332999574457435e-05, + "loss": 9.6712, + "step": 44890 + }, + { + "epoch": 0.2242253239781268, + "grad_norm": 0.0957692414522171, + "learning_rate": 2.3331497659515882e-05, + "loss": 9.6627, + "step": 44900 + }, + { + "epoch": 0.22427526280306626, + "grad_norm": 0.0920589417219162, + "learning_rate": 2.3329995744574332e-05, + "loss": 9.6683, + "step": 44910 + }, + { + "epoch": 0.2243252016280057, + "grad_norm": 0.09941014647483826, + "learning_rate": 2.3328493829632783e-05, + "loss": 9.6632, + "step": 44920 + }, + { + "epoch": 0.22437514045294515, + "grad_norm": 0.09040158987045288, + "learning_rate": 2.332699191469123e-05, + "loss": 9.6632, + "step": 44930 + }, + { + "epoch": 0.2244250792778846, + "grad_norm": 0.09432369470596313, + "learning_rate": 2.3325489999749683e-05, + "loss": 9.6624, + "step": 44940 + }, + { + "epoch": 0.22447501810282405, + "grad_norm": 0.09329992532730103, + "learning_rate": 2.332398808480813e-05, + "loss": 9.6735, + "step": 44950 + }, + { + "epoch": 0.2245249569277635, + "grad_norm": 0.09206376224756241, + "learning_rate": 2.332248616986658e-05, + "loss": 9.6687, + "step": 44960 + }, + { + "epoch": 0.22457489575270295, + "grad_norm": 0.09406663477420807, + "learning_rate": 2.332098425492503e-05, + "loss": 9.6687, + "step": 44970 + }, + { + "epoch": 0.2246248345776424, + "grad_norm": 0.09512316435575485, + "learning_rate": 2.331948233998348e-05, + "loss": 9.6635, + "step": 44980 + }, + { + "epoch": 0.22467477340258185, + "grad_norm": 0.09398701786994934, + "learning_rate": 2.331798042504193e-05, + "loss": 9.6649, + "step": 44990 + }, + { + "epoch": 0.22472471222752127, + "grad_norm": 0.09211496263742447, + "learning_rate": 2.3316478510100377e-05, + "loss": 9.6654, + "step": 45000 + }, + { + "epoch": 0.22477465105246072, + "grad_norm": 0.09733974188566208, + "learning_rate": 2.3314976595158827e-05, + "loss": 9.6564, + "step": 45010 + }, + { + "epoch": 0.22482458987740017, + "grad_norm": 0.09194665402173996, + "learning_rate": 2.3313474680217278e-05, + "loss": 9.6596, + "step": 45020 + }, + { + "epoch": 0.22487452870233962, + "grad_norm": 0.08959387242794037, + "learning_rate": 2.3311972765275728e-05, + "loss": 9.6623, + "step": 45030 + }, + { + "epoch": 0.22492446752727907, + "grad_norm": 0.09784001857042313, + "learning_rate": 2.3310470850334178e-05, + "loss": 9.6599, + "step": 45040 + }, + { + "epoch": 0.22497440635221852, + "grad_norm": 0.09329735487699509, + "learning_rate": 2.3308968935392625e-05, + "loss": 9.6656, + "step": 45050 + }, + { + "epoch": 0.22502434517715797, + "grad_norm": 0.08951118588447571, + "learning_rate": 2.3307467020451075e-05, + "loss": 9.6618, + "step": 45060 + }, + { + "epoch": 0.22507428400209742, + "grad_norm": 0.09492070972919464, + "learning_rate": 2.3305965105509525e-05, + "loss": 9.6589, + "step": 45070 + }, + { + "epoch": 0.22512422282703687, + "grad_norm": 0.09987737983465195, + "learning_rate": 2.3304463190567975e-05, + "loss": 9.6615, + "step": 45080 + }, + { + "epoch": 0.22517416165197632, + "grad_norm": 0.10113034397363663, + "learning_rate": 2.3302961275626425e-05, + "loss": 9.6608, + "step": 45090 + }, + { + "epoch": 0.22522410047691577, + "grad_norm": 0.09234467148780823, + "learning_rate": 2.3301459360684872e-05, + "loss": 9.669, + "step": 45100 + }, + { + "epoch": 0.22527403930185522, + "grad_norm": 0.09484550356864929, + "learning_rate": 2.3299957445743322e-05, + "loss": 9.6537, + "step": 45110 + }, + { + "epoch": 0.22532397812679467, + "grad_norm": 0.09227019548416138, + "learning_rate": 2.3298455530801773e-05, + "loss": 9.6642, + "step": 45120 + }, + { + "epoch": 0.22537391695173412, + "grad_norm": 0.0964643582701683, + "learning_rate": 2.3296953615860223e-05, + "loss": 9.6621, + "step": 45130 + }, + { + "epoch": 0.22542385577667357, + "grad_norm": 0.09387196600437164, + "learning_rate": 2.3295451700918673e-05, + "loss": 9.6668, + "step": 45140 + }, + { + "epoch": 0.22547379460161301, + "grad_norm": 0.08830858767032623, + "learning_rate": 2.329394978597712e-05, + "loss": 9.6616, + "step": 45150 + }, + { + "epoch": 0.22552373342655246, + "grad_norm": 0.09885809570550919, + "learning_rate": 2.329244787103557e-05, + "loss": 9.6554, + "step": 45160 + }, + { + "epoch": 0.2255736722514919, + "grad_norm": 0.09711761027574539, + "learning_rate": 2.329094595609402e-05, + "loss": 9.6619, + "step": 45170 + }, + { + "epoch": 0.22562361107643136, + "grad_norm": 0.0906030535697937, + "learning_rate": 2.328944404115247e-05, + "loss": 9.661, + "step": 45180 + }, + { + "epoch": 0.2256735499013708, + "grad_norm": 0.09631696343421936, + "learning_rate": 2.328794212621092e-05, + "loss": 9.6565, + "step": 45190 + }, + { + "epoch": 0.22572348872631026, + "grad_norm": 0.09392791241407394, + "learning_rate": 2.3286440211269367e-05, + "loss": 9.6611, + "step": 45200 + }, + { + "epoch": 0.2257734275512497, + "grad_norm": 0.10017602145671844, + "learning_rate": 2.3284938296327817e-05, + "loss": 9.6605, + "step": 45210 + }, + { + "epoch": 0.22582336637618916, + "grad_norm": 0.09598571807146072, + "learning_rate": 2.3283436381386268e-05, + "loss": 9.664, + "step": 45220 + }, + { + "epoch": 0.2258733052011286, + "grad_norm": 0.09706757217645645, + "learning_rate": 2.3281934466444718e-05, + "loss": 9.6531, + "step": 45230 + }, + { + "epoch": 0.22592324402606806, + "grad_norm": 0.09351912885904312, + "learning_rate": 2.3280432551503168e-05, + "loss": 9.6569, + "step": 45240 + }, + { + "epoch": 0.2259731828510075, + "grad_norm": 0.08977209031581879, + "learning_rate": 2.3278930636561615e-05, + "loss": 9.6535, + "step": 45250 + }, + { + "epoch": 0.22602312167594696, + "grad_norm": 0.09070810675621033, + "learning_rate": 2.3277428721620065e-05, + "loss": 9.6611, + "step": 45260 + }, + { + "epoch": 0.2260730605008864, + "grad_norm": 0.09817781299352646, + "learning_rate": 2.3275926806678515e-05, + "loss": 9.654, + "step": 45270 + }, + { + "epoch": 0.22612299932582586, + "grad_norm": 0.09307067096233368, + "learning_rate": 2.3274424891736965e-05, + "loss": 9.6607, + "step": 45280 + }, + { + "epoch": 0.2261729381507653, + "grad_norm": 0.09242681413888931, + "learning_rate": 2.3272922976795415e-05, + "loss": 9.6639, + "step": 45290 + }, + { + "epoch": 0.22622287697570476, + "grad_norm": 0.09181907773017883, + "learning_rate": 2.3271421061853866e-05, + "loss": 9.653, + "step": 45300 + }, + { + "epoch": 0.2262728158006442, + "grad_norm": 0.09517987072467804, + "learning_rate": 2.3269919146912312e-05, + "loss": 9.6602, + "step": 45310 + }, + { + "epoch": 0.22632275462558366, + "grad_norm": 0.0962613895535469, + "learning_rate": 2.3268417231970763e-05, + "loss": 9.6629, + "step": 45320 + }, + { + "epoch": 0.2263726934505231, + "grad_norm": 0.09088754653930664, + "learning_rate": 2.3266915317029213e-05, + "loss": 9.6521, + "step": 45330 + }, + { + "epoch": 0.22642263227546255, + "grad_norm": 0.09162914752960205, + "learning_rate": 2.3265413402087663e-05, + "loss": 9.6579, + "step": 45340 + }, + { + "epoch": 0.226472571100402, + "grad_norm": 0.09387911111116409, + "learning_rate": 2.3263911487146113e-05, + "loss": 9.6558, + "step": 45350 + }, + { + "epoch": 0.22652250992534145, + "grad_norm": 0.08971641957759857, + "learning_rate": 2.326240957220456e-05, + "loss": 9.6652, + "step": 45360 + }, + { + "epoch": 0.2265724487502809, + "grad_norm": 0.090997114777565, + "learning_rate": 2.326090765726301e-05, + "loss": 9.6571, + "step": 45370 + }, + { + "epoch": 0.22662238757522035, + "grad_norm": 0.09356195479631424, + "learning_rate": 2.325940574232146e-05, + "loss": 9.6564, + "step": 45380 + }, + { + "epoch": 0.2266723264001598, + "grad_norm": 0.09340345114469528, + "learning_rate": 2.325790382737991e-05, + "loss": 9.6616, + "step": 45390 + }, + { + "epoch": 0.22672226522509925, + "grad_norm": 0.09831879287958145, + "learning_rate": 2.325640191243836e-05, + "loss": 9.6553, + "step": 45400 + }, + { + "epoch": 0.2267722040500387, + "grad_norm": 0.0913265198469162, + "learning_rate": 2.3254899997496807e-05, + "loss": 9.6529, + "step": 45410 + }, + { + "epoch": 0.22682214287497815, + "grad_norm": 0.09678717702627182, + "learning_rate": 2.3253398082555258e-05, + "loss": 9.657, + "step": 45420 + }, + { + "epoch": 0.2268720816999176, + "grad_norm": 0.0980067104101181, + "learning_rate": 2.3251896167613708e-05, + "loss": 9.6614, + "step": 45430 + }, + { + "epoch": 0.22692202052485705, + "grad_norm": 0.09428383409976959, + "learning_rate": 2.3250394252672158e-05, + "loss": 9.6596, + "step": 45440 + }, + { + "epoch": 0.2269719593497965, + "grad_norm": 0.09209676086902618, + "learning_rate": 2.3248892337730608e-05, + "loss": 9.6553, + "step": 45450 + }, + { + "epoch": 0.22702189817473595, + "grad_norm": 0.09065406024456024, + "learning_rate": 2.3247390422789055e-05, + "loss": 9.6595, + "step": 45460 + }, + { + "epoch": 0.2270718369996754, + "grad_norm": 0.0918533131480217, + "learning_rate": 2.3245888507847505e-05, + "loss": 9.6571, + "step": 45470 + }, + { + "epoch": 0.22712177582461485, + "grad_norm": 0.0916934683918953, + "learning_rate": 2.3244386592905955e-05, + "loss": 9.667, + "step": 45480 + }, + { + "epoch": 0.2271717146495543, + "grad_norm": 0.09149292856454849, + "learning_rate": 2.3242884677964406e-05, + "loss": 9.6524, + "step": 45490 + }, + { + "epoch": 0.22722165347449375, + "grad_norm": 0.09491834789514542, + "learning_rate": 2.3241382763022856e-05, + "loss": 9.6487, + "step": 45500 + }, + { + "epoch": 0.2272715922994332, + "grad_norm": 0.09356304258108139, + "learning_rate": 2.3239880848081302e-05, + "loss": 9.6529, + "step": 45510 + }, + { + "epoch": 0.22732153112437264, + "grad_norm": 0.09925433993339539, + "learning_rate": 2.3238378933139753e-05, + "loss": 9.6515, + "step": 45520 + }, + { + "epoch": 0.2273714699493121, + "grad_norm": 0.09688141942024231, + "learning_rate": 2.3236877018198203e-05, + "loss": 9.656, + "step": 45530 + }, + { + "epoch": 0.22742140877425154, + "grad_norm": 0.08831819891929626, + "learning_rate": 2.3235375103256653e-05, + "loss": 9.6533, + "step": 45540 + }, + { + "epoch": 0.227471347599191, + "grad_norm": 0.0944901704788208, + "learning_rate": 2.3233873188315103e-05, + "loss": 9.657, + "step": 45550 + }, + { + "epoch": 0.22752128642413044, + "grad_norm": 0.0912630707025528, + "learning_rate": 2.323237127337355e-05, + "loss": 9.6442, + "step": 45560 + }, + { + "epoch": 0.2275712252490699, + "grad_norm": 0.09489890933036804, + "learning_rate": 2.3230869358432e-05, + "loss": 9.6535, + "step": 45570 + }, + { + "epoch": 0.22762116407400934, + "grad_norm": 0.09343596547842026, + "learning_rate": 2.322936744349045e-05, + "loss": 9.6522, + "step": 45580 + }, + { + "epoch": 0.2276711028989488, + "grad_norm": 0.0928681418299675, + "learning_rate": 2.32278655285489e-05, + "loss": 9.6557, + "step": 45590 + }, + { + "epoch": 0.22772104172388824, + "grad_norm": 0.09620653092861176, + "learning_rate": 2.322636361360735e-05, + "loss": 9.6563, + "step": 45600 + }, + { + "epoch": 0.2277709805488277, + "grad_norm": 0.09333667159080505, + "learning_rate": 2.3224861698665797e-05, + "loss": 9.6608, + "step": 45610 + }, + { + "epoch": 0.22782091937376714, + "grad_norm": 0.0948384627699852, + "learning_rate": 2.322335978372425e-05, + "loss": 9.6551, + "step": 45620 + }, + { + "epoch": 0.2278708581987066, + "grad_norm": 0.09374862164258957, + "learning_rate": 2.3221857868782698e-05, + "loss": 9.6495, + "step": 45630 + }, + { + "epoch": 0.22792079702364604, + "grad_norm": 0.09943927079439163, + "learning_rate": 2.3220355953841148e-05, + "loss": 9.657, + "step": 45640 + }, + { + "epoch": 0.2279707358485855, + "grad_norm": 0.09763931483030319, + "learning_rate": 2.3218854038899598e-05, + "loss": 9.6523, + "step": 45650 + }, + { + "epoch": 0.22802067467352494, + "grad_norm": 0.09670069813728333, + "learning_rate": 2.3217352123958045e-05, + "loss": 9.652, + "step": 45660 + }, + { + "epoch": 0.2280706134984644, + "grad_norm": 0.09232018142938614, + "learning_rate": 2.32158502090165e-05, + "loss": 9.657, + "step": 45670 + }, + { + "epoch": 0.22812055232340384, + "grad_norm": 0.09801393002271652, + "learning_rate": 2.3214348294074945e-05, + "loss": 9.6503, + "step": 45680 + }, + { + "epoch": 0.22817049114834329, + "grad_norm": 0.09959478676319122, + "learning_rate": 2.3212846379133396e-05, + "loss": 9.6495, + "step": 45690 + }, + { + "epoch": 0.22822042997328273, + "grad_norm": 0.09530535340309143, + "learning_rate": 2.3211344464191846e-05, + "loss": 9.6492, + "step": 45700 + }, + { + "epoch": 0.22827036879822218, + "grad_norm": 0.08995451778173447, + "learning_rate": 2.3209842549250292e-05, + "loss": 9.6496, + "step": 45710 + }, + { + "epoch": 0.22832030762316163, + "grad_norm": 0.09303654730319977, + "learning_rate": 2.3208340634308746e-05, + "loss": 9.6446, + "step": 45720 + }, + { + "epoch": 0.22837024644810108, + "grad_norm": 0.09486031532287598, + "learning_rate": 2.3206838719367193e-05, + "loss": 9.6531, + "step": 45730 + }, + { + "epoch": 0.22842018527304053, + "grad_norm": 0.0937868058681488, + "learning_rate": 2.3205336804425643e-05, + "loss": 9.65, + "step": 45740 + }, + { + "epoch": 0.22847012409797998, + "grad_norm": 0.09792537987232208, + "learning_rate": 2.3203834889484093e-05, + "loss": 9.651, + "step": 45750 + }, + { + "epoch": 0.22852006292291943, + "grad_norm": 0.09761232137680054, + "learning_rate": 2.320233297454254e-05, + "loss": 9.6487, + "step": 45760 + }, + { + "epoch": 0.22857000174785888, + "grad_norm": 0.09970896691083908, + "learning_rate": 2.3200831059600994e-05, + "loss": 9.6473, + "step": 45770 + }, + { + "epoch": 0.22861994057279833, + "grad_norm": 0.096064992249012, + "learning_rate": 2.319932914465944e-05, + "loss": 9.6548, + "step": 45780 + }, + { + "epoch": 0.22866987939773778, + "grad_norm": 0.09543237090110779, + "learning_rate": 2.319782722971789e-05, + "loss": 9.6464, + "step": 45790 + }, + { + "epoch": 0.22871981822267723, + "grad_norm": 0.09486628323793411, + "learning_rate": 2.319632531477634e-05, + "loss": 9.6529, + "step": 45800 + }, + { + "epoch": 0.22876975704761668, + "grad_norm": 0.09722953289747238, + "learning_rate": 2.3194823399834788e-05, + "loss": 9.6543, + "step": 45810 + }, + { + "epoch": 0.22881969587255613, + "grad_norm": 0.09415905922651291, + "learning_rate": 2.319332148489324e-05, + "loss": 9.6438, + "step": 45820 + }, + { + "epoch": 0.22886963469749558, + "grad_norm": 0.08979658782482147, + "learning_rate": 2.3191819569951688e-05, + "loss": 9.653, + "step": 45830 + }, + { + "epoch": 0.22891957352243503, + "grad_norm": 0.09261351078748703, + "learning_rate": 2.3190317655010138e-05, + "loss": 9.6534, + "step": 45840 + }, + { + "epoch": 0.22896951234737448, + "grad_norm": 0.09331527352333069, + "learning_rate": 2.3188815740068588e-05, + "loss": 9.6481, + "step": 45850 + }, + { + "epoch": 0.22901945117231393, + "grad_norm": 0.09803377836942673, + "learning_rate": 2.3187313825127035e-05, + "loss": 9.6467, + "step": 45860 + }, + { + "epoch": 0.22906938999725338, + "grad_norm": 0.09804876148700714, + "learning_rate": 2.318581191018549e-05, + "loss": 9.6431, + "step": 45870 + }, + { + "epoch": 0.22911932882219282, + "grad_norm": 0.09359174221754074, + "learning_rate": 2.3184309995243935e-05, + "loss": 9.6487, + "step": 45880 + }, + { + "epoch": 0.22916926764713227, + "grad_norm": 0.09482156485319138, + "learning_rate": 2.3182808080302386e-05, + "loss": 9.6482, + "step": 45890 + }, + { + "epoch": 0.22921920647207172, + "grad_norm": 0.09959374368190765, + "learning_rate": 2.3181306165360836e-05, + "loss": 9.6447, + "step": 45900 + }, + { + "epoch": 0.22926914529701117, + "grad_norm": 0.09299067407846451, + "learning_rate": 2.3179804250419283e-05, + "loss": 9.6411, + "step": 45910 + }, + { + "epoch": 0.22931908412195062, + "grad_norm": 0.0913899689912796, + "learning_rate": 2.3178302335477736e-05, + "loss": 9.6476, + "step": 45920 + }, + { + "epoch": 0.22936902294689007, + "grad_norm": 0.09546446055173874, + "learning_rate": 2.3176800420536183e-05, + "loss": 9.6434, + "step": 45930 + }, + { + "epoch": 0.22941896177182952, + "grad_norm": 0.09306006133556366, + "learning_rate": 2.3175298505594636e-05, + "loss": 9.6393, + "step": 45940 + }, + { + "epoch": 0.22946890059676897, + "grad_norm": 0.09446984529495239, + "learning_rate": 2.3173796590653083e-05, + "loss": 9.6481, + "step": 45950 + }, + { + "epoch": 0.22951883942170842, + "grad_norm": 0.09467601031064987, + "learning_rate": 2.317229467571153e-05, + "loss": 9.6485, + "step": 45960 + }, + { + "epoch": 0.22956877824664787, + "grad_norm": 0.09725523740053177, + "learning_rate": 2.3170792760769984e-05, + "loss": 9.6504, + "step": 45970 + }, + { + "epoch": 0.22961871707158732, + "grad_norm": 0.09193247556686401, + "learning_rate": 2.316929084582843e-05, + "loss": 9.6495, + "step": 45980 + }, + { + "epoch": 0.22966865589652674, + "grad_norm": 0.09462161362171173, + "learning_rate": 2.3167788930886884e-05, + "loss": 9.6387, + "step": 45990 + }, + { + "epoch": 0.2297185947214662, + "grad_norm": 0.09702228009700775, + "learning_rate": 2.316628701594533e-05, + "loss": 9.6417, + "step": 46000 + }, + { + "epoch": 0.22976853354640564, + "grad_norm": 0.09509331732988358, + "learning_rate": 2.316478510100378e-05, + "loss": 9.6414, + "step": 46010 + }, + { + "epoch": 0.2298184723713451, + "grad_norm": 0.09766579419374466, + "learning_rate": 2.316328318606223e-05, + "loss": 9.6406, + "step": 46020 + }, + { + "epoch": 0.22986841119628454, + "grad_norm": 0.10346722602844238, + "learning_rate": 2.3161781271120678e-05, + "loss": 9.6464, + "step": 46030 + }, + { + "epoch": 0.229918350021224, + "grad_norm": 0.09228979051113129, + "learning_rate": 2.316027935617913e-05, + "loss": 9.6473, + "step": 46040 + }, + { + "epoch": 0.22996828884616344, + "grad_norm": 0.10133613646030426, + "learning_rate": 2.3158777441237578e-05, + "loss": 9.6507, + "step": 46050 + }, + { + "epoch": 0.2300182276711029, + "grad_norm": 0.09652470797300339, + "learning_rate": 2.315727552629603e-05, + "loss": 9.6408, + "step": 46060 + }, + { + "epoch": 0.23006816649604234, + "grad_norm": 0.09428387135267258, + "learning_rate": 2.315577361135448e-05, + "loss": 9.6406, + "step": 46070 + }, + { + "epoch": 0.2301181053209818, + "grad_norm": 0.09347372502088547, + "learning_rate": 2.3154271696412925e-05, + "loss": 9.6449, + "step": 46080 + }, + { + "epoch": 0.23016804414592124, + "grad_norm": 0.09821302443742752, + "learning_rate": 2.315276978147138e-05, + "loss": 9.6412, + "step": 46090 + }, + { + "epoch": 0.23021798297086069, + "grad_norm": 0.09411518275737762, + "learning_rate": 2.3151267866529826e-05, + "loss": 9.6504, + "step": 46100 + }, + { + "epoch": 0.23026792179580013, + "grad_norm": 0.09326085448265076, + "learning_rate": 2.3149765951588276e-05, + "loss": 9.6424, + "step": 46110 + }, + { + "epoch": 0.23031786062073958, + "grad_norm": 0.09685725718736649, + "learning_rate": 2.3148264036646726e-05, + "loss": 9.6439, + "step": 46120 + }, + { + "epoch": 0.23036779944567903, + "grad_norm": 0.09456616640090942, + "learning_rate": 2.3146762121705173e-05, + "loss": 9.649, + "step": 46130 + }, + { + "epoch": 0.23041773827061848, + "grad_norm": 0.08801116794347763, + "learning_rate": 2.3145260206763626e-05, + "loss": 9.6457, + "step": 46140 + }, + { + "epoch": 0.23046767709555793, + "grad_norm": 0.09126181155443192, + "learning_rate": 2.3143758291822073e-05, + "loss": 9.6452, + "step": 46150 + }, + { + "epoch": 0.23051761592049738, + "grad_norm": 0.0932285413146019, + "learning_rate": 2.3142256376880523e-05, + "loss": 9.6401, + "step": 46160 + }, + { + "epoch": 0.23056755474543683, + "grad_norm": 0.10316822677850723, + "learning_rate": 2.3140754461938974e-05, + "loss": 9.6444, + "step": 46170 + }, + { + "epoch": 0.23061749357037628, + "grad_norm": 0.0956098735332489, + "learning_rate": 2.313925254699742e-05, + "loss": 9.6408, + "step": 46180 + }, + { + "epoch": 0.23066743239531573, + "grad_norm": 0.09577090293169022, + "learning_rate": 2.3137750632055874e-05, + "loss": 9.6406, + "step": 46190 + }, + { + "epoch": 0.23071737122025518, + "grad_norm": 0.09496666491031647, + "learning_rate": 2.313624871711432e-05, + "loss": 9.6419, + "step": 46200 + }, + { + "epoch": 0.23076731004519463, + "grad_norm": 0.09271476417779922, + "learning_rate": 2.313474680217277e-05, + "loss": 9.6484, + "step": 46210 + }, + { + "epoch": 0.23081724887013408, + "grad_norm": 0.09295427054166794, + "learning_rate": 2.313324488723122e-05, + "loss": 9.6385, + "step": 46220 + }, + { + "epoch": 0.23086718769507353, + "grad_norm": 0.09634546935558319, + "learning_rate": 2.3131742972289668e-05, + "loss": 9.6381, + "step": 46230 + }, + { + "epoch": 0.23091712652001298, + "grad_norm": 0.09890441596508026, + "learning_rate": 2.313024105734812e-05, + "loss": 9.6422, + "step": 46240 + }, + { + "epoch": 0.23096706534495243, + "grad_norm": 0.0954676941037178, + "learning_rate": 2.3128739142406568e-05, + "loss": 9.6382, + "step": 46250 + }, + { + "epoch": 0.23101700416989188, + "grad_norm": 0.09203564375638962, + "learning_rate": 2.312723722746502e-05, + "loss": 9.6359, + "step": 46260 + }, + { + "epoch": 0.23106694299483133, + "grad_norm": 0.09297526627779007, + "learning_rate": 2.312573531252347e-05, + "loss": 9.6386, + "step": 46270 + }, + { + "epoch": 0.23111688181977078, + "grad_norm": 0.09271712601184845, + "learning_rate": 2.3124233397581915e-05, + "loss": 9.6389, + "step": 46280 + }, + { + "epoch": 0.23116682064471022, + "grad_norm": 0.09680858999490738, + "learning_rate": 2.312273148264037e-05, + "loss": 9.6354, + "step": 46290 + }, + { + "epoch": 0.23121675946964967, + "grad_norm": 0.09343792498111725, + "learning_rate": 2.3121229567698816e-05, + "loss": 9.6396, + "step": 46300 + }, + { + "epoch": 0.23126669829458912, + "grad_norm": 0.10061474144458771, + "learning_rate": 2.311972765275727e-05, + "loss": 9.6373, + "step": 46310 + }, + { + "epoch": 0.23131663711952857, + "grad_norm": 0.09283909946680069, + "learning_rate": 2.3118225737815716e-05, + "loss": 9.6334, + "step": 46320 + }, + { + "epoch": 0.23136657594446802, + "grad_norm": 0.09406852722167969, + "learning_rate": 2.3116723822874163e-05, + "loss": 9.6364, + "step": 46330 + }, + { + "epoch": 0.23141651476940747, + "grad_norm": 0.0919579491019249, + "learning_rate": 2.3115221907932616e-05, + "loss": 9.6377, + "step": 46340 + }, + { + "epoch": 0.23146645359434692, + "grad_norm": 0.09919621050357819, + "learning_rate": 2.3113719992991063e-05, + "loss": 9.6411, + "step": 46350 + }, + { + "epoch": 0.23151639241928637, + "grad_norm": 0.09455198049545288, + "learning_rate": 2.3112218078049517e-05, + "loss": 9.6333, + "step": 46360 + }, + { + "epoch": 0.23156633124422582, + "grad_norm": 0.09368839114904404, + "learning_rate": 2.3110716163107964e-05, + "loss": 9.6295, + "step": 46370 + }, + { + "epoch": 0.23161627006916527, + "grad_norm": 0.09375040233135223, + "learning_rate": 2.310921424816641e-05, + "loss": 9.6388, + "step": 46380 + }, + { + "epoch": 0.23166620889410472, + "grad_norm": 0.09624538570642471, + "learning_rate": 2.3107712333224864e-05, + "loss": 9.6401, + "step": 46390 + }, + { + "epoch": 0.23171614771904417, + "grad_norm": 0.09258920699357986, + "learning_rate": 2.310621041828331e-05, + "loss": 9.6337, + "step": 46400 + }, + { + "epoch": 0.23176608654398362, + "grad_norm": 0.09907475113868713, + "learning_rate": 2.3104708503341764e-05, + "loss": 9.632, + "step": 46410 + }, + { + "epoch": 0.23181602536892307, + "grad_norm": 0.094297394156456, + "learning_rate": 2.310320658840021e-05, + "loss": 9.6353, + "step": 46420 + }, + { + "epoch": 0.23186596419386252, + "grad_norm": 0.09265825897455215, + "learning_rate": 2.3101704673458658e-05, + "loss": 9.6436, + "step": 46430 + }, + { + "epoch": 0.23191590301880197, + "grad_norm": 0.09331150352954865, + "learning_rate": 2.310020275851711e-05, + "loss": 9.6316, + "step": 46440 + }, + { + "epoch": 0.23196584184374142, + "grad_norm": 0.0924522653222084, + "learning_rate": 2.3098700843575558e-05, + "loss": 9.6313, + "step": 46450 + }, + { + "epoch": 0.23201578066868087, + "grad_norm": 0.09283218532800674, + "learning_rate": 2.3097198928634012e-05, + "loss": 9.6353, + "step": 46460 + }, + { + "epoch": 0.23206571949362031, + "grad_norm": 0.09375794231891632, + "learning_rate": 2.309569701369246e-05, + "loss": 9.6385, + "step": 46470 + }, + { + "epoch": 0.23211565831855976, + "grad_norm": 0.0972442626953125, + "learning_rate": 2.3094195098750905e-05, + "loss": 9.6413, + "step": 46480 + }, + { + "epoch": 0.2321655971434992, + "grad_norm": 0.09976401925086975, + "learning_rate": 2.309269318380936e-05, + "loss": 9.6418, + "step": 46490 + }, + { + "epoch": 0.23221553596843866, + "grad_norm": 0.09310724586248398, + "learning_rate": 2.3091191268867806e-05, + "loss": 9.642, + "step": 46500 + }, + { + "epoch": 0.2322654747933781, + "grad_norm": 0.09604420512914658, + "learning_rate": 2.308968935392626e-05, + "loss": 9.6406, + "step": 46510 + }, + { + "epoch": 0.23231541361831756, + "grad_norm": 0.09983832389116287, + "learning_rate": 2.3088187438984706e-05, + "loss": 9.6365, + "step": 46520 + }, + { + "epoch": 0.232365352443257, + "grad_norm": 0.09731082618236542, + "learning_rate": 2.3086685524043153e-05, + "loss": 9.6396, + "step": 46530 + }, + { + "epoch": 0.23241529126819646, + "grad_norm": 0.09260038286447525, + "learning_rate": 2.3085183609101606e-05, + "loss": 9.6343, + "step": 46540 + }, + { + "epoch": 0.2324652300931359, + "grad_norm": 0.09418473392724991, + "learning_rate": 2.3083681694160053e-05, + "loss": 9.6387, + "step": 46550 + }, + { + "epoch": 0.23251516891807536, + "grad_norm": 0.09961055219173431, + "learning_rate": 2.3082179779218507e-05, + "loss": 9.6389, + "step": 46560 + }, + { + "epoch": 0.2325651077430148, + "grad_norm": 0.0885944813489914, + "learning_rate": 2.3080677864276954e-05, + "loss": 9.6307, + "step": 46570 + }, + { + "epoch": 0.23261504656795426, + "grad_norm": 0.0929674431681633, + "learning_rate": 2.3079175949335404e-05, + "loss": 9.6414, + "step": 46580 + }, + { + "epoch": 0.2326649853928937, + "grad_norm": 0.09185917675495148, + "learning_rate": 2.3077674034393854e-05, + "loss": 9.6348, + "step": 46590 + }, + { + "epoch": 0.23271492421783316, + "grad_norm": 0.09937243163585663, + "learning_rate": 2.30761721194523e-05, + "loss": 9.6294, + "step": 46600 + }, + { + "epoch": 0.2327648630427726, + "grad_norm": 0.09624093770980835, + "learning_rate": 2.3074670204510754e-05, + "loss": 9.6273, + "step": 46610 + }, + { + "epoch": 0.23281480186771206, + "grad_norm": 0.09259884804487228, + "learning_rate": 2.30731682895692e-05, + "loss": 9.6259, + "step": 46620 + }, + { + "epoch": 0.2328647406926515, + "grad_norm": 0.09767475724220276, + "learning_rate": 2.307166637462765e-05, + "loss": 9.6371, + "step": 46630 + }, + { + "epoch": 0.23291467951759096, + "grad_norm": 0.09269337356090546, + "learning_rate": 2.30701644596861e-05, + "loss": 9.6273, + "step": 46640 + }, + { + "epoch": 0.2329646183425304, + "grad_norm": 0.09703583270311356, + "learning_rate": 2.3068662544744548e-05, + "loss": 9.6257, + "step": 46650 + }, + { + "epoch": 0.23301455716746985, + "grad_norm": 0.09539482742547989, + "learning_rate": 2.3067160629803002e-05, + "loss": 9.631, + "step": 46660 + }, + { + "epoch": 0.2330644959924093, + "grad_norm": 0.09522981196641922, + "learning_rate": 2.306565871486145e-05, + "loss": 9.6335, + "step": 46670 + }, + { + "epoch": 0.23311443481734875, + "grad_norm": 0.09748247265815735, + "learning_rate": 2.30641567999199e-05, + "loss": 9.6364, + "step": 46680 + }, + { + "epoch": 0.2331643736422882, + "grad_norm": 0.09750822186470032, + "learning_rate": 2.306265488497835e-05, + "loss": 9.6327, + "step": 46690 + }, + { + "epoch": 0.23321431246722765, + "grad_norm": 0.09520017355680466, + "learning_rate": 2.3061152970036796e-05, + "loss": 9.6306, + "step": 46700 + }, + { + "epoch": 0.2332642512921671, + "grad_norm": 0.09403938800096512, + "learning_rate": 2.305965105509525e-05, + "loss": 9.6276, + "step": 46710 + }, + { + "epoch": 0.23331419011710655, + "grad_norm": 0.09592479467391968, + "learning_rate": 2.3058149140153696e-05, + "loss": 9.6331, + "step": 46720 + }, + { + "epoch": 0.233364128942046, + "grad_norm": 0.09444668143987656, + "learning_rate": 2.3056647225212146e-05, + "loss": 9.6345, + "step": 46730 + }, + { + "epoch": 0.23341406776698545, + "grad_norm": 0.09617168456315994, + "learning_rate": 2.3055145310270596e-05, + "loss": 9.6335, + "step": 46740 + }, + { + "epoch": 0.2334640065919249, + "grad_norm": 0.09481420367956161, + "learning_rate": 2.3053643395329043e-05, + "loss": 9.6353, + "step": 46750 + }, + { + "epoch": 0.23351394541686435, + "grad_norm": 0.09595254808664322, + "learning_rate": 2.3052141480387497e-05, + "loss": 9.6378, + "step": 46760 + }, + { + "epoch": 0.2335638842418038, + "grad_norm": 0.09526467323303223, + "learning_rate": 2.3050639565445944e-05, + "loss": 9.6292, + "step": 46770 + }, + { + "epoch": 0.23361382306674325, + "grad_norm": 0.09057611972093582, + "learning_rate": 2.3049137650504394e-05, + "loss": 9.6269, + "step": 46780 + }, + { + "epoch": 0.2336637618916827, + "grad_norm": 0.09435748308897018, + "learning_rate": 2.3047635735562844e-05, + "loss": 9.638, + "step": 46790 + }, + { + "epoch": 0.23371370071662215, + "grad_norm": 0.09341932833194733, + "learning_rate": 2.304613382062129e-05, + "loss": 9.6302, + "step": 46800 + }, + { + "epoch": 0.2337636395415616, + "grad_norm": 0.09780159592628479, + "learning_rate": 2.3044631905679744e-05, + "loss": 9.6343, + "step": 46810 + }, + { + "epoch": 0.23381357836650105, + "grad_norm": 0.09608551114797592, + "learning_rate": 2.304312999073819e-05, + "loss": 9.6324, + "step": 46820 + }, + { + "epoch": 0.2338635171914405, + "grad_norm": 0.10504171252250671, + "learning_rate": 2.304162807579664e-05, + "loss": 9.6361, + "step": 46830 + }, + { + "epoch": 0.23391345601637994, + "grad_norm": 0.09910661727190018, + "learning_rate": 2.304012616085509e-05, + "loss": 9.6292, + "step": 46840 + }, + { + "epoch": 0.2339633948413194, + "grad_norm": 0.10046257078647614, + "learning_rate": 2.3038624245913538e-05, + "loss": 9.6332, + "step": 46850 + }, + { + "epoch": 0.23401333366625884, + "grad_norm": 0.09717267006635666, + "learning_rate": 2.3037122330971992e-05, + "loss": 9.6163, + "step": 46860 + }, + { + "epoch": 0.2340632724911983, + "grad_norm": 0.09229366481304169, + "learning_rate": 2.303562041603044e-05, + "loss": 9.6261, + "step": 46870 + }, + { + "epoch": 0.23411321131613774, + "grad_norm": 0.09404976665973663, + "learning_rate": 2.303411850108889e-05, + "loss": 9.6323, + "step": 46880 + }, + { + "epoch": 0.2341631501410772, + "grad_norm": 0.09583253413438797, + "learning_rate": 2.303261658614734e-05, + "loss": 9.6264, + "step": 46890 + }, + { + "epoch": 0.23421308896601664, + "grad_norm": 0.09781666100025177, + "learning_rate": 2.303111467120579e-05, + "loss": 9.6268, + "step": 46900 + }, + { + "epoch": 0.2342630277909561, + "grad_norm": 0.09339626133441925, + "learning_rate": 2.302961275626424e-05, + "loss": 9.6277, + "step": 46910 + }, + { + "epoch": 0.23431296661589554, + "grad_norm": 0.09981784224510193, + "learning_rate": 2.3028110841322686e-05, + "loss": 9.6338, + "step": 46920 + }, + { + "epoch": 0.234362905440835, + "grad_norm": 0.09911654889583588, + "learning_rate": 2.3026608926381136e-05, + "loss": 9.6334, + "step": 46930 + }, + { + "epoch": 0.23441284426577444, + "grad_norm": 0.09392616897821426, + "learning_rate": 2.3025107011439587e-05, + "loss": 9.6212, + "step": 46940 + }, + { + "epoch": 0.2344627830907139, + "grad_norm": 0.10090331733226776, + "learning_rate": 2.3023605096498037e-05, + "loss": 9.6232, + "step": 46950 + }, + { + "epoch": 0.23451272191565334, + "grad_norm": 0.09081251919269562, + "learning_rate": 2.3022103181556487e-05, + "loss": 9.6258, + "step": 46960 + }, + { + "epoch": 0.2345626607405928, + "grad_norm": 0.09683696180582047, + "learning_rate": 2.3020601266614934e-05, + "loss": 9.6245, + "step": 46970 + }, + { + "epoch": 0.2346125995655322, + "grad_norm": 0.09155721217393875, + "learning_rate": 2.3019099351673384e-05, + "loss": 9.6267, + "step": 46980 + }, + { + "epoch": 0.23466253839047166, + "grad_norm": 0.09573863446712494, + "learning_rate": 2.3017597436731834e-05, + "loss": 9.6289, + "step": 46990 + }, + { + "epoch": 0.2347124772154111, + "grad_norm": 0.09463366866111755, + "learning_rate": 2.3016095521790284e-05, + "loss": 9.6195, + "step": 47000 + }, + { + "epoch": 0.23476241604035056, + "grad_norm": 0.10097618401050568, + "learning_rate": 2.3014593606848734e-05, + "loss": 9.6101, + "step": 47010 + }, + { + "epoch": 0.23481235486529, + "grad_norm": 0.09264184534549713, + "learning_rate": 2.301309169190718e-05, + "loss": 9.621, + "step": 47020 + }, + { + "epoch": 0.23486229369022946, + "grad_norm": 0.09526827186346054, + "learning_rate": 2.301158977696563e-05, + "loss": 9.6284, + "step": 47030 + }, + { + "epoch": 0.2349122325151689, + "grad_norm": 0.09512627869844437, + "learning_rate": 2.301008786202408e-05, + "loss": 9.6286, + "step": 47040 + }, + { + "epoch": 0.23496217134010836, + "grad_norm": 0.09627030789852142, + "learning_rate": 2.300858594708253e-05, + "loss": 9.6311, + "step": 47050 + }, + { + "epoch": 0.2350121101650478, + "grad_norm": 0.08953950554132462, + "learning_rate": 2.3007084032140982e-05, + "loss": 9.6195, + "step": 47060 + }, + { + "epoch": 0.23506204898998725, + "grad_norm": 0.09324587136507034, + "learning_rate": 2.300558211719943e-05, + "loss": 9.6292, + "step": 47070 + }, + { + "epoch": 0.2351119878149267, + "grad_norm": 0.09066016227006912, + "learning_rate": 2.300408020225788e-05, + "loss": 9.6237, + "step": 47080 + }, + { + "epoch": 0.23516192663986615, + "grad_norm": 0.08983506262302399, + "learning_rate": 2.300257828731633e-05, + "loss": 9.6283, + "step": 47090 + }, + { + "epoch": 0.2352118654648056, + "grad_norm": 0.09250736236572266, + "learning_rate": 2.300107637237478e-05, + "loss": 9.6245, + "step": 47100 + }, + { + "epoch": 0.23526180428974505, + "grad_norm": 0.09599629789590836, + "learning_rate": 2.299957445743323e-05, + "loss": 9.6202, + "step": 47110 + }, + { + "epoch": 0.2353117431146845, + "grad_norm": 0.09286179393529892, + "learning_rate": 2.2998072542491676e-05, + "loss": 9.6168, + "step": 47120 + }, + { + "epoch": 0.23536168193962395, + "grad_norm": 0.09755680710077286, + "learning_rate": 2.2996570627550126e-05, + "loss": 9.6249, + "step": 47130 + }, + { + "epoch": 0.2354116207645634, + "grad_norm": 0.09816092252731323, + "learning_rate": 2.2995068712608577e-05, + "loss": 9.6213, + "step": 47140 + }, + { + "epoch": 0.23546155958950285, + "grad_norm": 0.08990190178155899, + "learning_rate": 2.2993566797667027e-05, + "loss": 9.6237, + "step": 47150 + }, + { + "epoch": 0.2355114984144423, + "grad_norm": 0.09264282137155533, + "learning_rate": 2.2992064882725477e-05, + "loss": 9.6222, + "step": 47160 + }, + { + "epoch": 0.23556143723938175, + "grad_norm": 0.0953594446182251, + "learning_rate": 2.2990562967783924e-05, + "loss": 9.6219, + "step": 47170 + }, + { + "epoch": 0.2356113760643212, + "grad_norm": 0.09457553923130035, + "learning_rate": 2.2989061052842374e-05, + "loss": 9.6167, + "step": 47180 + }, + { + "epoch": 0.23566131488926065, + "grad_norm": 0.09653682261705399, + "learning_rate": 2.2987559137900824e-05, + "loss": 9.6214, + "step": 47190 + }, + { + "epoch": 0.2357112537142001, + "grad_norm": 0.09378449618816376, + "learning_rate": 2.2986057222959274e-05, + "loss": 9.6282, + "step": 47200 + }, + { + "epoch": 0.23576119253913955, + "grad_norm": 0.0928981825709343, + "learning_rate": 2.2984555308017724e-05, + "loss": 9.6326, + "step": 47210 + }, + { + "epoch": 0.235811131364079, + "grad_norm": 0.0937650203704834, + "learning_rate": 2.298305339307617e-05, + "loss": 9.6249, + "step": 47220 + }, + { + "epoch": 0.23586107018901845, + "grad_norm": 0.09575259685516357, + "learning_rate": 2.298155147813462e-05, + "loss": 9.6179, + "step": 47230 + }, + { + "epoch": 0.2359110090139579, + "grad_norm": 75396.46875, + "learning_rate": 2.298004956319307e-05, + "loss": 9.9622, + "step": 47240 + }, + { + "epoch": 0.23596094783889734, + "grad_norm": 0.09559078514575958, + "learning_rate": 2.2978547648251522e-05, + "loss": 9.6256, + "step": 47250 + }, + { + "epoch": 0.2360108866638368, + "grad_norm": 0.09144776314496994, + "learning_rate": 2.2977045733309972e-05, + "loss": 17.3369, + "step": 47260 + }, + { + "epoch": 0.23606082548877624, + "grad_norm": 0.09429620951414108, + "learning_rate": 2.2975543818368422e-05, + "loss": 12.2017, + "step": 47270 + }, + { + "epoch": 0.2361107643137157, + "grad_norm": 0.09179428964853287, + "learning_rate": 2.297404190342687e-05, + "loss": 9.6117, + "step": 47280 + }, + { + "epoch": 0.23616070313865514, + "grad_norm": 0.09685272723436356, + "learning_rate": 2.297253998848532e-05, + "loss": 9.6269, + "step": 47290 + }, + { + "epoch": 0.2362106419635946, + "grad_norm": 0.09493184089660645, + "learning_rate": 2.297103807354377e-05, + "loss": 9.6188, + "step": 47300 + }, + { + "epoch": 0.23626058078853404, + "grad_norm": 0.10096865147352219, + "learning_rate": 2.296953615860222e-05, + "loss": 9.6169, + "step": 47310 + }, + { + "epoch": 0.2363105196134735, + "grad_norm": 0.10004733502864838, + "learning_rate": 2.296803424366067e-05, + "loss": 9.6244, + "step": 47320 + }, + { + "epoch": 0.23636045843841294, + "grad_norm": 0.09608258306980133, + "learning_rate": 2.2966532328719116e-05, + "loss": 9.6154, + "step": 47330 + }, + { + "epoch": 0.2364103972633524, + "grad_norm": 0.09426455199718475, + "learning_rate": 2.2965030413777567e-05, + "loss": 9.6204, + "step": 47340 + }, + { + "epoch": 0.23646033608829184, + "grad_norm": 0.09753237664699554, + "learning_rate": 2.2963528498836017e-05, + "loss": 9.6185, + "step": 47350 + }, + { + "epoch": 0.2365102749132313, + "grad_norm": 0.09157487750053406, + "learning_rate": 2.2962026583894467e-05, + "loss": 9.6327, + "step": 47360 + }, + { + "epoch": 0.23656021373817074, + "grad_norm": 0.10023976117372513, + "learning_rate": 2.2960524668952917e-05, + "loss": 9.6226, + "step": 47370 + }, + { + "epoch": 0.2366101525631102, + "grad_norm": 0.09586095809936523, + "learning_rate": 2.2959022754011364e-05, + "loss": 9.6217, + "step": 47380 + }, + { + "epoch": 0.23666009138804964, + "grad_norm": 0.0944489985704422, + "learning_rate": 2.2957520839069814e-05, + "loss": 9.6157, + "step": 47390 + }, + { + "epoch": 0.2367100302129891, + "grad_norm": 0.09229391068220139, + "learning_rate": 2.2956018924128264e-05, + "loss": 9.6213, + "step": 47400 + }, + { + "epoch": 0.23675996903792854, + "grad_norm": 0.09394244849681854, + "learning_rate": 2.2954517009186714e-05, + "loss": 9.6235, + "step": 47410 + }, + { + "epoch": 0.23680990786286799, + "grad_norm": 0.0892910584807396, + "learning_rate": 2.2953015094245165e-05, + "loss": 9.6202, + "step": 47420 + }, + { + "epoch": 0.23685984668780743, + "grad_norm": 0.09789809584617615, + "learning_rate": 2.295151317930361e-05, + "loss": 9.6224, + "step": 47430 + }, + { + "epoch": 0.23690978551274688, + "grad_norm": 0.0950971245765686, + "learning_rate": 2.295001126436206e-05, + "loss": 9.6107, + "step": 47440 + }, + { + "epoch": 0.23695972433768633, + "grad_norm": 0.09064780175685883, + "learning_rate": 2.2948509349420512e-05, + "loss": 9.6138, + "step": 47450 + }, + { + "epoch": 0.23700966316262578, + "grad_norm": 0.09859991818666458, + "learning_rate": 2.2947007434478962e-05, + "loss": 9.6197, + "step": 47460 + }, + { + "epoch": 0.23705960198756523, + "grad_norm": 0.0948246568441391, + "learning_rate": 2.2945505519537412e-05, + "loss": 9.6189, + "step": 47470 + }, + { + "epoch": 0.23710954081250468, + "grad_norm": 0.09848882257938385, + "learning_rate": 2.294400360459586e-05, + "loss": 9.611, + "step": 47480 + }, + { + "epoch": 0.23715947963744413, + "grad_norm": 0.10083547234535217, + "learning_rate": 2.294250168965431e-05, + "loss": 9.6214, + "step": 47490 + }, + { + "epoch": 0.23720941846238358, + "grad_norm": 0.09797154366970062, + "learning_rate": 2.294099977471276e-05, + "loss": 9.6184, + "step": 47500 + }, + { + "epoch": 0.23725935728732303, + "grad_norm": 0.0983353927731514, + "learning_rate": 2.293949785977121e-05, + "loss": 9.6119, + "step": 47510 + }, + { + "epoch": 0.23730929611226248, + "grad_norm": 0.10210006684064865, + "learning_rate": 2.293799594482966e-05, + "loss": 9.6136, + "step": 47520 + }, + { + "epoch": 0.23735923493720193, + "grad_norm": 0.10113011300563812, + "learning_rate": 2.2936494029888106e-05, + "loss": 9.6137, + "step": 47530 + }, + { + "epoch": 0.23740917376214138, + "grad_norm": 0.0893891304731369, + "learning_rate": 2.2934992114946557e-05, + "loss": 9.6155, + "step": 47540 + }, + { + "epoch": 0.23745911258708083, + "grad_norm": 0.09196850657463074, + "learning_rate": 2.2933490200005007e-05, + "loss": 9.6032, + "step": 47550 + }, + { + "epoch": 0.23750905141202028, + "grad_norm": 0.08714939653873444, + "learning_rate": 2.2931988285063457e-05, + "loss": 9.6155, + "step": 47560 + }, + { + "epoch": 0.23755899023695973, + "grad_norm": 0.0882970318198204, + "learning_rate": 2.2930486370121907e-05, + "loss": 9.6081, + "step": 47570 + }, + { + "epoch": 0.23760892906189918, + "grad_norm": 0.09797061234712601, + "learning_rate": 2.2928984455180354e-05, + "loss": 9.6092, + "step": 47580 + }, + { + "epoch": 0.23765886788683863, + "grad_norm": 0.09515173733234406, + "learning_rate": 2.2927482540238807e-05, + "loss": 9.6153, + "step": 47590 + }, + { + "epoch": 0.23770880671177808, + "grad_norm": 0.09541980177164078, + "learning_rate": 2.2925980625297254e-05, + "loss": 9.6167, + "step": 47600 + }, + { + "epoch": 0.23775874553671753, + "grad_norm": 0.09236780554056168, + "learning_rate": 2.2924478710355704e-05, + "loss": 9.6251, + "step": 47610 + }, + { + "epoch": 0.23780868436165697, + "grad_norm": 0.09183859080076218, + "learning_rate": 2.2922976795414155e-05, + "loss": 9.6104, + "step": 47620 + }, + { + "epoch": 0.23785862318659642, + "grad_norm": 0.1002270057797432, + "learning_rate": 2.29214748804726e-05, + "loss": 9.6142, + "step": 47630 + }, + { + "epoch": 0.23790856201153587, + "grad_norm": 0.09763851016759872, + "learning_rate": 2.2919972965531055e-05, + "loss": 9.6067, + "step": 47640 + }, + { + "epoch": 0.23795850083647532, + "grad_norm": 0.09223950654268265, + "learning_rate": 2.2918471050589502e-05, + "loss": 9.6198, + "step": 47650 + }, + { + "epoch": 0.23800843966141477, + "grad_norm": 0.09290538728237152, + "learning_rate": 2.2916969135647952e-05, + "loss": 9.6121, + "step": 47660 + }, + { + "epoch": 0.23805837848635422, + "grad_norm": 0.09840866923332214, + "learning_rate": 2.2915467220706402e-05, + "loss": 9.6148, + "step": 47670 + }, + { + "epoch": 0.23810831731129367, + "grad_norm": 0.0904998853802681, + "learning_rate": 2.291396530576485e-05, + "loss": 9.6134, + "step": 47680 + }, + { + "epoch": 0.23815825613623312, + "grad_norm": 0.09012880176305771, + "learning_rate": 2.2912463390823302e-05, + "loss": 9.6154, + "step": 47690 + }, + { + "epoch": 0.23820819496117257, + "grad_norm": 0.08958422392606735, + "learning_rate": 2.291096147588175e-05, + "loss": 9.6192, + "step": 47700 + }, + { + "epoch": 0.23825813378611202, + "grad_norm": 0.09270545840263367, + "learning_rate": 2.29094595609402e-05, + "loss": 9.6096, + "step": 47710 + }, + { + "epoch": 0.23830807261105147, + "grad_norm": 0.09306894987821579, + "learning_rate": 2.290795764599865e-05, + "loss": 9.604, + "step": 47720 + }, + { + "epoch": 0.23835801143599092, + "grad_norm": 0.1006632074713707, + "learning_rate": 2.2906455731057096e-05, + "loss": 9.6153, + "step": 47730 + }, + { + "epoch": 0.23840795026093037, + "grad_norm": 0.08890628069639206, + "learning_rate": 2.290495381611555e-05, + "loss": 9.6092, + "step": 47740 + }, + { + "epoch": 0.23845788908586982, + "grad_norm": 0.09797194600105286, + "learning_rate": 2.2903451901173997e-05, + "loss": 9.6124, + "step": 47750 + }, + { + "epoch": 0.23850782791080927, + "grad_norm": 0.0914837047457695, + "learning_rate": 2.2901949986232447e-05, + "loss": 9.6155, + "step": 47760 + }, + { + "epoch": 0.23855776673574872, + "grad_norm": 0.09398198127746582, + "learning_rate": 2.2900448071290897e-05, + "loss": 9.6183, + "step": 47770 + }, + { + "epoch": 0.23860770556068817, + "grad_norm": 0.10565483570098877, + "learning_rate": 2.2898946156349344e-05, + "loss": 9.6146, + "step": 47780 + }, + { + "epoch": 0.23865764438562762, + "grad_norm": 0.09536195546388626, + "learning_rate": 2.2897444241407797e-05, + "loss": 9.6134, + "step": 47790 + }, + { + "epoch": 0.23870758321056706, + "grad_norm": 0.0907558798789978, + "learning_rate": 2.2895942326466244e-05, + "loss": 9.6117, + "step": 47800 + }, + { + "epoch": 0.23875752203550651, + "grad_norm": 0.09679032117128372, + "learning_rate": 2.2894440411524694e-05, + "loss": 9.6129, + "step": 47810 + }, + { + "epoch": 0.23880746086044596, + "grad_norm": 0.08943892270326614, + "learning_rate": 2.2892938496583145e-05, + "loss": 9.618, + "step": 47820 + }, + { + "epoch": 0.2388573996853854, + "grad_norm": 0.0918293297290802, + "learning_rate": 2.289143658164159e-05, + "loss": 9.6098, + "step": 47830 + }, + { + "epoch": 0.23890733851032486, + "grad_norm": 0.09785201400518417, + "learning_rate": 2.2889934666700045e-05, + "loss": 9.6052, + "step": 47840 + }, + { + "epoch": 0.2389572773352643, + "grad_norm": 0.09538784623146057, + "learning_rate": 2.2888432751758492e-05, + "loss": 9.5993, + "step": 47850 + }, + { + "epoch": 0.23900721616020376, + "grad_norm": 0.09343953430652618, + "learning_rate": 2.2886930836816942e-05, + "loss": 9.6063, + "step": 47860 + }, + { + "epoch": 0.2390571549851432, + "grad_norm": 0.0926864743232727, + "learning_rate": 2.2885428921875392e-05, + "loss": 9.6103, + "step": 47870 + }, + { + "epoch": 0.23910709381008266, + "grad_norm": 0.09175824373960495, + "learning_rate": 2.288392700693384e-05, + "loss": 9.6072, + "step": 47880 + }, + { + "epoch": 0.2391570326350221, + "grad_norm": 0.09742142260074615, + "learning_rate": 2.2882425091992292e-05, + "loss": 9.6058, + "step": 47890 + }, + { + "epoch": 0.23920697145996156, + "grad_norm": 0.09516909718513489, + "learning_rate": 2.288092317705074e-05, + "loss": 9.6085, + "step": 47900 + }, + { + "epoch": 0.239256910284901, + "grad_norm": 0.09717977792024612, + "learning_rate": 2.2879421262109193e-05, + "loss": 9.6036, + "step": 47910 + }, + { + "epoch": 0.23930684910984046, + "grad_norm": 0.09463629871606827, + "learning_rate": 2.287791934716764e-05, + "loss": 9.6029, + "step": 47920 + }, + { + "epoch": 0.2393567879347799, + "grad_norm": 0.09392336010932922, + "learning_rate": 2.2876417432226086e-05, + "loss": 9.6177, + "step": 47930 + }, + { + "epoch": 0.23940672675971936, + "grad_norm": 0.09007874131202698, + "learning_rate": 2.287491551728454e-05, + "loss": 9.6079, + "step": 47940 + }, + { + "epoch": 0.2394566655846588, + "grad_norm": 0.09397672861814499, + "learning_rate": 2.2873413602342987e-05, + "loss": 9.6104, + "step": 47950 + }, + { + "epoch": 0.23950660440959823, + "grad_norm": 0.08829968422651291, + "learning_rate": 2.287191168740144e-05, + "loss": 9.6029, + "step": 47960 + }, + { + "epoch": 0.23955654323453768, + "grad_norm": 0.09608025848865509, + "learning_rate": 2.2870409772459887e-05, + "loss": 9.6063, + "step": 47970 + }, + { + "epoch": 0.23960648205947713, + "grad_norm": 0.09699834138154984, + "learning_rate": 2.2868907857518334e-05, + "loss": 9.6119, + "step": 47980 + }, + { + "epoch": 0.23965642088441658, + "grad_norm": 0.08946532756090164, + "learning_rate": 2.2867405942576787e-05, + "loss": 9.6014, + "step": 47990 + }, + { + "epoch": 0.23970635970935603, + "grad_norm": 0.0944380983710289, + "learning_rate": 2.2865904027635234e-05, + "loss": 9.6076, + "step": 48000 + }, + { + "epoch": 0.23975629853429548, + "grad_norm": 0.09391778707504272, + "learning_rate": 2.2864402112693688e-05, + "loss": 9.6034, + "step": 48010 + }, + { + "epoch": 0.23980623735923493, + "grad_norm": 0.09647521376609802, + "learning_rate": 2.2862900197752135e-05, + "loss": 9.6077, + "step": 48020 + }, + { + "epoch": 0.23985617618417437, + "grad_norm": 0.09497778117656708, + "learning_rate": 2.286139828281058e-05, + "loss": 9.6047, + "step": 48030 + }, + { + "epoch": 0.23990611500911382, + "grad_norm": 0.10870006680488586, + "learning_rate": 2.2859896367869035e-05, + "loss": 9.6094, + "step": 48040 + }, + { + "epoch": 0.23995605383405327, + "grad_norm": 0.09138607233762741, + "learning_rate": 2.2858394452927482e-05, + "loss": 9.6093, + "step": 48050 + }, + { + "epoch": 0.24000599265899272, + "grad_norm": 0.09472063183784485, + "learning_rate": 2.2856892537985935e-05, + "loss": 9.6105, + "step": 48060 + }, + { + "epoch": 0.24005593148393217, + "grad_norm": 0.09620670229196548, + "learning_rate": 2.2855390623044382e-05, + "loss": 9.5972, + "step": 48070 + }, + { + "epoch": 0.24010587030887162, + "grad_norm": 0.09534239768981934, + "learning_rate": 2.285388870810283e-05, + "loss": 9.6064, + "step": 48080 + }, + { + "epoch": 0.24015580913381107, + "grad_norm": 0.09170860797166824, + "learning_rate": 2.2852386793161282e-05, + "loss": 9.5999, + "step": 48090 + }, + { + "epoch": 0.24020574795875052, + "grad_norm": 0.09515348076820374, + "learning_rate": 2.285088487821973e-05, + "loss": 9.6172, + "step": 48100 + }, + { + "epoch": 0.24025568678368997, + "grad_norm": 0.09469642490148544, + "learning_rate": 2.2849382963278183e-05, + "loss": 9.6089, + "step": 48110 + }, + { + "epoch": 0.24030562560862942, + "grad_norm": 0.09230419993400574, + "learning_rate": 2.284788104833663e-05, + "loss": 9.6098, + "step": 48120 + }, + { + "epoch": 0.24035556443356887, + "grad_norm": 0.09317916631698608, + "learning_rate": 2.2846379133395076e-05, + "loss": 9.5989, + "step": 48130 + }, + { + "epoch": 0.24040550325850832, + "grad_norm": 0.0909963995218277, + "learning_rate": 2.284487721845353e-05, + "loss": 9.6125, + "step": 48140 + }, + { + "epoch": 0.24045544208344777, + "grad_norm": 0.09574473649263382, + "learning_rate": 2.2843375303511977e-05, + "loss": 9.601, + "step": 48150 + }, + { + "epoch": 0.24050538090838722, + "grad_norm": 0.09136365354061127, + "learning_rate": 2.284187338857043e-05, + "loss": 9.6029, + "step": 48160 + }, + { + "epoch": 0.24055531973332667, + "grad_norm": 0.0943564847111702, + "learning_rate": 2.2840371473628877e-05, + "loss": 9.6088, + "step": 48170 + }, + { + "epoch": 0.24060525855826612, + "grad_norm": 0.09551718086004257, + "learning_rate": 2.2838869558687324e-05, + "loss": 9.6124, + "step": 48180 + }, + { + "epoch": 0.24065519738320557, + "grad_norm": 0.09354714304208755, + "learning_rate": 2.2837367643745777e-05, + "loss": 9.6024, + "step": 48190 + }, + { + "epoch": 0.24070513620814502, + "grad_norm": 0.09227943420410156, + "learning_rate": 2.2835865728804224e-05, + "loss": 9.6047, + "step": 48200 + }, + { + "epoch": 0.24075507503308446, + "grad_norm": 0.09799333661794662, + "learning_rate": 2.2834363813862678e-05, + "loss": 9.6027, + "step": 48210 + }, + { + "epoch": 0.24080501385802391, + "grad_norm": 0.09457400441169739, + "learning_rate": 2.2832861898921125e-05, + "loss": 9.6017, + "step": 48220 + }, + { + "epoch": 0.24085495268296336, + "grad_norm": 0.09242776036262512, + "learning_rate": 2.2831359983979575e-05, + "loss": 9.6013, + "step": 48230 + }, + { + "epoch": 0.2409048915079028, + "grad_norm": 0.09647472202777863, + "learning_rate": 2.2829858069038025e-05, + "loss": 9.5992, + "step": 48240 + }, + { + "epoch": 0.24095483033284226, + "grad_norm": 0.09526066482067108, + "learning_rate": 2.2828356154096472e-05, + "loss": 9.6049, + "step": 48250 + }, + { + "epoch": 0.2410047691577817, + "grad_norm": 0.09362996369600296, + "learning_rate": 2.2826854239154925e-05, + "loss": 9.6031, + "step": 48260 + }, + { + "epoch": 0.24105470798272116, + "grad_norm": 0.0959758386015892, + "learning_rate": 2.2825352324213372e-05, + "loss": 9.6, + "step": 48270 + }, + { + "epoch": 0.2411046468076606, + "grad_norm": 0.09466012567281723, + "learning_rate": 2.2823850409271822e-05, + "loss": 9.6062, + "step": 48280 + }, + { + "epoch": 0.24115458563260006, + "grad_norm": 0.08923938125371933, + "learning_rate": 2.2822348494330272e-05, + "loss": 9.5986, + "step": 48290 + }, + { + "epoch": 0.2412045244575395, + "grad_norm": 0.09768188744783401, + "learning_rate": 2.282084657938872e-05, + "loss": 9.5975, + "step": 48300 + }, + { + "epoch": 0.24125446328247896, + "grad_norm": 0.0997987762093544, + "learning_rate": 2.2819344664447173e-05, + "loss": 9.5978, + "step": 48310 + }, + { + "epoch": 0.2413044021074184, + "grad_norm": 0.0951227992773056, + "learning_rate": 2.281784274950562e-05, + "loss": 9.6114, + "step": 48320 + }, + { + "epoch": 0.24135434093235786, + "grad_norm": 0.08892112970352173, + "learning_rate": 2.281634083456407e-05, + "loss": 9.6082, + "step": 48330 + }, + { + "epoch": 0.2414042797572973, + "grad_norm": 0.0971013680100441, + "learning_rate": 2.281483891962252e-05, + "loss": 9.6065, + "step": 48340 + }, + { + "epoch": 0.24145421858223676, + "grad_norm": 0.09072565287351608, + "learning_rate": 2.2813337004680967e-05, + "loss": 9.5978, + "step": 48350 + }, + { + "epoch": 0.2415041574071762, + "grad_norm": 0.08944379538297653, + "learning_rate": 2.281183508973942e-05, + "loss": 9.5961, + "step": 48360 + }, + { + "epoch": 0.24155409623211566, + "grad_norm": 0.09630691260099411, + "learning_rate": 2.2810333174797867e-05, + "loss": 9.6005, + "step": 48370 + }, + { + "epoch": 0.2416040350570551, + "grad_norm": 0.09116937965154648, + "learning_rate": 2.2808831259856317e-05, + "loss": 9.6018, + "step": 48380 + }, + { + "epoch": 0.24165397388199455, + "grad_norm": 0.09356780350208282, + "learning_rate": 2.2807329344914768e-05, + "loss": 9.6053, + "step": 48390 + }, + { + "epoch": 0.241703912706934, + "grad_norm": 0.09499246627092361, + "learning_rate": 2.2805827429973214e-05, + "loss": 9.6004, + "step": 48400 + }, + { + "epoch": 0.24175385153187345, + "grad_norm": 0.09654933959245682, + "learning_rate": 2.2804325515031668e-05, + "loss": 9.5968, + "step": 48410 + }, + { + "epoch": 0.2418037903568129, + "grad_norm": 0.09912944585084915, + "learning_rate": 2.2802823600090115e-05, + "loss": 9.5996, + "step": 48420 + }, + { + "epoch": 0.24185372918175235, + "grad_norm": 0.09671138226985931, + "learning_rate": 2.2801321685148565e-05, + "loss": 9.6, + "step": 48430 + }, + { + "epoch": 0.2419036680066918, + "grad_norm": 0.09061242640018463, + "learning_rate": 2.2799819770207015e-05, + "loss": 9.6022, + "step": 48440 + }, + { + "epoch": 0.24195360683163125, + "grad_norm": 0.09459752589464188, + "learning_rate": 2.2798317855265462e-05, + "loss": 9.5978, + "step": 48450 + }, + { + "epoch": 0.2420035456565707, + "grad_norm": 0.08927514404058456, + "learning_rate": 2.2796815940323915e-05, + "loss": 9.6072, + "step": 48460 + }, + { + "epoch": 0.24205348448151015, + "grad_norm": 0.09183401614427567, + "learning_rate": 2.2795314025382362e-05, + "loss": 9.5982, + "step": 48470 + }, + { + "epoch": 0.2421034233064496, + "grad_norm": 0.09792063385248184, + "learning_rate": 2.2793812110440812e-05, + "loss": 9.5965, + "step": 48480 + }, + { + "epoch": 0.24215336213138905, + "grad_norm": 0.09104972332715988, + "learning_rate": 2.2792310195499263e-05, + "loss": 9.5929, + "step": 48490 + }, + { + "epoch": 0.2422033009563285, + "grad_norm": 0.09637004882097244, + "learning_rate": 2.279080828055771e-05, + "loss": 9.6063, + "step": 48500 + }, + { + "epoch": 0.24225323978126795, + "grad_norm": 0.09254331886768341, + "learning_rate": 2.2789306365616163e-05, + "loss": 9.6029, + "step": 48510 + }, + { + "epoch": 0.2423031786062074, + "grad_norm": 0.09216886013746262, + "learning_rate": 2.278780445067461e-05, + "loss": 9.5954, + "step": 48520 + }, + { + "epoch": 0.24235311743114685, + "grad_norm": 0.09522795677185059, + "learning_rate": 2.278630253573306e-05, + "loss": 9.6019, + "step": 48530 + }, + { + "epoch": 0.2424030562560863, + "grad_norm": 0.09067758917808533, + "learning_rate": 2.278480062079151e-05, + "loss": 9.5966, + "step": 48540 + }, + { + "epoch": 0.24245299508102575, + "grad_norm": 0.10162653774023056, + "learning_rate": 2.278329870584996e-05, + "loss": 9.5896, + "step": 48550 + }, + { + "epoch": 0.2425029339059652, + "grad_norm": 0.09732117503881454, + "learning_rate": 2.278179679090841e-05, + "loss": 9.5988, + "step": 48560 + }, + { + "epoch": 0.24255287273090465, + "grad_norm": 0.09825015068054199, + "learning_rate": 2.2780294875966857e-05, + "loss": 9.586, + "step": 48570 + }, + { + "epoch": 0.2426028115558441, + "grad_norm": 0.09816539287567139, + "learning_rate": 2.2778792961025307e-05, + "loss": 9.6021, + "step": 48580 + }, + { + "epoch": 0.24265275038078354, + "grad_norm": 0.09649891406297684, + "learning_rate": 2.2777291046083758e-05, + "loss": 9.5972, + "step": 48590 + }, + { + "epoch": 0.242702689205723, + "grad_norm": 0.09952486306428909, + "learning_rate": 2.2775789131142208e-05, + "loss": 9.6009, + "step": 48600 + }, + { + "epoch": 0.24275262803066244, + "grad_norm": 0.0953533872961998, + "learning_rate": 2.2774287216200658e-05, + "loss": 9.602, + "step": 48610 + }, + { + "epoch": 0.2428025668556019, + "grad_norm": 0.09693508595228195, + "learning_rate": 2.2772785301259105e-05, + "loss": 9.5935, + "step": 48620 + }, + { + "epoch": 0.24285250568054134, + "grad_norm": 0.08956372737884521, + "learning_rate": 2.2771283386317555e-05, + "loss": 9.5966, + "step": 48630 + }, + { + "epoch": 0.2429024445054808, + "grad_norm": 0.10089200735092163, + "learning_rate": 2.2769781471376005e-05, + "loss": 9.5974, + "step": 48640 + }, + { + "epoch": 0.24295238333042024, + "grad_norm": 0.09470240026712418, + "learning_rate": 2.2768279556434455e-05, + "loss": 9.5923, + "step": 48650 + }, + { + "epoch": 0.2430023221553597, + "grad_norm": 0.08645424246788025, + "learning_rate": 2.2766777641492905e-05, + "loss": 9.597, + "step": 48660 + }, + { + "epoch": 0.24305226098029914, + "grad_norm": 0.09936508536338806, + "learning_rate": 2.2765275726551352e-05, + "loss": 9.5877, + "step": 48670 + }, + { + "epoch": 0.2431021998052386, + "grad_norm": 0.09653277695178986, + "learning_rate": 2.2763773811609802e-05, + "loss": 9.5927, + "step": 48680 + }, + { + "epoch": 0.24315213863017804, + "grad_norm": 0.09702669084072113, + "learning_rate": 2.2762271896668253e-05, + "loss": 9.5947, + "step": 48690 + }, + { + "epoch": 0.2432020774551175, + "grad_norm": 0.09212882816791534, + "learning_rate": 2.2760769981726703e-05, + "loss": 9.5822, + "step": 48700 + }, + { + "epoch": 0.24325201628005694, + "grad_norm": 0.09269695729017258, + "learning_rate": 2.2759268066785153e-05, + "loss": 9.5927, + "step": 48710 + }, + { + "epoch": 0.2433019551049964, + "grad_norm": 0.09830940514802933, + "learning_rate": 2.27577661518436e-05, + "loss": 9.5933, + "step": 48720 + }, + { + "epoch": 0.24335189392993584, + "grad_norm": 0.08732043951749802, + "learning_rate": 2.275626423690205e-05, + "loss": 9.5939, + "step": 48730 + }, + { + "epoch": 0.24340183275487529, + "grad_norm": 0.09308458864688873, + "learning_rate": 2.27547623219605e-05, + "loss": 9.5926, + "step": 48740 + }, + { + "epoch": 0.24345177157981474, + "grad_norm": 0.09478411078453064, + "learning_rate": 2.275326040701895e-05, + "loss": 9.5954, + "step": 48750 + }, + { + "epoch": 0.24350171040475418, + "grad_norm": 0.09362474828958511, + "learning_rate": 2.27517584920774e-05, + "loss": 9.5937, + "step": 48760 + }, + { + "epoch": 0.24355164922969363, + "grad_norm": 0.09029746055603027, + "learning_rate": 2.2750256577135847e-05, + "loss": 9.5973, + "step": 48770 + }, + { + "epoch": 0.24360158805463308, + "grad_norm": 0.10020599514245987, + "learning_rate": 2.2748754662194297e-05, + "loss": 9.5866, + "step": 48780 + }, + { + "epoch": 0.24365152687957253, + "grad_norm": 0.1007223054766655, + "learning_rate": 2.2747252747252748e-05, + "loss": 9.5955, + "step": 48790 + }, + { + "epoch": 0.24370146570451198, + "grad_norm": 0.0981803610920906, + "learning_rate": 2.2745750832311198e-05, + "loss": 9.5805, + "step": 48800 + }, + { + "epoch": 0.24375140452945143, + "grad_norm": 0.09510128200054169, + "learning_rate": 2.2744248917369648e-05, + "loss": 9.5954, + "step": 48810 + }, + { + "epoch": 0.24380134335439088, + "grad_norm": 0.09191947430372238, + "learning_rate": 2.2742747002428095e-05, + "loss": 9.6015, + "step": 48820 + }, + { + "epoch": 0.24385128217933033, + "grad_norm": 0.09448053687810898, + "learning_rate": 2.2741245087486545e-05, + "loss": 9.5898, + "step": 48830 + }, + { + "epoch": 0.24390122100426978, + "grad_norm": 0.09542269259691238, + "learning_rate": 2.2739743172544995e-05, + "loss": 9.591, + "step": 48840 + }, + { + "epoch": 0.24395115982920923, + "grad_norm": 0.09461547434329987, + "learning_rate": 2.2738241257603445e-05, + "loss": 9.5968, + "step": 48850 + }, + { + "epoch": 0.24400109865414868, + "grad_norm": 0.09337666630744934, + "learning_rate": 2.2736739342661895e-05, + "loss": 9.5898, + "step": 48860 + }, + { + "epoch": 0.24405103747908813, + "grad_norm": 0.09441894292831421, + "learning_rate": 2.2735237427720346e-05, + "loss": 9.5903, + "step": 48870 + }, + { + "epoch": 0.24410097630402758, + "grad_norm": 0.0947941318154335, + "learning_rate": 2.2733735512778792e-05, + "loss": 9.5884, + "step": 48880 + }, + { + "epoch": 0.24415091512896703, + "grad_norm": 0.10225453972816467, + "learning_rate": 2.2732233597837243e-05, + "loss": 9.5906, + "step": 48890 + }, + { + "epoch": 0.24420085395390648, + "grad_norm": 0.09397625923156738, + "learning_rate": 2.2730731682895693e-05, + "loss": 9.5955, + "step": 48900 + }, + { + "epoch": 0.24425079277884593, + "grad_norm": 0.0943833664059639, + "learning_rate": 2.2729229767954143e-05, + "loss": 9.5859, + "step": 48910 + }, + { + "epoch": 0.24430073160378538, + "grad_norm": 0.09139799326658249, + "learning_rate": 2.2727727853012593e-05, + "loss": 9.5942, + "step": 48920 + }, + { + "epoch": 0.24435067042872483, + "grad_norm": 0.09776920080184937, + "learning_rate": 2.272622593807104e-05, + "loss": 9.5919, + "step": 48930 + }, + { + "epoch": 0.24440060925366427, + "grad_norm": 0.09115762263536453, + "learning_rate": 2.272472402312949e-05, + "loss": 9.5915, + "step": 48940 + }, + { + "epoch": 0.2444505480786037, + "grad_norm": 0.09906817972660065, + "learning_rate": 2.272322210818794e-05, + "loss": 9.593, + "step": 48950 + }, + { + "epoch": 0.24450048690354315, + "grad_norm": 0.09307815879583359, + "learning_rate": 2.272172019324639e-05, + "loss": 9.5819, + "step": 48960 + }, + { + "epoch": 0.2445504257284826, + "grad_norm": 0.10123563557863235, + "learning_rate": 2.272021827830484e-05, + "loss": 9.59, + "step": 48970 + }, + { + "epoch": 0.24460036455342204, + "grad_norm": 0.09160536527633667, + "learning_rate": 2.2718716363363287e-05, + "loss": 9.5937, + "step": 48980 + }, + { + "epoch": 0.2446503033783615, + "grad_norm": 0.0956059992313385, + "learning_rate": 2.2717214448421738e-05, + "loss": 9.5891, + "step": 48990 + }, + { + "epoch": 0.24470024220330094, + "grad_norm": 0.08904523402452469, + "learning_rate": 2.2715712533480188e-05, + "loss": 9.5975, + "step": 49000 + }, + { + "epoch": 0.2447501810282404, + "grad_norm": 0.09728097915649414, + "learning_rate": 2.2714210618538638e-05, + "loss": 9.5892, + "step": 49010 + }, + { + "epoch": 0.24480011985317984, + "grad_norm": 0.0932324156165123, + "learning_rate": 2.2712708703597088e-05, + "loss": 9.5944, + "step": 49020 + }, + { + "epoch": 0.2448500586781193, + "grad_norm": 0.09640751779079437, + "learning_rate": 2.2711206788655535e-05, + "loss": 9.5948, + "step": 49030 + }, + { + "epoch": 0.24489999750305874, + "grad_norm": 0.09052955359220505, + "learning_rate": 2.2709704873713985e-05, + "loss": 9.5832, + "step": 49040 + }, + { + "epoch": 0.2449499363279982, + "grad_norm": 0.09775557368993759, + "learning_rate": 2.2708202958772435e-05, + "loss": 9.5852, + "step": 49050 + }, + { + "epoch": 0.24499987515293764, + "grad_norm": 0.0953780934214592, + "learning_rate": 2.2706701043830885e-05, + "loss": 9.5839, + "step": 49060 + }, + { + "epoch": 0.2450498139778771, + "grad_norm": 0.09656697511672974, + "learning_rate": 2.2705199128889336e-05, + "loss": 9.5869, + "step": 49070 + }, + { + "epoch": 0.24509975280281654, + "grad_norm": 0.09594185650348663, + "learning_rate": 2.2703697213947782e-05, + "loss": 9.5924, + "step": 49080 + }, + { + "epoch": 0.245149691627756, + "grad_norm": 0.09402401000261307, + "learning_rate": 2.2702195299006233e-05, + "loss": 9.5812, + "step": 49090 + }, + { + "epoch": 0.24519963045269544, + "grad_norm": 0.09970910847187042, + "learning_rate": 2.2700693384064683e-05, + "loss": 9.5868, + "step": 49100 + }, + { + "epoch": 0.2452495692776349, + "grad_norm": 0.09406423568725586, + "learning_rate": 2.2699191469123133e-05, + "loss": 9.5864, + "step": 49110 + }, + { + "epoch": 0.24529950810257434, + "grad_norm": 0.09630411863327026, + "learning_rate": 2.2697689554181583e-05, + "loss": 9.586, + "step": 49120 + }, + { + "epoch": 0.2453494469275138, + "grad_norm": 0.09320022165775299, + "learning_rate": 2.269618763924003e-05, + "loss": 9.5852, + "step": 49130 + }, + { + "epoch": 0.24539938575245324, + "grad_norm": 0.0887378454208374, + "learning_rate": 2.269468572429848e-05, + "loss": 9.5917, + "step": 49140 + }, + { + "epoch": 0.24544932457739269, + "grad_norm": 0.09708340466022491, + "learning_rate": 2.269318380935693e-05, + "loss": 9.5828, + "step": 49150 + }, + { + "epoch": 0.24549926340233214, + "grad_norm": 0.09893514215946198, + "learning_rate": 2.269168189441538e-05, + "loss": 9.5893, + "step": 49160 + }, + { + "epoch": 0.24554920222727158, + "grad_norm": 0.09499365836381912, + "learning_rate": 2.269017997947383e-05, + "loss": 9.5846, + "step": 49170 + }, + { + "epoch": 0.24559914105221103, + "grad_norm": 0.09623656421899796, + "learning_rate": 2.2688678064532277e-05, + "loss": 9.5843, + "step": 49180 + }, + { + "epoch": 0.24564907987715048, + "grad_norm": 0.09261438250541687, + "learning_rate": 2.268717614959073e-05, + "loss": 9.5892, + "step": 49190 + }, + { + "epoch": 0.24569901870208993, + "grad_norm": 0.08927629142999649, + "learning_rate": 2.2685674234649178e-05, + "loss": 9.5861, + "step": 49200 + }, + { + "epoch": 0.24574895752702938, + "grad_norm": 0.08987867087125778, + "learning_rate": 2.2684172319707628e-05, + "loss": 9.5839, + "step": 49210 + }, + { + "epoch": 0.24579889635196883, + "grad_norm": 0.09199423342943192, + "learning_rate": 2.2682670404766078e-05, + "loss": 9.5921, + "step": 49220 + }, + { + "epoch": 0.24584883517690828, + "grad_norm": 0.09492474794387817, + "learning_rate": 2.2681168489824525e-05, + "loss": 9.5821, + "step": 49230 + }, + { + "epoch": 0.24589877400184773, + "grad_norm": 0.09393247216939926, + "learning_rate": 2.267966657488298e-05, + "loss": 9.5838, + "step": 49240 + }, + { + "epoch": 0.24594871282678718, + "grad_norm": 0.09099004417657852, + "learning_rate": 2.2678164659941425e-05, + "loss": 9.5917, + "step": 49250 + }, + { + "epoch": 0.24599865165172663, + "grad_norm": 0.09554600715637207, + "learning_rate": 2.2676662744999875e-05, + "loss": 9.5849, + "step": 49260 + }, + { + "epoch": 0.24604859047666608, + "grad_norm": 0.09146859496831894, + "learning_rate": 2.2675160830058326e-05, + "loss": 9.5806, + "step": 49270 + }, + { + "epoch": 0.24609852930160553, + "grad_norm": 0.09467651695013046, + "learning_rate": 2.2673658915116772e-05, + "loss": 9.5945, + "step": 49280 + }, + { + "epoch": 0.24614846812654498, + "grad_norm": 0.09306614845991135, + "learning_rate": 2.2672157000175226e-05, + "loss": 9.5833, + "step": 49290 + }, + { + "epoch": 0.24619840695148443, + "grad_norm": 0.09221172332763672, + "learning_rate": 2.2670655085233673e-05, + "loss": 9.576, + "step": 49300 + }, + { + "epoch": 0.24624834577642388, + "grad_norm": 0.09125102311372757, + "learning_rate": 2.2669153170292123e-05, + "loss": 9.5789, + "step": 49310 + }, + { + "epoch": 0.24629828460136333, + "grad_norm": 0.09555398672819138, + "learning_rate": 2.2667651255350573e-05, + "loss": 9.5835, + "step": 49320 + }, + { + "epoch": 0.24634822342630278, + "grad_norm": 0.09381726384162903, + "learning_rate": 2.266614934040902e-05, + "loss": 9.5803, + "step": 49330 + }, + { + "epoch": 0.24639816225124223, + "grad_norm": 0.09570520371198654, + "learning_rate": 2.2664647425467473e-05, + "loss": 9.5808, + "step": 49340 + }, + { + "epoch": 0.24644810107618167, + "grad_norm": 0.08948148041963577, + "learning_rate": 2.266314551052592e-05, + "loss": 9.5837, + "step": 49350 + }, + { + "epoch": 0.24649803990112112, + "grad_norm": 0.09439452737569809, + "learning_rate": 2.266164359558437e-05, + "loss": 9.5819, + "step": 49360 + }, + { + "epoch": 0.24654797872606057, + "grad_norm": 0.09754544496536255, + "learning_rate": 2.266014168064282e-05, + "loss": 9.5824, + "step": 49370 + }, + { + "epoch": 0.24659791755100002, + "grad_norm": 0.09239448606967926, + "learning_rate": 2.2658639765701267e-05, + "loss": 9.5796, + "step": 49380 + }, + { + "epoch": 0.24664785637593947, + "grad_norm": 0.09098103642463684, + "learning_rate": 2.265713785075972e-05, + "loss": 9.5934, + "step": 49390 + }, + { + "epoch": 0.24669779520087892, + "grad_norm": 0.09144534915685654, + "learning_rate": 2.2655635935818168e-05, + "loss": 9.5866, + "step": 49400 + }, + { + "epoch": 0.24674773402581837, + "grad_norm": 0.09623269736766815, + "learning_rate": 2.2654134020876618e-05, + "loss": 9.5856, + "step": 49410 + }, + { + "epoch": 0.24679767285075782, + "grad_norm": 0.09903118759393692, + "learning_rate": 2.2652632105935068e-05, + "loss": 9.5846, + "step": 49420 + }, + { + "epoch": 0.24684761167569727, + "grad_norm": 0.09627585858106613, + "learning_rate": 2.2651130190993515e-05, + "loss": 9.578, + "step": 49430 + }, + { + "epoch": 0.24689755050063672, + "grad_norm": 0.09699337929487228, + "learning_rate": 2.264962827605197e-05, + "loss": 9.5867, + "step": 49440 + }, + { + "epoch": 0.24694748932557617, + "grad_norm": 0.08975996822118759, + "learning_rate": 2.2648126361110415e-05, + "loss": 9.5786, + "step": 49450 + }, + { + "epoch": 0.24699742815051562, + "grad_norm": 0.09180519729852676, + "learning_rate": 2.2646624446168865e-05, + "loss": 9.5793, + "step": 49460 + }, + { + "epoch": 0.24704736697545507, + "grad_norm": 0.09850934892892838, + "learning_rate": 2.2645122531227316e-05, + "loss": 9.581, + "step": 49470 + }, + { + "epoch": 0.24709730580039452, + "grad_norm": 0.09343817830085754, + "learning_rate": 2.2643620616285762e-05, + "loss": 9.5812, + "step": 49480 + }, + { + "epoch": 0.24714724462533397, + "grad_norm": 0.09533870965242386, + "learning_rate": 2.2642118701344216e-05, + "loss": 9.5801, + "step": 49490 + }, + { + "epoch": 0.24719718345027342, + "grad_norm": 0.08947062492370605, + "learning_rate": 2.2640616786402663e-05, + "loss": 9.5813, + "step": 49500 + }, + { + "epoch": 0.24724712227521287, + "grad_norm": 0.09306909888982773, + "learning_rate": 2.2639114871461116e-05, + "loss": 9.5802, + "step": 49510 + }, + { + "epoch": 0.24729706110015232, + "grad_norm": 0.09695802628993988, + "learning_rate": 2.2637612956519563e-05, + "loss": 9.5719, + "step": 49520 + }, + { + "epoch": 0.24734699992509177, + "grad_norm": 0.09378542751073837, + "learning_rate": 2.263611104157801e-05, + "loss": 9.5747, + "step": 49530 + }, + { + "epoch": 0.24739693875003121, + "grad_norm": 0.0942843034863472, + "learning_rate": 2.2634609126636463e-05, + "loss": 9.5746, + "step": 49540 + }, + { + "epoch": 0.24744687757497066, + "grad_norm": 0.09266909956932068, + "learning_rate": 2.263310721169491e-05, + "loss": 9.5729, + "step": 49550 + }, + { + "epoch": 0.2474968163999101, + "grad_norm": 0.09565820544958115, + "learning_rate": 2.2631605296753364e-05, + "loss": 9.5685, + "step": 49560 + }, + { + "epoch": 0.24754675522484956, + "grad_norm": 0.09224898368120193, + "learning_rate": 2.263010338181181e-05, + "loss": 9.586, + "step": 49570 + }, + { + "epoch": 0.247596694049789, + "grad_norm": 0.09254671633243561, + "learning_rate": 2.2628601466870257e-05, + "loss": 9.5773, + "step": 49580 + }, + { + "epoch": 0.24764663287472846, + "grad_norm": 0.09858168661594391, + "learning_rate": 2.262709955192871e-05, + "loss": 9.5796, + "step": 49590 + }, + { + "epoch": 0.2476965716996679, + "grad_norm": 0.0942915603518486, + "learning_rate": 2.2625597636987158e-05, + "loss": 9.5813, + "step": 49600 + }, + { + "epoch": 0.24774651052460736, + "grad_norm": 0.09052585065364838, + "learning_rate": 2.262409572204561e-05, + "loss": 9.5741, + "step": 49610 + }, + { + "epoch": 0.2477964493495468, + "grad_norm": 0.09947668015956879, + "learning_rate": 2.2622593807104058e-05, + "loss": 9.5746, + "step": 49620 + }, + { + "epoch": 0.24784638817448626, + "grad_norm": 0.09705556184053421, + "learning_rate": 2.2621091892162505e-05, + "loss": 9.5732, + "step": 49630 + }, + { + "epoch": 0.2478963269994257, + "grad_norm": 0.0956898182630539, + "learning_rate": 2.261958997722096e-05, + "loss": 9.5757, + "step": 49640 + }, + { + "epoch": 0.24794626582436516, + "grad_norm": 0.09402977675199509, + "learning_rate": 2.2618088062279405e-05, + "loss": 9.5683, + "step": 49650 + }, + { + "epoch": 0.2479962046493046, + "grad_norm": 0.09913989156484604, + "learning_rate": 2.261658614733786e-05, + "loss": 9.5776, + "step": 49660 + }, + { + "epoch": 0.24804614347424406, + "grad_norm": 0.09860474616289139, + "learning_rate": 2.2615084232396306e-05, + "loss": 9.5795, + "step": 49670 + }, + { + "epoch": 0.2480960822991835, + "grad_norm": 0.0972980335354805, + "learning_rate": 2.2613582317454752e-05, + "loss": 9.567, + "step": 49680 + }, + { + "epoch": 0.24814602112412296, + "grad_norm": 0.09505248814821243, + "learning_rate": 2.2612080402513206e-05, + "loss": 9.5719, + "step": 49690 + }, + { + "epoch": 0.2481959599490624, + "grad_norm": 0.09183008968830109, + "learning_rate": 2.2610578487571653e-05, + "loss": 9.5819, + "step": 49700 + }, + { + "epoch": 0.24824589877400186, + "grad_norm": 0.09852754324674606, + "learning_rate": 2.2609076572630106e-05, + "loss": 9.577, + "step": 49710 + }, + { + "epoch": 0.2482958375989413, + "grad_norm": 0.09413760155439377, + "learning_rate": 2.2607574657688553e-05, + "loss": 9.5743, + "step": 49720 + }, + { + "epoch": 0.24834577642388075, + "grad_norm": 0.09190001338720322, + "learning_rate": 2.2606072742747e-05, + "loss": 9.5768, + "step": 49730 + }, + { + "epoch": 0.2483957152488202, + "grad_norm": 0.09404946118593216, + "learning_rate": 2.2604570827805453e-05, + "loss": 9.5756, + "step": 49740 + }, + { + "epoch": 0.24844565407375965, + "grad_norm": 0.09731113910675049, + "learning_rate": 2.26030689128639e-05, + "loss": 9.5738, + "step": 49750 + }, + { + "epoch": 0.2484955928986991, + "grad_norm": 0.09584534913301468, + "learning_rate": 2.2601566997922354e-05, + "loss": 9.5745, + "step": 49760 + }, + { + "epoch": 0.24854553172363855, + "grad_norm": 0.09506174921989441, + "learning_rate": 2.26000650829808e-05, + "loss": 9.5702, + "step": 49770 + }, + { + "epoch": 0.248595470548578, + "grad_norm": 0.09768600016832352, + "learning_rate": 2.2598563168039247e-05, + "loss": 9.5804, + "step": 49780 + }, + { + "epoch": 0.24864540937351745, + "grad_norm": 0.09207095205783844, + "learning_rate": 2.25970612530977e-05, + "loss": 9.569, + "step": 49790 + }, + { + "epoch": 0.2486953481984569, + "grad_norm": 0.09260823577642441, + "learning_rate": 2.2595559338156148e-05, + "loss": 9.5739, + "step": 49800 + }, + { + "epoch": 0.24874528702339635, + "grad_norm": 0.09007450938224792, + "learning_rate": 2.25940574232146e-05, + "loss": 9.5792, + "step": 49810 + }, + { + "epoch": 0.2487952258483358, + "grad_norm": 0.10160695761442184, + "learning_rate": 2.2592555508273048e-05, + "loss": 9.5803, + "step": 49820 + }, + { + "epoch": 0.24884516467327525, + "grad_norm": 0.09343059360980988, + "learning_rate": 2.25910535933315e-05, + "loss": 9.5758, + "step": 49830 + }, + { + "epoch": 0.2488951034982147, + "grad_norm": 0.09455156326293945, + "learning_rate": 2.258955167838995e-05, + "loss": 9.5728, + "step": 49840 + }, + { + "epoch": 0.24894504232315415, + "grad_norm": 0.09680726379156113, + "learning_rate": 2.2588049763448395e-05, + "loss": 9.5752, + "step": 49850 + }, + { + "epoch": 0.2489949811480936, + "grad_norm": 0.09316403418779373, + "learning_rate": 2.258654784850685e-05, + "loss": 9.5631, + "step": 49860 + }, + { + "epoch": 0.24904491997303305, + "grad_norm": 0.0938340425491333, + "learning_rate": 2.2585045933565296e-05, + "loss": 9.5747, + "step": 49870 + }, + { + "epoch": 0.2490948587979725, + "grad_norm": 0.094419926404953, + "learning_rate": 2.2583544018623746e-05, + "loss": 9.5744, + "step": 49880 + }, + { + "epoch": 0.24914479762291195, + "grad_norm": 0.09464523941278458, + "learning_rate": 2.2582042103682196e-05, + "loss": 9.5676, + "step": 49890 + }, + { + "epoch": 0.2491947364478514, + "grad_norm": 0.09369165450334549, + "learning_rate": 2.2580540188740643e-05, + "loss": 9.5752, + "step": 49900 + }, + { + "epoch": 0.24924467527279084, + "grad_norm": 0.09760770201683044, + "learning_rate": 2.2579038273799096e-05, + "loss": 9.5642, + "step": 49910 + }, + { + "epoch": 0.2492946140977303, + "grad_norm": 0.09786707162857056, + "learning_rate": 2.2577536358857543e-05, + "loss": 9.5733, + "step": 49920 + }, + { + "epoch": 0.24934455292266974, + "grad_norm": 0.09324178099632263, + "learning_rate": 2.2576034443915993e-05, + "loss": 9.5683, + "step": 49930 + }, + { + "epoch": 0.24939449174760916, + "grad_norm": 0.09547551721334457, + "learning_rate": 2.2574532528974444e-05, + "loss": 9.5693, + "step": 49940 + }, + { + "epoch": 0.24944443057254861, + "grad_norm": 0.09578131884336472, + "learning_rate": 2.257303061403289e-05, + "loss": 9.5682, + "step": 49950 + }, + { + "epoch": 0.24949436939748806, + "grad_norm": 0.09277050942182541, + "learning_rate": 2.2571528699091344e-05, + "loss": 9.5733, + "step": 49960 + }, + { + "epoch": 0.2495443082224275, + "grad_norm": 0.09631931036710739, + "learning_rate": 2.257002678414979e-05, + "loss": 9.5709, + "step": 49970 + }, + { + "epoch": 0.24959424704736696, + "grad_norm": 0.09734215587377548, + "learning_rate": 2.256852486920824e-05, + "loss": 9.5697, + "step": 49980 + }, + { + "epoch": 0.2496441858723064, + "grad_norm": 0.09608133882284164, + "learning_rate": 2.256702295426669e-05, + "loss": 9.5712, + "step": 49990 + }, + { + "epoch": 0.24969412469724586, + "grad_norm": 0.09397196024656296, + "learning_rate": 2.2565521039325138e-05, + "loss": 9.571, + "step": 50000 + }, + { + "epoch": 0.2497440635221853, + "grad_norm": 0.09108542650938034, + "learning_rate": 2.256401912438359e-05, + "loss": 9.5636, + "step": 50010 + }, + { + "epoch": 0.24979400234712476, + "grad_norm": 0.09850990772247314, + "learning_rate": 2.2562517209442038e-05, + "loss": 9.5599, + "step": 50020 + }, + { + "epoch": 0.2498439411720642, + "grad_norm": 0.10261461138725281, + "learning_rate": 2.256101529450049e-05, + "loss": 9.5668, + "step": 50030 + }, + { + "epoch": 0.24989387999700366, + "grad_norm": 0.09602981060743332, + "learning_rate": 2.255951337955894e-05, + "loss": 9.5784, + "step": 50040 + }, + { + "epoch": 0.2499438188219431, + "grad_norm": 0.10035931318998337, + "learning_rate": 2.2558011464617385e-05, + "loss": 9.5678, + "step": 50050 + }, + { + "epoch": 0.24999375764688256, + "grad_norm": 0.09186173975467682, + "learning_rate": 2.255650954967584e-05, + "loss": 9.5682, + "step": 50060 + }, + { + "epoch": 0.250043696471822, + "grad_norm": 0.09531144052743912, + "learning_rate": 2.2555007634734286e-05, + "loss": 9.5753, + "step": 50070 + }, + { + "epoch": 0.2500936352967615, + "grad_norm": 0.0925971195101738, + "learning_rate": 2.2553505719792736e-05, + "loss": 9.5576, + "step": 50080 + }, + { + "epoch": 0.2501435741217009, + "grad_norm": 0.10341840982437134, + "learning_rate": 2.2552003804851186e-05, + "loss": 9.5649, + "step": 50090 + }, + { + "epoch": 0.2501935129466404, + "grad_norm": 0.08949480205774307, + "learning_rate": 2.2550501889909633e-05, + "loss": 9.5681, + "step": 50100 + }, + { + "epoch": 0.2502434517715798, + "grad_norm": 0.0952087789773941, + "learning_rate": 2.2548999974968086e-05, + "loss": 9.5719, + "step": 50110 + }, + { + "epoch": 0.2502933905965193, + "grad_norm": 0.09297273308038712, + "learning_rate": 2.2547498060026533e-05, + "loss": 9.5602, + "step": 50120 + }, + { + "epoch": 0.2503433294214587, + "grad_norm": 0.09558509290218353, + "learning_rate": 2.2545996145084983e-05, + "loss": 9.5775, + "step": 50130 + }, + { + "epoch": 0.2503932682463982, + "grad_norm": 0.09406739473342896, + "learning_rate": 2.2544494230143434e-05, + "loss": 9.5728, + "step": 50140 + }, + { + "epoch": 0.2504432070713376, + "grad_norm": 0.08946365118026733, + "learning_rate": 2.254299231520188e-05, + "loss": 9.5646, + "step": 50150 + }, + { + "epoch": 0.2504931458962771, + "grad_norm": 0.0948345735669136, + "learning_rate": 2.2541490400260334e-05, + "loss": 9.5688, + "step": 50160 + }, + { + "epoch": 0.2505430847212165, + "grad_norm": 0.09642423689365387, + "learning_rate": 2.253998848531878e-05, + "loss": 9.5646, + "step": 50170 + }, + { + "epoch": 0.250593023546156, + "grad_norm": 0.09099261462688446, + "learning_rate": 2.253848657037723e-05, + "loss": 9.5648, + "step": 50180 + }, + { + "epoch": 0.2506429623710954, + "grad_norm": 0.09220701456069946, + "learning_rate": 2.253698465543568e-05, + "loss": 9.5665, + "step": 50190 + }, + { + "epoch": 0.2506929011960349, + "grad_norm": 0.09915616363286972, + "learning_rate": 2.253548274049413e-05, + "loss": 9.5697, + "step": 50200 + }, + { + "epoch": 0.2507428400209743, + "grad_norm": 0.0929388552904129, + "learning_rate": 2.253398082555258e-05, + "loss": 9.5623, + "step": 50210 + }, + { + "epoch": 0.2507927788459138, + "grad_norm": 0.09379485994577408, + "learning_rate": 2.2532478910611028e-05, + "loss": 9.5631, + "step": 50220 + }, + { + "epoch": 0.2508427176708532, + "grad_norm": 0.0981220230460167, + "learning_rate": 2.253097699566948e-05, + "loss": 9.5638, + "step": 50230 + }, + { + "epoch": 0.2508926564957927, + "grad_norm": 0.09549273550510406, + "learning_rate": 2.252947508072793e-05, + "loss": 9.5607, + "step": 50240 + }, + { + "epoch": 0.2509425953207321, + "grad_norm": 0.09716612845659256, + "learning_rate": 2.252797316578638e-05, + "loss": 9.5685, + "step": 50250 + }, + { + "epoch": 0.2509925341456716, + "grad_norm": 0.09582599252462387, + "learning_rate": 2.252647125084483e-05, + "loss": 9.5682, + "step": 50260 + }, + { + "epoch": 0.251042472970611, + "grad_norm": 0.09806785732507706, + "learning_rate": 2.2524969335903276e-05, + "loss": 9.5639, + "step": 50270 + }, + { + "epoch": 0.2510924117955505, + "grad_norm": 0.10009253770112991, + "learning_rate": 2.2523467420961726e-05, + "loss": 9.5696, + "step": 50280 + }, + { + "epoch": 0.2511423506204899, + "grad_norm": 0.10564589500427246, + "learning_rate": 2.2521965506020176e-05, + "loss": 9.5722, + "step": 50290 + }, + { + "epoch": 0.2511922894454294, + "grad_norm": 0.09391903877258301, + "learning_rate": 2.2520463591078626e-05, + "loss": 9.5661, + "step": 50300 + }, + { + "epoch": 0.2512422282703688, + "grad_norm": 0.09433561563491821, + "learning_rate": 2.2518961676137076e-05, + "loss": 9.5701, + "step": 50310 + }, + { + "epoch": 0.25129216709530827, + "grad_norm": 0.09320453554391861, + "learning_rate": 2.2517459761195523e-05, + "loss": 9.5589, + "step": 50320 + }, + { + "epoch": 0.2513421059202477, + "grad_norm": 0.0926797166466713, + "learning_rate": 2.2515957846253973e-05, + "loss": 9.5641, + "step": 50330 + }, + { + "epoch": 0.25139204474518717, + "grad_norm": 0.09965445101261139, + "learning_rate": 2.2514455931312424e-05, + "loss": 9.572, + "step": 50340 + }, + { + "epoch": 0.2514419835701266, + "grad_norm": 0.09860392659902573, + "learning_rate": 2.2512954016370874e-05, + "loss": 9.569, + "step": 50350 + }, + { + "epoch": 0.25149192239506607, + "grad_norm": 0.09547075629234314, + "learning_rate": 2.2511452101429324e-05, + "loss": 9.5689, + "step": 50360 + }, + { + "epoch": 0.2515418612200055, + "grad_norm": 0.09795694053173065, + "learning_rate": 2.250995018648777e-05, + "loss": 9.5628, + "step": 50370 + }, + { + "epoch": 0.25159180004494497, + "grad_norm": 0.09307505935430527, + "learning_rate": 2.250844827154622e-05, + "loss": 9.5662, + "step": 50380 + }, + { + "epoch": 0.2516417388698844, + "grad_norm": 0.10071365535259247, + "learning_rate": 2.250694635660467e-05, + "loss": 9.569, + "step": 50390 + }, + { + "epoch": 0.25169167769482387, + "grad_norm": 0.09713767468929291, + "learning_rate": 2.250544444166312e-05, + "loss": 9.5577, + "step": 50400 + }, + { + "epoch": 0.2517416165197633, + "grad_norm": 0.09521119296550751, + "learning_rate": 2.250394252672157e-05, + "loss": 9.5673, + "step": 50410 + }, + { + "epoch": 0.25179155534470277, + "grad_norm": 0.09349004924297333, + "learning_rate": 2.2502440611780018e-05, + "loss": 9.5571, + "step": 50420 + }, + { + "epoch": 0.2518414941696422, + "grad_norm": 0.0950050801038742, + "learning_rate": 2.250093869683847e-05, + "loss": 9.5636, + "step": 50430 + }, + { + "epoch": 0.2518914329945816, + "grad_norm": 0.09199751913547516, + "learning_rate": 2.249943678189692e-05, + "loss": 9.5595, + "step": 50440 + }, + { + "epoch": 0.2519413718195211, + "grad_norm": 0.09100238978862762, + "learning_rate": 2.249793486695537e-05, + "loss": 9.5609, + "step": 50450 + }, + { + "epoch": 0.2519913106444605, + "grad_norm": 0.09246649593114853, + "learning_rate": 2.249643295201382e-05, + "loss": 9.5659, + "step": 50460 + }, + { + "epoch": 0.2520412494694, + "grad_norm": 0.09442534297704697, + "learning_rate": 2.2494931037072266e-05, + "loss": 9.5646, + "step": 50470 + }, + { + "epoch": 0.2520911882943394, + "grad_norm": 0.08858856558799744, + "learning_rate": 2.249342912213072e-05, + "loss": 9.5523, + "step": 50480 + }, + { + "epoch": 0.2521411271192789, + "grad_norm": 0.09395717829465866, + "learning_rate": 2.2491927207189166e-05, + "loss": 9.5661, + "step": 50490 + }, + { + "epoch": 0.2521910659442183, + "grad_norm": 0.101358562707901, + "learning_rate": 2.2490425292247616e-05, + "loss": 9.5592, + "step": 50500 + }, + { + "epoch": 0.2522410047691578, + "grad_norm": 0.09509819746017456, + "learning_rate": 2.2488923377306066e-05, + "loss": 9.5605, + "step": 50510 + }, + { + "epoch": 0.2522909435940972, + "grad_norm": 0.09509771317243576, + "learning_rate": 2.2487421462364517e-05, + "loss": 9.561, + "step": 50520 + }, + { + "epoch": 0.2523408824190367, + "grad_norm": 0.09046350419521332, + "learning_rate": 2.2485919547422967e-05, + "loss": 9.5595, + "step": 50530 + }, + { + "epoch": 0.2523908212439761, + "grad_norm": 0.09777220338582993, + "learning_rate": 2.2484417632481414e-05, + "loss": 9.5526, + "step": 50540 + }, + { + "epoch": 0.2524407600689156, + "grad_norm": 0.09290176630020142, + "learning_rate": 2.2482915717539864e-05, + "loss": 9.562, + "step": 50550 + }, + { + "epoch": 0.252490698893855, + "grad_norm": 0.0935697928071022, + "learning_rate": 2.2481413802598314e-05, + "loss": 9.5649, + "step": 50560 + }, + { + "epoch": 0.2525406377187945, + "grad_norm": 0.09431051462888718, + "learning_rate": 2.2479911887656764e-05, + "loss": 9.5558, + "step": 50570 + }, + { + "epoch": 0.2525905765437339, + "grad_norm": 0.09408137947320938, + "learning_rate": 2.2478409972715214e-05, + "loss": 9.5587, + "step": 50580 + }, + { + "epoch": 0.2526405153686734, + "grad_norm": 0.0971836969256401, + "learning_rate": 2.247690805777366e-05, + "loss": 9.5572, + "step": 50590 + }, + { + "epoch": 0.2526904541936128, + "grad_norm": 0.09811126440763474, + "learning_rate": 2.247540614283211e-05, + "loss": 9.5591, + "step": 50600 + }, + { + "epoch": 0.2527403930185523, + "grad_norm": 0.09719320386648178, + "learning_rate": 2.247390422789056e-05, + "loss": 9.5642, + "step": 50610 + }, + { + "epoch": 0.2527903318434917, + "grad_norm": 0.09279009699821472, + "learning_rate": 2.247240231294901e-05, + "loss": 9.554, + "step": 50620 + }, + { + "epoch": 0.2528402706684312, + "grad_norm": 0.09156560897827148, + "learning_rate": 2.2470900398007462e-05, + "loss": 9.5613, + "step": 50630 + }, + { + "epoch": 0.2528902094933706, + "grad_norm": 0.08856366574764252, + "learning_rate": 2.246939848306591e-05, + "loss": 9.5541, + "step": 50640 + }, + { + "epoch": 0.2529401483183101, + "grad_norm": 0.09104395657777786, + "learning_rate": 2.246789656812436e-05, + "loss": 9.5616, + "step": 50650 + }, + { + "epoch": 0.2529900871432495, + "grad_norm": 0.09743545204401016, + "learning_rate": 2.246639465318281e-05, + "loss": 9.5614, + "step": 50660 + }, + { + "epoch": 0.253040025968189, + "grad_norm": 0.09513422101736069, + "learning_rate": 2.246489273824126e-05, + "loss": 9.5536, + "step": 50670 + }, + { + "epoch": 0.2530899647931284, + "grad_norm": 0.09526988118886948, + "learning_rate": 2.246339082329971e-05, + "loss": 9.5638, + "step": 50680 + }, + { + "epoch": 0.2531399036180679, + "grad_norm": 0.0918373093008995, + "learning_rate": 2.2461888908358156e-05, + "loss": 9.5538, + "step": 50690 + }, + { + "epoch": 0.2531898424430073, + "grad_norm": 0.09240086376667023, + "learning_rate": 2.2460386993416606e-05, + "loss": 9.5696, + "step": 50700 + }, + { + "epoch": 0.2532397812679468, + "grad_norm": 0.09589746594429016, + "learning_rate": 2.2458885078475056e-05, + "loss": 9.5527, + "step": 50710 + }, + { + "epoch": 0.2532897200928862, + "grad_norm": 0.0918283686041832, + "learning_rate": 2.2457383163533507e-05, + "loss": 9.5575, + "step": 50720 + }, + { + "epoch": 0.25333965891782567, + "grad_norm": 0.09194007515907288, + "learning_rate": 2.2455881248591957e-05, + "loss": 9.5578, + "step": 50730 + }, + { + "epoch": 0.2533895977427651, + "grad_norm": 0.09658579528331757, + "learning_rate": 2.2454379333650404e-05, + "loss": 9.5613, + "step": 50740 + }, + { + "epoch": 0.25343953656770457, + "grad_norm": 0.09084783494472504, + "learning_rate": 2.2452877418708854e-05, + "loss": 9.5606, + "step": 50750 + }, + { + "epoch": 0.253489475392644, + "grad_norm": 0.09604687243700027, + "learning_rate": 2.2451375503767304e-05, + "loss": 9.5488, + "step": 50760 + }, + { + "epoch": 0.25353941421758347, + "grad_norm": 0.08728381246328354, + "learning_rate": 2.2449873588825754e-05, + "loss": 9.555, + "step": 50770 + }, + { + "epoch": 0.2535893530425229, + "grad_norm": 0.09549268335103989, + "learning_rate": 2.2448371673884204e-05, + "loss": 9.5534, + "step": 50780 + }, + { + "epoch": 0.25363929186746237, + "grad_norm": 0.0957789346575737, + "learning_rate": 2.244686975894265e-05, + "loss": 9.5565, + "step": 50790 + }, + { + "epoch": 0.2536892306924018, + "grad_norm": 0.09618537127971649, + "learning_rate": 2.24453678440011e-05, + "loss": 9.5515, + "step": 50800 + }, + { + "epoch": 0.25373916951734127, + "grad_norm": 0.0949334055185318, + "learning_rate": 2.244386592905955e-05, + "loss": 9.5482, + "step": 50810 + }, + { + "epoch": 0.2537891083422807, + "grad_norm": 0.09194260835647583, + "learning_rate": 2.2442364014118e-05, + "loss": 9.5547, + "step": 50820 + }, + { + "epoch": 0.25383904716722017, + "grad_norm": 0.09208869934082031, + "learning_rate": 2.2440862099176452e-05, + "loss": 9.561, + "step": 50830 + }, + { + "epoch": 0.2538889859921596, + "grad_norm": 0.0971684604883194, + "learning_rate": 2.2439360184234902e-05, + "loss": 9.559, + "step": 50840 + }, + { + "epoch": 0.25393892481709907, + "grad_norm": 0.09714303910732269, + "learning_rate": 2.243785826929335e-05, + "loss": 9.5505, + "step": 50850 + }, + { + "epoch": 0.2539888636420385, + "grad_norm": 0.09106447547674179, + "learning_rate": 2.24363563543518e-05, + "loss": 9.5475, + "step": 50860 + }, + { + "epoch": 0.25403880246697796, + "grad_norm": 0.09171400219202042, + "learning_rate": 2.243485443941025e-05, + "loss": 9.5539, + "step": 50870 + }, + { + "epoch": 0.2540887412919174, + "grad_norm": 0.09836923331022263, + "learning_rate": 2.24333525244687e-05, + "loss": 9.5509, + "step": 50880 + }, + { + "epoch": 0.25413868011685686, + "grad_norm": 0.09668246656656265, + "learning_rate": 2.243185060952715e-05, + "loss": 9.5561, + "step": 50890 + }, + { + "epoch": 0.2541886189417963, + "grad_norm": 0.09654278308153152, + "learning_rate": 2.2430348694585596e-05, + "loss": 9.5565, + "step": 50900 + }, + { + "epoch": 0.25423855776673576, + "grad_norm": 0.09737430512905121, + "learning_rate": 2.2428846779644046e-05, + "loss": 9.5577, + "step": 50910 + }, + { + "epoch": 0.2542884965916752, + "grad_norm": 0.09896323084831238, + "learning_rate": 2.2427344864702497e-05, + "loss": 9.5504, + "step": 50920 + }, + { + "epoch": 0.25433843541661466, + "grad_norm": 0.09133461862802505, + "learning_rate": 2.2425842949760947e-05, + "loss": 9.5474, + "step": 50930 + }, + { + "epoch": 0.2543883742415541, + "grad_norm": 0.09348398447036743, + "learning_rate": 2.2424341034819397e-05, + "loss": 9.5559, + "step": 50940 + }, + { + "epoch": 0.25443831306649356, + "grad_norm": 0.09214718639850616, + "learning_rate": 2.2422839119877844e-05, + "loss": 9.5534, + "step": 50950 + }, + { + "epoch": 0.254488251891433, + "grad_norm": 0.09419059008359909, + "learning_rate": 2.2421337204936294e-05, + "loss": 9.5525, + "step": 50960 + }, + { + "epoch": 0.25453819071637246, + "grad_norm": 0.10455143451690674, + "learning_rate": 2.2419835289994744e-05, + "loss": 9.5567, + "step": 50970 + }, + { + "epoch": 0.2545881295413119, + "grad_norm": 0.09650573879480362, + "learning_rate": 2.2418333375053194e-05, + "loss": 9.5585, + "step": 50980 + }, + { + "epoch": 0.25463806836625136, + "grad_norm": 0.09631022065877914, + "learning_rate": 2.2416831460111644e-05, + "loss": 9.5525, + "step": 50990 + }, + { + "epoch": 0.2546880071911908, + "grad_norm": 0.09867773205041885, + "learning_rate": 2.241532954517009e-05, + "loss": 9.5604, + "step": 51000 + }, + { + "epoch": 0.25473794601613026, + "grad_norm": 0.09457637369632721, + "learning_rate": 2.241382763022854e-05, + "loss": 9.5552, + "step": 51010 + }, + { + "epoch": 0.2547878848410697, + "grad_norm": 0.09107904881238937, + "learning_rate": 2.241232571528699e-05, + "loss": 9.5587, + "step": 51020 + }, + { + "epoch": 0.25483782366600916, + "grad_norm": 0.0893888846039772, + "learning_rate": 2.2410823800345442e-05, + "loss": 9.5496, + "step": 51030 + }, + { + "epoch": 0.2548877624909486, + "grad_norm": 0.09376450628042221, + "learning_rate": 2.2409321885403892e-05, + "loss": 9.5533, + "step": 51040 + }, + { + "epoch": 0.25493770131588805, + "grad_norm": 0.09373288601636887, + "learning_rate": 2.240781997046234e-05, + "loss": 9.5508, + "step": 51050 + }, + { + "epoch": 0.2549876401408275, + "grad_norm": 0.08769457787275314, + "learning_rate": 2.240631805552079e-05, + "loss": 9.5622, + "step": 51060 + }, + { + "epoch": 0.25503757896576695, + "grad_norm": 0.09537973999977112, + "learning_rate": 2.240481614057924e-05, + "loss": 9.5541, + "step": 51070 + }, + { + "epoch": 0.2550875177907064, + "grad_norm": 0.09325198084115982, + "learning_rate": 2.240331422563769e-05, + "loss": 9.5471, + "step": 51080 + }, + { + "epoch": 0.25513745661564585, + "grad_norm": 0.08798488229513168, + "learning_rate": 2.240181231069614e-05, + "loss": 9.5516, + "step": 51090 + }, + { + "epoch": 0.2551873954405853, + "grad_norm": 0.09604234993457794, + "learning_rate": 2.2400310395754586e-05, + "loss": 9.5472, + "step": 51100 + }, + { + "epoch": 0.25523733426552475, + "grad_norm": 0.0939023569226265, + "learning_rate": 2.2398808480813036e-05, + "loss": 9.5409, + "step": 51110 + }, + { + "epoch": 0.2552872730904642, + "grad_norm": 0.09348843991756439, + "learning_rate": 2.2397306565871487e-05, + "loss": 9.5484, + "step": 51120 + }, + { + "epoch": 0.25533721191540365, + "grad_norm": 0.0899486169219017, + "learning_rate": 2.2395804650929937e-05, + "loss": 9.547, + "step": 51130 + }, + { + "epoch": 0.25538715074034307, + "grad_norm": 0.0945720225572586, + "learning_rate": 2.2394302735988387e-05, + "loss": 9.5554, + "step": 51140 + }, + { + "epoch": 0.25543708956528255, + "grad_norm": 0.0940125435590744, + "learning_rate": 2.2392800821046834e-05, + "loss": 9.5527, + "step": 51150 + }, + { + "epoch": 0.25548702839022197, + "grad_norm": 0.09849490225315094, + "learning_rate": 2.2391298906105287e-05, + "loss": 9.5505, + "step": 51160 + }, + { + "epoch": 0.25553696721516145, + "grad_norm": 0.08866614103317261, + "learning_rate": 2.2389796991163734e-05, + "loss": 9.5502, + "step": 51170 + }, + { + "epoch": 0.25558690604010087, + "grad_norm": 0.09326382726430893, + "learning_rate": 2.2388295076222184e-05, + "loss": 9.5456, + "step": 51180 + }, + { + "epoch": 0.25563684486504035, + "grad_norm": 0.09560631960630417, + "learning_rate": 2.2386793161280634e-05, + "loss": 9.544, + "step": 51190 + }, + { + "epoch": 0.25568678368997977, + "grad_norm": 0.09622514992952347, + "learning_rate": 2.238529124633908e-05, + "loss": 9.5561, + "step": 51200 + }, + { + "epoch": 0.25573672251491925, + "grad_norm": 0.09464744478464127, + "learning_rate": 2.2383789331397535e-05, + "loss": 9.5411, + "step": 51210 + }, + { + "epoch": 0.25578666133985867, + "grad_norm": 0.09231311827898026, + "learning_rate": 2.238228741645598e-05, + "loss": 9.5537, + "step": 51220 + }, + { + "epoch": 0.25583660016479814, + "grad_norm": 0.09209968149662018, + "learning_rate": 2.2380785501514432e-05, + "loss": 9.5526, + "step": 51230 + }, + { + "epoch": 0.25588653898973757, + "grad_norm": 0.09474463015794754, + "learning_rate": 2.2379283586572882e-05, + "loss": 9.5419, + "step": 51240 + }, + { + "epoch": 0.25593647781467704, + "grad_norm": 0.09012725204229355, + "learning_rate": 2.237778167163133e-05, + "loss": 9.5522, + "step": 51250 + }, + { + "epoch": 0.25598641663961647, + "grad_norm": 0.09033194929361343, + "learning_rate": 2.2376279756689782e-05, + "loss": 9.5544, + "step": 51260 + }, + { + "epoch": 0.25603635546455594, + "grad_norm": 0.0948658287525177, + "learning_rate": 2.237477784174823e-05, + "loss": 9.546, + "step": 51270 + }, + { + "epoch": 0.25608629428949536, + "grad_norm": 0.09429352730512619, + "learning_rate": 2.237327592680668e-05, + "loss": 9.5531, + "step": 51280 + }, + { + "epoch": 0.25613623311443484, + "grad_norm": 0.09330032765865326, + "learning_rate": 2.237177401186513e-05, + "loss": 9.5449, + "step": 51290 + }, + { + "epoch": 0.25618617193937426, + "grad_norm": 0.08929108828306198, + "learning_rate": 2.2370272096923576e-05, + "loss": 9.551, + "step": 51300 + }, + { + "epoch": 0.25623611076431374, + "grad_norm": 0.0942007303237915, + "learning_rate": 2.236877018198203e-05, + "loss": 9.5506, + "step": 51310 + }, + { + "epoch": 0.25628604958925316, + "grad_norm": 0.09233163297176361, + "learning_rate": 2.2367268267040477e-05, + "loss": 9.5422, + "step": 51320 + }, + { + "epoch": 0.25633598841419264, + "grad_norm": 0.09945209324359894, + "learning_rate": 2.2365766352098927e-05, + "loss": 9.539, + "step": 51330 + }, + { + "epoch": 0.25638592723913206, + "grad_norm": 0.09467910975217819, + "learning_rate": 2.2364264437157377e-05, + "loss": 9.551, + "step": 51340 + }, + { + "epoch": 0.25643586606407154, + "grad_norm": 0.09829029440879822, + "learning_rate": 2.2362762522215824e-05, + "loss": 9.5447, + "step": 51350 + }, + { + "epoch": 0.25648580488901096, + "grad_norm": 0.09287157654762268, + "learning_rate": 2.2361260607274277e-05, + "loss": 9.5401, + "step": 51360 + }, + { + "epoch": 0.25653574371395044, + "grad_norm": 0.09296029806137085, + "learning_rate": 2.2359758692332724e-05, + "loss": 9.5441, + "step": 51370 + }, + { + "epoch": 0.25658568253888986, + "grad_norm": 0.09422246366739273, + "learning_rate": 2.2358256777391174e-05, + "loss": 9.5485, + "step": 51380 + }, + { + "epoch": 0.25663562136382934, + "grad_norm": 0.0900813490152359, + "learning_rate": 2.2356754862449624e-05, + "loss": 9.5456, + "step": 51390 + }, + { + "epoch": 0.25668556018876876, + "grad_norm": 0.09812017530202866, + "learning_rate": 2.235525294750807e-05, + "loss": 9.5497, + "step": 51400 + }, + { + "epoch": 0.25673549901370823, + "grad_norm": 0.09745459258556366, + "learning_rate": 2.2353751032566525e-05, + "loss": 9.5479, + "step": 51410 + }, + { + "epoch": 0.25678543783864766, + "grad_norm": 0.09394165873527527, + "learning_rate": 2.235224911762497e-05, + "loss": 9.5428, + "step": 51420 + }, + { + "epoch": 0.2568353766635871, + "grad_norm": 0.09442657977342606, + "learning_rate": 2.2350747202683422e-05, + "loss": 9.5507, + "step": 51430 + }, + { + "epoch": 0.25688531548852656, + "grad_norm": 0.10503870993852615, + "learning_rate": 2.2349245287741872e-05, + "loss": 9.5499, + "step": 51440 + }, + { + "epoch": 0.256935254313466, + "grad_norm": 0.09587554633617401, + "learning_rate": 2.234774337280032e-05, + "loss": 9.5433, + "step": 51450 + }, + { + "epoch": 0.25698519313840545, + "grad_norm": 0.09741827845573425, + "learning_rate": 2.2346241457858772e-05, + "loss": 9.5378, + "step": 51460 + }, + { + "epoch": 0.2570351319633449, + "grad_norm": 0.09930811822414398, + "learning_rate": 2.234473954291722e-05, + "loss": 9.5404, + "step": 51470 + }, + { + "epoch": 0.25708507078828435, + "grad_norm": 0.09290921688079834, + "learning_rate": 2.2343237627975673e-05, + "loss": 9.5358, + "step": 51480 + }, + { + "epoch": 0.2571350096132238, + "grad_norm": 0.09306775778532028, + "learning_rate": 2.234173571303412e-05, + "loss": 9.5465, + "step": 51490 + }, + { + "epoch": 0.25718494843816325, + "grad_norm": 0.09969160705804825, + "learning_rate": 2.2340233798092566e-05, + "loss": 9.5441, + "step": 51500 + }, + { + "epoch": 0.2572348872631027, + "grad_norm": 0.0934043824672699, + "learning_rate": 2.233873188315102e-05, + "loss": 9.5415, + "step": 51510 + }, + { + "epoch": 0.25728482608804215, + "grad_norm": 0.10059630125761032, + "learning_rate": 2.2337229968209467e-05, + "loss": 9.5438, + "step": 51520 + }, + { + "epoch": 0.2573347649129816, + "grad_norm": 0.09175658226013184, + "learning_rate": 2.233572805326792e-05, + "loss": 9.5439, + "step": 51530 + }, + { + "epoch": 0.25738470373792105, + "grad_norm": 0.09227610379457474, + "learning_rate": 2.2334226138326367e-05, + "loss": 9.545, + "step": 51540 + }, + { + "epoch": 0.25743464256286047, + "grad_norm": 0.09750757366418839, + "learning_rate": 2.2332724223384814e-05, + "loss": 9.5387, + "step": 51550 + }, + { + "epoch": 0.25748458138779995, + "grad_norm": 0.09199635684490204, + "learning_rate": 2.2331222308443267e-05, + "loss": 9.5451, + "step": 51560 + }, + { + "epoch": 0.25753452021273937, + "grad_norm": 0.09396953880786896, + "learning_rate": 2.2329720393501714e-05, + "loss": 9.5419, + "step": 51570 + }, + { + "epoch": 0.25758445903767885, + "grad_norm": 0.0948280543088913, + "learning_rate": 2.2328218478560168e-05, + "loss": 9.543, + "step": 51580 + }, + { + "epoch": 0.25763439786261827, + "grad_norm": 0.09518975019454956, + "learning_rate": 2.2326716563618615e-05, + "loss": 9.5468, + "step": 51590 + }, + { + "epoch": 0.25768433668755775, + "grad_norm": 0.0924508273601532, + "learning_rate": 2.232521464867706e-05, + "loss": 9.5395, + "step": 51600 + }, + { + "epoch": 0.25773427551249717, + "grad_norm": 0.09529732912778854, + "learning_rate": 2.2323712733735515e-05, + "loss": 9.54, + "step": 51610 + }, + { + "epoch": 0.25778421433743665, + "grad_norm": 0.09561322629451752, + "learning_rate": 2.232221081879396e-05, + "loss": 9.5452, + "step": 51620 + }, + { + "epoch": 0.25783415316237607, + "grad_norm": 0.0942932739853859, + "learning_rate": 2.2320708903852415e-05, + "loss": 9.5309, + "step": 51630 + }, + { + "epoch": 0.25788409198731554, + "grad_norm": 0.09694642573595047, + "learning_rate": 2.2319206988910862e-05, + "loss": 9.5398, + "step": 51640 + }, + { + "epoch": 0.25793403081225497, + "grad_norm": 0.09523008018732071, + "learning_rate": 2.231770507396931e-05, + "loss": 9.536, + "step": 51650 + }, + { + "epoch": 0.25798396963719444, + "grad_norm": 0.09009785205125809, + "learning_rate": 2.2316203159027762e-05, + "loss": 9.5383, + "step": 51660 + }, + { + "epoch": 0.25803390846213387, + "grad_norm": 0.09198308736085892, + "learning_rate": 2.231470124408621e-05, + "loss": 9.5422, + "step": 51670 + }, + { + "epoch": 0.25808384728707334, + "grad_norm": 0.0955272763967514, + "learning_rate": 2.2313199329144663e-05, + "loss": 9.5352, + "step": 51680 + }, + { + "epoch": 0.25813378611201276, + "grad_norm": 0.0900355726480484, + "learning_rate": 2.231169741420311e-05, + "loss": 9.5293, + "step": 51690 + }, + { + "epoch": 0.25818372493695224, + "grad_norm": 0.0917731374502182, + "learning_rate": 2.2310195499261556e-05, + "loss": 9.5424, + "step": 51700 + }, + { + "epoch": 0.25823366376189166, + "grad_norm": 0.09225501120090485, + "learning_rate": 2.230869358432001e-05, + "loss": 9.5442, + "step": 51710 + }, + { + "epoch": 0.25828360258683114, + "grad_norm": 0.09712720662355423, + "learning_rate": 2.2307191669378457e-05, + "loss": 9.5388, + "step": 51720 + }, + { + "epoch": 0.25833354141177056, + "grad_norm": 0.09137700498104095, + "learning_rate": 2.230568975443691e-05, + "loss": 9.5402, + "step": 51730 + }, + { + "epoch": 0.25838348023671004, + "grad_norm": 0.09159164875745773, + "learning_rate": 2.2304187839495357e-05, + "loss": 9.5458, + "step": 51740 + }, + { + "epoch": 0.25843341906164946, + "grad_norm": 0.09769763052463531, + "learning_rate": 2.2302685924553804e-05, + "loss": 9.5476, + "step": 51750 + }, + { + "epoch": 0.25848335788658894, + "grad_norm": 0.09766492992639542, + "learning_rate": 2.2301184009612257e-05, + "loss": 9.5393, + "step": 51760 + }, + { + "epoch": 0.25853329671152836, + "grad_norm": 0.09677687287330627, + "learning_rate": 2.2299682094670704e-05, + "loss": 9.534, + "step": 51770 + }, + { + "epoch": 0.25858323553646784, + "grad_norm": 0.09470915794372559, + "learning_rate": 2.2298180179729158e-05, + "loss": 9.5395, + "step": 51780 + }, + { + "epoch": 0.25863317436140726, + "grad_norm": 0.08983705192804337, + "learning_rate": 2.2296678264787605e-05, + "loss": 9.5329, + "step": 51790 + }, + { + "epoch": 0.25868311318634674, + "grad_norm": 0.10104523599147797, + "learning_rate": 2.2295176349846055e-05, + "loss": 9.5324, + "step": 51800 + }, + { + "epoch": 0.25873305201128616, + "grad_norm": 0.09755957871675491, + "learning_rate": 2.2293674434904505e-05, + "loss": 9.5345, + "step": 51810 + }, + { + "epoch": 0.25878299083622563, + "grad_norm": 0.09510654956102371, + "learning_rate": 2.229217251996295e-05, + "loss": 9.5396, + "step": 51820 + }, + { + "epoch": 0.25883292966116506, + "grad_norm": 0.10025627911090851, + "learning_rate": 2.2290670605021405e-05, + "loss": 9.541, + "step": 51830 + }, + { + "epoch": 0.25888286848610453, + "grad_norm": 0.0947519913315773, + "learning_rate": 2.2289168690079852e-05, + "loss": 9.5382, + "step": 51840 + }, + { + "epoch": 0.25893280731104396, + "grad_norm": 0.09344780445098877, + "learning_rate": 2.2287666775138302e-05, + "loss": 9.5367, + "step": 51850 + }, + { + "epoch": 0.25898274613598343, + "grad_norm": 0.09429247677326202, + "learning_rate": 2.2286164860196752e-05, + "loss": 9.535, + "step": 51860 + }, + { + "epoch": 0.25903268496092285, + "grad_norm": 0.09451945871114731, + "learning_rate": 2.22846629452552e-05, + "loss": 9.5271, + "step": 51870 + }, + { + "epoch": 0.25908262378586233, + "grad_norm": 0.09508265554904938, + "learning_rate": 2.2283161030313653e-05, + "loss": 9.5307, + "step": 51880 + }, + { + "epoch": 0.25913256261080175, + "grad_norm": 0.09736274182796478, + "learning_rate": 2.22816591153721e-05, + "loss": 9.5394, + "step": 51890 + }, + { + "epoch": 0.25918250143574123, + "grad_norm": 0.09334583580493927, + "learning_rate": 2.228015720043055e-05, + "loss": 9.5279, + "step": 51900 + }, + { + "epoch": 0.25923244026068065, + "grad_norm": 0.0936061292886734, + "learning_rate": 2.2278655285489e-05, + "loss": 9.5434, + "step": 51910 + }, + { + "epoch": 0.25928237908562013, + "grad_norm": 0.09247171133756638, + "learning_rate": 2.2277153370547447e-05, + "loss": 9.5407, + "step": 51920 + }, + { + "epoch": 0.25933231791055955, + "grad_norm": 0.09118923544883728, + "learning_rate": 2.22756514556059e-05, + "loss": 9.533, + "step": 51930 + }, + { + "epoch": 0.25938225673549903, + "grad_norm": 0.09702643752098083, + "learning_rate": 2.2274149540664347e-05, + "loss": 9.5396, + "step": 51940 + }, + { + "epoch": 0.25943219556043845, + "grad_norm": 0.0896030142903328, + "learning_rate": 2.2272647625722797e-05, + "loss": 9.5384, + "step": 51950 + }, + { + "epoch": 0.2594821343853779, + "grad_norm": 0.09771367162466049, + "learning_rate": 2.2271145710781247e-05, + "loss": 9.531, + "step": 51960 + }, + { + "epoch": 0.25953207321031735, + "grad_norm": 0.0948578342795372, + "learning_rate": 2.2269643795839694e-05, + "loss": 9.5386, + "step": 51970 + }, + { + "epoch": 0.2595820120352568, + "grad_norm": 0.10098248720169067, + "learning_rate": 2.2268141880898148e-05, + "loss": 9.5335, + "step": 51980 + }, + { + "epoch": 0.25963195086019625, + "grad_norm": 0.08970336616039276, + "learning_rate": 2.2266639965956595e-05, + "loss": 9.5386, + "step": 51990 + }, + { + "epoch": 0.2596818896851357, + "grad_norm": 0.09408001601696014, + "learning_rate": 2.2265138051015045e-05, + "loss": 9.5355, + "step": 52000 + }, + { + "epoch": 0.25973182851007515, + "grad_norm": 0.08999434113502502, + "learning_rate": 2.2263636136073495e-05, + "loss": 9.5373, + "step": 52010 + }, + { + "epoch": 0.2597817673350146, + "grad_norm": 0.09277333319187164, + "learning_rate": 2.226213422113194e-05, + "loss": 9.5405, + "step": 52020 + }, + { + "epoch": 0.25983170615995405, + "grad_norm": 0.09205369651317596, + "learning_rate": 2.2260632306190395e-05, + "loss": 9.5254, + "step": 52030 + }, + { + "epoch": 0.2598816449848935, + "grad_norm": 0.09553728252649307, + "learning_rate": 2.2259130391248842e-05, + "loss": 9.5262, + "step": 52040 + }, + { + "epoch": 0.25993158380983294, + "grad_norm": 0.08813092112541199, + "learning_rate": 2.2257628476307292e-05, + "loss": 9.536, + "step": 52050 + }, + { + "epoch": 0.2599815226347724, + "grad_norm": 0.09862591326236725, + "learning_rate": 2.2256126561365742e-05, + "loss": 9.537, + "step": 52060 + }, + { + "epoch": 0.26003146145971184, + "grad_norm": 0.09814270585775375, + "learning_rate": 2.225462464642419e-05, + "loss": 9.535, + "step": 52070 + }, + { + "epoch": 0.2600814002846513, + "grad_norm": 0.08804214745759964, + "learning_rate": 2.2253122731482643e-05, + "loss": 9.5421, + "step": 52080 + }, + { + "epoch": 0.26013133910959074, + "grad_norm": 0.09738943725824356, + "learning_rate": 2.225162081654109e-05, + "loss": 9.5307, + "step": 52090 + }, + { + "epoch": 0.2601812779345302, + "grad_norm": 0.09532274305820465, + "learning_rate": 2.225011890159954e-05, + "loss": 9.541, + "step": 52100 + }, + { + "epoch": 0.26023121675946964, + "grad_norm": 0.09409879148006439, + "learning_rate": 2.224861698665799e-05, + "loss": 9.5364, + "step": 52110 + }, + { + "epoch": 0.2602811555844091, + "grad_norm": 0.09654781222343445, + "learning_rate": 2.224711507171644e-05, + "loss": 9.5443, + "step": 52120 + }, + { + "epoch": 0.26033109440934854, + "grad_norm": 0.09525051712989807, + "learning_rate": 2.224561315677489e-05, + "loss": 9.5404, + "step": 52130 + }, + { + "epoch": 0.260381033234288, + "grad_norm": 0.09068022668361664, + "learning_rate": 2.2244111241833337e-05, + "loss": 9.529, + "step": 52140 + }, + { + "epoch": 0.26043097205922744, + "grad_norm": 0.09324584156274796, + "learning_rate": 2.2242609326891787e-05, + "loss": 9.5402, + "step": 52150 + }, + { + "epoch": 0.2604809108841669, + "grad_norm": 0.0990164652466774, + "learning_rate": 2.2241107411950237e-05, + "loss": 9.5354, + "step": 52160 + }, + { + "epoch": 0.26053084970910634, + "grad_norm": 0.09122282266616821, + "learning_rate": 2.2239605497008688e-05, + "loss": 9.538, + "step": 52170 + }, + { + "epoch": 0.2605807885340458, + "grad_norm": 0.09253876656293869, + "learning_rate": 2.2238103582067138e-05, + "loss": 9.5287, + "step": 52180 + }, + { + "epoch": 0.26063072735898524, + "grad_norm": 0.09256315976381302, + "learning_rate": 2.2236601667125585e-05, + "loss": 9.5346, + "step": 52190 + }, + { + "epoch": 0.2606806661839247, + "grad_norm": 0.09512501209974289, + "learning_rate": 2.2235099752184035e-05, + "loss": 9.533, + "step": 52200 + }, + { + "epoch": 0.26073060500886414, + "grad_norm": 0.09339733421802521, + "learning_rate": 2.2233597837242485e-05, + "loss": 9.5352, + "step": 52210 + }, + { + "epoch": 0.2607805438338036, + "grad_norm": 0.10032928735017776, + "learning_rate": 2.2232095922300935e-05, + "loss": 9.5266, + "step": 52220 + }, + { + "epoch": 0.26083048265874303, + "grad_norm": 0.09287399053573608, + "learning_rate": 2.2230594007359385e-05, + "loss": 9.5329, + "step": 52230 + }, + { + "epoch": 0.2608804214836825, + "grad_norm": 0.09904525429010391, + "learning_rate": 2.2229092092417832e-05, + "loss": 9.534, + "step": 52240 + }, + { + "epoch": 0.26093036030862193, + "grad_norm": 0.10111285001039505, + "learning_rate": 2.2227590177476282e-05, + "loss": 9.5349, + "step": 52250 + }, + { + "epoch": 0.2609802991335614, + "grad_norm": 0.10239645838737488, + "learning_rate": 2.2226088262534732e-05, + "loss": 9.5331, + "step": 52260 + }, + { + "epoch": 0.26103023795850083, + "grad_norm": 0.09370391070842743, + "learning_rate": 2.2224586347593183e-05, + "loss": 9.5251, + "step": 52270 + }, + { + "epoch": 0.2610801767834403, + "grad_norm": 0.09311920404434204, + "learning_rate": 2.2223084432651633e-05, + "loss": 9.5298, + "step": 52280 + }, + { + "epoch": 0.26113011560837973, + "grad_norm": 0.0964302271604538, + "learning_rate": 2.222158251771008e-05, + "loss": 9.5224, + "step": 52290 + }, + { + "epoch": 0.2611800544333192, + "grad_norm": 0.09276770055294037, + "learning_rate": 2.222008060276853e-05, + "loss": 9.5257, + "step": 52300 + }, + { + "epoch": 0.26122999325825863, + "grad_norm": 0.09495028853416443, + "learning_rate": 2.221857868782698e-05, + "loss": 9.5282, + "step": 52310 + }, + { + "epoch": 0.2612799320831981, + "grad_norm": 0.09358720481395721, + "learning_rate": 2.221707677288543e-05, + "loss": 9.5337, + "step": 52320 + }, + { + "epoch": 0.26132987090813753, + "grad_norm": 0.09454384446144104, + "learning_rate": 2.221557485794388e-05, + "loss": 9.5262, + "step": 52330 + }, + { + "epoch": 0.261379809733077, + "grad_norm": 0.09213569760322571, + "learning_rate": 2.2214072943002327e-05, + "loss": 9.5294, + "step": 52340 + }, + { + "epoch": 0.26142974855801643, + "grad_norm": 0.08967851847410202, + "learning_rate": 2.2212571028060777e-05, + "loss": 9.5267, + "step": 52350 + }, + { + "epoch": 0.2614796873829559, + "grad_norm": 0.09236017614603043, + "learning_rate": 2.2211069113119227e-05, + "loss": 9.5257, + "step": 52360 + }, + { + "epoch": 0.2615296262078953, + "grad_norm": 0.09722200036048889, + "learning_rate": 2.2209567198177678e-05, + "loss": 9.526, + "step": 52370 + }, + { + "epoch": 0.2615795650328348, + "grad_norm": 0.09535818547010422, + "learning_rate": 2.2208065283236128e-05, + "loss": 9.5367, + "step": 52380 + }, + { + "epoch": 0.2616295038577742, + "grad_norm": 0.09433511644601822, + "learning_rate": 2.2206563368294575e-05, + "loss": 9.5386, + "step": 52390 + }, + { + "epoch": 0.2616794426827137, + "grad_norm": 0.09581523388624191, + "learning_rate": 2.2205061453353025e-05, + "loss": 9.5319, + "step": 52400 + }, + { + "epoch": 0.2617293815076531, + "grad_norm": 0.0966779887676239, + "learning_rate": 2.2203559538411475e-05, + "loss": 9.5217, + "step": 52410 + }, + { + "epoch": 0.26177932033259255, + "grad_norm": 0.09320307523012161, + "learning_rate": 2.2202057623469925e-05, + "loss": 9.5237, + "step": 52420 + }, + { + "epoch": 0.261829259157532, + "grad_norm": 0.09114668518304825, + "learning_rate": 2.2200555708528375e-05, + "loss": 9.53, + "step": 52430 + }, + { + "epoch": 0.26187919798247145, + "grad_norm": 0.09709704667329788, + "learning_rate": 2.2199053793586825e-05, + "loss": 9.5339, + "step": 52440 + }, + { + "epoch": 0.2619291368074109, + "grad_norm": 0.09141276776790619, + "learning_rate": 2.2197551878645272e-05, + "loss": 9.52, + "step": 52450 + }, + { + "epoch": 0.26197907563235034, + "grad_norm": 0.09864918142557144, + "learning_rate": 2.2196049963703722e-05, + "loss": 9.525, + "step": 52460 + }, + { + "epoch": 0.2620290144572898, + "grad_norm": 0.09360401332378387, + "learning_rate": 2.2194548048762173e-05, + "loss": 9.5368, + "step": 52470 + }, + { + "epoch": 0.26207895328222924, + "grad_norm": 0.09955708682537079, + "learning_rate": 2.2193046133820623e-05, + "loss": 9.5265, + "step": 52480 + }, + { + "epoch": 0.2621288921071687, + "grad_norm": 0.09114803373813629, + "learning_rate": 2.2191544218879073e-05, + "loss": 9.5272, + "step": 52490 + }, + { + "epoch": 0.26217883093210814, + "grad_norm": 0.09551452845335007, + "learning_rate": 2.219004230393752e-05, + "loss": 9.5356, + "step": 52500 + }, + { + "epoch": 0.2622287697570476, + "grad_norm": 0.09264279901981354, + "learning_rate": 2.218854038899597e-05, + "loss": 9.5254, + "step": 52510 + }, + { + "epoch": 0.26227870858198704, + "grad_norm": 0.09141562134027481, + "learning_rate": 2.218703847405442e-05, + "loss": 9.5245, + "step": 52520 + }, + { + "epoch": 0.2623286474069265, + "grad_norm": 0.0920877605676651, + "learning_rate": 2.218553655911287e-05, + "loss": 9.5334, + "step": 52530 + }, + { + "epoch": 0.26237858623186594, + "grad_norm": 0.09428731352090836, + "learning_rate": 2.218403464417132e-05, + "loss": 9.5331, + "step": 52540 + }, + { + "epoch": 0.2624285250568054, + "grad_norm": 0.09918078780174255, + "learning_rate": 2.2182532729229767e-05, + "loss": 9.515, + "step": 52550 + }, + { + "epoch": 0.26247846388174484, + "grad_norm": 0.08825034648180008, + "learning_rate": 2.2181030814288217e-05, + "loss": 9.5283, + "step": 52560 + }, + { + "epoch": 0.2625284027066843, + "grad_norm": 0.09419302642345428, + "learning_rate": 2.2179528899346668e-05, + "loss": 9.5242, + "step": 52570 + }, + { + "epoch": 0.26257834153162374, + "grad_norm": 0.09222215414047241, + "learning_rate": 2.2178026984405118e-05, + "loss": 9.5165, + "step": 52580 + }, + { + "epoch": 0.2626282803565632, + "grad_norm": 0.0944628119468689, + "learning_rate": 2.2176525069463568e-05, + "loss": 9.519, + "step": 52590 + }, + { + "epoch": 0.26267821918150264, + "grad_norm": 0.09280452877283096, + "learning_rate": 2.2175023154522015e-05, + "loss": 9.5191, + "step": 52600 + }, + { + "epoch": 0.2627281580064421, + "grad_norm": 0.09857330471277237, + "learning_rate": 2.2173521239580465e-05, + "loss": 9.5195, + "step": 52610 + }, + { + "epoch": 0.26277809683138154, + "grad_norm": 0.10611597448587418, + "learning_rate": 2.2172019324638915e-05, + "loss": 9.5201, + "step": 52620 + }, + { + "epoch": 0.262828035656321, + "grad_norm": 0.09467349946498871, + "learning_rate": 2.2170517409697365e-05, + "loss": 9.5355, + "step": 52630 + }, + { + "epoch": 0.26287797448126043, + "grad_norm": 0.0893920361995697, + "learning_rate": 2.2169015494755815e-05, + "loss": 9.5229, + "step": 52640 + }, + { + "epoch": 0.2629279133061999, + "grad_norm": 0.09524576365947723, + "learning_rate": 2.2167513579814262e-05, + "loss": 9.5212, + "step": 52650 + }, + { + "epoch": 0.26297785213113933, + "grad_norm": 0.09462094306945801, + "learning_rate": 2.2166011664872712e-05, + "loss": 9.5331, + "step": 52660 + }, + { + "epoch": 0.2630277909560788, + "grad_norm": 0.1007109135389328, + "learning_rate": 2.2164509749931163e-05, + "loss": 9.5255, + "step": 52670 + }, + { + "epoch": 0.26307772978101823, + "grad_norm": 0.09446932375431061, + "learning_rate": 2.2163007834989613e-05, + "loss": 9.5272, + "step": 52680 + }, + { + "epoch": 0.2631276686059577, + "grad_norm": 0.08853420615196228, + "learning_rate": 2.2161505920048063e-05, + "loss": 9.5298, + "step": 52690 + }, + { + "epoch": 0.26317760743089713, + "grad_norm": 0.09556553512811661, + "learning_rate": 2.216000400510651e-05, + "loss": 9.5255, + "step": 52700 + }, + { + "epoch": 0.2632275462558366, + "grad_norm": 0.09378375113010406, + "learning_rate": 2.215850209016496e-05, + "loss": 9.5139, + "step": 52710 + }, + { + "epoch": 0.26327748508077603, + "grad_norm": 0.09631894528865814, + "learning_rate": 2.215700017522341e-05, + "loss": 9.5168, + "step": 52720 + }, + { + "epoch": 0.2633274239057155, + "grad_norm": 0.09347805380821228, + "learning_rate": 2.215549826028186e-05, + "loss": 9.5312, + "step": 52730 + }, + { + "epoch": 0.26337736273065493, + "grad_norm": 0.09734681993722916, + "learning_rate": 2.215399634534031e-05, + "loss": 9.5144, + "step": 52740 + }, + { + "epoch": 0.2634273015555944, + "grad_norm": 0.08855658769607544, + "learning_rate": 2.2152494430398757e-05, + "loss": 9.5155, + "step": 52750 + }, + { + "epoch": 0.26347724038053383, + "grad_norm": 0.09233860671520233, + "learning_rate": 2.215099251545721e-05, + "loss": 9.5132, + "step": 52760 + }, + { + "epoch": 0.2635271792054733, + "grad_norm": 0.0949973613023758, + "learning_rate": 2.2149490600515658e-05, + "loss": 9.5156, + "step": 52770 + }, + { + "epoch": 0.2635771180304127, + "grad_norm": 0.10060823708772659, + "learning_rate": 2.2147988685574108e-05, + "loss": 9.5138, + "step": 52780 + }, + { + "epoch": 0.2636270568553522, + "grad_norm": 0.09591439366340637, + "learning_rate": 2.2146486770632558e-05, + "loss": 9.5292, + "step": 52790 + }, + { + "epoch": 0.2636769956802916, + "grad_norm": 0.09454584121704102, + "learning_rate": 2.2144984855691005e-05, + "loss": 9.5177, + "step": 52800 + }, + { + "epoch": 0.2637269345052311, + "grad_norm": 0.09631575644016266, + "learning_rate": 2.214348294074946e-05, + "loss": 9.5153, + "step": 52810 + }, + { + "epoch": 0.2637768733301705, + "grad_norm": 0.09541364014148712, + "learning_rate": 2.2141981025807905e-05, + "loss": 9.5259, + "step": 52820 + }, + { + "epoch": 0.26382681215511, + "grad_norm": 0.09758414328098297, + "learning_rate": 2.2140479110866355e-05, + "loss": 9.5273, + "step": 52830 + }, + { + "epoch": 0.2638767509800494, + "grad_norm": 0.09002380818128586, + "learning_rate": 2.2138977195924805e-05, + "loss": 9.5144, + "step": 52840 + }, + { + "epoch": 0.2639266898049889, + "grad_norm": 0.09718296676874161, + "learning_rate": 2.2137475280983252e-05, + "loss": 9.515, + "step": 52850 + }, + { + "epoch": 0.2639766286299283, + "grad_norm": 0.09190072864294052, + "learning_rate": 2.2135973366041706e-05, + "loss": 9.5239, + "step": 52860 + }, + { + "epoch": 0.2640265674548678, + "grad_norm": 0.0970335304737091, + "learning_rate": 2.2134471451100153e-05, + "loss": 9.5219, + "step": 52870 + }, + { + "epoch": 0.2640765062798072, + "grad_norm": 0.09237813204526901, + "learning_rate": 2.2132969536158603e-05, + "loss": 9.5324, + "step": 52880 + }, + { + "epoch": 0.2641264451047467, + "grad_norm": 0.09487534314393997, + "learning_rate": 2.2131467621217053e-05, + "loss": 9.5275, + "step": 52890 + }, + { + "epoch": 0.2641763839296861, + "grad_norm": 0.09346019476652145, + "learning_rate": 2.21299657062755e-05, + "loss": 9.521, + "step": 52900 + }, + { + "epoch": 0.2642263227546256, + "grad_norm": 0.09257887303829193, + "learning_rate": 2.2128463791333953e-05, + "loss": 9.5117, + "step": 52910 + }, + { + "epoch": 0.264276261579565, + "grad_norm": 0.09638823568820953, + "learning_rate": 2.21269618763924e-05, + "loss": 9.5236, + "step": 52920 + }, + { + "epoch": 0.2643262004045045, + "grad_norm": 0.09802195429801941, + "learning_rate": 2.212545996145085e-05, + "loss": 9.5187, + "step": 52930 + }, + { + "epoch": 0.2643761392294439, + "grad_norm": 0.09367150813341141, + "learning_rate": 2.21239580465093e-05, + "loss": 9.5275, + "step": 52940 + }, + { + "epoch": 0.2644260780543834, + "grad_norm": 0.09576943516731262, + "learning_rate": 2.2122456131567747e-05, + "loss": 9.5152, + "step": 52950 + }, + { + "epoch": 0.2644760168793228, + "grad_norm": 0.10221412777900696, + "learning_rate": 2.21209542166262e-05, + "loss": 9.5211, + "step": 52960 + }, + { + "epoch": 0.2645259557042623, + "grad_norm": 0.08980675041675568, + "learning_rate": 2.2119452301684648e-05, + "loss": 9.5274, + "step": 52970 + }, + { + "epoch": 0.2645758945292017, + "grad_norm": 0.0930297002196312, + "learning_rate": 2.2117950386743098e-05, + "loss": 9.5114, + "step": 52980 + }, + { + "epoch": 0.2646258333541412, + "grad_norm": 0.09479690343141556, + "learning_rate": 2.2116448471801548e-05, + "loss": 9.5105, + "step": 52990 + }, + { + "epoch": 0.2646757721790806, + "grad_norm": 0.0916314572095871, + "learning_rate": 2.2114946556859995e-05, + "loss": 9.5178, + "step": 53000 + }, + { + "epoch": 0.2647257110040201, + "grad_norm": 0.09060303866863251, + "learning_rate": 2.211344464191845e-05, + "loss": 9.5146, + "step": 53010 + }, + { + "epoch": 0.2647756498289595, + "grad_norm": 0.09311266988515854, + "learning_rate": 2.2111942726976895e-05, + "loss": 9.5126, + "step": 53020 + }, + { + "epoch": 0.264825588653899, + "grad_norm": 0.09283456206321716, + "learning_rate": 2.2110440812035345e-05, + "loss": 9.5175, + "step": 53030 + }, + { + "epoch": 0.2648755274788384, + "grad_norm": 0.09691060334444046, + "learning_rate": 2.2108938897093796e-05, + "loss": 9.5163, + "step": 53040 + }, + { + "epoch": 0.2649254663037779, + "grad_norm": 0.09444165974855423, + "learning_rate": 2.2107436982152242e-05, + "loss": 9.5194, + "step": 53050 + }, + { + "epoch": 0.2649754051287173, + "grad_norm": 0.0917515829205513, + "learning_rate": 2.2105935067210696e-05, + "loss": 9.522, + "step": 53060 + }, + { + "epoch": 0.2650253439536568, + "grad_norm": 0.093546062707901, + "learning_rate": 2.2104433152269143e-05, + "loss": 9.5124, + "step": 53070 + }, + { + "epoch": 0.2650752827785962, + "grad_norm": 0.09106338769197464, + "learning_rate": 2.2102931237327593e-05, + "loss": 9.5194, + "step": 53080 + }, + { + "epoch": 0.2651252216035357, + "grad_norm": 0.09387896209955215, + "learning_rate": 2.2101429322386043e-05, + "loss": 9.5174, + "step": 53090 + }, + { + "epoch": 0.2651751604284751, + "grad_norm": 0.09550568461418152, + "learning_rate": 2.209992740744449e-05, + "loss": 9.5169, + "step": 53100 + }, + { + "epoch": 0.2652250992534146, + "grad_norm": 0.09244140982627869, + "learning_rate": 2.2098425492502943e-05, + "loss": 9.513, + "step": 53110 + }, + { + "epoch": 0.265275038078354, + "grad_norm": 0.09430118650197983, + "learning_rate": 2.209692357756139e-05, + "loss": 9.5206, + "step": 53120 + }, + { + "epoch": 0.2653249769032935, + "grad_norm": 0.10021936148405075, + "learning_rate": 2.2095421662619844e-05, + "loss": 9.5203, + "step": 53130 + }, + { + "epoch": 0.2653749157282329, + "grad_norm": 0.09453583508729935, + "learning_rate": 2.209391974767829e-05, + "loss": 9.5158, + "step": 53140 + }, + { + "epoch": 0.2654248545531724, + "grad_norm": 0.09471450746059418, + "learning_rate": 2.2092417832736737e-05, + "loss": 9.5179, + "step": 53150 + }, + { + "epoch": 0.2654747933781118, + "grad_norm": 0.0972927138209343, + "learning_rate": 2.209091591779519e-05, + "loss": 9.5145, + "step": 53160 + }, + { + "epoch": 0.2655247322030513, + "grad_norm": 0.09311800450086594, + "learning_rate": 2.2089414002853638e-05, + "loss": 9.5106, + "step": 53170 + }, + { + "epoch": 0.2655746710279907, + "grad_norm": 0.09145248681306839, + "learning_rate": 2.208791208791209e-05, + "loss": 9.5156, + "step": 53180 + }, + { + "epoch": 0.2656246098529302, + "grad_norm": 0.09487327933311462, + "learning_rate": 2.2086410172970538e-05, + "loss": 9.5138, + "step": 53190 + }, + { + "epoch": 0.2656745486778696, + "grad_norm": 0.09175003319978714, + "learning_rate": 2.2084908258028985e-05, + "loss": 9.5251, + "step": 53200 + }, + { + "epoch": 0.2657244875028091, + "grad_norm": 0.09299546480178833, + "learning_rate": 2.208340634308744e-05, + "loss": 9.5187, + "step": 53210 + }, + { + "epoch": 0.2657744263277485, + "grad_norm": 0.09463711827993393, + "learning_rate": 2.2081904428145885e-05, + "loss": 9.5196, + "step": 53220 + }, + { + "epoch": 0.265824365152688, + "grad_norm": 0.09696915000677109, + "learning_rate": 2.208040251320434e-05, + "loss": 9.5094, + "step": 53230 + }, + { + "epoch": 0.2658743039776274, + "grad_norm": 0.1047021821141243, + "learning_rate": 2.2078900598262786e-05, + "loss": 9.5022, + "step": 53240 + }, + { + "epoch": 0.2659242428025669, + "grad_norm": 0.09355685114860535, + "learning_rate": 2.2077398683321232e-05, + "loss": 9.5123, + "step": 53250 + }, + { + "epoch": 0.2659741816275063, + "grad_norm": 0.09120495617389679, + "learning_rate": 2.2075896768379686e-05, + "loss": 9.5105, + "step": 53260 + }, + { + "epoch": 0.2660241204524458, + "grad_norm": 0.08929228782653809, + "learning_rate": 2.2074394853438133e-05, + "loss": 9.5146, + "step": 53270 + }, + { + "epoch": 0.2660740592773852, + "grad_norm": 0.09182194620370865, + "learning_rate": 2.2072892938496586e-05, + "loss": 9.5202, + "step": 53280 + }, + { + "epoch": 0.2661239981023247, + "grad_norm": 0.0908316969871521, + "learning_rate": 2.2071391023555033e-05, + "loss": 9.515, + "step": 53290 + }, + { + "epoch": 0.2661739369272641, + "grad_norm": 0.09303124248981476, + "learning_rate": 2.206988910861348e-05, + "loss": 9.5074, + "step": 53300 + }, + { + "epoch": 0.2662238757522036, + "grad_norm": 0.09067052602767944, + "learning_rate": 2.2068387193671933e-05, + "loss": 9.5062, + "step": 53310 + }, + { + "epoch": 0.266273814577143, + "grad_norm": 0.09253434091806412, + "learning_rate": 2.206688527873038e-05, + "loss": 9.5053, + "step": 53320 + }, + { + "epoch": 0.2663237534020825, + "grad_norm": 0.09222152829170227, + "learning_rate": 2.2065383363788834e-05, + "loss": 9.5116, + "step": 53330 + }, + { + "epoch": 0.2663736922270219, + "grad_norm": 0.09831732511520386, + "learning_rate": 2.206388144884728e-05, + "loss": 9.5086, + "step": 53340 + }, + { + "epoch": 0.2664236310519614, + "grad_norm": 0.08993234485387802, + "learning_rate": 2.2062379533905727e-05, + "loss": 9.5213, + "step": 53350 + }, + { + "epoch": 0.2664735698769008, + "grad_norm": 0.09357697516679764, + "learning_rate": 2.206087761896418e-05, + "loss": 9.5129, + "step": 53360 + }, + { + "epoch": 0.2665235087018403, + "grad_norm": 0.09632600098848343, + "learning_rate": 2.2059375704022628e-05, + "loss": 9.5079, + "step": 53370 + }, + { + "epoch": 0.2665734475267797, + "grad_norm": 0.09463771432638168, + "learning_rate": 2.205787378908108e-05, + "loss": 9.5108, + "step": 53380 + }, + { + "epoch": 0.26662338635171917, + "grad_norm": 0.09664889425039291, + "learning_rate": 2.2056371874139528e-05, + "loss": 9.5136, + "step": 53390 + }, + { + "epoch": 0.2666733251766586, + "grad_norm": 0.0904850959777832, + "learning_rate": 2.2054869959197975e-05, + "loss": 9.5074, + "step": 53400 + }, + { + "epoch": 0.266723264001598, + "grad_norm": 0.0935087651014328, + "learning_rate": 2.205336804425643e-05, + "loss": 9.5163, + "step": 53410 + }, + { + "epoch": 0.2667732028265375, + "grad_norm": 0.09283655881881714, + "learning_rate": 2.2051866129314875e-05, + "loss": 9.517, + "step": 53420 + }, + { + "epoch": 0.2668231416514769, + "grad_norm": 0.09471392631530762, + "learning_rate": 2.205036421437333e-05, + "loss": 9.5124, + "step": 53430 + }, + { + "epoch": 0.2668730804764164, + "grad_norm": 0.10181424766778946, + "learning_rate": 2.2048862299431776e-05, + "loss": 9.4963, + "step": 53440 + }, + { + "epoch": 0.2669230193013558, + "grad_norm": 0.09600277245044708, + "learning_rate": 2.2047360384490226e-05, + "loss": 9.5115, + "step": 53450 + }, + { + "epoch": 0.2669729581262953, + "grad_norm": 0.10142569988965988, + "learning_rate": 2.2045858469548676e-05, + "loss": 9.5058, + "step": 53460 + }, + { + "epoch": 0.2670228969512347, + "grad_norm": 0.09453926235437393, + "learning_rate": 2.2044356554607123e-05, + "loss": 9.5034, + "step": 53470 + }, + { + "epoch": 0.2670728357761742, + "grad_norm": 0.09854920208454132, + "learning_rate": 2.2042854639665576e-05, + "loss": 9.5109, + "step": 53480 + }, + { + "epoch": 0.2671227746011136, + "grad_norm": 0.09525490552186966, + "learning_rate": 2.2041352724724023e-05, + "loss": 9.5093, + "step": 53490 + }, + { + "epoch": 0.2671727134260531, + "grad_norm": 0.10124125331640244, + "learning_rate": 2.2039850809782473e-05, + "loss": 9.5103, + "step": 53500 + }, + { + "epoch": 0.2672226522509925, + "grad_norm": 0.09206513315439224, + "learning_rate": 2.2038348894840923e-05, + "loss": 9.5106, + "step": 53510 + }, + { + "epoch": 0.267272591075932, + "grad_norm": 0.09180595725774765, + "learning_rate": 2.203684697989937e-05, + "loss": 9.5057, + "step": 53520 + }, + { + "epoch": 0.2673225299008714, + "grad_norm": 0.09658707678318024, + "learning_rate": 2.2035345064957824e-05, + "loss": 9.5024, + "step": 53530 + }, + { + "epoch": 0.2673724687258109, + "grad_norm": 0.08750133216381073, + "learning_rate": 2.203384315001627e-05, + "loss": 9.5091, + "step": 53540 + }, + { + "epoch": 0.2674224075507503, + "grad_norm": 0.09664824604988098, + "learning_rate": 2.203234123507472e-05, + "loss": 9.5102, + "step": 53550 + }, + { + "epoch": 0.2674723463756898, + "grad_norm": 0.09974256902933121, + "learning_rate": 2.203083932013317e-05, + "loss": 9.5205, + "step": 53560 + }, + { + "epoch": 0.2675222852006292, + "grad_norm": 0.08979537338018417, + "learning_rate": 2.2029337405191618e-05, + "loss": 9.5085, + "step": 53570 + }, + { + "epoch": 0.2675722240255687, + "grad_norm": 0.09149536490440369, + "learning_rate": 2.202783549025007e-05, + "loss": 9.5113, + "step": 53580 + }, + { + "epoch": 0.2676221628505081, + "grad_norm": 0.09277680516242981, + "learning_rate": 2.2026333575308518e-05, + "loss": 9.5072, + "step": 53590 + }, + { + "epoch": 0.2676721016754476, + "grad_norm": 0.09466489404439926, + "learning_rate": 2.2024831660366968e-05, + "loss": 9.5135, + "step": 53600 + }, + { + "epoch": 0.267722040500387, + "grad_norm": 0.10129041969776154, + "learning_rate": 2.202332974542542e-05, + "loss": 9.5097, + "step": 53610 + }, + { + "epoch": 0.2677719793253265, + "grad_norm": 0.09101355820894241, + "learning_rate": 2.2021827830483865e-05, + "loss": 9.5139, + "step": 53620 + }, + { + "epoch": 0.2678219181502659, + "grad_norm": 0.09977221488952637, + "learning_rate": 2.202032591554232e-05, + "loss": 9.5064, + "step": 53630 + }, + { + "epoch": 0.2678718569752054, + "grad_norm": 0.09725436568260193, + "learning_rate": 2.2018824000600766e-05, + "loss": 9.505, + "step": 53640 + }, + { + "epoch": 0.2679217958001448, + "grad_norm": 0.09925449639558792, + "learning_rate": 2.2017322085659216e-05, + "loss": 9.5171, + "step": 53650 + }, + { + "epoch": 0.2679717346250843, + "grad_norm": 0.09515231102705002, + "learning_rate": 2.2015820170717666e-05, + "loss": 9.5013, + "step": 53660 + }, + { + "epoch": 0.2680216734500237, + "grad_norm": 0.09482384473085403, + "learning_rate": 2.2014318255776113e-05, + "loss": 9.4974, + "step": 53670 + }, + { + "epoch": 0.2680716122749632, + "grad_norm": 0.09810610860586166, + "learning_rate": 2.2012816340834566e-05, + "loss": 9.5044, + "step": 53680 + }, + { + "epoch": 0.2681215510999026, + "grad_norm": 0.09548275917768478, + "learning_rate": 2.2011314425893013e-05, + "loss": 9.5023, + "step": 53690 + }, + { + "epoch": 0.2681714899248421, + "grad_norm": 0.10010819137096405, + "learning_rate": 2.2009812510951463e-05, + "loss": 9.5103, + "step": 53700 + }, + { + "epoch": 0.2682214287497815, + "grad_norm": 0.09645957499742508, + "learning_rate": 2.2008310596009913e-05, + "loss": 9.4937, + "step": 53710 + }, + { + "epoch": 0.268271367574721, + "grad_norm": 0.09398103505373001, + "learning_rate": 2.200680868106836e-05, + "loss": 9.496, + "step": 53720 + }, + { + "epoch": 0.2683213063996604, + "grad_norm": 0.09286051243543625, + "learning_rate": 2.2005306766126814e-05, + "loss": 9.5036, + "step": 53730 + }, + { + "epoch": 0.2683712452245999, + "grad_norm": 0.09161993116140366, + "learning_rate": 2.200380485118526e-05, + "loss": 9.5114, + "step": 53740 + }, + { + "epoch": 0.2684211840495393, + "grad_norm": 0.09624673426151276, + "learning_rate": 2.200230293624371e-05, + "loss": 9.5007, + "step": 53750 + }, + { + "epoch": 0.2684711228744788, + "grad_norm": 0.08799513429403305, + "learning_rate": 2.200080102130216e-05, + "loss": 9.5013, + "step": 53760 + }, + { + "epoch": 0.2685210616994182, + "grad_norm": 0.09283116459846497, + "learning_rate": 2.199929910636061e-05, + "loss": 9.5102, + "step": 53770 + }, + { + "epoch": 0.2685710005243577, + "grad_norm": 0.09318029880523682, + "learning_rate": 2.199779719141906e-05, + "loss": 9.509, + "step": 53780 + }, + { + "epoch": 0.2686209393492971, + "grad_norm": 0.09814724326133728, + "learning_rate": 2.1996295276477508e-05, + "loss": 9.502, + "step": 53790 + }, + { + "epoch": 0.26867087817423657, + "grad_norm": 0.08983851969242096, + "learning_rate": 2.1994793361535958e-05, + "loss": 9.504, + "step": 53800 + }, + { + "epoch": 0.268720816999176, + "grad_norm": 0.09457878768444061, + "learning_rate": 2.199329144659441e-05, + "loss": 9.4922, + "step": 53810 + }, + { + "epoch": 0.26877075582411547, + "grad_norm": 0.09290546178817749, + "learning_rate": 2.199178953165286e-05, + "loss": 9.497, + "step": 53820 + }, + { + "epoch": 0.2688206946490549, + "grad_norm": 0.09579872339963913, + "learning_rate": 2.199028761671131e-05, + "loss": 9.5007, + "step": 53830 + }, + { + "epoch": 0.26887063347399437, + "grad_norm": 0.09526251256465912, + "learning_rate": 2.1988785701769756e-05, + "loss": 9.4973, + "step": 53840 + }, + { + "epoch": 0.2689205722989338, + "grad_norm": 0.09075421094894409, + "learning_rate": 2.1987283786828206e-05, + "loss": 9.4985, + "step": 53850 + }, + { + "epoch": 0.26897051112387327, + "grad_norm": 0.09878144413232803, + "learning_rate": 2.1985781871886656e-05, + "loss": 9.4944, + "step": 53860 + }, + { + "epoch": 0.2690204499488127, + "grad_norm": 0.092621810734272, + "learning_rate": 2.1984279956945106e-05, + "loss": 9.4981, + "step": 53870 + }, + { + "epoch": 0.26907038877375217, + "grad_norm": 0.0957510843873024, + "learning_rate": 2.1982778042003556e-05, + "loss": 9.5005, + "step": 53880 + }, + { + "epoch": 0.2691203275986916, + "grad_norm": 0.0936402752995491, + "learning_rate": 2.1981276127062003e-05, + "loss": 9.4948, + "step": 53890 + }, + { + "epoch": 0.26917026642363107, + "grad_norm": 0.09587475657463074, + "learning_rate": 2.1979774212120453e-05, + "loss": 9.5018, + "step": 53900 + }, + { + "epoch": 0.2692202052485705, + "grad_norm": 0.09456077963113785, + "learning_rate": 2.1978272297178903e-05, + "loss": 9.496, + "step": 53910 + }, + { + "epoch": 0.26927014407350996, + "grad_norm": 0.10105657577514648, + "learning_rate": 2.1976770382237354e-05, + "loss": 9.4946, + "step": 53920 + }, + { + "epoch": 0.2693200828984494, + "grad_norm": 0.09980333596467972, + "learning_rate": 2.1975268467295804e-05, + "loss": 9.4955, + "step": 53930 + }, + { + "epoch": 0.26937002172338886, + "grad_norm": 0.09290049970149994, + "learning_rate": 2.197376655235425e-05, + "loss": 9.5054, + "step": 53940 + }, + { + "epoch": 0.2694199605483283, + "grad_norm": 0.09242033958435059, + "learning_rate": 2.19722646374127e-05, + "loss": 9.4997, + "step": 53950 + }, + { + "epoch": 0.26946989937326776, + "grad_norm": 0.09912257641553879, + "learning_rate": 2.197076272247115e-05, + "loss": 9.4969, + "step": 53960 + }, + { + "epoch": 0.2695198381982072, + "grad_norm": 0.10186777263879776, + "learning_rate": 2.19692608075296e-05, + "loss": 9.4975, + "step": 53970 + }, + { + "epoch": 0.26956977702314666, + "grad_norm": 0.09263597428798676, + "learning_rate": 2.196775889258805e-05, + "loss": 9.4902, + "step": 53980 + }, + { + "epoch": 0.2696197158480861, + "grad_norm": 0.09329408407211304, + "learning_rate": 2.1966256977646498e-05, + "loss": 9.4953, + "step": 53990 + }, + { + "epoch": 0.26966965467302556, + "grad_norm": 0.09728638082742691, + "learning_rate": 2.1964755062704948e-05, + "loss": 9.5026, + "step": 54000 + }, + { + "epoch": 0.269719593497965, + "grad_norm": 0.0942746102809906, + "learning_rate": 2.19632531477634e-05, + "loss": 9.5005, + "step": 54010 + }, + { + "epoch": 0.26976953232290446, + "grad_norm": 0.08841151744127274, + "learning_rate": 2.196175123282185e-05, + "loss": 9.501, + "step": 54020 + }, + { + "epoch": 0.2698194711478439, + "grad_norm": 0.09076721221208572, + "learning_rate": 2.19602493178803e-05, + "loss": 9.4855, + "step": 54030 + }, + { + "epoch": 0.26986940997278336, + "grad_norm": 0.09586405754089355, + "learning_rate": 2.1958747402938746e-05, + "loss": 9.5019, + "step": 54040 + }, + { + "epoch": 0.2699193487977228, + "grad_norm": 0.09588458389043808, + "learning_rate": 2.1957245487997196e-05, + "loss": 9.4985, + "step": 54050 + }, + { + "epoch": 0.26996928762266226, + "grad_norm": 0.09645280987024307, + "learning_rate": 2.1955743573055646e-05, + "loss": 9.5033, + "step": 54060 + }, + { + "epoch": 0.2700192264476017, + "grad_norm": 0.0946224108338356, + "learning_rate": 2.1954241658114096e-05, + "loss": 9.507, + "step": 54070 + }, + { + "epoch": 0.27006916527254116, + "grad_norm": 0.09653602540493011, + "learning_rate": 2.1952739743172546e-05, + "loss": 9.5072, + "step": 54080 + }, + { + "epoch": 0.2701191040974806, + "grad_norm": 0.09602764248847961, + "learning_rate": 2.1951237828230996e-05, + "loss": 9.4974, + "step": 54090 + }, + { + "epoch": 0.27016904292242006, + "grad_norm": 0.09846575558185577, + "learning_rate": 2.1949735913289443e-05, + "loss": 9.4918, + "step": 54100 + }, + { + "epoch": 0.2702189817473595, + "grad_norm": 0.09469395130872726, + "learning_rate": 2.1948233998347893e-05, + "loss": 9.4976, + "step": 54110 + }, + { + "epoch": 0.27026892057229895, + "grad_norm": 0.09206046909093857, + "learning_rate": 2.1946732083406344e-05, + "loss": 9.494, + "step": 54120 + }, + { + "epoch": 0.2703188593972384, + "grad_norm": 0.10043230652809143, + "learning_rate": 2.1945230168464794e-05, + "loss": 9.4947, + "step": 54130 + }, + { + "epoch": 0.27036879822217785, + "grad_norm": 0.09006008505821228, + "learning_rate": 2.1943728253523244e-05, + "loss": 9.5057, + "step": 54140 + }, + { + "epoch": 0.2704187370471173, + "grad_norm": 0.09778618067502975, + "learning_rate": 2.194222633858169e-05, + "loss": 9.5036, + "step": 54150 + }, + { + "epoch": 0.27046867587205675, + "grad_norm": 0.09430201351642609, + "learning_rate": 2.194072442364014e-05, + "loss": 9.5066, + "step": 54160 + }, + { + "epoch": 0.2705186146969962, + "grad_norm": 0.09432802349328995, + "learning_rate": 2.193922250869859e-05, + "loss": 9.4916, + "step": 54170 + }, + { + "epoch": 0.27056855352193565, + "grad_norm": 0.09633664041757584, + "learning_rate": 2.193772059375704e-05, + "loss": 9.4962, + "step": 54180 + }, + { + "epoch": 0.2706184923468751, + "grad_norm": 0.09365957230329514, + "learning_rate": 2.193621867881549e-05, + "loss": 9.5004, + "step": 54190 + }, + { + "epoch": 0.27066843117181455, + "grad_norm": 0.0957445502281189, + "learning_rate": 2.1934716763873938e-05, + "loss": 9.5006, + "step": 54200 + }, + { + "epoch": 0.27071836999675397, + "grad_norm": 0.09612950682640076, + "learning_rate": 2.193321484893239e-05, + "loss": 9.4985, + "step": 54210 + }, + { + "epoch": 0.27076830882169345, + "grad_norm": 0.09785423427820206, + "learning_rate": 2.193171293399084e-05, + "loss": 9.4971, + "step": 54220 + }, + { + "epoch": 0.27081824764663287, + "grad_norm": 0.09333615750074387, + "learning_rate": 2.193021101904929e-05, + "loss": 9.4973, + "step": 54230 + }, + { + "epoch": 0.27086818647157235, + "grad_norm": 0.10054484754800797, + "learning_rate": 2.192870910410774e-05, + "loss": 9.5062, + "step": 54240 + }, + { + "epoch": 0.27091812529651177, + "grad_norm": 0.09158135205507278, + "learning_rate": 2.1927207189166186e-05, + "loss": 9.5018, + "step": 54250 + }, + { + "epoch": 0.27096806412145125, + "grad_norm": 0.09675116837024689, + "learning_rate": 2.1925705274224636e-05, + "loss": 9.5022, + "step": 54260 + }, + { + "epoch": 0.27101800294639067, + "grad_norm": 0.09301751852035522, + "learning_rate": 2.1924203359283086e-05, + "loss": 9.4876, + "step": 54270 + }, + { + "epoch": 0.27106794177133015, + "grad_norm": 0.09641415625810623, + "learning_rate": 2.1922701444341536e-05, + "loss": 9.489, + "step": 54280 + }, + { + "epoch": 0.27111788059626957, + "grad_norm": 0.09570831805467606, + "learning_rate": 2.1921199529399986e-05, + "loss": 9.4929, + "step": 54290 + }, + { + "epoch": 0.27116781942120904, + "grad_norm": 0.09328965097665787, + "learning_rate": 2.1919697614458433e-05, + "loss": 9.4963, + "step": 54300 + }, + { + "epoch": 0.27121775824614847, + "grad_norm": 0.09763366729021072, + "learning_rate": 2.1918195699516883e-05, + "loss": 9.4935, + "step": 54310 + }, + { + "epoch": 0.27126769707108794, + "grad_norm": 0.097484290599823, + "learning_rate": 2.1916693784575334e-05, + "loss": 9.4983, + "step": 54320 + }, + { + "epoch": 0.27131763589602736, + "grad_norm": 0.09273017942905426, + "learning_rate": 2.1915191869633784e-05, + "loss": 9.5023, + "step": 54330 + }, + { + "epoch": 0.27136757472096684, + "grad_norm": 0.08936069160699844, + "learning_rate": 2.1913689954692234e-05, + "loss": 9.4918, + "step": 54340 + }, + { + "epoch": 0.27141751354590626, + "grad_norm": 0.08930017799139023, + "learning_rate": 2.191218803975068e-05, + "loss": 9.4937, + "step": 54350 + }, + { + "epoch": 0.27146745237084574, + "grad_norm": 0.0880388543009758, + "learning_rate": 2.191068612480913e-05, + "loss": 9.4962, + "step": 54360 + }, + { + "epoch": 0.27151739119578516, + "grad_norm": 0.10103661566972733, + "learning_rate": 2.190918420986758e-05, + "loss": 9.4915, + "step": 54370 + }, + { + "epoch": 0.27156733002072464, + "grad_norm": 0.09336966276168823, + "learning_rate": 2.190768229492603e-05, + "loss": 9.487, + "step": 54380 + }, + { + "epoch": 0.27161726884566406, + "grad_norm": 0.09124721586704254, + "learning_rate": 2.190618037998448e-05, + "loss": 9.4872, + "step": 54390 + }, + { + "epoch": 0.2716672076706035, + "grad_norm": 0.09562219679355621, + "learning_rate": 2.1904678465042928e-05, + "loss": 9.4953, + "step": 54400 + }, + { + "epoch": 0.27171714649554296, + "grad_norm": 0.0949045866727829, + "learning_rate": 2.1903176550101382e-05, + "loss": 9.505, + "step": 54410 + }, + { + "epoch": 0.2717670853204824, + "grad_norm": 0.09756708145141602, + "learning_rate": 2.190167463515983e-05, + "loss": 9.5009, + "step": 54420 + }, + { + "epoch": 0.27181702414542186, + "grad_norm": 0.08797673135995865, + "learning_rate": 2.190017272021828e-05, + "loss": 9.4954, + "step": 54430 + }, + { + "epoch": 0.2718669629703613, + "grad_norm": 0.09841335564851761, + "learning_rate": 2.189867080527673e-05, + "loss": 9.4911, + "step": 54440 + }, + { + "epoch": 0.27191690179530076, + "grad_norm": 0.09503602236509323, + "learning_rate": 2.1897168890335176e-05, + "loss": 9.5034, + "step": 54450 + }, + { + "epoch": 0.2719668406202402, + "grad_norm": 0.09067325294017792, + "learning_rate": 2.189566697539363e-05, + "loss": 9.4863, + "step": 54460 + }, + { + "epoch": 0.27201677944517966, + "grad_norm": 0.100099578499794, + "learning_rate": 2.1894165060452076e-05, + "loss": 9.5065, + "step": 54470 + }, + { + "epoch": 0.2720667182701191, + "grad_norm": 0.09303244203329086, + "learning_rate": 2.1892663145510526e-05, + "loss": 9.4933, + "step": 54480 + }, + { + "epoch": 0.27211665709505856, + "grad_norm": 0.09559056162834167, + "learning_rate": 2.1891161230568977e-05, + "loss": 9.492, + "step": 54490 + }, + { + "epoch": 0.272166595919998, + "grad_norm": 0.09210564941167831, + "learning_rate": 2.1889659315627423e-05, + "loss": 9.5017, + "step": 54500 + }, + { + "epoch": 0.27221653474493746, + "grad_norm": 0.09366162121295929, + "learning_rate": 2.1888157400685877e-05, + "loss": 9.493, + "step": 54510 + }, + { + "epoch": 0.2722664735698769, + "grad_norm": 0.09506445378065109, + "learning_rate": 2.1886655485744324e-05, + "loss": 9.4811, + "step": 54520 + }, + { + "epoch": 0.27231641239481635, + "grad_norm": 0.10406965017318726, + "learning_rate": 2.1885153570802774e-05, + "loss": 9.4821, + "step": 54530 + }, + { + "epoch": 0.2723663512197558, + "grad_norm": 0.08874404430389404, + "learning_rate": 2.1883651655861224e-05, + "loss": 9.5002, + "step": 54540 + }, + { + "epoch": 0.27241629004469525, + "grad_norm": 0.09706084430217743, + "learning_rate": 2.188214974091967e-05, + "loss": 9.4876, + "step": 54550 + }, + { + "epoch": 0.2724662288696347, + "grad_norm": 0.09322942048311234, + "learning_rate": 2.1880647825978124e-05, + "loss": 9.4887, + "step": 54560 + }, + { + "epoch": 0.27251616769457415, + "grad_norm": 0.09634638577699661, + "learning_rate": 2.187914591103657e-05, + "loss": 9.4933, + "step": 54570 + }, + { + "epoch": 0.2725661065195136, + "grad_norm": 0.09559839218854904, + "learning_rate": 2.187764399609502e-05, + "loss": 9.4836, + "step": 54580 + }, + { + "epoch": 0.27261604534445305, + "grad_norm": 0.08715943992137909, + "learning_rate": 2.187614208115347e-05, + "loss": 9.4903, + "step": 54590 + }, + { + "epoch": 0.2726659841693925, + "grad_norm": 0.09356392920017242, + "learning_rate": 2.1874640166211918e-05, + "loss": 9.4909, + "step": 54600 + }, + { + "epoch": 0.27271592299433195, + "grad_norm": 0.09131593257188797, + "learning_rate": 2.1873138251270372e-05, + "loss": 9.4938, + "step": 54610 + }, + { + "epoch": 0.27276586181927137, + "grad_norm": 0.09553367644548416, + "learning_rate": 2.187163633632882e-05, + "loss": 9.4918, + "step": 54620 + }, + { + "epoch": 0.27281580064421085, + "grad_norm": 0.09892634302377701, + "learning_rate": 2.187013442138727e-05, + "loss": 9.484, + "step": 54630 + }, + { + "epoch": 0.27286573946915027, + "grad_norm": 0.09537822008132935, + "learning_rate": 2.186863250644572e-05, + "loss": 9.4901, + "step": 54640 + }, + { + "epoch": 0.27291567829408975, + "grad_norm": 0.09602360427379608, + "learning_rate": 2.1867130591504166e-05, + "loss": 9.4821, + "step": 54650 + }, + { + "epoch": 0.27296561711902917, + "grad_norm": 0.09913076460361481, + "learning_rate": 2.186562867656262e-05, + "loss": 9.488, + "step": 54660 + }, + { + "epoch": 0.27301555594396865, + "grad_norm": 0.09501135349273682, + "learning_rate": 2.1864126761621066e-05, + "loss": 9.4864, + "step": 54670 + }, + { + "epoch": 0.27306549476890807, + "grad_norm": 0.0974724143743515, + "learning_rate": 2.1862624846679516e-05, + "loss": 9.4942, + "step": 54680 + }, + { + "epoch": 0.27311543359384755, + "grad_norm": 0.09023966640233994, + "learning_rate": 2.1861122931737967e-05, + "loss": 9.4859, + "step": 54690 + }, + { + "epoch": 0.27316537241878697, + "grad_norm": 0.09034571051597595, + "learning_rate": 2.1859621016796417e-05, + "loss": 9.4907, + "step": 54700 + }, + { + "epoch": 0.27321531124372644, + "grad_norm": 0.09617186337709427, + "learning_rate": 2.1858119101854867e-05, + "loss": 9.483, + "step": 54710 + }, + { + "epoch": 0.27326525006866587, + "grad_norm": 0.09283477067947388, + "learning_rate": 2.1856617186913314e-05, + "loss": 9.4723, + "step": 54720 + }, + { + "epoch": 0.27331518889360534, + "grad_norm": 0.0881299078464508, + "learning_rate": 2.1855115271971767e-05, + "loss": 9.4951, + "step": 54730 + }, + { + "epoch": 0.27336512771854476, + "grad_norm": 0.09953461587429047, + "learning_rate": 2.1853613357030214e-05, + "loss": 9.4853, + "step": 54740 + }, + { + "epoch": 0.27341506654348424, + "grad_norm": 0.09831681102514267, + "learning_rate": 2.1852111442088664e-05, + "loss": 9.4849, + "step": 54750 + }, + { + "epoch": 0.27346500536842366, + "grad_norm": 0.0924922525882721, + "learning_rate": 2.1850609527147114e-05, + "loss": 9.4929, + "step": 54760 + }, + { + "epoch": 0.27351494419336314, + "grad_norm": 0.09693264961242676, + "learning_rate": 2.184910761220556e-05, + "loss": 9.4778, + "step": 54770 + }, + { + "epoch": 0.27356488301830256, + "grad_norm": 0.0940731093287468, + "learning_rate": 2.1847605697264015e-05, + "loss": 9.4909, + "step": 54780 + }, + { + "epoch": 0.27361482184324204, + "grad_norm": 0.09551883488893509, + "learning_rate": 2.184610378232246e-05, + "loss": 9.485, + "step": 54790 + }, + { + "epoch": 0.27366476066818146, + "grad_norm": 0.09278986603021622, + "learning_rate": 2.1844601867380912e-05, + "loss": 9.4857, + "step": 54800 + }, + { + "epoch": 0.27371469949312094, + "grad_norm": 0.09064249694347382, + "learning_rate": 2.1843099952439362e-05, + "loss": 9.4826, + "step": 54810 + }, + { + "epoch": 0.27376463831806036, + "grad_norm": 0.0937790721654892, + "learning_rate": 2.184159803749781e-05, + "loss": 9.4923, + "step": 54820 + }, + { + "epoch": 0.27381457714299984, + "grad_norm": 0.0964457243680954, + "learning_rate": 2.1840096122556262e-05, + "loss": 9.4783, + "step": 54830 + }, + { + "epoch": 0.27386451596793926, + "grad_norm": 0.08797242492437363, + "learning_rate": 2.183859420761471e-05, + "loss": 9.481, + "step": 54840 + }, + { + "epoch": 0.27391445479287874, + "grad_norm": 0.09727727621793747, + "learning_rate": 2.183709229267316e-05, + "loss": 9.4801, + "step": 54850 + }, + { + "epoch": 0.27396439361781816, + "grad_norm": 0.0918155238032341, + "learning_rate": 2.183559037773161e-05, + "loss": 9.4905, + "step": 54860 + }, + { + "epoch": 0.27401433244275764, + "grad_norm": 0.09824883192777634, + "learning_rate": 2.1834088462790056e-05, + "loss": 9.4761, + "step": 54870 + }, + { + "epoch": 0.27406427126769706, + "grad_norm": 0.09386686235666275, + "learning_rate": 2.183258654784851e-05, + "loss": 9.4937, + "step": 54880 + }, + { + "epoch": 0.27411421009263653, + "grad_norm": 0.09294228255748749, + "learning_rate": 2.1831084632906957e-05, + "loss": 9.4793, + "step": 54890 + }, + { + "epoch": 0.27416414891757596, + "grad_norm": 0.10264691710472107, + "learning_rate": 2.1829582717965407e-05, + "loss": 9.4796, + "step": 54900 + }, + { + "epoch": 0.27421408774251543, + "grad_norm": 0.09530775249004364, + "learning_rate": 2.1828080803023857e-05, + "loss": 9.479, + "step": 54910 + }, + { + "epoch": 0.27426402656745485, + "grad_norm": 0.0969926044344902, + "learning_rate": 2.1826578888082304e-05, + "loss": 9.4762, + "step": 54920 + }, + { + "epoch": 0.27431396539239433, + "grad_norm": 0.0955069363117218, + "learning_rate": 2.1825076973140757e-05, + "loss": 9.4781, + "step": 54930 + }, + { + "epoch": 0.27436390421733375, + "grad_norm": 0.09909159690141678, + "learning_rate": 2.1823575058199204e-05, + "loss": 9.4861, + "step": 54940 + }, + { + "epoch": 0.27441384304227323, + "grad_norm": 0.09283483028411865, + "learning_rate": 2.1822073143257654e-05, + "loss": 9.4774, + "step": 54950 + }, + { + "epoch": 0.27446378186721265, + "grad_norm": 0.09053994715213776, + "learning_rate": 2.1820571228316104e-05, + "loss": 9.4813, + "step": 54960 + }, + { + "epoch": 0.27451372069215213, + "grad_norm": 0.0921054258942604, + "learning_rate": 2.181906931337455e-05, + "loss": 9.4889, + "step": 54970 + }, + { + "epoch": 0.27456365951709155, + "grad_norm": 0.09481295198202133, + "learning_rate": 2.1817567398433005e-05, + "loss": 9.485, + "step": 54980 + }, + { + "epoch": 0.27461359834203103, + "grad_norm": 0.10501012206077576, + "learning_rate": 2.181606548349145e-05, + "loss": 9.4831, + "step": 54990 + }, + { + "epoch": 0.27466353716697045, + "grad_norm": 0.09528646618127823, + "learning_rate": 2.1814563568549902e-05, + "loss": 9.4872, + "step": 55000 + }, + { + "epoch": 0.27471347599190993, + "grad_norm": 0.09487199038267136, + "learning_rate": 2.1813061653608352e-05, + "loss": 9.4929, + "step": 55010 + }, + { + "epoch": 0.27476341481684935, + "grad_norm": 0.09505639970302582, + "learning_rate": 2.18115597386668e-05, + "loss": 9.4858, + "step": 55020 + }, + { + "epoch": 0.2748133536417888, + "grad_norm": 0.0927814468741417, + "learning_rate": 2.1810057823725252e-05, + "loss": 9.4791, + "step": 55030 + }, + { + "epoch": 0.27486329246672825, + "grad_norm": 0.09519551694393158, + "learning_rate": 2.18085559087837e-05, + "loss": 9.4805, + "step": 55040 + }, + { + "epoch": 0.2749132312916677, + "grad_norm": 0.09184595942497253, + "learning_rate": 2.1807053993842153e-05, + "loss": 9.4717, + "step": 55050 + }, + { + "epoch": 0.27496317011660715, + "grad_norm": 0.09657662361860275, + "learning_rate": 2.18055520789006e-05, + "loss": 9.4875, + "step": 55060 + }, + { + "epoch": 0.2750131089415466, + "grad_norm": 0.0927683562040329, + "learning_rate": 2.1804050163959046e-05, + "loss": 9.4826, + "step": 55070 + }, + { + "epoch": 0.27506304776648605, + "grad_norm": 0.09400233626365662, + "learning_rate": 2.18025482490175e-05, + "loss": 9.4751, + "step": 55080 + }, + { + "epoch": 0.2751129865914255, + "grad_norm": 0.09439916908740997, + "learning_rate": 2.1801046334075947e-05, + "loss": 9.4739, + "step": 55090 + }, + { + "epoch": 0.27516292541636495, + "grad_norm": 0.09644033014774323, + "learning_rate": 2.17995444191344e-05, + "loss": 9.4795, + "step": 55100 + }, + { + "epoch": 0.2752128642413044, + "grad_norm": 0.09258447587490082, + "learning_rate": 2.1798042504192847e-05, + "loss": 9.4774, + "step": 55110 + }, + { + "epoch": 0.27526280306624384, + "grad_norm": 0.09144683927297592, + "learning_rate": 2.1796540589251294e-05, + "loss": 9.4767, + "step": 55120 + }, + { + "epoch": 0.2753127418911833, + "grad_norm": 0.09092001616954803, + "learning_rate": 2.1795038674309747e-05, + "loss": 9.4839, + "step": 55130 + }, + { + "epoch": 0.27536268071612274, + "grad_norm": 0.0951116681098938, + "learning_rate": 2.1793536759368194e-05, + "loss": 9.4774, + "step": 55140 + }, + { + "epoch": 0.2754126195410622, + "grad_norm": 0.09364614635705948, + "learning_rate": 2.1792034844426648e-05, + "loss": 9.4739, + "step": 55150 + }, + { + "epoch": 0.27546255836600164, + "grad_norm": 0.09528619796037674, + "learning_rate": 2.1790532929485094e-05, + "loss": 9.4745, + "step": 55160 + }, + { + "epoch": 0.2755124971909411, + "grad_norm": 0.09560272097587585, + "learning_rate": 2.178903101454354e-05, + "loss": 9.4794, + "step": 55170 + }, + { + "epoch": 0.27556243601588054, + "grad_norm": 0.09209144115447998, + "learning_rate": 2.1787529099601995e-05, + "loss": 9.4788, + "step": 55180 + }, + { + "epoch": 0.27561237484082, + "grad_norm": 0.09365902096033096, + "learning_rate": 2.178602718466044e-05, + "loss": 9.472, + "step": 55190 + }, + { + "epoch": 0.27566231366575944, + "grad_norm": 0.09534572809934616, + "learning_rate": 2.1784525269718895e-05, + "loss": 9.4737, + "step": 55200 + }, + { + "epoch": 0.2757122524906989, + "grad_norm": 0.09372628480195999, + "learning_rate": 2.1783023354777342e-05, + "loss": 9.4766, + "step": 55210 + }, + { + "epoch": 0.27576219131563834, + "grad_norm": 0.09953497350215912, + "learning_rate": 2.178152143983579e-05, + "loss": 9.4827, + "step": 55220 + }, + { + "epoch": 0.2758121301405778, + "grad_norm": 0.0936177521944046, + "learning_rate": 2.1780019524894242e-05, + "loss": 9.4838, + "step": 55230 + }, + { + "epoch": 0.27586206896551724, + "grad_norm": 0.0956781804561615, + "learning_rate": 2.177851760995269e-05, + "loss": 9.4818, + "step": 55240 + }, + { + "epoch": 0.2759120077904567, + "grad_norm": 0.09287876635789871, + "learning_rate": 2.1777015695011143e-05, + "loss": 9.4868, + "step": 55250 + }, + { + "epoch": 0.27596194661539614, + "grad_norm": 0.09168554097414017, + "learning_rate": 2.177551378006959e-05, + "loss": 9.4796, + "step": 55260 + }, + { + "epoch": 0.2760118854403356, + "grad_norm": 0.09445545822381973, + "learning_rate": 2.1774011865128036e-05, + "loss": 9.4829, + "step": 55270 + }, + { + "epoch": 0.27606182426527504, + "grad_norm": 0.09545770287513733, + "learning_rate": 2.177250995018649e-05, + "loss": 9.4689, + "step": 55280 + }, + { + "epoch": 0.2761117630902145, + "grad_norm": 0.09333658963441849, + "learning_rate": 2.1771008035244937e-05, + "loss": 9.4856, + "step": 55290 + }, + { + "epoch": 0.27616170191515393, + "grad_norm": 0.09187939018011093, + "learning_rate": 2.176950612030339e-05, + "loss": 9.4839, + "step": 55300 + }, + { + "epoch": 0.2762116407400934, + "grad_norm": 0.09599092602729797, + "learning_rate": 2.1768004205361837e-05, + "loss": 9.4787, + "step": 55310 + }, + { + "epoch": 0.27626157956503283, + "grad_norm": 0.09836266934871674, + "learning_rate": 2.1766502290420284e-05, + "loss": 9.4733, + "step": 55320 + }, + { + "epoch": 0.2763115183899723, + "grad_norm": 0.08872494846582413, + "learning_rate": 2.1765000375478737e-05, + "loss": 9.4781, + "step": 55330 + }, + { + "epoch": 0.27636145721491173, + "grad_norm": 0.09891574829816818, + "learning_rate": 2.1763498460537184e-05, + "loss": 9.4845, + "step": 55340 + }, + { + "epoch": 0.2764113960398512, + "grad_norm": 0.09147507697343826, + "learning_rate": 2.1761996545595638e-05, + "loss": 9.4784, + "step": 55350 + }, + { + "epoch": 0.27646133486479063, + "grad_norm": 0.09677154570817947, + "learning_rate": 2.1760494630654084e-05, + "loss": 9.4795, + "step": 55360 + }, + { + "epoch": 0.2765112736897301, + "grad_norm": 0.09255880117416382, + "learning_rate": 2.1758992715712535e-05, + "loss": 9.4825, + "step": 55370 + }, + { + "epoch": 0.27656121251466953, + "grad_norm": 0.09383849054574966, + "learning_rate": 2.1757490800770985e-05, + "loss": 9.4684, + "step": 55380 + }, + { + "epoch": 0.27661115133960895, + "grad_norm": 0.09055565297603607, + "learning_rate": 2.175598888582943e-05, + "loss": 9.4714, + "step": 55390 + }, + { + "epoch": 0.27666109016454843, + "grad_norm": 0.09910639375448227, + "learning_rate": 2.1754486970887885e-05, + "loss": 9.4788, + "step": 55400 + }, + { + "epoch": 0.27671102898948785, + "grad_norm": 0.09024091809988022, + "learning_rate": 2.1752985055946332e-05, + "loss": 9.4775, + "step": 55410 + }, + { + "epoch": 0.2767609678144273, + "grad_norm": 0.09249179810285568, + "learning_rate": 2.1751483141004782e-05, + "loss": 9.4655, + "step": 55420 + }, + { + "epoch": 0.27681090663936675, + "grad_norm": 0.09385978430509567, + "learning_rate": 2.1749981226063232e-05, + "loss": 9.4792, + "step": 55430 + }, + { + "epoch": 0.2768608454643062, + "grad_norm": 0.09004578739404678, + "learning_rate": 2.174847931112168e-05, + "loss": 9.4759, + "step": 55440 + }, + { + "epoch": 0.27691078428924565, + "grad_norm": 0.09427126497030258, + "learning_rate": 2.1746977396180133e-05, + "loss": 9.4672, + "step": 55450 + }, + { + "epoch": 0.2769607231141851, + "grad_norm": 0.09121444821357727, + "learning_rate": 2.174547548123858e-05, + "loss": 9.4653, + "step": 55460 + }, + { + "epoch": 0.27701066193912455, + "grad_norm": 0.0954887717962265, + "learning_rate": 2.174397356629703e-05, + "loss": 9.47, + "step": 55470 + }, + { + "epoch": 0.277060600764064, + "grad_norm": 0.09227491915225983, + "learning_rate": 2.174247165135548e-05, + "loss": 9.4773, + "step": 55480 + }, + { + "epoch": 0.27711053958900345, + "grad_norm": 0.09483451396226883, + "learning_rate": 2.1740969736413927e-05, + "loss": 9.4778, + "step": 55490 + }, + { + "epoch": 0.2771604784139429, + "grad_norm": 0.09309407323598862, + "learning_rate": 2.173946782147238e-05, + "loss": 9.4802, + "step": 55500 + }, + { + "epoch": 0.27721041723888235, + "grad_norm": 0.09302882105112076, + "learning_rate": 2.1737965906530827e-05, + "loss": 9.4772, + "step": 55510 + }, + { + "epoch": 0.2772603560638218, + "grad_norm": 0.09708143770694733, + "learning_rate": 2.1736463991589277e-05, + "loss": 9.4642, + "step": 55520 + }, + { + "epoch": 0.27731029488876124, + "grad_norm": 0.09857852011919022, + "learning_rate": 2.1734962076647727e-05, + "loss": 9.4724, + "step": 55530 + }, + { + "epoch": 0.2773602337137007, + "grad_norm": 0.08941424638032913, + "learning_rate": 2.1733460161706174e-05, + "loss": 9.4653, + "step": 55540 + }, + { + "epoch": 0.27741017253864014, + "grad_norm": 0.09356237202882767, + "learning_rate": 2.1731958246764628e-05, + "loss": 9.4737, + "step": 55550 + }, + { + "epoch": 0.2774601113635796, + "grad_norm": 0.09721656143665314, + "learning_rate": 2.1730456331823074e-05, + "loss": 9.4649, + "step": 55560 + }, + { + "epoch": 0.27751005018851904, + "grad_norm": 0.0922091156244278, + "learning_rate": 2.1728954416881525e-05, + "loss": 9.4746, + "step": 55570 + }, + { + "epoch": 0.2775599890134585, + "grad_norm": 0.09237723052501678, + "learning_rate": 2.1727452501939975e-05, + "loss": 9.4742, + "step": 55580 + }, + { + "epoch": 0.27760992783839794, + "grad_norm": 0.09540941566228867, + "learning_rate": 2.172595058699842e-05, + "loss": 9.4836, + "step": 55590 + }, + { + "epoch": 0.2776598666633374, + "grad_norm": 0.09385842829942703, + "learning_rate": 2.1724448672056875e-05, + "loss": 9.4653, + "step": 55600 + }, + { + "epoch": 0.27770980548827684, + "grad_norm": 0.09224699437618256, + "learning_rate": 2.1722946757115322e-05, + "loss": 9.4769, + "step": 55610 + }, + { + "epoch": 0.2777597443132163, + "grad_norm": 0.08767042309045792, + "learning_rate": 2.1721444842173772e-05, + "loss": 9.4674, + "step": 55620 + }, + { + "epoch": 0.27780968313815574, + "grad_norm": 0.09636397659778595, + "learning_rate": 2.1719942927232222e-05, + "loss": 9.4676, + "step": 55630 + }, + { + "epoch": 0.2778596219630952, + "grad_norm": 0.09190484136343002, + "learning_rate": 2.171844101229067e-05, + "loss": 9.4758, + "step": 55640 + }, + { + "epoch": 0.27790956078803464, + "grad_norm": 0.09460058063268661, + "learning_rate": 2.1716939097349123e-05, + "loss": 9.4664, + "step": 55650 + }, + { + "epoch": 0.2779594996129741, + "grad_norm": 0.0935700535774231, + "learning_rate": 2.171543718240757e-05, + "loss": 9.4654, + "step": 55660 + }, + { + "epoch": 0.27800943843791354, + "grad_norm": 0.09616656601428986, + "learning_rate": 2.171393526746602e-05, + "loss": 9.4798, + "step": 55670 + }, + { + "epoch": 0.278059377262853, + "grad_norm": 0.09719350188970566, + "learning_rate": 2.171243335252447e-05, + "loss": 9.4636, + "step": 55680 + }, + { + "epoch": 0.27810931608779244, + "grad_norm": 0.09644226729869843, + "learning_rate": 2.171093143758292e-05, + "loss": 9.471, + "step": 55690 + }, + { + "epoch": 0.2781592549127319, + "grad_norm": 0.09436850249767303, + "learning_rate": 2.170942952264137e-05, + "loss": 9.4831, + "step": 55700 + }, + { + "epoch": 0.27820919373767133, + "grad_norm": 0.09048687666654587, + "learning_rate": 2.1707927607699817e-05, + "loss": 9.4731, + "step": 55710 + }, + { + "epoch": 0.2782591325626108, + "grad_norm": 0.10046453028917313, + "learning_rate": 2.1706425692758267e-05, + "loss": 9.4598, + "step": 55720 + }, + { + "epoch": 0.27830907138755023, + "grad_norm": 0.09537816792726517, + "learning_rate": 2.1704923777816717e-05, + "loss": 9.4707, + "step": 55730 + }, + { + "epoch": 0.2783590102124897, + "grad_norm": 0.09480351954698563, + "learning_rate": 2.1703421862875167e-05, + "loss": 9.478, + "step": 55740 + }, + { + "epoch": 0.27840894903742913, + "grad_norm": 0.09742586314678192, + "learning_rate": 2.1701919947933618e-05, + "loss": 9.4817, + "step": 55750 + }, + { + "epoch": 0.2784588878623686, + "grad_norm": 0.1001049056649208, + "learning_rate": 2.1700418032992064e-05, + "loss": 9.4723, + "step": 55760 + }, + { + "epoch": 0.27850882668730803, + "grad_norm": 0.09015222638845444, + "learning_rate": 2.1698916118050515e-05, + "loss": 9.4722, + "step": 55770 + }, + { + "epoch": 0.2785587655122475, + "grad_norm": 0.09560934454202652, + "learning_rate": 2.1697414203108965e-05, + "loss": 9.4617, + "step": 55780 + }, + { + "epoch": 0.27860870433718693, + "grad_norm": 0.0906476080417633, + "learning_rate": 2.1695912288167415e-05, + "loss": 9.4799, + "step": 55790 + }, + { + "epoch": 0.2786586431621264, + "grad_norm": 0.09162773191928864, + "learning_rate": 2.1694410373225865e-05, + "loss": 9.4484, + "step": 55800 + }, + { + "epoch": 0.27870858198706583, + "grad_norm": 0.08697135001420975, + "learning_rate": 2.1692908458284312e-05, + "loss": 9.4698, + "step": 55810 + }, + { + "epoch": 0.2787585208120053, + "grad_norm": 0.09465092420578003, + "learning_rate": 2.1691406543342762e-05, + "loss": 9.4725, + "step": 55820 + }, + { + "epoch": 0.2788084596369447, + "grad_norm": 0.09739962965250015, + "learning_rate": 2.1689904628401212e-05, + "loss": 9.4746, + "step": 55830 + }, + { + "epoch": 0.2788583984618842, + "grad_norm": 0.09323502331972122, + "learning_rate": 2.1688402713459662e-05, + "loss": 9.4643, + "step": 55840 + }, + { + "epoch": 0.2789083372868236, + "grad_norm": 0.09480603039264679, + "learning_rate": 2.1686900798518113e-05, + "loss": 9.4724, + "step": 55850 + }, + { + "epoch": 0.2789582761117631, + "grad_norm": 0.09424508363008499, + "learning_rate": 2.168539888357656e-05, + "loss": 9.4578, + "step": 55860 + }, + { + "epoch": 0.2790082149367025, + "grad_norm": 0.10481972247362137, + "learning_rate": 2.168389696863501e-05, + "loss": 9.4665, + "step": 55870 + }, + { + "epoch": 0.279058153761642, + "grad_norm": 0.09700514376163483, + "learning_rate": 2.168239505369346e-05, + "loss": 9.4596, + "step": 55880 + }, + { + "epoch": 0.2791080925865814, + "grad_norm": 0.09476059675216675, + "learning_rate": 2.168089313875191e-05, + "loss": 9.4671, + "step": 55890 + }, + { + "epoch": 0.2791580314115209, + "grad_norm": 0.09556054323911667, + "learning_rate": 2.167939122381036e-05, + "loss": 9.4661, + "step": 55900 + }, + { + "epoch": 0.2792079702364603, + "grad_norm": 0.09373265504837036, + "learning_rate": 2.1677889308868807e-05, + "loss": 9.4751, + "step": 55910 + }, + { + "epoch": 0.2792579090613998, + "grad_norm": 0.10191362351179123, + "learning_rate": 2.1676387393927257e-05, + "loss": 9.4675, + "step": 55920 + }, + { + "epoch": 0.2793078478863392, + "grad_norm": 0.09132136404514313, + "learning_rate": 2.1674885478985707e-05, + "loss": 9.4671, + "step": 55930 + }, + { + "epoch": 0.2793577867112787, + "grad_norm": 0.08985847234725952, + "learning_rate": 2.1673383564044157e-05, + "loss": 9.4687, + "step": 55940 + }, + { + "epoch": 0.2794077255362181, + "grad_norm": 0.10665588080883026, + "learning_rate": 2.1671881649102608e-05, + "loss": 9.4769, + "step": 55950 + }, + { + "epoch": 0.2794576643611576, + "grad_norm": 0.10030438750982285, + "learning_rate": 2.1670379734161054e-05, + "loss": 9.4622, + "step": 55960 + }, + { + "epoch": 0.279507603186097, + "grad_norm": 0.09504380822181702, + "learning_rate": 2.1668877819219505e-05, + "loss": 9.468, + "step": 55970 + }, + { + "epoch": 0.2795575420110365, + "grad_norm": 0.09228894859552383, + "learning_rate": 2.1667375904277955e-05, + "loss": 9.4615, + "step": 55980 + }, + { + "epoch": 0.2796074808359759, + "grad_norm": 0.09327695518732071, + "learning_rate": 2.1665873989336405e-05, + "loss": 9.4575, + "step": 55990 + }, + { + "epoch": 0.2796574196609154, + "grad_norm": 0.09014037996530533, + "learning_rate": 2.1664372074394855e-05, + "loss": 9.4682, + "step": 56000 + }, + { + "epoch": 0.2797073584858548, + "grad_norm": 0.09762946516275406, + "learning_rate": 2.1662870159453302e-05, + "loss": 9.466, + "step": 56010 + }, + { + "epoch": 0.2797572973107943, + "grad_norm": 0.08341635018587112, + "learning_rate": 2.1661368244511752e-05, + "loss": 9.4691, + "step": 56020 + }, + { + "epoch": 0.2798072361357337, + "grad_norm": 0.09523497521877289, + "learning_rate": 2.1659866329570202e-05, + "loss": 9.4618, + "step": 56030 + }, + { + "epoch": 0.2798571749606732, + "grad_norm": 0.10125718265771866, + "learning_rate": 2.1658364414628653e-05, + "loss": 9.4623, + "step": 56040 + }, + { + "epoch": 0.2799071137856126, + "grad_norm": 0.10129445791244507, + "learning_rate": 2.1656862499687103e-05, + "loss": 9.458, + "step": 56050 + }, + { + "epoch": 0.2799570526105521, + "grad_norm": 0.08802901953458786, + "learning_rate": 2.1655360584745553e-05, + "loss": 9.465, + "step": 56060 + }, + { + "epoch": 0.2800069914354915, + "grad_norm": 0.09462311863899231, + "learning_rate": 2.1653858669804e-05, + "loss": 9.4684, + "step": 56070 + }, + { + "epoch": 0.280056930260431, + "grad_norm": 0.09879063069820404, + "learning_rate": 2.165235675486245e-05, + "loss": 9.4562, + "step": 56080 + }, + { + "epoch": 0.2801068690853704, + "grad_norm": 0.09174858778715134, + "learning_rate": 2.16508548399209e-05, + "loss": 9.4649, + "step": 56090 + }, + { + "epoch": 0.2801568079103099, + "grad_norm": 0.09843375533819199, + "learning_rate": 2.164935292497935e-05, + "loss": 9.4666, + "step": 56100 + }, + { + "epoch": 0.2802067467352493, + "grad_norm": 0.09890784323215485, + "learning_rate": 2.16478510100378e-05, + "loss": 9.464, + "step": 56110 + }, + { + "epoch": 0.2802566855601888, + "grad_norm": 0.09890832751989365, + "learning_rate": 2.1646349095096247e-05, + "loss": 9.47, + "step": 56120 + }, + { + "epoch": 0.2803066243851282, + "grad_norm": 0.09250981360673904, + "learning_rate": 2.1644847180154697e-05, + "loss": 9.4576, + "step": 56130 + }, + { + "epoch": 0.2803565632100677, + "grad_norm": 0.09066881984472275, + "learning_rate": 2.1643345265213148e-05, + "loss": 9.4633, + "step": 56140 + }, + { + "epoch": 0.2804065020350071, + "grad_norm": 0.09215233474969864, + "learning_rate": 2.1641843350271598e-05, + "loss": 9.4647, + "step": 56150 + }, + { + "epoch": 0.2804564408599466, + "grad_norm": 0.09696812182664871, + "learning_rate": 2.1640341435330048e-05, + "loss": 9.4715, + "step": 56160 + }, + { + "epoch": 0.280506379684886, + "grad_norm": 0.09409933537244797, + "learning_rate": 2.1638839520388495e-05, + "loss": 9.4697, + "step": 56170 + }, + { + "epoch": 0.2805563185098255, + "grad_norm": 0.09850755333900452, + "learning_rate": 2.1637337605446945e-05, + "loss": 9.4676, + "step": 56180 + }, + { + "epoch": 0.2806062573347649, + "grad_norm": 0.09551995247602463, + "learning_rate": 2.1635835690505395e-05, + "loss": 9.4628, + "step": 56190 + }, + { + "epoch": 0.2806561961597044, + "grad_norm": 0.08868438005447388, + "learning_rate": 2.1634333775563845e-05, + "loss": 9.4624, + "step": 56200 + }, + { + "epoch": 0.2807061349846438, + "grad_norm": 0.09749383479356766, + "learning_rate": 2.1632831860622295e-05, + "loss": 9.4605, + "step": 56210 + }, + { + "epoch": 0.2807560738095833, + "grad_norm": 0.09395034611225128, + "learning_rate": 2.1631329945680742e-05, + "loss": 9.4619, + "step": 56220 + }, + { + "epoch": 0.2808060126345227, + "grad_norm": 0.09494924545288086, + "learning_rate": 2.1629828030739192e-05, + "loss": 9.4666, + "step": 56230 + }, + { + "epoch": 0.2808559514594622, + "grad_norm": 0.09371868520975113, + "learning_rate": 2.1628326115797643e-05, + "loss": 9.4607, + "step": 56240 + }, + { + "epoch": 0.2809058902844016, + "grad_norm": 0.0938858613371849, + "learning_rate": 2.1626824200856093e-05, + "loss": 9.4649, + "step": 56250 + }, + { + "epoch": 0.2809558291093411, + "grad_norm": 0.09127645194530487, + "learning_rate": 2.1625322285914543e-05, + "loss": 9.4614, + "step": 56260 + }, + { + "epoch": 0.2810057679342805, + "grad_norm": 0.09199647605419159, + "learning_rate": 2.162382037097299e-05, + "loss": 9.4671, + "step": 56270 + }, + { + "epoch": 0.28105570675922, + "grad_norm": 0.09332824498414993, + "learning_rate": 2.162231845603144e-05, + "loss": 9.4663, + "step": 56280 + }, + { + "epoch": 0.2811056455841594, + "grad_norm": 0.09723930805921555, + "learning_rate": 2.162081654108989e-05, + "loss": 9.459, + "step": 56290 + }, + { + "epoch": 0.2811555844090989, + "grad_norm": 0.09130632877349854, + "learning_rate": 2.161931462614834e-05, + "loss": 9.4603, + "step": 56300 + }, + { + "epoch": 0.2812055232340383, + "grad_norm": 0.09806390851736069, + "learning_rate": 2.161781271120679e-05, + "loss": 9.463, + "step": 56310 + }, + { + "epoch": 0.2812554620589778, + "grad_norm": 0.09688523411750793, + "learning_rate": 2.1616310796265237e-05, + "loss": 9.459, + "step": 56320 + }, + { + "epoch": 0.2813054008839172, + "grad_norm": 0.09443571418523788, + "learning_rate": 2.1614808881323687e-05, + "loss": 9.4629, + "step": 56330 + }, + { + "epoch": 0.2813553397088567, + "grad_norm": 0.09263693541288376, + "learning_rate": 2.1613306966382138e-05, + "loss": 9.4674, + "step": 56340 + }, + { + "epoch": 0.2814052785337961, + "grad_norm": 0.08993376046419144, + "learning_rate": 2.1611805051440588e-05, + "loss": 9.462, + "step": 56350 + }, + { + "epoch": 0.2814552173587356, + "grad_norm": 0.09446647763252258, + "learning_rate": 2.1610303136499038e-05, + "loss": 9.4597, + "step": 56360 + }, + { + "epoch": 0.281505156183675, + "grad_norm": 0.09347580373287201, + "learning_rate": 2.1608801221557485e-05, + "loss": 9.4585, + "step": 56370 + }, + { + "epoch": 0.2815550950086144, + "grad_norm": 0.09771845489740372, + "learning_rate": 2.1607299306615938e-05, + "loss": 9.4685, + "step": 56380 + }, + { + "epoch": 0.2816050338335539, + "grad_norm": 0.09477143734693527, + "learning_rate": 2.1605797391674385e-05, + "loss": 9.4516, + "step": 56390 + }, + { + "epoch": 0.2816549726584933, + "grad_norm": 0.09481296688318253, + "learning_rate": 2.1604295476732835e-05, + "loss": 9.4627, + "step": 56400 + }, + { + "epoch": 0.2817049114834328, + "grad_norm": 0.09837774932384491, + "learning_rate": 2.1602793561791285e-05, + "loss": 9.4678, + "step": 56410 + }, + { + "epoch": 0.2817548503083722, + "grad_norm": 0.09405245631933212, + "learning_rate": 2.1601291646849732e-05, + "loss": 9.4549, + "step": 56420 + }, + { + "epoch": 0.2818047891333117, + "grad_norm": 0.09251265972852707, + "learning_rate": 2.1599789731908186e-05, + "loss": 9.466, + "step": 56430 + }, + { + "epoch": 0.2818547279582511, + "grad_norm": 0.0892677903175354, + "learning_rate": 2.1598287816966633e-05, + "loss": 9.4575, + "step": 56440 + }, + { + "epoch": 0.2819046667831906, + "grad_norm": 0.09336142987012863, + "learning_rate": 2.1596785902025083e-05, + "loss": 9.4523, + "step": 56450 + }, + { + "epoch": 0.28195460560813, + "grad_norm": 0.09530828148126602, + "learning_rate": 2.1595283987083533e-05, + "loss": 9.4588, + "step": 56460 + }, + { + "epoch": 0.2820045444330695, + "grad_norm": 0.09306281059980392, + "learning_rate": 2.159378207214198e-05, + "loss": 9.4528, + "step": 56470 + }, + { + "epoch": 0.2820544832580089, + "grad_norm": 0.0939321219921112, + "learning_rate": 2.1592280157200433e-05, + "loss": 9.4545, + "step": 56480 + }, + { + "epoch": 0.2821044220829484, + "grad_norm": 0.1002638041973114, + "learning_rate": 2.159077824225888e-05, + "loss": 9.4582, + "step": 56490 + }, + { + "epoch": 0.2821543609078878, + "grad_norm": 0.09518595784902573, + "learning_rate": 2.158927632731733e-05, + "loss": 9.4531, + "step": 56500 + }, + { + "epoch": 0.2822042997328273, + "grad_norm": 0.0943618193268776, + "learning_rate": 2.158777441237578e-05, + "loss": 9.4467, + "step": 56510 + }, + { + "epoch": 0.2822542385577667, + "grad_norm": 0.09317539632320404, + "learning_rate": 2.1586272497434227e-05, + "loss": 9.4538, + "step": 56520 + }, + { + "epoch": 0.2823041773827062, + "grad_norm": 0.09565674513578415, + "learning_rate": 2.158477058249268e-05, + "loss": 9.457, + "step": 56530 + }, + { + "epoch": 0.2823541162076456, + "grad_norm": 0.09357752650976181, + "learning_rate": 2.1583268667551128e-05, + "loss": 9.4566, + "step": 56540 + }, + { + "epoch": 0.2824040550325851, + "grad_norm": 0.09761801362037659, + "learning_rate": 2.1581766752609578e-05, + "loss": 9.46, + "step": 56550 + }, + { + "epoch": 0.2824539938575245, + "grad_norm": 0.08818177133798599, + "learning_rate": 2.1580264837668028e-05, + "loss": 9.453, + "step": 56560 + }, + { + "epoch": 0.282503932682464, + "grad_norm": 0.09338049590587616, + "learning_rate": 2.1578762922726475e-05, + "loss": 9.4508, + "step": 56570 + }, + { + "epoch": 0.2825538715074034, + "grad_norm": 0.09481517225503922, + "learning_rate": 2.1577261007784928e-05, + "loss": 9.4517, + "step": 56580 + }, + { + "epoch": 0.2826038103323429, + "grad_norm": 0.0910092368721962, + "learning_rate": 2.1575759092843375e-05, + "loss": 9.4635, + "step": 56590 + }, + { + "epoch": 0.2826537491572823, + "grad_norm": 0.0879504382610321, + "learning_rate": 2.1574257177901825e-05, + "loss": 9.4676, + "step": 56600 + }, + { + "epoch": 0.2827036879822218, + "grad_norm": 0.09749528020620346, + "learning_rate": 2.1572755262960275e-05, + "loss": 9.4548, + "step": 56610 + }, + { + "epoch": 0.2827536268071612, + "grad_norm": 0.09335489571094513, + "learning_rate": 2.1571253348018722e-05, + "loss": 9.459, + "step": 56620 + }, + { + "epoch": 0.2828035656321007, + "grad_norm": 0.09115227311849594, + "learning_rate": 2.1569751433077176e-05, + "loss": 9.4516, + "step": 56630 + }, + { + "epoch": 0.2828535044570401, + "grad_norm": 0.09703268110752106, + "learning_rate": 2.1568249518135623e-05, + "loss": 9.4488, + "step": 56640 + }, + { + "epoch": 0.2829034432819796, + "grad_norm": 0.08919961750507355, + "learning_rate": 2.1566747603194073e-05, + "loss": 9.4534, + "step": 56650 + }, + { + "epoch": 0.282953382106919, + "grad_norm": 0.09611677378416061, + "learning_rate": 2.1565245688252523e-05, + "loss": 9.4558, + "step": 56660 + }, + { + "epoch": 0.2830033209318585, + "grad_norm": 0.09488637745380402, + "learning_rate": 2.156374377331097e-05, + "loss": 9.4519, + "step": 56670 + }, + { + "epoch": 0.2830532597567979, + "grad_norm": 0.09528210014104843, + "learning_rate": 2.1562241858369423e-05, + "loss": 9.4565, + "step": 56680 + }, + { + "epoch": 0.2831031985817374, + "grad_norm": 0.0950738713145256, + "learning_rate": 2.156073994342787e-05, + "loss": 9.4551, + "step": 56690 + }, + { + "epoch": 0.2831531374066768, + "grad_norm": 0.09509290009737015, + "learning_rate": 2.1559238028486324e-05, + "loss": 9.4597, + "step": 56700 + }, + { + "epoch": 0.2832030762316163, + "grad_norm": 0.09353553503751755, + "learning_rate": 2.155773611354477e-05, + "loss": 9.443, + "step": 56710 + }, + { + "epoch": 0.2832530150565557, + "grad_norm": 0.09463495761156082, + "learning_rate": 2.1556234198603217e-05, + "loss": 9.454, + "step": 56720 + }, + { + "epoch": 0.2833029538814952, + "grad_norm": 0.0871269553899765, + "learning_rate": 2.155473228366167e-05, + "loss": 9.4553, + "step": 56730 + }, + { + "epoch": 0.2833528927064346, + "grad_norm": 0.10174693167209625, + "learning_rate": 2.1553230368720118e-05, + "loss": 9.4618, + "step": 56740 + }, + { + "epoch": 0.2834028315313741, + "grad_norm": 0.09311925619840622, + "learning_rate": 2.155172845377857e-05, + "loss": 9.4507, + "step": 56750 + }, + { + "epoch": 0.2834527703563135, + "grad_norm": 0.09324943274259567, + "learning_rate": 2.1550226538837018e-05, + "loss": 9.4522, + "step": 56760 + }, + { + "epoch": 0.283502709181253, + "grad_norm": 0.0928729698061943, + "learning_rate": 2.1548724623895465e-05, + "loss": 9.4504, + "step": 56770 + }, + { + "epoch": 0.2835526480061924, + "grad_norm": 0.09641097486019135, + "learning_rate": 2.1547222708953918e-05, + "loss": 9.4463, + "step": 56780 + }, + { + "epoch": 0.2836025868311319, + "grad_norm": 0.09734059870243073, + "learning_rate": 2.1545720794012365e-05, + "loss": 9.4444, + "step": 56790 + }, + { + "epoch": 0.2836525256560713, + "grad_norm": 0.09037147462368011, + "learning_rate": 2.154421887907082e-05, + "loss": 9.4487, + "step": 56800 + }, + { + "epoch": 0.2837024644810108, + "grad_norm": 0.09848001599311829, + "learning_rate": 2.1542716964129265e-05, + "loss": 9.4588, + "step": 56810 + }, + { + "epoch": 0.2837524033059502, + "grad_norm": 0.092610202729702, + "learning_rate": 2.1541215049187712e-05, + "loss": 9.453, + "step": 56820 + }, + { + "epoch": 0.2838023421308897, + "grad_norm": 0.09564469754695892, + "learning_rate": 2.1539713134246166e-05, + "loss": 9.4589, + "step": 56830 + }, + { + "epoch": 0.2838522809558291, + "grad_norm": 0.08878234773874283, + "learning_rate": 2.1538211219304613e-05, + "loss": 9.4545, + "step": 56840 + }, + { + "epoch": 0.28390221978076857, + "grad_norm": 0.09450232982635498, + "learning_rate": 2.1536709304363066e-05, + "loss": 9.4541, + "step": 56850 + }, + { + "epoch": 0.283952158605708, + "grad_norm": 0.09449663758277893, + "learning_rate": 2.1535207389421513e-05, + "loss": 9.4419, + "step": 56860 + }, + { + "epoch": 0.28400209743064747, + "grad_norm": 0.09098829329013824, + "learning_rate": 2.153370547447996e-05, + "loss": 9.452, + "step": 56870 + }, + { + "epoch": 0.2840520362555869, + "grad_norm": 0.09370938688516617, + "learning_rate": 2.1532203559538413e-05, + "loss": 9.45, + "step": 56880 + }, + { + "epoch": 0.28410197508052637, + "grad_norm": 0.09251664578914642, + "learning_rate": 2.153070164459686e-05, + "loss": 9.453, + "step": 56890 + }, + { + "epoch": 0.2841519139054658, + "grad_norm": 0.0982016995549202, + "learning_rate": 2.1529199729655314e-05, + "loss": 9.4479, + "step": 56900 + }, + { + "epoch": 0.28420185273040527, + "grad_norm": 0.0964377224445343, + "learning_rate": 2.152769781471376e-05, + "loss": 9.4496, + "step": 56910 + }, + { + "epoch": 0.2842517915553447, + "grad_norm": 0.09132152795791626, + "learning_rate": 2.1526195899772207e-05, + "loss": 9.4547, + "step": 56920 + }, + { + "epoch": 0.28430173038028417, + "grad_norm": 0.09878633171319962, + "learning_rate": 2.152469398483066e-05, + "loss": 9.4533, + "step": 56930 + }, + { + "epoch": 0.2843516692052236, + "grad_norm": 0.09209530800580978, + "learning_rate": 2.1523192069889108e-05, + "loss": 9.4452, + "step": 56940 + }, + { + "epoch": 0.28440160803016307, + "grad_norm": 0.09105639159679413, + "learning_rate": 2.152169015494756e-05, + "loss": 9.4402, + "step": 56950 + }, + { + "epoch": 0.2844515468551025, + "grad_norm": 0.09028221666812897, + "learning_rate": 2.1520188240006008e-05, + "loss": 9.4486, + "step": 56960 + }, + { + "epoch": 0.28450148568004197, + "grad_norm": 0.09749405086040497, + "learning_rate": 2.1518686325064455e-05, + "loss": 9.452, + "step": 56970 + }, + { + "epoch": 0.2845514245049814, + "grad_norm": 0.0886070504784584, + "learning_rate": 2.1517184410122908e-05, + "loss": 9.4518, + "step": 56980 + }, + { + "epoch": 0.28460136332992086, + "grad_norm": 0.09163407981395721, + "learning_rate": 2.1515682495181355e-05, + "loss": 9.4533, + "step": 56990 + }, + { + "epoch": 0.2846513021548603, + "grad_norm": 0.09811826795339584, + "learning_rate": 2.151418058023981e-05, + "loss": 9.4422, + "step": 57000 + }, + { + "epoch": 0.28470124097979976, + "grad_norm": 0.08839601278305054, + "learning_rate": 2.1512678665298255e-05, + "loss": 9.4462, + "step": 57010 + }, + { + "epoch": 0.2847511798047392, + "grad_norm": 0.09699650853872299, + "learning_rate": 2.1511176750356706e-05, + "loss": 9.4547, + "step": 57020 + }, + { + "epoch": 0.28480111862967866, + "grad_norm": 0.0974980965256691, + "learning_rate": 2.1509674835415156e-05, + "loss": 9.4466, + "step": 57030 + }, + { + "epoch": 0.2848510574546181, + "grad_norm": 0.09446827322244644, + "learning_rate": 2.1508172920473603e-05, + "loss": 9.4488, + "step": 57040 + }, + { + "epoch": 0.28490099627955756, + "grad_norm": 0.09437047690153122, + "learning_rate": 2.1506671005532056e-05, + "loss": 9.4458, + "step": 57050 + }, + { + "epoch": 0.284950935104497, + "grad_norm": 0.09088893234729767, + "learning_rate": 2.1505169090590503e-05, + "loss": 9.4443, + "step": 57060 + }, + { + "epoch": 0.28500087392943646, + "grad_norm": 0.0951780378818512, + "learning_rate": 2.1503667175648953e-05, + "loss": 9.4552, + "step": 57070 + }, + { + "epoch": 0.2850508127543759, + "grad_norm": 0.08948690444231033, + "learning_rate": 2.1502165260707403e-05, + "loss": 9.4513, + "step": 57080 + }, + { + "epoch": 0.28510075157931536, + "grad_norm": 0.09669423848390579, + "learning_rate": 2.150066334576585e-05, + "loss": 9.4471, + "step": 57090 + }, + { + "epoch": 0.2851506904042548, + "grad_norm": 0.09193644672632217, + "learning_rate": 2.1499161430824304e-05, + "loss": 9.4458, + "step": 57100 + }, + { + "epoch": 0.28520062922919426, + "grad_norm": 0.10340452194213867, + "learning_rate": 2.149765951588275e-05, + "loss": 9.4497, + "step": 57110 + }, + { + "epoch": 0.2852505680541337, + "grad_norm": 0.09467227011919022, + "learning_rate": 2.14961576009412e-05, + "loss": 9.4555, + "step": 57120 + }, + { + "epoch": 0.28530050687907316, + "grad_norm": 0.09359690546989441, + "learning_rate": 2.149465568599965e-05, + "loss": 9.4468, + "step": 57130 + }, + { + "epoch": 0.2853504457040126, + "grad_norm": 0.08810552209615707, + "learning_rate": 2.1493153771058098e-05, + "loss": 9.4406, + "step": 57140 + }, + { + "epoch": 0.28540038452895206, + "grad_norm": 0.09267257899045944, + "learning_rate": 2.149165185611655e-05, + "loss": 9.4496, + "step": 57150 + }, + { + "epoch": 0.2854503233538915, + "grad_norm": 0.09369373321533203, + "learning_rate": 2.1490149941174998e-05, + "loss": 9.4524, + "step": 57160 + }, + { + "epoch": 0.28550026217883095, + "grad_norm": 0.09611758589744568, + "learning_rate": 2.1488648026233448e-05, + "loss": 9.446, + "step": 57170 + }, + { + "epoch": 0.2855502010037704, + "grad_norm": 0.09398515522480011, + "learning_rate": 2.1487146111291898e-05, + "loss": 9.4492, + "step": 57180 + }, + { + "epoch": 0.28560013982870985, + "grad_norm": 0.08984824270009995, + "learning_rate": 2.1485644196350345e-05, + "loss": 9.4438, + "step": 57190 + }, + { + "epoch": 0.2856500786536493, + "grad_norm": 0.09207414090633392, + "learning_rate": 2.14841422814088e-05, + "loss": 9.4522, + "step": 57200 + }, + { + "epoch": 0.28570001747858875, + "grad_norm": 0.09611576050519943, + "learning_rate": 2.1482640366467245e-05, + "loss": 9.4511, + "step": 57210 + }, + { + "epoch": 0.2857499563035282, + "grad_norm": 0.09785609692335129, + "learning_rate": 2.1481138451525696e-05, + "loss": 9.4475, + "step": 57220 + }, + { + "epoch": 0.28579989512846765, + "grad_norm": 0.09928670525550842, + "learning_rate": 2.1479636536584146e-05, + "loss": 9.4436, + "step": 57230 + }, + { + "epoch": 0.2858498339534071, + "grad_norm": 0.09629528224468231, + "learning_rate": 2.1478134621642593e-05, + "loss": 9.4383, + "step": 57240 + }, + { + "epoch": 0.28589977277834655, + "grad_norm": 0.09739939123392105, + "learning_rate": 2.1476632706701046e-05, + "loss": 9.447, + "step": 57250 + }, + { + "epoch": 0.28594971160328597, + "grad_norm": 0.09917205572128296, + "learning_rate": 2.1475130791759493e-05, + "loss": 9.4335, + "step": 57260 + }, + { + "epoch": 0.28599965042822545, + "grad_norm": 0.09072194248437881, + "learning_rate": 2.1473628876817943e-05, + "loss": 9.4544, + "step": 57270 + }, + { + "epoch": 0.28604958925316487, + "grad_norm": 0.09361054748296738, + "learning_rate": 2.1472126961876393e-05, + "loss": 9.4429, + "step": 57280 + }, + { + "epoch": 0.28609952807810435, + "grad_norm": 0.09945287555456161, + "learning_rate": 2.147062504693484e-05, + "loss": 9.4512, + "step": 57290 + }, + { + "epoch": 0.28614946690304377, + "grad_norm": 0.09822694212198257, + "learning_rate": 2.1469123131993294e-05, + "loss": 9.434, + "step": 57300 + }, + { + "epoch": 0.28619940572798325, + "grad_norm": 0.09281609952449799, + "learning_rate": 2.146762121705174e-05, + "loss": 9.4456, + "step": 57310 + }, + { + "epoch": 0.28624934455292267, + "grad_norm": 0.09337762743234634, + "learning_rate": 2.146611930211019e-05, + "loss": 9.4507, + "step": 57320 + }, + { + "epoch": 0.28629928337786215, + "grad_norm": 0.09160379320383072, + "learning_rate": 2.146461738716864e-05, + "loss": 9.4441, + "step": 57330 + }, + { + "epoch": 0.28634922220280157, + "grad_norm": 0.09638303518295288, + "learning_rate": 2.146311547222709e-05, + "loss": 9.45, + "step": 57340 + }, + { + "epoch": 0.28639916102774104, + "grad_norm": 0.0943082943558693, + "learning_rate": 2.146161355728554e-05, + "loss": 9.4481, + "step": 57350 + }, + { + "epoch": 0.28644909985268047, + "grad_norm": 0.09442918747663498, + "learning_rate": 2.1460111642343988e-05, + "loss": 9.4396, + "step": 57360 + }, + { + "epoch": 0.2864990386776199, + "grad_norm": 0.10041014105081558, + "learning_rate": 2.1458609727402438e-05, + "loss": 9.4372, + "step": 57370 + }, + { + "epoch": 0.28654897750255937, + "grad_norm": 0.0975830927491188, + "learning_rate": 2.145710781246089e-05, + "loss": 9.4574, + "step": 57380 + }, + { + "epoch": 0.2865989163274988, + "grad_norm": 0.0888514295220375, + "learning_rate": 2.145560589751934e-05, + "loss": 9.4463, + "step": 57390 + }, + { + "epoch": 0.28664885515243826, + "grad_norm": 0.09187906235456467, + "learning_rate": 2.145410398257779e-05, + "loss": 9.4551, + "step": 57400 + }, + { + "epoch": 0.2866987939773777, + "grad_norm": 0.09454265236854553, + "learning_rate": 2.1452602067636235e-05, + "loss": 9.4473, + "step": 57410 + }, + { + "epoch": 0.28674873280231716, + "grad_norm": 0.09518423676490784, + "learning_rate": 2.1451100152694686e-05, + "loss": 9.4421, + "step": 57420 + }, + { + "epoch": 0.2867986716272566, + "grad_norm": 0.09494660049676895, + "learning_rate": 2.1449598237753136e-05, + "loss": 9.4452, + "step": 57430 + }, + { + "epoch": 0.28684861045219606, + "grad_norm": 0.09462330490350723, + "learning_rate": 2.1448096322811586e-05, + "loss": 9.444, + "step": 57440 + }, + { + "epoch": 0.2868985492771355, + "grad_norm": 0.09740941226482391, + "learning_rate": 2.1446594407870036e-05, + "loss": 9.4363, + "step": 57450 + }, + { + "epoch": 0.28694848810207496, + "grad_norm": 0.09297572076320648, + "learning_rate": 2.1445092492928483e-05, + "loss": 9.4484, + "step": 57460 + }, + { + "epoch": 0.2869984269270144, + "grad_norm": 0.09299388527870178, + "learning_rate": 2.1443590577986933e-05, + "loss": 9.4431, + "step": 57470 + }, + { + "epoch": 0.28704836575195386, + "grad_norm": 0.09611527621746063, + "learning_rate": 2.1442088663045383e-05, + "loss": 9.4422, + "step": 57480 + }, + { + "epoch": 0.2870983045768933, + "grad_norm": 0.09383988380432129, + "learning_rate": 2.1440586748103833e-05, + "loss": 9.4434, + "step": 57490 + }, + { + "epoch": 0.28714824340183276, + "grad_norm": 0.09201846271753311, + "learning_rate": 2.1439084833162284e-05, + "loss": 9.4427, + "step": 57500 + }, + { + "epoch": 0.2871981822267722, + "grad_norm": 0.09528767317533493, + "learning_rate": 2.143758291822073e-05, + "loss": 9.4362, + "step": 57510 + }, + { + "epoch": 0.28724812105171166, + "grad_norm": 0.09210474044084549, + "learning_rate": 2.143608100327918e-05, + "loss": 9.4403, + "step": 57520 + }, + { + "epoch": 0.2872980598766511, + "grad_norm": 0.09716343879699707, + "learning_rate": 2.143457908833763e-05, + "loss": 9.4486, + "step": 57530 + }, + { + "epoch": 0.28734799870159056, + "grad_norm": 0.0949615091085434, + "learning_rate": 2.143307717339608e-05, + "loss": 9.4341, + "step": 57540 + }, + { + "epoch": 0.28739793752653, + "grad_norm": 0.09865114837884903, + "learning_rate": 2.143157525845453e-05, + "loss": 9.4345, + "step": 57550 + }, + { + "epoch": 0.28744787635146946, + "grad_norm": 0.09301295876502991, + "learning_rate": 2.1430073343512978e-05, + "loss": 9.4398, + "step": 57560 + }, + { + "epoch": 0.2874978151764089, + "grad_norm": 0.09104243665933609, + "learning_rate": 2.1428571428571428e-05, + "loss": 9.4445, + "step": 57570 + }, + { + "epoch": 0.28754775400134835, + "grad_norm": 0.09278678148984909, + "learning_rate": 2.142706951362988e-05, + "loss": 9.4513, + "step": 57580 + }, + { + "epoch": 0.2875976928262878, + "grad_norm": 0.0937771275639534, + "learning_rate": 2.142556759868833e-05, + "loss": 9.4393, + "step": 57590 + }, + { + "epoch": 0.28764763165122725, + "grad_norm": 0.09479419887065887, + "learning_rate": 2.142406568374678e-05, + "loss": 9.4373, + "step": 57600 + }, + { + "epoch": 0.2876975704761667, + "grad_norm": 0.09332539886236191, + "learning_rate": 2.1422563768805225e-05, + "loss": 9.443, + "step": 57610 + }, + { + "epoch": 0.28774750930110615, + "grad_norm": 0.09496680647134781, + "learning_rate": 2.1421061853863676e-05, + "loss": 9.4405, + "step": 57620 + }, + { + "epoch": 0.2877974481260456, + "grad_norm": 0.09168408811092377, + "learning_rate": 2.1419559938922126e-05, + "loss": 9.4475, + "step": 57630 + }, + { + "epoch": 0.28784738695098505, + "grad_norm": 0.08694005757570267, + "learning_rate": 2.1418058023980576e-05, + "loss": 9.4491, + "step": 57640 + }, + { + "epoch": 0.2878973257759245, + "grad_norm": 0.0956626832485199, + "learning_rate": 2.1416556109039026e-05, + "loss": 9.4481, + "step": 57650 + }, + { + "epoch": 0.28794726460086395, + "grad_norm": 0.09360313415527344, + "learning_rate": 2.1415054194097476e-05, + "loss": 9.438, + "step": 57660 + }, + { + "epoch": 0.28799720342580337, + "grad_norm": 0.09131594747304916, + "learning_rate": 2.1413552279155923e-05, + "loss": 9.4273, + "step": 57670 + }, + { + "epoch": 0.28804714225074285, + "grad_norm": 0.09347652643918991, + "learning_rate": 2.1412050364214373e-05, + "loss": 9.4416, + "step": 57680 + }, + { + "epoch": 0.28809708107568227, + "grad_norm": 0.09475225955247879, + "learning_rate": 2.1410548449272824e-05, + "loss": 9.431, + "step": 57690 + }, + { + "epoch": 0.28814701990062175, + "grad_norm": 0.0879056304693222, + "learning_rate": 2.1409046534331274e-05, + "loss": 9.4484, + "step": 57700 + }, + { + "epoch": 0.28819695872556117, + "grad_norm": 0.090447798371315, + "learning_rate": 2.1407544619389724e-05, + "loss": 9.4465, + "step": 57710 + }, + { + "epoch": 0.28824689755050065, + "grad_norm": 0.09707989543676376, + "learning_rate": 2.140604270444817e-05, + "loss": 9.4368, + "step": 57720 + }, + { + "epoch": 0.28829683637544007, + "grad_norm": 0.09571398794651031, + "learning_rate": 2.140454078950662e-05, + "loss": 9.4407, + "step": 57730 + }, + { + "epoch": 0.28834677520037955, + "grad_norm": 0.09500686079263687, + "learning_rate": 2.140303887456507e-05, + "loss": 9.4291, + "step": 57740 + }, + { + "epoch": 0.28839671402531897, + "grad_norm": 0.09296731650829315, + "learning_rate": 2.140153695962352e-05, + "loss": 9.4408, + "step": 57750 + }, + { + "epoch": 0.28844665285025844, + "grad_norm": 0.0914110615849495, + "learning_rate": 2.140003504468197e-05, + "loss": 9.4334, + "step": 57760 + }, + { + "epoch": 0.28849659167519787, + "grad_norm": 0.09195576608181, + "learning_rate": 2.1398533129740418e-05, + "loss": 9.4371, + "step": 57770 + }, + { + "epoch": 0.28854653050013734, + "grad_norm": 0.09556440263986588, + "learning_rate": 2.139703121479887e-05, + "loss": 9.4361, + "step": 57780 + }, + { + "epoch": 0.28859646932507677, + "grad_norm": 0.09003462642431259, + "learning_rate": 2.139552929985732e-05, + "loss": 9.4242, + "step": 57790 + }, + { + "epoch": 0.28864640815001624, + "grad_norm": 0.09391631186008453, + "learning_rate": 2.139402738491577e-05, + "loss": 9.4349, + "step": 57800 + }, + { + "epoch": 0.28869634697495566, + "grad_norm": 0.09652207046747208, + "learning_rate": 2.139252546997422e-05, + "loss": 9.4379, + "step": 57810 + }, + { + "epoch": 0.28874628579989514, + "grad_norm": 0.09347474575042725, + "learning_rate": 2.1391023555032666e-05, + "loss": 9.4349, + "step": 57820 + }, + { + "epoch": 0.28879622462483456, + "grad_norm": 0.09264619648456573, + "learning_rate": 2.1389521640091116e-05, + "loss": 9.4356, + "step": 57830 + }, + { + "epoch": 0.28884616344977404, + "grad_norm": 0.09425893425941467, + "learning_rate": 2.1388019725149566e-05, + "loss": 9.4268, + "step": 57840 + }, + { + "epoch": 0.28889610227471346, + "grad_norm": 0.09370515495538712, + "learning_rate": 2.1386517810208016e-05, + "loss": 9.4335, + "step": 57850 + }, + { + "epoch": 0.28894604109965294, + "grad_norm": 0.09193460643291473, + "learning_rate": 2.1385015895266466e-05, + "loss": 9.4294, + "step": 57860 + }, + { + "epoch": 0.28899597992459236, + "grad_norm": 0.09023167192935944, + "learning_rate": 2.1383513980324913e-05, + "loss": 9.4308, + "step": 57870 + }, + { + "epoch": 0.28904591874953184, + "grad_norm": 0.09350664168596268, + "learning_rate": 2.1382012065383363e-05, + "loss": 9.4267, + "step": 57880 + }, + { + "epoch": 0.28909585757447126, + "grad_norm": 0.09755279123783112, + "learning_rate": 2.1380510150441814e-05, + "loss": 9.4382, + "step": 57890 + }, + { + "epoch": 0.28914579639941074, + "grad_norm": 0.09248659759759903, + "learning_rate": 2.1379008235500264e-05, + "loss": 9.4328, + "step": 57900 + }, + { + "epoch": 0.28919573522435016, + "grad_norm": 0.09720529615879059, + "learning_rate": 2.1377506320558714e-05, + "loss": 9.4405, + "step": 57910 + }, + { + "epoch": 0.28924567404928964, + "grad_norm": 0.09519968181848526, + "learning_rate": 2.137600440561716e-05, + "loss": 9.4223, + "step": 57920 + }, + { + "epoch": 0.28929561287422906, + "grad_norm": 0.09368513524532318, + "learning_rate": 2.137450249067561e-05, + "loss": 9.4369, + "step": 57930 + }, + { + "epoch": 0.28934555169916854, + "grad_norm": 0.09568150341510773, + "learning_rate": 2.137300057573406e-05, + "loss": 9.4303, + "step": 57940 + }, + { + "epoch": 0.28939549052410796, + "grad_norm": 0.09773694723844528, + "learning_rate": 2.137149866079251e-05, + "loss": 9.4328, + "step": 57950 + }, + { + "epoch": 0.28944542934904743, + "grad_norm": 0.0943894013762474, + "learning_rate": 2.136999674585096e-05, + "loss": 9.4307, + "step": 57960 + }, + { + "epoch": 0.28949536817398686, + "grad_norm": 0.09780600666999817, + "learning_rate": 2.1368494830909408e-05, + "loss": 9.4274, + "step": 57970 + }, + { + "epoch": 0.28954530699892633, + "grad_norm": 0.09167556464672089, + "learning_rate": 2.1366992915967862e-05, + "loss": 9.4372, + "step": 57980 + }, + { + "epoch": 0.28959524582386575, + "grad_norm": 0.08926273137331009, + "learning_rate": 2.136549100102631e-05, + "loss": 9.4382, + "step": 57990 + }, + { + "epoch": 0.28964518464880523, + "grad_norm": 0.09246543794870377, + "learning_rate": 2.136398908608476e-05, + "loss": 9.4295, + "step": 58000 + }, + { + "epoch": 0.28969512347374465, + "grad_norm": 0.10091344267129898, + "learning_rate": 2.136248717114321e-05, + "loss": 9.4334, + "step": 58010 + }, + { + "epoch": 0.28974506229868413, + "grad_norm": 0.10019103437662125, + "learning_rate": 2.1360985256201656e-05, + "loss": 9.443, + "step": 58020 + }, + { + "epoch": 0.28979500112362355, + "grad_norm": 0.09648888558149338, + "learning_rate": 2.135948334126011e-05, + "loss": 9.4307, + "step": 58030 + }, + { + "epoch": 0.28984493994856303, + "grad_norm": 0.09710298478603363, + "learning_rate": 2.1357981426318556e-05, + "loss": 9.4356, + "step": 58040 + }, + { + "epoch": 0.28989487877350245, + "grad_norm": 0.0928901955485344, + "learning_rate": 2.1356479511377006e-05, + "loss": 9.4327, + "step": 58050 + }, + { + "epoch": 0.28994481759844193, + "grad_norm": 0.09765011072158813, + "learning_rate": 2.1354977596435456e-05, + "loss": 9.4317, + "step": 58060 + }, + { + "epoch": 0.28999475642338135, + "grad_norm": 0.08833613991737366, + "learning_rate": 2.1353475681493903e-05, + "loss": 9.4366, + "step": 58070 + }, + { + "epoch": 0.2900446952483208, + "grad_norm": 0.09767667204141617, + "learning_rate": 2.1351973766552357e-05, + "loss": 9.4237, + "step": 58080 + }, + { + "epoch": 0.29009463407326025, + "grad_norm": 0.09400112926959991, + "learning_rate": 2.1350471851610804e-05, + "loss": 9.4363, + "step": 58090 + }, + { + "epoch": 0.2901445728981997, + "grad_norm": 0.09494834393262863, + "learning_rate": 2.1348969936669254e-05, + "loss": 9.4317, + "step": 58100 + }, + { + "epoch": 0.29019451172313915, + "grad_norm": 0.08743976801633835, + "learning_rate": 2.1347468021727704e-05, + "loss": 9.4312, + "step": 58110 + }, + { + "epoch": 0.2902444505480786, + "grad_norm": 0.0986538678407669, + "learning_rate": 2.134596610678615e-05, + "loss": 9.4404, + "step": 58120 + }, + { + "epoch": 0.29029438937301805, + "grad_norm": 0.09177923202514648, + "learning_rate": 2.1344464191844604e-05, + "loss": 9.4274, + "step": 58130 + }, + { + "epoch": 0.2903443281979575, + "grad_norm": 0.09830587357282639, + "learning_rate": 2.134296227690305e-05, + "loss": 9.4314, + "step": 58140 + }, + { + "epoch": 0.29039426702289695, + "grad_norm": 0.09088832139968872, + "learning_rate": 2.13414603619615e-05, + "loss": 9.4367, + "step": 58150 + }, + { + "epoch": 0.2904442058478364, + "grad_norm": 0.0943973958492279, + "learning_rate": 2.133995844701995e-05, + "loss": 9.4372, + "step": 58160 + }, + { + "epoch": 0.29049414467277584, + "grad_norm": 0.09387973695993423, + "learning_rate": 2.1338456532078398e-05, + "loss": 9.4323, + "step": 58170 + }, + { + "epoch": 0.2905440834977153, + "grad_norm": 0.0891706719994545, + "learning_rate": 2.1336954617136852e-05, + "loss": 9.4364, + "step": 58180 + }, + { + "epoch": 0.29059402232265474, + "grad_norm": 0.09315983951091766, + "learning_rate": 2.13354527021953e-05, + "loss": 9.428, + "step": 58190 + }, + { + "epoch": 0.2906439611475942, + "grad_norm": 0.09957679361104965, + "learning_rate": 2.133395078725375e-05, + "loss": 9.4258, + "step": 58200 + }, + { + "epoch": 0.29069389997253364, + "grad_norm": 0.0999840795993805, + "learning_rate": 2.13324488723122e-05, + "loss": 9.4291, + "step": 58210 + }, + { + "epoch": 0.2907438387974731, + "grad_norm": 0.09671057015657425, + "learning_rate": 2.1330946957370646e-05, + "loss": 9.4328, + "step": 58220 + }, + { + "epoch": 0.29079377762241254, + "grad_norm": 0.0913921594619751, + "learning_rate": 2.13294450424291e-05, + "loss": 9.4308, + "step": 58230 + }, + { + "epoch": 0.290843716447352, + "grad_norm": 0.09564314782619476, + "learning_rate": 2.1327943127487546e-05, + "loss": 9.4269, + "step": 58240 + }, + { + "epoch": 0.29089365527229144, + "grad_norm": 0.09441643208265305, + "learning_rate": 2.1326441212545996e-05, + "loss": 9.4385, + "step": 58250 + }, + { + "epoch": 0.2909435940972309, + "grad_norm": 0.0950765311717987, + "learning_rate": 2.1324939297604446e-05, + "loss": 9.4394, + "step": 58260 + }, + { + "epoch": 0.29099353292217034, + "grad_norm": 0.09390682727098465, + "learning_rate": 2.1323437382662893e-05, + "loss": 9.4312, + "step": 58270 + }, + { + "epoch": 0.2910434717471098, + "grad_norm": 0.09104769676923752, + "learning_rate": 2.1321935467721347e-05, + "loss": 9.4247, + "step": 58280 + }, + { + "epoch": 0.29109341057204924, + "grad_norm": 0.10046746581792831, + "learning_rate": 2.1320433552779794e-05, + "loss": 9.4248, + "step": 58290 + }, + { + "epoch": 0.2911433493969887, + "grad_norm": 0.09288429468870163, + "learning_rate": 2.1318931637838247e-05, + "loss": 9.4221, + "step": 58300 + }, + { + "epoch": 0.29119328822192814, + "grad_norm": 0.09124504029750824, + "learning_rate": 2.1317429722896694e-05, + "loss": 9.4278, + "step": 58310 + }, + { + "epoch": 0.2912432270468676, + "grad_norm": 0.09842488169670105, + "learning_rate": 2.131592780795514e-05, + "loss": 9.4219, + "step": 58320 + }, + { + "epoch": 0.29129316587180704, + "grad_norm": 0.09622178226709366, + "learning_rate": 2.1314425893013594e-05, + "loss": 9.4214, + "step": 58330 + }, + { + "epoch": 0.2913431046967465, + "grad_norm": 0.09460314363241196, + "learning_rate": 2.131292397807204e-05, + "loss": 9.4139, + "step": 58340 + }, + { + "epoch": 0.29139304352168593, + "grad_norm": 0.10045454651117325, + "learning_rate": 2.1311422063130495e-05, + "loss": 9.4236, + "step": 58350 + }, + { + "epoch": 0.29144298234662536, + "grad_norm": 0.09059098362922668, + "learning_rate": 2.130992014818894e-05, + "loss": 9.4255, + "step": 58360 + }, + { + "epoch": 0.29149292117156483, + "grad_norm": 0.09203209728002548, + "learning_rate": 2.1308418233247388e-05, + "loss": 9.4297, + "step": 58370 + }, + { + "epoch": 0.29154285999650426, + "grad_norm": 0.08972401171922684, + "learning_rate": 2.1306916318305842e-05, + "loss": 9.4402, + "step": 58380 + }, + { + "epoch": 0.29159279882144373, + "grad_norm": 0.09233536571264267, + "learning_rate": 2.130541440336429e-05, + "loss": 9.4283, + "step": 58390 + }, + { + "epoch": 0.29164273764638315, + "grad_norm": 0.10041224211454391, + "learning_rate": 2.1303912488422742e-05, + "loss": 9.425, + "step": 58400 + }, + { + "epoch": 0.29169267647132263, + "grad_norm": 0.09314385056495667, + "learning_rate": 2.130241057348119e-05, + "loss": 9.4202, + "step": 58410 + }, + { + "epoch": 0.29174261529626205, + "grad_norm": 0.09177546203136444, + "learning_rate": 2.1300908658539636e-05, + "loss": 9.4319, + "step": 58420 + }, + { + "epoch": 0.29179255412120153, + "grad_norm": 0.09403910487890244, + "learning_rate": 2.129940674359809e-05, + "loss": 9.4367, + "step": 58430 + }, + { + "epoch": 0.29184249294614095, + "grad_norm": 0.09180597215890884, + "learning_rate": 2.1297904828656536e-05, + "loss": 9.4304, + "step": 58440 + }, + { + "epoch": 0.29189243177108043, + "grad_norm": 0.09582697600126266, + "learning_rate": 2.129640291371499e-05, + "loss": 9.4268, + "step": 58450 + }, + { + "epoch": 0.29194237059601985, + "grad_norm": 0.09342509508132935, + "learning_rate": 2.1294900998773436e-05, + "loss": 9.4324, + "step": 58460 + }, + { + "epoch": 0.29199230942095933, + "grad_norm": 0.09267037361860275, + "learning_rate": 2.1293399083831883e-05, + "loss": 9.4224, + "step": 58470 + }, + { + "epoch": 0.29204224824589875, + "grad_norm": 0.10199777036905289, + "learning_rate": 2.1291897168890337e-05, + "loss": 9.4253, + "step": 58480 + }, + { + "epoch": 0.2920921870708382, + "grad_norm": 0.08985645323991776, + "learning_rate": 2.1290395253948784e-05, + "loss": 9.4197, + "step": 58490 + }, + { + "epoch": 0.29214212589577765, + "grad_norm": 0.09782855212688446, + "learning_rate": 2.1288893339007237e-05, + "loss": 9.4231, + "step": 58500 + }, + { + "epoch": 0.2921920647207171, + "grad_norm": 0.09603475779294968, + "learning_rate": 2.1287391424065684e-05, + "loss": 9.4237, + "step": 58510 + }, + { + "epoch": 0.29224200354565655, + "grad_norm": 0.0944107323884964, + "learning_rate": 2.128588950912413e-05, + "loss": 9.4173, + "step": 58520 + }, + { + "epoch": 0.292291942370596, + "grad_norm": 0.0947156697511673, + "learning_rate": 2.1284387594182584e-05, + "loss": 9.4352, + "step": 58530 + }, + { + "epoch": 0.29234188119553545, + "grad_norm": 0.093475840985775, + "learning_rate": 2.128288567924103e-05, + "loss": 9.4267, + "step": 58540 + }, + { + "epoch": 0.2923918200204749, + "grad_norm": 0.1016530990600586, + "learning_rate": 2.1281383764299485e-05, + "loss": 9.4312, + "step": 58550 + }, + { + "epoch": 0.29244175884541435, + "grad_norm": 0.09788955748081207, + "learning_rate": 2.127988184935793e-05, + "loss": 9.4174, + "step": 58560 + }, + { + "epoch": 0.2924916976703538, + "grad_norm": 0.09802679717540741, + "learning_rate": 2.1278379934416378e-05, + "loss": 9.428, + "step": 58570 + }, + { + "epoch": 0.29254163649529324, + "grad_norm": 0.09107000380754471, + "learning_rate": 2.1276878019474832e-05, + "loss": 9.4274, + "step": 58580 + }, + { + "epoch": 0.2925915753202327, + "grad_norm": 0.09072667360305786, + "learning_rate": 2.127537610453328e-05, + "loss": 9.4244, + "step": 58590 + }, + { + "epoch": 0.29264151414517214, + "grad_norm": 0.097968690097332, + "learning_rate": 2.1273874189591732e-05, + "loss": 9.4158, + "step": 58600 + }, + { + "epoch": 0.2926914529701116, + "grad_norm": 0.09575551748275757, + "learning_rate": 2.127237227465018e-05, + "loss": 9.4244, + "step": 58610 + }, + { + "epoch": 0.29274139179505104, + "grad_norm": 0.09668948501348495, + "learning_rate": 2.127087035970863e-05, + "loss": 9.4156, + "step": 58620 + }, + { + "epoch": 0.2927913306199905, + "grad_norm": 0.09409730136394501, + "learning_rate": 2.126936844476708e-05, + "loss": 9.4266, + "step": 58630 + }, + { + "epoch": 0.29284126944492994, + "grad_norm": 0.09310100227594376, + "learning_rate": 2.1267866529825526e-05, + "loss": 9.4158, + "step": 58640 + }, + { + "epoch": 0.2928912082698694, + "grad_norm": 0.09272795915603638, + "learning_rate": 2.126636461488398e-05, + "loss": 9.4275, + "step": 58650 + }, + { + "epoch": 0.29294114709480884, + "grad_norm": 0.09329953789710999, + "learning_rate": 2.1264862699942426e-05, + "loss": 9.425, + "step": 58660 + }, + { + "epoch": 0.2929910859197483, + "grad_norm": 0.08982928097248077, + "learning_rate": 2.1263360785000877e-05, + "loss": 9.4224, + "step": 58670 + }, + { + "epoch": 0.29304102474468774, + "grad_norm": 0.09044983983039856, + "learning_rate": 2.1261858870059327e-05, + "loss": 9.4227, + "step": 58680 + }, + { + "epoch": 0.2930909635696272, + "grad_norm": 0.09245163947343826, + "learning_rate": 2.1260356955117774e-05, + "loss": 9.424, + "step": 58690 + }, + { + "epoch": 0.29314090239456664, + "grad_norm": 0.09064756333827972, + "learning_rate": 2.1258855040176227e-05, + "loss": 9.4198, + "step": 58700 + }, + { + "epoch": 0.2931908412195061, + "grad_norm": 0.08946257829666138, + "learning_rate": 2.1257353125234674e-05, + "loss": 9.4247, + "step": 58710 + }, + { + "epoch": 0.29324078004444554, + "grad_norm": 0.09985718131065369, + "learning_rate": 2.1255851210293124e-05, + "loss": 9.4138, + "step": 58720 + }, + { + "epoch": 0.293290718869385, + "grad_norm": 0.0930742546916008, + "learning_rate": 2.1254349295351574e-05, + "loss": 9.4201, + "step": 58730 + }, + { + "epoch": 0.29334065769432444, + "grad_norm": 0.08928178995847702, + "learning_rate": 2.125284738041002e-05, + "loss": 9.4215, + "step": 58740 + }, + { + "epoch": 0.2933905965192639, + "grad_norm": 0.09685919433832169, + "learning_rate": 2.1251345465468475e-05, + "loss": 9.4265, + "step": 58750 + }, + { + "epoch": 0.29344053534420333, + "grad_norm": 0.09459210932254791, + "learning_rate": 2.124984355052692e-05, + "loss": 9.411, + "step": 58760 + }, + { + "epoch": 0.2934904741691428, + "grad_norm": 0.09364203363656998, + "learning_rate": 2.124834163558537e-05, + "loss": 9.425, + "step": 58770 + }, + { + "epoch": 0.29354041299408223, + "grad_norm": 0.09589599817991257, + "learning_rate": 2.1246839720643822e-05, + "loss": 9.4135, + "step": 58780 + }, + { + "epoch": 0.2935903518190217, + "grad_norm": 0.08911266922950745, + "learning_rate": 2.124533780570227e-05, + "loss": 9.4207, + "step": 58790 + }, + { + "epoch": 0.29364029064396113, + "grad_norm": 0.09034483134746552, + "learning_rate": 2.1243835890760722e-05, + "loss": 9.4197, + "step": 58800 + }, + { + "epoch": 0.2936902294689006, + "grad_norm": 0.0975160002708435, + "learning_rate": 2.124233397581917e-05, + "loss": 9.4134, + "step": 58810 + }, + { + "epoch": 0.29374016829384003, + "grad_norm": 0.09520293772220612, + "learning_rate": 2.124083206087762e-05, + "loss": 9.4159, + "step": 58820 + }, + { + "epoch": 0.2937901071187795, + "grad_norm": 0.09131459891796112, + "learning_rate": 2.123933014593607e-05, + "loss": 9.4216, + "step": 58830 + }, + { + "epoch": 0.29384004594371893, + "grad_norm": 0.10248485952615738, + "learning_rate": 2.1237828230994516e-05, + "loss": 9.4095, + "step": 58840 + }, + { + "epoch": 0.2938899847686584, + "grad_norm": 0.09531592577695847, + "learning_rate": 2.123632631605297e-05, + "loss": 9.4285, + "step": 58850 + }, + { + "epoch": 0.29393992359359783, + "grad_norm": 0.09358898550271988, + "learning_rate": 2.1234824401111416e-05, + "loss": 9.4086, + "step": 58860 + }, + { + "epoch": 0.2939898624185373, + "grad_norm": 0.09685607254505157, + "learning_rate": 2.1233322486169867e-05, + "loss": 9.4227, + "step": 58870 + }, + { + "epoch": 0.29403980124347673, + "grad_norm": 0.09395089745521545, + "learning_rate": 2.1231820571228317e-05, + "loss": 9.4146, + "step": 58880 + }, + { + "epoch": 0.2940897400684162, + "grad_norm": 0.0898551344871521, + "learning_rate": 2.1230318656286764e-05, + "loss": 9.4178, + "step": 58890 + }, + { + "epoch": 0.2941396788933556, + "grad_norm": 0.0958523377776146, + "learning_rate": 2.1228816741345217e-05, + "loss": 9.4159, + "step": 58900 + }, + { + "epoch": 0.2941896177182951, + "grad_norm": 0.093168243765831, + "learning_rate": 2.1227314826403664e-05, + "loss": 9.4083, + "step": 58910 + }, + { + "epoch": 0.2942395565432345, + "grad_norm": 0.09986717253923416, + "learning_rate": 2.1225812911462114e-05, + "loss": 9.4089, + "step": 58920 + }, + { + "epoch": 0.294289495368174, + "grad_norm": 0.0952669233083725, + "learning_rate": 2.1224310996520564e-05, + "loss": 9.4216, + "step": 58930 + }, + { + "epoch": 0.2943394341931134, + "grad_norm": 0.0995723307132721, + "learning_rate": 2.122280908157901e-05, + "loss": 9.4183, + "step": 58940 + }, + { + "epoch": 0.2943893730180529, + "grad_norm": 0.09398746490478516, + "learning_rate": 2.1221307166637465e-05, + "loss": 9.4149, + "step": 58950 + }, + { + "epoch": 0.2944393118429923, + "grad_norm": 0.10161332786083221, + "learning_rate": 2.121980525169591e-05, + "loss": 9.4085, + "step": 58960 + }, + { + "epoch": 0.2944892506679318, + "grad_norm": 0.09317755699157715, + "learning_rate": 2.121830333675436e-05, + "loss": 9.4227, + "step": 58970 + }, + { + "epoch": 0.2945391894928712, + "grad_norm": 0.09553561359643936, + "learning_rate": 2.1216801421812812e-05, + "loss": 9.4188, + "step": 58980 + }, + { + "epoch": 0.2945891283178107, + "grad_norm": 0.09423384815454483, + "learning_rate": 2.1215299506871262e-05, + "loss": 9.4171, + "step": 58990 + }, + { + "epoch": 0.2946390671427501, + "grad_norm": 0.09159230440855026, + "learning_rate": 2.1213797591929712e-05, + "loss": 9.4175, + "step": 59000 + }, + { + "epoch": 0.2946890059676896, + "grad_norm": 0.09593381732702255, + "learning_rate": 2.121229567698816e-05, + "loss": 9.4132, + "step": 59010 + }, + { + "epoch": 0.294738944792629, + "grad_norm": 0.09449375420808792, + "learning_rate": 2.121079376204661e-05, + "loss": 9.422, + "step": 59020 + }, + { + "epoch": 0.2947888836175685, + "grad_norm": 0.09151158481836319, + "learning_rate": 2.120929184710506e-05, + "loss": 9.4252, + "step": 59030 + }, + { + "epoch": 0.2948388224425079, + "grad_norm": 0.09154769778251648, + "learning_rate": 2.120778993216351e-05, + "loss": 9.4157, + "step": 59040 + }, + { + "epoch": 0.2948887612674474, + "grad_norm": 0.09065014123916626, + "learning_rate": 2.120628801722196e-05, + "loss": 9.4179, + "step": 59050 + }, + { + "epoch": 0.2949387000923868, + "grad_norm": 0.09222662448883057, + "learning_rate": 2.1204786102280406e-05, + "loss": 9.4056, + "step": 59060 + }, + { + "epoch": 0.2949886389173263, + "grad_norm": 0.09881994873285294, + "learning_rate": 2.1203284187338857e-05, + "loss": 9.4033, + "step": 59070 + }, + { + "epoch": 0.2950385777422657, + "grad_norm": 0.10036629438400269, + "learning_rate": 2.1201782272397307e-05, + "loss": 9.4071, + "step": 59080 + }, + { + "epoch": 0.2950885165672052, + "grad_norm": 0.09214989095926285, + "learning_rate": 2.1200280357455757e-05, + "loss": 9.4131, + "step": 59090 + }, + { + "epoch": 0.2951384553921446, + "grad_norm": 0.09671517461538315, + "learning_rate": 2.1198778442514207e-05, + "loss": 9.407, + "step": 59100 + }, + { + "epoch": 0.2951883942170841, + "grad_norm": 0.09238772839307785, + "learning_rate": 2.1197276527572654e-05, + "loss": 9.4105, + "step": 59110 + }, + { + "epoch": 0.2952383330420235, + "grad_norm": 0.09682830423116684, + "learning_rate": 2.1195774612631104e-05, + "loss": 9.4144, + "step": 59120 + }, + { + "epoch": 0.295288271866963, + "grad_norm": 0.09174001961946487, + "learning_rate": 2.1194272697689554e-05, + "loss": 9.409, + "step": 59130 + }, + { + "epoch": 0.2953382106919024, + "grad_norm": 0.09227842837572098, + "learning_rate": 2.1192770782748005e-05, + "loss": 9.4068, + "step": 59140 + }, + { + "epoch": 0.2953881495168419, + "grad_norm": 0.09464056044816971, + "learning_rate": 2.1191268867806455e-05, + "loss": 9.4129, + "step": 59150 + }, + { + "epoch": 0.2954380883417813, + "grad_norm": 0.09324008971452713, + "learning_rate": 2.11897669528649e-05, + "loss": 9.4128, + "step": 59160 + }, + { + "epoch": 0.2954880271667208, + "grad_norm": 0.093290314078331, + "learning_rate": 2.118826503792335e-05, + "loss": 9.4178, + "step": 59170 + }, + { + "epoch": 0.2955379659916602, + "grad_norm": 0.09760567545890808, + "learning_rate": 2.1186763122981802e-05, + "loss": 9.4109, + "step": 59180 + }, + { + "epoch": 0.2955879048165997, + "grad_norm": 0.09324108064174652, + "learning_rate": 2.1185261208040252e-05, + "loss": 9.4177, + "step": 59190 + }, + { + "epoch": 0.2956378436415391, + "grad_norm": 0.09072353690862656, + "learning_rate": 2.1183759293098702e-05, + "loss": 9.4191, + "step": 59200 + }, + { + "epoch": 0.2956877824664786, + "grad_norm": 0.09248016029596329, + "learning_rate": 2.118225737815715e-05, + "loss": 9.4159, + "step": 59210 + }, + { + "epoch": 0.295737721291418, + "grad_norm": 0.08923139423131943, + "learning_rate": 2.1180755463215603e-05, + "loss": 9.4086, + "step": 59220 + }, + { + "epoch": 0.2957876601163575, + "grad_norm": 0.09490073472261429, + "learning_rate": 2.117925354827405e-05, + "loss": 9.4123, + "step": 59230 + }, + { + "epoch": 0.2958375989412969, + "grad_norm": 0.09313952177762985, + "learning_rate": 2.11777516333325e-05, + "loss": 9.4169, + "step": 59240 + }, + { + "epoch": 0.2958875377662364, + "grad_norm": 0.0937030240893364, + "learning_rate": 2.117624971839095e-05, + "loss": 9.4101, + "step": 59250 + }, + { + "epoch": 0.2959374765911758, + "grad_norm": 0.0976044312119484, + "learning_rate": 2.1174747803449396e-05, + "loss": 9.4097, + "step": 59260 + }, + { + "epoch": 0.2959874154161153, + "grad_norm": 0.09942024946212769, + "learning_rate": 2.117324588850785e-05, + "loss": 9.4033, + "step": 59270 + }, + { + "epoch": 0.2960373542410547, + "grad_norm": 0.09450352191925049, + "learning_rate": 2.1171743973566297e-05, + "loss": 9.4104, + "step": 59280 + }, + { + "epoch": 0.2960872930659942, + "grad_norm": 0.09626131504774094, + "learning_rate": 2.1170242058624747e-05, + "loss": 9.4153, + "step": 59290 + }, + { + "epoch": 0.2961372318909336, + "grad_norm": 0.09533768892288208, + "learning_rate": 2.1168740143683197e-05, + "loss": 9.4257, + "step": 59300 + }, + { + "epoch": 0.2961871707158731, + "grad_norm": 0.09322089701890945, + "learning_rate": 2.1167238228741647e-05, + "loss": 9.4129, + "step": 59310 + }, + { + "epoch": 0.2962371095408125, + "grad_norm": 0.08978445082902908, + "learning_rate": 2.1165736313800098e-05, + "loss": 9.4065, + "step": 59320 + }, + { + "epoch": 0.296287048365752, + "grad_norm": 0.09518822282552719, + "learning_rate": 2.1164234398858544e-05, + "loss": 9.4149, + "step": 59330 + }, + { + "epoch": 0.2963369871906914, + "grad_norm": 0.09288329631090164, + "learning_rate": 2.1162732483916995e-05, + "loss": 9.4026, + "step": 59340 + }, + { + "epoch": 0.2963869260156308, + "grad_norm": 0.0915319174528122, + "learning_rate": 2.1161230568975445e-05, + "loss": 9.4154, + "step": 59350 + }, + { + "epoch": 0.2964368648405703, + "grad_norm": 0.09252842515707016, + "learning_rate": 2.1159728654033895e-05, + "loss": 9.4147, + "step": 59360 + }, + { + "epoch": 0.2964868036655097, + "grad_norm": 0.09411928057670593, + "learning_rate": 2.1158226739092345e-05, + "loss": 9.4116, + "step": 59370 + }, + { + "epoch": 0.2965367424904492, + "grad_norm": 0.09231306612491608, + "learning_rate": 2.1156724824150792e-05, + "loss": 9.4012, + "step": 59380 + }, + { + "epoch": 0.2965866813153886, + "grad_norm": 0.09552402049303055, + "learning_rate": 2.1155222909209242e-05, + "loss": 9.4092, + "step": 59390 + }, + { + "epoch": 0.2966366201403281, + "grad_norm": 0.0954194962978363, + "learning_rate": 2.1153720994267692e-05, + "loss": 9.4077, + "step": 59400 + }, + { + "epoch": 0.2966865589652675, + "grad_norm": 0.09317540377378464, + "learning_rate": 2.1152219079326142e-05, + "loss": 9.4094, + "step": 59410 + }, + { + "epoch": 0.296736497790207, + "grad_norm": 0.09430140256881714, + "learning_rate": 2.1150717164384593e-05, + "loss": 9.4044, + "step": 59420 + }, + { + "epoch": 0.2967864366151464, + "grad_norm": 0.09407494962215424, + "learning_rate": 2.114921524944304e-05, + "loss": 9.4147, + "step": 59430 + }, + { + "epoch": 0.2968363754400859, + "grad_norm": 0.09957774728536606, + "learning_rate": 2.114771333450149e-05, + "loss": 9.4126, + "step": 59440 + }, + { + "epoch": 0.2968863142650253, + "grad_norm": 0.09334376454353333, + "learning_rate": 2.114621141955994e-05, + "loss": 9.4016, + "step": 59450 + }, + { + "epoch": 0.2969362530899648, + "grad_norm": 0.0957721695303917, + "learning_rate": 2.114470950461839e-05, + "loss": 9.4109, + "step": 59460 + }, + { + "epoch": 0.2969861919149042, + "grad_norm": 0.09447581321001053, + "learning_rate": 2.114320758967684e-05, + "loss": 9.4193, + "step": 59470 + }, + { + "epoch": 0.2970361307398437, + "grad_norm": 0.0941816195845604, + "learning_rate": 2.1141705674735287e-05, + "loss": 9.4047, + "step": 59480 + }, + { + "epoch": 0.2970860695647831, + "grad_norm": 0.08960052579641342, + "learning_rate": 2.1140203759793737e-05, + "loss": 9.4161, + "step": 59490 + }, + { + "epoch": 0.2971360083897226, + "grad_norm": 0.09240063279867172, + "learning_rate": 2.1138701844852187e-05, + "loss": 9.4053, + "step": 59500 + }, + { + "epoch": 0.297185947214662, + "grad_norm": 0.08736561983823776, + "learning_rate": 2.1137199929910637e-05, + "loss": 9.4092, + "step": 59510 + }, + { + "epoch": 0.2972358860396015, + "grad_norm": 0.09654782712459564, + "learning_rate": 2.1135698014969088e-05, + "loss": 9.4072, + "step": 59520 + }, + { + "epoch": 0.2972858248645409, + "grad_norm": 0.09687676280736923, + "learning_rate": 2.1134196100027534e-05, + "loss": 9.409, + "step": 59530 + }, + { + "epoch": 0.2973357636894804, + "grad_norm": 0.0934508666396141, + "learning_rate": 2.1132694185085985e-05, + "loss": 9.4048, + "step": 59540 + }, + { + "epoch": 0.2973857025144198, + "grad_norm": 0.09141778200864792, + "learning_rate": 2.1131192270144435e-05, + "loss": 9.4101, + "step": 59550 + }, + { + "epoch": 0.2974356413393593, + "grad_norm": 0.0905558317899704, + "learning_rate": 2.1129690355202885e-05, + "loss": 9.4071, + "step": 59560 + }, + { + "epoch": 0.2974855801642987, + "grad_norm": 0.09105515480041504, + "learning_rate": 2.1128188440261335e-05, + "loss": 9.4076, + "step": 59570 + }, + { + "epoch": 0.2975355189892382, + "grad_norm": 0.09343723952770233, + "learning_rate": 2.1126686525319782e-05, + "loss": 9.4212, + "step": 59580 + }, + { + "epoch": 0.2975854578141776, + "grad_norm": 0.09228665381669998, + "learning_rate": 2.1125184610378232e-05, + "loss": 9.4151, + "step": 59590 + }, + { + "epoch": 0.2976353966391171, + "grad_norm": 0.09571309387683868, + "learning_rate": 2.1123682695436682e-05, + "loss": 9.3969, + "step": 59600 + }, + { + "epoch": 0.2976853354640565, + "grad_norm": 0.09124372899532318, + "learning_rate": 2.1122180780495132e-05, + "loss": 9.4104, + "step": 59610 + }, + { + "epoch": 0.297735274288996, + "grad_norm": 0.09096134454011917, + "learning_rate": 2.1120678865553583e-05, + "loss": 9.4108, + "step": 59620 + }, + { + "epoch": 0.2977852131139354, + "grad_norm": 0.09474842250347137, + "learning_rate": 2.1119176950612033e-05, + "loss": 9.4011, + "step": 59630 + }, + { + "epoch": 0.2978351519388749, + "grad_norm": 0.09214451164007187, + "learning_rate": 2.111767503567048e-05, + "loss": 9.4138, + "step": 59640 + }, + { + "epoch": 0.2978850907638143, + "grad_norm": 0.09250292927026749, + "learning_rate": 2.111617312072893e-05, + "loss": 9.4054, + "step": 59650 + }, + { + "epoch": 0.2979350295887538, + "grad_norm": 0.09642202407121658, + "learning_rate": 2.111467120578738e-05, + "loss": 9.4088, + "step": 59660 + }, + { + "epoch": 0.2979849684136932, + "grad_norm": 0.09199708700180054, + "learning_rate": 2.111316929084583e-05, + "loss": 9.4239, + "step": 59670 + }, + { + "epoch": 0.2980349072386327, + "grad_norm": 0.09029726684093475, + "learning_rate": 2.111166737590428e-05, + "loss": 9.4053, + "step": 59680 + }, + { + "epoch": 0.2980848460635721, + "grad_norm": 0.09503061324357986, + "learning_rate": 2.1110165460962727e-05, + "loss": 9.4035, + "step": 59690 + }, + { + "epoch": 0.2981347848885116, + "grad_norm": 0.09824039041996002, + "learning_rate": 2.1108663546021177e-05, + "loss": 9.4013, + "step": 59700 + }, + { + "epoch": 0.298184723713451, + "grad_norm": 0.09801938384771347, + "learning_rate": 2.1107161631079627e-05, + "loss": 9.3948, + "step": 59710 + }, + { + "epoch": 0.2982346625383905, + "grad_norm": 0.10737218707799911, + "learning_rate": 2.1105659716138078e-05, + "loss": 9.3953, + "step": 59720 + }, + { + "epoch": 0.2982846013633299, + "grad_norm": 0.09229201078414917, + "learning_rate": 2.1104157801196528e-05, + "loss": 9.4062, + "step": 59730 + }, + { + "epoch": 0.2983345401882694, + "grad_norm": 0.08875761926174164, + "learning_rate": 2.1102655886254975e-05, + "loss": 9.4129, + "step": 59740 + }, + { + "epoch": 0.2983844790132088, + "grad_norm": 0.09321201592683792, + "learning_rate": 2.1101153971313425e-05, + "loss": 9.412, + "step": 59750 + }, + { + "epoch": 0.2984344178381483, + "grad_norm": 0.0906151756644249, + "learning_rate": 2.1099652056371875e-05, + "loss": 9.3997, + "step": 59760 + }, + { + "epoch": 0.2984843566630877, + "grad_norm": 0.10253120213747025, + "learning_rate": 2.1098150141430325e-05, + "loss": 9.397, + "step": 59770 + }, + { + "epoch": 0.2985342954880272, + "grad_norm": 0.08926044404506683, + "learning_rate": 2.1096648226488775e-05, + "loss": 9.4117, + "step": 59780 + }, + { + "epoch": 0.2985842343129666, + "grad_norm": 0.09025093168020248, + "learning_rate": 2.1095146311547222e-05, + "loss": 9.4118, + "step": 59790 + }, + { + "epoch": 0.2986341731379061, + "grad_norm": 0.09714997559785843, + "learning_rate": 2.1093644396605672e-05, + "loss": 9.4087, + "step": 59800 + }, + { + "epoch": 0.2986841119628455, + "grad_norm": 0.09052036702632904, + "learning_rate": 2.1092142481664122e-05, + "loss": 9.4045, + "step": 59810 + }, + { + "epoch": 0.298734050787785, + "grad_norm": 0.09755855053663254, + "learning_rate": 2.1090640566722573e-05, + "loss": 9.398, + "step": 59820 + }, + { + "epoch": 0.2987839896127244, + "grad_norm": 0.09479790180921555, + "learning_rate": 2.1089138651781023e-05, + "loss": 9.404, + "step": 59830 + }, + { + "epoch": 0.2988339284376639, + "grad_norm": 0.09147018939256668, + "learning_rate": 2.108763673683947e-05, + "loss": 9.3966, + "step": 59840 + }, + { + "epoch": 0.2988838672626033, + "grad_norm": 0.0920906662940979, + "learning_rate": 2.108613482189792e-05, + "loss": 9.4109, + "step": 59850 + }, + { + "epoch": 0.2989338060875428, + "grad_norm": 0.0949472039937973, + "learning_rate": 2.108463290695637e-05, + "loss": 9.3958, + "step": 59860 + }, + { + "epoch": 0.2989837449124822, + "grad_norm": 0.0973886027932167, + "learning_rate": 2.108313099201482e-05, + "loss": 9.4021, + "step": 59870 + }, + { + "epoch": 0.2990336837374217, + "grad_norm": 0.08694946020841599, + "learning_rate": 2.108162907707327e-05, + "loss": 9.405, + "step": 59880 + }, + { + "epoch": 0.2990836225623611, + "grad_norm": 0.09538460522890091, + "learning_rate": 2.1080127162131717e-05, + "loss": 9.4057, + "step": 59890 + }, + { + "epoch": 0.2991335613873006, + "grad_norm": 0.0949239507317543, + "learning_rate": 2.1078625247190167e-05, + "loss": 9.4029, + "step": 59900 + }, + { + "epoch": 0.29918350021224, + "grad_norm": 0.09560471773147583, + "learning_rate": 2.1077123332248617e-05, + "loss": 9.3962, + "step": 59910 + }, + { + "epoch": 0.29923343903717947, + "grad_norm": 0.09594585001468658, + "learning_rate": 2.1075621417307068e-05, + "loss": 9.4046, + "step": 59920 + }, + { + "epoch": 0.2992833778621189, + "grad_norm": 0.09318960458040237, + "learning_rate": 2.1074119502365518e-05, + "loss": 9.407, + "step": 59930 + }, + { + "epoch": 0.29933331668705837, + "grad_norm": 0.09141054004430771, + "learning_rate": 2.1072617587423965e-05, + "loss": 9.3944, + "step": 59940 + }, + { + "epoch": 0.2993832555119978, + "grad_norm": 0.09421342611312866, + "learning_rate": 2.1071115672482418e-05, + "loss": 9.4033, + "step": 59950 + }, + { + "epoch": 0.29943319433693727, + "grad_norm": 0.09517619758844376, + "learning_rate": 2.1069613757540865e-05, + "loss": 9.3988, + "step": 59960 + }, + { + "epoch": 0.2994831331618767, + "grad_norm": 0.09366179257631302, + "learning_rate": 2.1068111842599315e-05, + "loss": 9.4144, + "step": 59970 + }, + { + "epoch": 0.29953307198681617, + "grad_norm": 0.09368981420993805, + "learning_rate": 2.1066609927657765e-05, + "loss": 9.3938, + "step": 59980 + }, + { + "epoch": 0.2995830108117556, + "grad_norm": 0.09122604131698608, + "learning_rate": 2.1065108012716212e-05, + "loss": 9.3962, + "step": 59990 + }, + { + "epoch": 0.29963294963669507, + "grad_norm": 0.0922602191567421, + "learning_rate": 2.1063606097774666e-05, + "loss": 9.3997, + "step": 60000 + }, + { + "epoch": 0.2996828884616345, + "grad_norm": 0.1016426831483841, + "learning_rate": 2.1062104182833112e-05, + "loss": 9.3922, + "step": 60010 + }, + { + "epoch": 0.29973282728657397, + "grad_norm": 0.08960018306970596, + "learning_rate": 2.1060602267891563e-05, + "loss": 9.3926, + "step": 60020 + }, + { + "epoch": 0.2997827661115134, + "grad_norm": 0.08921145647764206, + "learning_rate": 2.1059100352950013e-05, + "loss": 9.3912, + "step": 60030 + }, + { + "epoch": 0.29983270493645287, + "grad_norm": 0.09575637429952621, + "learning_rate": 2.105759843800846e-05, + "loss": 9.3966, + "step": 60040 + }, + { + "epoch": 0.2998826437613923, + "grad_norm": 0.09875485301017761, + "learning_rate": 2.1056096523066913e-05, + "loss": 9.3956, + "step": 60050 + }, + { + "epoch": 0.29993258258633176, + "grad_norm": 0.09163234382867813, + "learning_rate": 2.105459460812536e-05, + "loss": 9.3946, + "step": 60060 + }, + { + "epoch": 0.2999825214112712, + "grad_norm": 0.0965883880853653, + "learning_rate": 2.105309269318381e-05, + "loss": 9.3962, + "step": 60070 + }, + { + "epoch": 0.30003246023621066, + "grad_norm": 0.09682179242372513, + "learning_rate": 2.105159077824226e-05, + "loss": 9.4011, + "step": 60080 + }, + { + "epoch": 0.3000823990611501, + "grad_norm": 0.09401979297399521, + "learning_rate": 2.1050088863300707e-05, + "loss": 9.3984, + "step": 60090 + }, + { + "epoch": 0.30013233788608956, + "grad_norm": 0.09467914700508118, + "learning_rate": 2.104858694835916e-05, + "loss": 9.3967, + "step": 60100 + }, + { + "epoch": 0.300182276711029, + "grad_norm": 0.09743199497461319, + "learning_rate": 2.1047085033417607e-05, + "loss": 9.3941, + "step": 60110 + }, + { + "epoch": 0.30023221553596846, + "grad_norm": 0.08682700246572495, + "learning_rate": 2.1045583118476058e-05, + "loss": 9.3914, + "step": 60120 + }, + { + "epoch": 0.3002821543609079, + "grad_norm": 0.0950264036655426, + "learning_rate": 2.1044081203534508e-05, + "loss": 9.395, + "step": 60130 + }, + { + "epoch": 0.30033209318584736, + "grad_norm": 0.09148851037025452, + "learning_rate": 2.1042579288592955e-05, + "loss": 9.3978, + "step": 60140 + }, + { + "epoch": 0.3003820320107868, + "grad_norm": 0.09214703738689423, + "learning_rate": 2.1041077373651408e-05, + "loss": 9.3972, + "step": 60150 + }, + { + "epoch": 0.30043197083572626, + "grad_norm": 0.0951230525970459, + "learning_rate": 2.1039575458709855e-05, + "loss": 9.4042, + "step": 60160 + }, + { + "epoch": 0.3004819096606657, + "grad_norm": 0.09221307933330536, + "learning_rate": 2.1038073543768305e-05, + "loss": 9.4051, + "step": 60170 + }, + { + "epoch": 0.30053184848560516, + "grad_norm": 0.09453621506690979, + "learning_rate": 2.1036571628826755e-05, + "loss": 9.3895, + "step": 60180 + }, + { + "epoch": 0.3005817873105446, + "grad_norm": 0.09616675227880478, + "learning_rate": 2.1035069713885202e-05, + "loss": 9.4036, + "step": 60190 + }, + { + "epoch": 0.30063172613548406, + "grad_norm": 0.0845404714345932, + "learning_rate": 2.1033567798943656e-05, + "loss": 9.4068, + "step": 60200 + }, + { + "epoch": 0.3006816649604235, + "grad_norm": 0.092598095536232, + "learning_rate": 2.1032065884002102e-05, + "loss": 9.396, + "step": 60210 + }, + { + "epoch": 0.30073160378536296, + "grad_norm": 0.09395390748977661, + "learning_rate": 2.1030563969060553e-05, + "loss": 9.4003, + "step": 60220 + }, + { + "epoch": 0.3007815426103024, + "grad_norm": 0.09719889611005783, + "learning_rate": 2.1029062054119003e-05, + "loss": 9.383, + "step": 60230 + }, + { + "epoch": 0.30083148143524185, + "grad_norm": 0.10100460797548294, + "learning_rate": 2.102756013917745e-05, + "loss": 9.398, + "step": 60240 + }, + { + "epoch": 0.3008814202601813, + "grad_norm": 0.09339495748281479, + "learning_rate": 2.1026058224235903e-05, + "loss": 9.3972, + "step": 60250 + }, + { + "epoch": 0.30093135908512075, + "grad_norm": 0.095674529671669, + "learning_rate": 2.102455630929435e-05, + "loss": 9.3979, + "step": 60260 + }, + { + "epoch": 0.3009812979100602, + "grad_norm": 0.09552553296089172, + "learning_rate": 2.1023054394352804e-05, + "loss": 9.4002, + "step": 60270 + }, + { + "epoch": 0.30103123673499965, + "grad_norm": 0.0926952138543129, + "learning_rate": 2.102155247941125e-05, + "loss": 9.4003, + "step": 60280 + }, + { + "epoch": 0.3010811755599391, + "grad_norm": 0.09167633205652237, + "learning_rate": 2.1020050564469697e-05, + "loss": 9.3939, + "step": 60290 + }, + { + "epoch": 0.30113111438487855, + "grad_norm": 0.10082550346851349, + "learning_rate": 2.101854864952815e-05, + "loss": 9.3882, + "step": 60300 + }, + { + "epoch": 0.301181053209818, + "grad_norm": 0.09979651123285294, + "learning_rate": 2.1017046734586597e-05, + "loss": 9.3897, + "step": 60310 + }, + { + "epoch": 0.30123099203475745, + "grad_norm": 0.09624478965997696, + "learning_rate": 2.101554481964505e-05, + "loss": 9.3969, + "step": 60320 + }, + { + "epoch": 0.30128093085969687, + "grad_norm": 0.09281089156866074, + "learning_rate": 2.1014042904703498e-05, + "loss": 9.3837, + "step": 60330 + }, + { + "epoch": 0.3013308696846363, + "grad_norm": 0.08858885616064072, + "learning_rate": 2.1012540989761945e-05, + "loss": 9.3936, + "step": 60340 + }, + { + "epoch": 0.30138080850957577, + "grad_norm": 0.09177447855472565, + "learning_rate": 2.1011039074820398e-05, + "loss": 9.4041, + "step": 60350 + }, + { + "epoch": 0.3014307473345152, + "grad_norm": 0.09817986190319061, + "learning_rate": 2.1009537159878845e-05, + "loss": 9.4026, + "step": 60360 + }, + { + "epoch": 0.30148068615945467, + "grad_norm": 0.09388108551502228, + "learning_rate": 2.10080352449373e-05, + "loss": 9.3916, + "step": 60370 + }, + { + "epoch": 0.3015306249843941, + "grad_norm": 0.09480094164609909, + "learning_rate": 2.1006533329995745e-05, + "loss": 9.3932, + "step": 60380 + }, + { + "epoch": 0.30158056380933357, + "grad_norm": 0.09471967816352844, + "learning_rate": 2.1005031415054192e-05, + "loss": 9.3966, + "step": 60390 + }, + { + "epoch": 0.301630502634273, + "grad_norm": 0.0964706614613533, + "learning_rate": 2.1003529500112646e-05, + "loss": 9.3854, + "step": 60400 + }, + { + "epoch": 0.30168044145921247, + "grad_norm": 0.0946371778845787, + "learning_rate": 2.1002027585171092e-05, + "loss": 9.3927, + "step": 60410 + }, + { + "epoch": 0.3017303802841519, + "grad_norm": 0.09347691386938095, + "learning_rate": 2.1000525670229546e-05, + "loss": 9.4019, + "step": 60420 + }, + { + "epoch": 0.30178031910909137, + "grad_norm": 0.09268350899219513, + "learning_rate": 2.0999023755287993e-05, + "loss": 9.3979, + "step": 60430 + }, + { + "epoch": 0.3018302579340308, + "grad_norm": 0.10059063881635666, + "learning_rate": 2.099752184034644e-05, + "loss": 9.3866, + "step": 60440 + }, + { + "epoch": 0.30188019675897027, + "grad_norm": 0.08915655314922333, + "learning_rate": 2.0996019925404893e-05, + "loss": 9.3971, + "step": 60450 + }, + { + "epoch": 0.3019301355839097, + "grad_norm": 0.09569905698299408, + "learning_rate": 2.099451801046334e-05, + "loss": 9.3943, + "step": 60460 + }, + { + "epoch": 0.30198007440884916, + "grad_norm": 0.10031815618276596, + "learning_rate": 2.0993016095521794e-05, + "loss": 9.3903, + "step": 60470 + }, + { + "epoch": 0.3020300132337886, + "grad_norm": 0.09447156637907028, + "learning_rate": 2.099151418058024e-05, + "loss": 9.3889, + "step": 60480 + }, + { + "epoch": 0.30207995205872806, + "grad_norm": 0.09017782658338547, + "learning_rate": 2.0990012265638687e-05, + "loss": 9.3948, + "step": 60490 + }, + { + "epoch": 0.3021298908836675, + "grad_norm": 0.09461423754692078, + "learning_rate": 2.098851035069714e-05, + "loss": 9.3869, + "step": 60500 + }, + { + "epoch": 0.30217982970860696, + "grad_norm": 0.09656091034412384, + "learning_rate": 2.0987008435755587e-05, + "loss": 9.3937, + "step": 60510 + }, + { + "epoch": 0.3022297685335464, + "grad_norm": 0.08991716802120209, + "learning_rate": 2.098550652081404e-05, + "loss": 9.3899, + "step": 60520 + }, + { + "epoch": 0.30227970735848586, + "grad_norm": 0.09503340721130371, + "learning_rate": 2.0984004605872488e-05, + "loss": 9.3889, + "step": 60530 + }, + { + "epoch": 0.3023296461834253, + "grad_norm": 0.08786161243915558, + "learning_rate": 2.0982502690930935e-05, + "loss": 9.3972, + "step": 60540 + }, + { + "epoch": 0.30237958500836476, + "grad_norm": 0.09755858033895493, + "learning_rate": 2.0981000775989388e-05, + "loss": 9.4025, + "step": 60550 + }, + { + "epoch": 0.3024295238333042, + "grad_norm": 0.09725630283355713, + "learning_rate": 2.0979498861047835e-05, + "loss": 9.3909, + "step": 60560 + }, + { + "epoch": 0.30247946265824366, + "grad_norm": 0.08783844858407974, + "learning_rate": 2.097799694610629e-05, + "loss": 9.3987, + "step": 60570 + }, + { + "epoch": 0.3025294014831831, + "grad_norm": 0.09798564016819, + "learning_rate": 2.0976495031164735e-05, + "loss": 9.4008, + "step": 60580 + }, + { + "epoch": 0.30257934030812256, + "grad_norm": 0.09155137091875076, + "learning_rate": 2.0974993116223185e-05, + "loss": 9.3922, + "step": 60590 + }, + { + "epoch": 0.302629279133062, + "grad_norm": 0.09524587541818619, + "learning_rate": 2.0973491201281636e-05, + "loss": 9.3868, + "step": 60600 + }, + { + "epoch": 0.30267921795800146, + "grad_norm": 0.09223838150501251, + "learning_rate": 2.0971989286340082e-05, + "loss": 9.3943, + "step": 60610 + }, + { + "epoch": 0.3027291567829409, + "grad_norm": 0.09258273243904114, + "learning_rate": 2.0970487371398536e-05, + "loss": 9.382, + "step": 60620 + }, + { + "epoch": 0.30277909560788036, + "grad_norm": 0.09514257311820984, + "learning_rate": 2.0968985456456983e-05, + "loss": 9.3933, + "step": 60630 + }, + { + "epoch": 0.3028290344328198, + "grad_norm": 0.08645520359277725, + "learning_rate": 2.0967483541515433e-05, + "loss": 9.3864, + "step": 60640 + }, + { + "epoch": 0.30287897325775925, + "grad_norm": 0.09346000850200653, + "learning_rate": 2.0965981626573883e-05, + "loss": 9.4062, + "step": 60650 + }, + { + "epoch": 0.3029289120826987, + "grad_norm": 0.09467698633670807, + "learning_rate": 2.096447971163233e-05, + "loss": 9.3819, + "step": 60660 + }, + { + "epoch": 0.30297885090763815, + "grad_norm": 0.09375496953725815, + "learning_rate": 2.0962977796690784e-05, + "loss": 9.3892, + "step": 60670 + }, + { + "epoch": 0.3030287897325776, + "grad_norm": 0.09323824197053909, + "learning_rate": 2.096147588174923e-05, + "loss": 9.3915, + "step": 60680 + }, + { + "epoch": 0.30307872855751705, + "grad_norm": 0.0925745964050293, + "learning_rate": 2.095997396680768e-05, + "loss": 9.3835, + "step": 60690 + }, + { + "epoch": 0.3031286673824565, + "grad_norm": 0.09299085289239883, + "learning_rate": 2.095847205186613e-05, + "loss": 9.3882, + "step": 60700 + }, + { + "epoch": 0.30317860620739595, + "grad_norm": 0.09627874940633774, + "learning_rate": 2.0956970136924577e-05, + "loss": 9.3914, + "step": 60710 + }, + { + "epoch": 0.3032285450323354, + "grad_norm": 0.09084891527891159, + "learning_rate": 2.095546822198303e-05, + "loss": 9.3905, + "step": 60720 + }, + { + "epoch": 0.30327848385727485, + "grad_norm": 0.09070669114589691, + "learning_rate": 2.0953966307041478e-05, + "loss": 9.3851, + "step": 60730 + }, + { + "epoch": 0.30332842268221427, + "grad_norm": 0.09068963676691055, + "learning_rate": 2.0952464392099928e-05, + "loss": 9.3869, + "step": 60740 + }, + { + "epoch": 0.30337836150715375, + "grad_norm": 0.09145542979240417, + "learning_rate": 2.0950962477158378e-05, + "loss": 9.3823, + "step": 60750 + }, + { + "epoch": 0.30342830033209317, + "grad_norm": 0.09390787780284882, + "learning_rate": 2.0949460562216825e-05, + "loss": 9.3873, + "step": 60760 + }, + { + "epoch": 0.30347823915703265, + "grad_norm": 0.09399698674678802, + "learning_rate": 2.094795864727528e-05, + "loss": 9.3884, + "step": 60770 + }, + { + "epoch": 0.30352817798197207, + "grad_norm": 0.09260044991970062, + "learning_rate": 2.0946456732333725e-05, + "loss": 9.3739, + "step": 60780 + }, + { + "epoch": 0.30357811680691155, + "grad_norm": 0.09860619902610779, + "learning_rate": 2.0944954817392176e-05, + "loss": 9.4007, + "step": 60790 + }, + { + "epoch": 0.30362805563185097, + "grad_norm": 0.0905844122171402, + "learning_rate": 2.0943452902450626e-05, + "loss": 9.3961, + "step": 60800 + }, + { + "epoch": 0.30367799445679045, + "grad_norm": 0.09324529021978378, + "learning_rate": 2.0941950987509072e-05, + "loss": 9.3876, + "step": 60810 + }, + { + "epoch": 0.30372793328172987, + "grad_norm": 0.08999308943748474, + "learning_rate": 2.0940449072567526e-05, + "loss": 9.3901, + "step": 60820 + }, + { + "epoch": 0.30377787210666934, + "grad_norm": 0.09419707208871841, + "learning_rate": 2.0938947157625973e-05, + "loss": 9.3841, + "step": 60830 + }, + { + "epoch": 0.30382781093160877, + "grad_norm": 0.09293625503778458, + "learning_rate": 2.0937445242684423e-05, + "loss": 9.3892, + "step": 60840 + }, + { + "epoch": 0.30387774975654824, + "grad_norm": 0.09216861426830292, + "learning_rate": 2.0935943327742873e-05, + "loss": 9.3948, + "step": 60850 + }, + { + "epoch": 0.30392768858148766, + "grad_norm": 0.0980692207813263, + "learning_rate": 2.093444141280132e-05, + "loss": 9.3888, + "step": 60860 + }, + { + "epoch": 0.30397762740642714, + "grad_norm": 0.09464273601770401, + "learning_rate": 2.0932939497859774e-05, + "loss": 9.3793, + "step": 60870 + }, + { + "epoch": 0.30402756623136656, + "grad_norm": 0.0960172563791275, + "learning_rate": 2.093143758291822e-05, + "loss": 9.3839, + "step": 60880 + }, + { + "epoch": 0.30407750505630604, + "grad_norm": 0.09181035310029984, + "learning_rate": 2.092993566797667e-05, + "loss": 9.3835, + "step": 60890 + }, + { + "epoch": 0.30412744388124546, + "grad_norm": 0.09236995130777359, + "learning_rate": 2.092843375303512e-05, + "loss": 9.3809, + "step": 60900 + }, + { + "epoch": 0.30417738270618494, + "grad_norm": 0.08640799671411514, + "learning_rate": 2.092693183809357e-05, + "loss": 9.378, + "step": 60910 + }, + { + "epoch": 0.30422732153112436, + "grad_norm": 0.0906277671456337, + "learning_rate": 2.092542992315202e-05, + "loss": 9.381, + "step": 60920 + }, + { + "epoch": 0.30427726035606384, + "grad_norm": 0.10483743995428085, + "learning_rate": 2.0923928008210468e-05, + "loss": 9.3815, + "step": 60930 + }, + { + "epoch": 0.30432719918100326, + "grad_norm": 0.08990181982517242, + "learning_rate": 2.0922426093268918e-05, + "loss": 9.3943, + "step": 60940 + }, + { + "epoch": 0.30437713800594274, + "grad_norm": 0.09138570725917816, + "learning_rate": 2.0920924178327368e-05, + "loss": 9.3993, + "step": 60950 + }, + { + "epoch": 0.30442707683088216, + "grad_norm": 0.0894051194190979, + "learning_rate": 2.091942226338582e-05, + "loss": 9.3783, + "step": 60960 + }, + { + "epoch": 0.30447701565582164, + "grad_norm": 0.09751047194004059, + "learning_rate": 2.091792034844427e-05, + "loss": 9.3815, + "step": 60970 + }, + { + "epoch": 0.30452695448076106, + "grad_norm": 0.09688021242618561, + "learning_rate": 2.0916418433502715e-05, + "loss": 9.3885, + "step": 60980 + }, + { + "epoch": 0.30457689330570054, + "grad_norm": 0.09227102994918823, + "learning_rate": 2.0914916518561166e-05, + "loss": 9.3901, + "step": 60990 + }, + { + "epoch": 0.30462683213063996, + "grad_norm": 0.09256913512945175, + "learning_rate": 2.0913414603619616e-05, + "loss": 9.3922, + "step": 61000 + }, + { + "epoch": 0.30467677095557943, + "grad_norm": 0.08989210426807404, + "learning_rate": 2.0911912688678066e-05, + "loss": 9.3778, + "step": 61010 + }, + { + "epoch": 0.30472670978051886, + "grad_norm": 0.09482806921005249, + "learning_rate": 2.0910410773736516e-05, + "loss": 9.3865, + "step": 61020 + }, + { + "epoch": 0.30477664860545833, + "grad_norm": 0.09472500532865524, + "learning_rate": 2.0908908858794963e-05, + "loss": 9.3832, + "step": 61030 + }, + { + "epoch": 0.30482658743039776, + "grad_norm": 0.09536511451005936, + "learning_rate": 2.0907406943853413e-05, + "loss": 9.3852, + "step": 61040 + }, + { + "epoch": 0.30487652625533723, + "grad_norm": 0.0961252823472023, + "learning_rate": 2.0905905028911863e-05, + "loss": 9.3877, + "step": 61050 + }, + { + "epoch": 0.30492646508027665, + "grad_norm": 0.0982387587428093, + "learning_rate": 2.0904403113970313e-05, + "loss": 9.3891, + "step": 61060 + }, + { + "epoch": 0.30497640390521613, + "grad_norm": 0.09710310399532318, + "learning_rate": 2.0902901199028764e-05, + "loss": 9.3805, + "step": 61070 + }, + { + "epoch": 0.30502634273015555, + "grad_norm": 0.09833624958992004, + "learning_rate": 2.090139928408721e-05, + "loss": 9.3872, + "step": 61080 + }, + { + "epoch": 0.30507628155509503, + "grad_norm": 0.09045345336198807, + "learning_rate": 2.089989736914566e-05, + "loss": 9.3848, + "step": 61090 + }, + { + "epoch": 0.30512622038003445, + "grad_norm": 0.0948140025138855, + "learning_rate": 2.089839545420411e-05, + "loss": 9.3753, + "step": 61100 + }, + { + "epoch": 0.30517615920497393, + "grad_norm": 0.09809758514165878, + "learning_rate": 2.089689353926256e-05, + "loss": 9.3902, + "step": 61110 + }, + { + "epoch": 0.30522609802991335, + "grad_norm": 0.09343090653419495, + "learning_rate": 2.089539162432101e-05, + "loss": 9.3775, + "step": 61120 + }, + { + "epoch": 0.30527603685485283, + "grad_norm": 0.09006762504577637, + "learning_rate": 2.0893889709379458e-05, + "loss": 9.376, + "step": 61130 + }, + { + "epoch": 0.30532597567979225, + "grad_norm": 0.09377524256706238, + "learning_rate": 2.0892387794437908e-05, + "loss": 9.3931, + "step": 61140 + }, + { + "epoch": 0.3053759145047317, + "grad_norm": 0.09025995433330536, + "learning_rate": 2.0890885879496358e-05, + "loss": 9.3832, + "step": 61150 + }, + { + "epoch": 0.30542585332967115, + "grad_norm": 0.09238019585609436, + "learning_rate": 2.088938396455481e-05, + "loss": 9.3832, + "step": 61160 + }, + { + "epoch": 0.3054757921546106, + "grad_norm": 0.0937105119228363, + "learning_rate": 2.088788204961326e-05, + "loss": 9.3737, + "step": 61170 + }, + { + "epoch": 0.30552573097955005, + "grad_norm": 0.10375673323869705, + "learning_rate": 2.0886380134671705e-05, + "loss": 9.3801, + "step": 61180 + }, + { + "epoch": 0.3055756698044895, + "grad_norm": 0.09446416795253754, + "learning_rate": 2.0884878219730156e-05, + "loss": 9.3759, + "step": 61190 + }, + { + "epoch": 0.30562560862942895, + "grad_norm": 0.10232505947351456, + "learning_rate": 2.0883376304788606e-05, + "loss": 9.3806, + "step": 61200 + }, + { + "epoch": 0.3056755474543684, + "grad_norm": 0.09139604866504669, + "learning_rate": 2.0881874389847056e-05, + "loss": 9.385, + "step": 61210 + }, + { + "epoch": 0.30572548627930785, + "grad_norm": 0.09424486756324768, + "learning_rate": 2.0880372474905506e-05, + "loss": 9.384, + "step": 61220 + }, + { + "epoch": 0.3057754251042473, + "grad_norm": 0.10143927484750748, + "learning_rate": 2.0878870559963956e-05, + "loss": 9.3777, + "step": 61230 + }, + { + "epoch": 0.30582536392918674, + "grad_norm": 0.09673741459846497, + "learning_rate": 2.0877368645022403e-05, + "loss": 9.3755, + "step": 61240 + }, + { + "epoch": 0.3058753027541262, + "grad_norm": 0.08992431312799454, + "learning_rate": 2.0875866730080853e-05, + "loss": 9.3668, + "step": 61250 + }, + { + "epoch": 0.30592524157906564, + "grad_norm": 0.0944616049528122, + "learning_rate": 2.0874364815139303e-05, + "loss": 9.3816, + "step": 61260 + }, + { + "epoch": 0.3059751804040051, + "grad_norm": 0.09414882957935333, + "learning_rate": 2.0872862900197754e-05, + "loss": 9.3888, + "step": 61270 + }, + { + "epoch": 0.30602511922894454, + "grad_norm": 0.0981215313076973, + "learning_rate": 2.0871360985256204e-05, + "loss": 9.3905, + "step": 61280 + }, + { + "epoch": 0.306075058053884, + "grad_norm": 0.0904826819896698, + "learning_rate": 2.086985907031465e-05, + "loss": 9.3693, + "step": 61290 + }, + { + "epoch": 0.30612499687882344, + "grad_norm": 0.09661656618118286, + "learning_rate": 2.08683571553731e-05, + "loss": 9.3803, + "step": 61300 + }, + { + "epoch": 0.30617493570376286, + "grad_norm": 0.09500022977590561, + "learning_rate": 2.086685524043155e-05, + "loss": 9.369, + "step": 61310 + }, + { + "epoch": 0.30622487452870234, + "grad_norm": 0.0918661504983902, + "learning_rate": 2.086535332549e-05, + "loss": 9.3814, + "step": 61320 + }, + { + "epoch": 0.30627481335364176, + "grad_norm": 0.09368791431188583, + "learning_rate": 2.086385141054845e-05, + "loss": 9.3804, + "step": 61330 + }, + { + "epoch": 0.30632475217858124, + "grad_norm": 0.09218349307775497, + "learning_rate": 2.0862349495606898e-05, + "loss": 9.3819, + "step": 61340 + }, + { + "epoch": 0.30637469100352066, + "grad_norm": 0.10055679827928543, + "learning_rate": 2.0860847580665348e-05, + "loss": 9.3934, + "step": 61350 + }, + { + "epoch": 0.30642462982846014, + "grad_norm": 0.09349419176578522, + "learning_rate": 2.08593456657238e-05, + "loss": 9.3864, + "step": 61360 + }, + { + "epoch": 0.30647456865339956, + "grad_norm": 0.09518557041883469, + "learning_rate": 2.085784375078225e-05, + "loss": 9.3871, + "step": 61370 + }, + { + "epoch": 0.30652450747833904, + "grad_norm": 0.09079370647668839, + "learning_rate": 2.08563418358407e-05, + "loss": 9.3785, + "step": 61380 + }, + { + "epoch": 0.30657444630327846, + "grad_norm": 0.0915151834487915, + "learning_rate": 2.0854839920899146e-05, + "loss": 9.3852, + "step": 61390 + }, + { + "epoch": 0.30662438512821794, + "grad_norm": 0.09642606973648071, + "learning_rate": 2.0853338005957596e-05, + "loss": 9.3797, + "step": 61400 + }, + { + "epoch": 0.30667432395315736, + "grad_norm": 0.08891963213682175, + "learning_rate": 2.0851836091016046e-05, + "loss": 9.382, + "step": 61410 + }, + { + "epoch": 0.30672426277809683, + "grad_norm": 0.0949094146490097, + "learning_rate": 2.0850334176074496e-05, + "loss": 9.3788, + "step": 61420 + }, + { + "epoch": 0.30677420160303626, + "grad_norm": 0.0930948406457901, + "learning_rate": 2.0848832261132946e-05, + "loss": 9.3803, + "step": 61430 + }, + { + "epoch": 0.30682414042797573, + "grad_norm": 0.09792865067720413, + "learning_rate": 2.0847330346191393e-05, + "loss": 9.3795, + "step": 61440 + }, + { + "epoch": 0.30687407925291516, + "grad_norm": 0.09721236675977707, + "learning_rate": 2.0845828431249843e-05, + "loss": 9.3869, + "step": 61450 + }, + { + "epoch": 0.30692401807785463, + "grad_norm": 0.095427006483078, + "learning_rate": 2.0844326516308293e-05, + "loss": 9.3857, + "step": 61460 + }, + { + "epoch": 0.30697395690279405, + "grad_norm": 0.093628890812397, + "learning_rate": 2.0842824601366744e-05, + "loss": 9.3756, + "step": 61470 + }, + { + "epoch": 0.30702389572773353, + "grad_norm": 0.09637012332677841, + "learning_rate": 2.0841322686425194e-05, + "loss": 9.3804, + "step": 61480 + }, + { + "epoch": 0.30707383455267295, + "grad_norm": 0.09300060570240021, + "learning_rate": 2.083982077148364e-05, + "loss": 9.3687, + "step": 61490 + }, + { + "epoch": 0.30712377337761243, + "grad_norm": 0.08992978185415268, + "learning_rate": 2.083831885654209e-05, + "loss": 9.382, + "step": 61500 + }, + { + "epoch": 0.30717371220255185, + "grad_norm": 0.09597385674715042, + "learning_rate": 2.083681694160054e-05, + "loss": 9.379, + "step": 61510 + }, + { + "epoch": 0.30722365102749133, + "grad_norm": 0.09319521486759186, + "learning_rate": 2.083531502665899e-05, + "loss": 9.3809, + "step": 61520 + }, + { + "epoch": 0.30727358985243075, + "grad_norm": 0.09977719187736511, + "learning_rate": 2.083381311171744e-05, + "loss": 9.3695, + "step": 61530 + }, + { + "epoch": 0.30732352867737023, + "grad_norm": 0.09011849015951157, + "learning_rate": 2.0832311196775888e-05, + "loss": 9.3751, + "step": 61540 + }, + { + "epoch": 0.30737346750230965, + "grad_norm": 0.09430202841758728, + "learning_rate": 2.083080928183434e-05, + "loss": 9.3763, + "step": 61550 + }, + { + "epoch": 0.3074234063272491, + "grad_norm": 0.09825265407562256, + "learning_rate": 2.082930736689279e-05, + "loss": 9.3643, + "step": 61560 + }, + { + "epoch": 0.30747334515218855, + "grad_norm": 0.09236638993024826, + "learning_rate": 2.082780545195124e-05, + "loss": 9.3792, + "step": 61570 + }, + { + "epoch": 0.307523283977128, + "grad_norm": 0.09510213881731033, + "learning_rate": 2.082630353700969e-05, + "loss": 9.3854, + "step": 61580 + }, + { + "epoch": 0.30757322280206745, + "grad_norm": 0.09195613861083984, + "learning_rate": 2.0824801622068136e-05, + "loss": 9.3655, + "step": 61590 + }, + { + "epoch": 0.3076231616270069, + "grad_norm": 0.09247229248285294, + "learning_rate": 2.082329970712659e-05, + "loss": 9.3756, + "step": 61600 + }, + { + "epoch": 0.30767310045194635, + "grad_norm": 0.08966951817274094, + "learning_rate": 2.0821797792185036e-05, + "loss": 9.379, + "step": 61610 + }, + { + "epoch": 0.3077230392768858, + "grad_norm": 0.10270446538925171, + "learning_rate": 2.0820295877243486e-05, + "loss": 9.3774, + "step": 61620 + }, + { + "epoch": 0.30777297810182525, + "grad_norm": 0.09327306598424911, + "learning_rate": 2.0818793962301936e-05, + "loss": 9.3764, + "step": 61630 + }, + { + "epoch": 0.3078229169267647, + "grad_norm": 0.09807731211185455, + "learning_rate": 2.0817292047360383e-05, + "loss": 9.3884, + "step": 61640 + }, + { + "epoch": 0.30787285575170414, + "grad_norm": 0.09317595511674881, + "learning_rate": 2.0815790132418837e-05, + "loss": 9.3817, + "step": 61650 + }, + { + "epoch": 0.3079227945766436, + "grad_norm": 0.09404472261667252, + "learning_rate": 2.0814288217477283e-05, + "loss": 9.3812, + "step": 61660 + }, + { + "epoch": 0.30797273340158304, + "grad_norm": 0.0946296826004982, + "learning_rate": 2.0812786302535734e-05, + "loss": 9.3784, + "step": 61670 + }, + { + "epoch": 0.3080226722265225, + "grad_norm": 0.09800449013710022, + "learning_rate": 2.0811284387594184e-05, + "loss": 9.3691, + "step": 61680 + }, + { + "epoch": 0.30807261105146194, + "grad_norm": 0.09789472073316574, + "learning_rate": 2.080978247265263e-05, + "loss": 9.3718, + "step": 61690 + }, + { + "epoch": 0.3081225498764014, + "grad_norm": 0.0944046601653099, + "learning_rate": 2.0808280557711084e-05, + "loss": 9.3751, + "step": 61700 + }, + { + "epoch": 0.30817248870134084, + "grad_norm": 0.09153825044631958, + "learning_rate": 2.080677864276953e-05, + "loss": 9.3673, + "step": 61710 + }, + { + "epoch": 0.3082224275262803, + "grad_norm": 0.09950811415910721, + "learning_rate": 2.080527672782798e-05, + "loss": 9.3759, + "step": 61720 + }, + { + "epoch": 0.30827236635121974, + "grad_norm": 0.09401055425405502, + "learning_rate": 2.080377481288643e-05, + "loss": 9.3752, + "step": 61730 + }, + { + "epoch": 0.3083223051761592, + "grad_norm": 0.09673509001731873, + "learning_rate": 2.0802272897944878e-05, + "loss": 9.3674, + "step": 61740 + }, + { + "epoch": 0.30837224400109864, + "grad_norm": 0.09277281165122986, + "learning_rate": 2.080077098300333e-05, + "loss": 9.3649, + "step": 61750 + }, + { + "epoch": 0.3084221828260381, + "grad_norm": 0.09555141627788544, + "learning_rate": 2.079926906806178e-05, + "loss": 9.3666, + "step": 61760 + }, + { + "epoch": 0.30847212165097754, + "grad_norm": 0.09273938834667206, + "learning_rate": 2.079776715312023e-05, + "loss": 9.3702, + "step": 61770 + }, + { + "epoch": 0.308522060475917, + "grad_norm": 0.09557077288627625, + "learning_rate": 2.079626523817868e-05, + "loss": 9.3766, + "step": 61780 + }, + { + "epoch": 0.30857199930085644, + "grad_norm": 0.09353851526975632, + "learning_rate": 2.0794763323237126e-05, + "loss": 9.3753, + "step": 61790 + }, + { + "epoch": 0.3086219381257959, + "grad_norm": 0.09252606332302094, + "learning_rate": 2.079326140829558e-05, + "loss": 9.3661, + "step": 61800 + }, + { + "epoch": 0.30867187695073534, + "grad_norm": 0.09201821684837341, + "learning_rate": 2.0791759493354026e-05, + "loss": 9.3726, + "step": 61810 + }, + { + "epoch": 0.3087218157756748, + "grad_norm": 0.09545756131410599, + "learning_rate": 2.0790257578412476e-05, + "loss": 9.3795, + "step": 61820 + }, + { + "epoch": 0.30877175460061423, + "grad_norm": 0.0946696475148201, + "learning_rate": 2.0788755663470926e-05, + "loss": 9.3637, + "step": 61830 + }, + { + "epoch": 0.3088216934255537, + "grad_norm": 0.09465292096138, + "learning_rate": 2.0787253748529373e-05, + "loss": 9.3803, + "step": 61840 + }, + { + "epoch": 0.30887163225049313, + "grad_norm": 0.09846868366003036, + "learning_rate": 2.0785751833587827e-05, + "loss": 9.3652, + "step": 61850 + }, + { + "epoch": 0.3089215710754326, + "grad_norm": 0.09574449062347412, + "learning_rate": 2.0784249918646273e-05, + "loss": 9.3696, + "step": 61860 + }, + { + "epoch": 0.30897150990037203, + "grad_norm": 0.09601958841085434, + "learning_rate": 2.0782748003704724e-05, + "loss": 9.3682, + "step": 61870 + }, + { + "epoch": 0.3090214487253115, + "grad_norm": 0.0923629179596901, + "learning_rate": 2.0781246088763174e-05, + "loss": 9.3626, + "step": 61880 + }, + { + "epoch": 0.30907138755025093, + "grad_norm": 0.09300635010004044, + "learning_rate": 2.077974417382162e-05, + "loss": 9.3663, + "step": 61890 + }, + { + "epoch": 0.3091213263751904, + "grad_norm": 0.09495260566473007, + "learning_rate": 2.0778242258880074e-05, + "loss": 9.3814, + "step": 61900 + }, + { + "epoch": 0.30917126520012983, + "grad_norm": 0.0935489758849144, + "learning_rate": 2.077674034393852e-05, + "loss": 9.3656, + "step": 61910 + }, + { + "epoch": 0.3092212040250693, + "grad_norm": 0.09819766879081726, + "learning_rate": 2.0775238428996975e-05, + "loss": 9.3711, + "step": 61920 + }, + { + "epoch": 0.30927114285000873, + "grad_norm": 0.09066694229841232, + "learning_rate": 2.077373651405542e-05, + "loss": 9.3818, + "step": 61930 + }, + { + "epoch": 0.3093210816749482, + "grad_norm": 0.09707902371883392, + "learning_rate": 2.0772234599113868e-05, + "loss": 9.3693, + "step": 61940 + }, + { + "epoch": 0.30937102049988763, + "grad_norm": 0.09382162988185883, + "learning_rate": 2.077073268417232e-05, + "loss": 9.3681, + "step": 61950 + }, + { + "epoch": 0.3094209593248271, + "grad_norm": 0.09599854052066803, + "learning_rate": 2.076923076923077e-05, + "loss": 9.3744, + "step": 61960 + }, + { + "epoch": 0.3094708981497665, + "grad_norm": 0.09321361035108566, + "learning_rate": 2.0767728854289222e-05, + "loss": 9.3718, + "step": 61970 + }, + { + "epoch": 0.309520836974706, + "grad_norm": 0.09474252909421921, + "learning_rate": 2.076622693934767e-05, + "loss": 9.3747, + "step": 61980 + }, + { + "epoch": 0.3095707757996454, + "grad_norm": 0.09249020367860794, + "learning_rate": 2.0764725024406116e-05, + "loss": 9.3677, + "step": 61990 + }, + { + "epoch": 0.3096207146245849, + "grad_norm": 0.09276629239320755, + "learning_rate": 2.076322310946457e-05, + "loss": 9.375, + "step": 62000 + }, + { + "epoch": 0.3096706534495243, + "grad_norm": 0.09531158208847046, + "learning_rate": 2.0761721194523016e-05, + "loss": 9.3636, + "step": 62010 + }, + { + "epoch": 0.3097205922744638, + "grad_norm": 0.09786634147167206, + "learning_rate": 2.076021927958147e-05, + "loss": 9.3572, + "step": 62020 + }, + { + "epoch": 0.3097705310994032, + "grad_norm": 0.09066082537174225, + "learning_rate": 2.0758717364639916e-05, + "loss": 9.3621, + "step": 62030 + }, + { + "epoch": 0.3098204699243427, + "grad_norm": 0.09768658876419067, + "learning_rate": 2.0757215449698363e-05, + "loss": 9.3643, + "step": 62040 + }, + { + "epoch": 0.3098704087492821, + "grad_norm": 0.09411395341157913, + "learning_rate": 2.0755713534756817e-05, + "loss": 9.3757, + "step": 62050 + }, + { + "epoch": 0.3099203475742216, + "grad_norm": 0.09201698750257492, + "learning_rate": 2.0754211619815263e-05, + "loss": 9.3652, + "step": 62060 + }, + { + "epoch": 0.309970286399161, + "grad_norm": 0.09616990387439728, + "learning_rate": 2.0752709704873717e-05, + "loss": 9.3675, + "step": 62070 + }, + { + "epoch": 0.3100202252241005, + "grad_norm": 0.09069666266441345, + "learning_rate": 2.0751207789932164e-05, + "loss": 9.3703, + "step": 62080 + }, + { + "epoch": 0.3100701640490399, + "grad_norm": 0.09700020402669907, + "learning_rate": 2.074970587499061e-05, + "loss": 9.3644, + "step": 62090 + }, + { + "epoch": 0.3101201028739794, + "grad_norm": 0.10156318545341492, + "learning_rate": 2.0748203960049064e-05, + "loss": 9.3651, + "step": 62100 + }, + { + "epoch": 0.3101700416989188, + "grad_norm": 0.09886864572763443, + "learning_rate": 2.074670204510751e-05, + "loss": 9.3718, + "step": 62110 + }, + { + "epoch": 0.3102199805238583, + "grad_norm": 0.09852635115385056, + "learning_rate": 2.0745200130165965e-05, + "loss": 9.3617, + "step": 62120 + }, + { + "epoch": 0.3102699193487977, + "grad_norm": 0.09089913219213486, + "learning_rate": 2.074369821522441e-05, + "loss": 9.3574, + "step": 62130 + }, + { + "epoch": 0.3103198581737372, + "grad_norm": 0.08944336324930191, + "learning_rate": 2.0742196300282858e-05, + "loss": 9.3706, + "step": 62140 + }, + { + "epoch": 0.3103697969986766, + "grad_norm": 0.09155122190713882, + "learning_rate": 2.074069438534131e-05, + "loss": 9.3682, + "step": 62150 + }, + { + "epoch": 0.3104197358236161, + "grad_norm": 0.09597411006689072, + "learning_rate": 2.073919247039976e-05, + "loss": 9.3717, + "step": 62160 + }, + { + "epoch": 0.3104696746485555, + "grad_norm": 0.09866214543581009, + "learning_rate": 2.0737690555458212e-05, + "loss": 9.3543, + "step": 62170 + }, + { + "epoch": 0.310519613473495, + "grad_norm": 0.09262767434120178, + "learning_rate": 2.073618864051666e-05, + "loss": 9.3623, + "step": 62180 + }, + { + "epoch": 0.3105695522984344, + "grad_norm": 0.08718739449977875, + "learning_rate": 2.0734686725575106e-05, + "loss": 9.3704, + "step": 62190 + }, + { + "epoch": 0.3106194911233739, + "grad_norm": 0.08802207559347153, + "learning_rate": 2.073318481063356e-05, + "loss": 9.3702, + "step": 62200 + }, + { + "epoch": 0.3106694299483133, + "grad_norm": 0.09451784193515778, + "learning_rate": 2.0731682895692006e-05, + "loss": 9.3645, + "step": 62210 + }, + { + "epoch": 0.3107193687732528, + "grad_norm": 0.09479044377803802, + "learning_rate": 2.073018098075046e-05, + "loss": 9.3502, + "step": 62220 + }, + { + "epoch": 0.3107693075981922, + "grad_norm": 0.09419619292020798, + "learning_rate": 2.0728679065808906e-05, + "loss": 9.3656, + "step": 62230 + }, + { + "epoch": 0.3108192464231317, + "grad_norm": 0.09506448358297348, + "learning_rate": 2.0727177150867357e-05, + "loss": 9.3581, + "step": 62240 + }, + { + "epoch": 0.3108691852480711, + "grad_norm": 0.09851628541946411, + "learning_rate": 2.0725675235925807e-05, + "loss": 9.362, + "step": 62250 + }, + { + "epoch": 0.3109191240730106, + "grad_norm": 0.0938975065946579, + "learning_rate": 2.0724173320984253e-05, + "loss": 9.3594, + "step": 62260 + }, + { + "epoch": 0.31096906289795, + "grad_norm": 0.0952107235789299, + "learning_rate": 2.0722671406042707e-05, + "loss": 9.3598, + "step": 62270 + }, + { + "epoch": 0.3110190017228895, + "grad_norm": 0.09428625553846359, + "learning_rate": 2.0721169491101154e-05, + "loss": 9.3688, + "step": 62280 + }, + { + "epoch": 0.3110689405478289, + "grad_norm": 0.09929768741130829, + "learning_rate": 2.0719667576159604e-05, + "loss": 9.3563, + "step": 62290 + }, + { + "epoch": 0.31111887937276833, + "grad_norm": 0.09252603352069855, + "learning_rate": 2.0718165661218054e-05, + "loss": 9.3555, + "step": 62300 + }, + { + "epoch": 0.3111688181977078, + "grad_norm": 0.0918884426355362, + "learning_rate": 2.07166637462765e-05, + "loss": 9.3604, + "step": 62310 + }, + { + "epoch": 0.31121875702264723, + "grad_norm": 0.09412012249231339, + "learning_rate": 2.0715161831334955e-05, + "loss": 9.36, + "step": 62320 + }, + { + "epoch": 0.3112686958475867, + "grad_norm": 0.08864130824804306, + "learning_rate": 2.07136599163934e-05, + "loss": 9.3741, + "step": 62330 + }, + { + "epoch": 0.31131863467252613, + "grad_norm": 0.09902546554803848, + "learning_rate": 2.071215800145185e-05, + "loss": 9.3707, + "step": 62340 + }, + { + "epoch": 0.3113685734974656, + "grad_norm": 0.0945492535829544, + "learning_rate": 2.07106560865103e-05, + "loss": 9.362, + "step": 62350 + }, + { + "epoch": 0.31141851232240503, + "grad_norm": 0.0934128388762474, + "learning_rate": 2.070915417156875e-05, + "loss": 9.3579, + "step": 62360 + }, + { + "epoch": 0.3114684511473445, + "grad_norm": 0.10103520005941391, + "learning_rate": 2.0707652256627202e-05, + "loss": 9.3705, + "step": 62370 + }, + { + "epoch": 0.3115183899722839, + "grad_norm": 0.09728195518255234, + "learning_rate": 2.070615034168565e-05, + "loss": 9.3674, + "step": 62380 + }, + { + "epoch": 0.3115683287972234, + "grad_norm": 0.0965341180562973, + "learning_rate": 2.07046484267441e-05, + "loss": 9.368, + "step": 62390 + }, + { + "epoch": 0.3116182676221628, + "grad_norm": 0.09560953825712204, + "learning_rate": 2.070314651180255e-05, + "loss": 9.3595, + "step": 62400 + }, + { + "epoch": 0.3116682064471023, + "grad_norm": 0.09266313165426254, + "learning_rate": 2.0701644596860996e-05, + "loss": 9.3655, + "step": 62410 + }, + { + "epoch": 0.3117181452720417, + "grad_norm": 0.09788119792938232, + "learning_rate": 2.070014268191945e-05, + "loss": 9.3602, + "step": 62420 + }, + { + "epoch": 0.3117680840969812, + "grad_norm": 0.09870301932096481, + "learning_rate": 2.0698640766977896e-05, + "loss": 9.3552, + "step": 62430 + }, + { + "epoch": 0.3118180229219206, + "grad_norm": 0.09183664619922638, + "learning_rate": 2.0697138852036347e-05, + "loss": 9.3642, + "step": 62440 + }, + { + "epoch": 0.3118679617468601, + "grad_norm": 0.09062953293323517, + "learning_rate": 2.0695636937094797e-05, + "loss": 9.3599, + "step": 62450 + }, + { + "epoch": 0.3119179005717995, + "grad_norm": 0.08905572444200516, + "learning_rate": 2.0694135022153243e-05, + "loss": 9.3629, + "step": 62460 + }, + { + "epoch": 0.311967839396739, + "grad_norm": 0.08906285464763641, + "learning_rate": 2.0692633107211697e-05, + "loss": 9.3675, + "step": 62470 + }, + { + "epoch": 0.3120177782216784, + "grad_norm": 0.09582462906837463, + "learning_rate": 2.0691131192270144e-05, + "loss": 9.3733, + "step": 62480 + }, + { + "epoch": 0.3120677170466179, + "grad_norm": 0.09110390394926071, + "learning_rate": 2.0689629277328594e-05, + "loss": 9.3662, + "step": 62490 + }, + { + "epoch": 0.3121176558715573, + "grad_norm": 0.09354531019926071, + "learning_rate": 2.0688127362387044e-05, + "loss": 9.3623, + "step": 62500 + }, + { + "epoch": 0.3121675946964968, + "grad_norm": 0.0941261276602745, + "learning_rate": 2.068662544744549e-05, + "loss": 9.3616, + "step": 62510 + }, + { + "epoch": 0.3122175335214362, + "grad_norm": 0.09080076217651367, + "learning_rate": 2.0685123532503945e-05, + "loss": 9.3641, + "step": 62520 + }, + { + "epoch": 0.3122674723463757, + "grad_norm": 0.09199047833681107, + "learning_rate": 2.068362161756239e-05, + "loss": 9.3541, + "step": 62530 + }, + { + "epoch": 0.3123174111713151, + "grad_norm": 0.09623630344867706, + "learning_rate": 2.068211970262084e-05, + "loss": 9.3581, + "step": 62540 + }, + { + "epoch": 0.3123673499962546, + "grad_norm": 0.09023137390613556, + "learning_rate": 2.0680617787679292e-05, + "loss": 9.3669, + "step": 62550 + }, + { + "epoch": 0.312417288821194, + "grad_norm": 0.09632597118616104, + "learning_rate": 2.0679115872737742e-05, + "loss": 9.3582, + "step": 62560 + }, + { + "epoch": 0.3124672276461335, + "grad_norm": 0.09074317663908005, + "learning_rate": 2.0677613957796192e-05, + "loss": 9.3596, + "step": 62570 + }, + { + "epoch": 0.3125171664710729, + "grad_norm": 0.09316986799240112, + "learning_rate": 2.067611204285464e-05, + "loss": 9.3562, + "step": 62580 + }, + { + "epoch": 0.3125671052960124, + "grad_norm": 0.09124487638473511, + "learning_rate": 2.067461012791309e-05, + "loss": 9.3615, + "step": 62590 + }, + { + "epoch": 0.3126170441209518, + "grad_norm": 0.09576891362667084, + "learning_rate": 2.067310821297154e-05, + "loss": 9.3529, + "step": 62600 + }, + { + "epoch": 0.3126669829458913, + "grad_norm": 0.09395425021648407, + "learning_rate": 2.067160629802999e-05, + "loss": 9.3519, + "step": 62610 + }, + { + "epoch": 0.3127169217708307, + "grad_norm": 0.09081248939037323, + "learning_rate": 2.067010438308844e-05, + "loss": 9.3598, + "step": 62620 + }, + { + "epoch": 0.3127668605957702, + "grad_norm": 0.09516331553459167, + "learning_rate": 2.0668602468146886e-05, + "loss": 9.3653, + "step": 62630 + }, + { + "epoch": 0.3128167994207096, + "grad_norm": 0.09487758576869965, + "learning_rate": 2.0667100553205337e-05, + "loss": 9.3517, + "step": 62640 + }, + { + "epoch": 0.3128667382456491, + "grad_norm": 0.0971754714846611, + "learning_rate": 2.0665598638263787e-05, + "loss": 9.3476, + "step": 62650 + }, + { + "epoch": 0.3129166770705885, + "grad_norm": 0.09306435286998749, + "learning_rate": 2.0664096723322237e-05, + "loss": 9.3561, + "step": 62660 + }, + { + "epoch": 0.312966615895528, + "grad_norm": 0.0970197319984436, + "learning_rate": 2.0662594808380687e-05, + "loss": 9.3684, + "step": 62670 + }, + { + "epoch": 0.3130165547204674, + "grad_norm": 0.09373274445533752, + "learning_rate": 2.0661092893439134e-05, + "loss": 9.3663, + "step": 62680 + }, + { + "epoch": 0.3130664935454069, + "grad_norm": 0.09603800624608994, + "learning_rate": 2.0659590978497584e-05, + "loss": 9.3468, + "step": 62690 + }, + { + "epoch": 0.3131164323703463, + "grad_norm": 0.09086347371339798, + "learning_rate": 2.0658089063556034e-05, + "loss": 9.3515, + "step": 62700 + }, + { + "epoch": 0.3131663711952858, + "grad_norm": 0.09210800379514694, + "learning_rate": 2.0656587148614484e-05, + "loss": 9.3627, + "step": 62710 + }, + { + "epoch": 0.3132163100202252, + "grad_norm": 0.09706009924411774, + "learning_rate": 2.0655085233672935e-05, + "loss": 9.3576, + "step": 62720 + }, + { + "epoch": 0.3132662488451647, + "grad_norm": 0.09735792875289917, + "learning_rate": 2.065358331873138e-05, + "loss": 9.3584, + "step": 62730 + }, + { + "epoch": 0.3133161876701041, + "grad_norm": 0.08968986570835114, + "learning_rate": 2.065208140378983e-05, + "loss": 9.3586, + "step": 62740 + }, + { + "epoch": 0.3133661264950436, + "grad_norm": 0.09612874686717987, + "learning_rate": 2.0650579488848282e-05, + "loss": 9.3559, + "step": 62750 + }, + { + "epoch": 0.313416065319983, + "grad_norm": 0.10443656146526337, + "learning_rate": 2.0649077573906732e-05, + "loss": 9.3511, + "step": 62760 + }, + { + "epoch": 0.3134660041449225, + "grad_norm": 0.09903521835803986, + "learning_rate": 2.0647575658965182e-05, + "loss": 9.3614, + "step": 62770 + }, + { + "epoch": 0.3135159429698619, + "grad_norm": 0.09267803281545639, + "learning_rate": 2.064607374402363e-05, + "loss": 9.3444, + "step": 62780 + }, + { + "epoch": 0.3135658817948014, + "grad_norm": 0.09199144691228867, + "learning_rate": 2.064457182908208e-05, + "loss": 9.3585, + "step": 62790 + }, + { + "epoch": 0.3136158206197408, + "grad_norm": 0.09724773466587067, + "learning_rate": 2.064306991414053e-05, + "loss": 9.352, + "step": 62800 + }, + { + "epoch": 0.3136657594446803, + "grad_norm": 0.10160032659769058, + "learning_rate": 2.064156799919898e-05, + "loss": 9.3515, + "step": 62810 + }, + { + "epoch": 0.3137156982696197, + "grad_norm": 0.09469520300626755, + "learning_rate": 2.064006608425743e-05, + "loss": 9.3425, + "step": 62820 + }, + { + "epoch": 0.3137656370945592, + "grad_norm": 0.09688717126846313, + "learning_rate": 2.0638564169315876e-05, + "loss": 9.3486, + "step": 62830 + }, + { + "epoch": 0.3138155759194986, + "grad_norm": 0.09648570418357849, + "learning_rate": 2.0637062254374327e-05, + "loss": 9.365, + "step": 62840 + }, + { + "epoch": 0.3138655147444381, + "grad_norm": 0.09986833482980728, + "learning_rate": 2.0635560339432777e-05, + "loss": 9.3472, + "step": 62850 + }, + { + "epoch": 0.3139154535693775, + "grad_norm": 0.09859945625066757, + "learning_rate": 2.0634058424491227e-05, + "loss": 9.3553, + "step": 62860 + }, + { + "epoch": 0.313965392394317, + "grad_norm": 0.09469161182641983, + "learning_rate": 2.0632556509549677e-05, + "loss": 9.351, + "step": 62870 + }, + { + "epoch": 0.3140153312192564, + "grad_norm": 0.10031906515359879, + "learning_rate": 2.0631054594608127e-05, + "loss": 9.3604, + "step": 62880 + }, + { + "epoch": 0.3140652700441959, + "grad_norm": 0.09583910554647446, + "learning_rate": 2.0629552679666574e-05, + "loss": 9.3615, + "step": 62890 + }, + { + "epoch": 0.3141152088691353, + "grad_norm": 0.09656809270381927, + "learning_rate": 2.0628050764725024e-05, + "loss": 9.3522, + "step": 62900 + }, + { + "epoch": 0.3141651476940748, + "grad_norm": 0.08953146636486053, + "learning_rate": 2.0626548849783474e-05, + "loss": 9.3564, + "step": 62910 + }, + { + "epoch": 0.3142150865190142, + "grad_norm": 0.09325104206800461, + "learning_rate": 2.0625046934841925e-05, + "loss": 9.3426, + "step": 62920 + }, + { + "epoch": 0.3142650253439537, + "grad_norm": 0.09127029776573181, + "learning_rate": 2.0623545019900375e-05, + "loss": 9.3543, + "step": 62930 + }, + { + "epoch": 0.3143149641688931, + "grad_norm": 0.09044724702835083, + "learning_rate": 2.062204310495882e-05, + "loss": 9.3675, + "step": 62940 + }, + { + "epoch": 0.3143649029938326, + "grad_norm": 0.0922994613647461, + "learning_rate": 2.0620541190017272e-05, + "loss": 9.3608, + "step": 62950 + }, + { + "epoch": 0.314414841818772, + "grad_norm": 0.09617674350738525, + "learning_rate": 2.0619039275075722e-05, + "loss": 9.353, + "step": 62960 + }, + { + "epoch": 0.3144647806437115, + "grad_norm": 0.09723023325204849, + "learning_rate": 2.0617537360134172e-05, + "loss": 9.3553, + "step": 62970 + }, + { + "epoch": 0.3145147194686509, + "grad_norm": 0.09201812744140625, + "learning_rate": 2.0616035445192622e-05, + "loss": 9.3563, + "step": 62980 + }, + { + "epoch": 0.31456465829359037, + "grad_norm": 0.09177816659212112, + "learning_rate": 2.061453353025107e-05, + "loss": 9.3471, + "step": 62990 + }, + { + "epoch": 0.3146145971185298, + "grad_norm": 0.09503309428691864, + "learning_rate": 2.061303161530952e-05, + "loss": 9.3563, + "step": 63000 + }, + { + "epoch": 0.31466453594346927, + "grad_norm": 0.0928831547498703, + "learning_rate": 2.061152970036797e-05, + "loss": 9.3518, + "step": 63010 + }, + { + "epoch": 0.3147144747684087, + "grad_norm": 0.09431839734315872, + "learning_rate": 2.061002778542642e-05, + "loss": 9.3627, + "step": 63020 + }, + { + "epoch": 0.31476441359334817, + "grad_norm": 0.08945141732692719, + "learning_rate": 2.060852587048487e-05, + "loss": 9.3505, + "step": 63030 + }, + { + "epoch": 0.3148143524182876, + "grad_norm": 0.09035779535770416, + "learning_rate": 2.0607023955543317e-05, + "loss": 9.3552, + "step": 63040 + }, + { + "epoch": 0.31486429124322707, + "grad_norm": 0.09948616474866867, + "learning_rate": 2.0605522040601767e-05, + "loss": 9.3376, + "step": 63050 + }, + { + "epoch": 0.3149142300681665, + "grad_norm": 0.09741248935461044, + "learning_rate": 2.0604020125660217e-05, + "loss": 9.343, + "step": 63060 + }, + { + "epoch": 0.31496416889310597, + "grad_norm": 0.0959203913807869, + "learning_rate": 2.0602518210718667e-05, + "loss": 9.3452, + "step": 63070 + }, + { + "epoch": 0.3150141077180454, + "grad_norm": 0.09166992455720901, + "learning_rate": 2.0601016295777117e-05, + "loss": 9.3536, + "step": 63080 + }, + { + "epoch": 0.31506404654298487, + "grad_norm": 0.09764059633016586, + "learning_rate": 2.0599514380835564e-05, + "loss": 9.3498, + "step": 63090 + }, + { + "epoch": 0.3151139853679243, + "grad_norm": 0.09447203576564789, + "learning_rate": 2.0598012465894014e-05, + "loss": 9.3514, + "step": 63100 + }, + { + "epoch": 0.31516392419286376, + "grad_norm": 0.09235192835330963, + "learning_rate": 2.0596510550952464e-05, + "loss": 9.3443, + "step": 63110 + }, + { + "epoch": 0.3152138630178032, + "grad_norm": 0.09150667488574982, + "learning_rate": 2.0595008636010915e-05, + "loss": 9.3494, + "step": 63120 + }, + { + "epoch": 0.31526380184274266, + "grad_norm": 0.09310675412416458, + "learning_rate": 2.0593506721069365e-05, + "loss": 9.3463, + "step": 63130 + }, + { + "epoch": 0.3153137406676821, + "grad_norm": 0.09127092361450195, + "learning_rate": 2.059200480612781e-05, + "loss": 9.3564, + "step": 63140 + }, + { + "epoch": 0.31536367949262156, + "grad_norm": 0.09105300903320312, + "learning_rate": 2.0590502891186262e-05, + "loss": 9.3598, + "step": 63150 + }, + { + "epoch": 0.315413618317561, + "grad_norm": 0.09039835631847382, + "learning_rate": 2.0589000976244712e-05, + "loss": 9.3697, + "step": 63160 + }, + { + "epoch": 0.31546355714250046, + "grad_norm": 0.09602472931146622, + "learning_rate": 2.0587499061303162e-05, + "loss": 9.3542, + "step": 63170 + }, + { + "epoch": 0.3155134959674399, + "grad_norm": 0.09388317167758942, + "learning_rate": 2.0585997146361612e-05, + "loss": 9.3465, + "step": 63180 + }, + { + "epoch": 0.31556343479237936, + "grad_norm": 0.092608243227005, + "learning_rate": 2.058449523142006e-05, + "loss": 9.3552, + "step": 63190 + }, + { + "epoch": 0.3156133736173188, + "grad_norm": 0.09442637860774994, + "learning_rate": 2.0582993316478513e-05, + "loss": 9.3467, + "step": 63200 + }, + { + "epoch": 0.31566331244225826, + "grad_norm": 0.09088288247585297, + "learning_rate": 2.058149140153696e-05, + "loss": 9.3525, + "step": 63210 + }, + { + "epoch": 0.3157132512671977, + "grad_norm": 0.09530657529830933, + "learning_rate": 2.057998948659541e-05, + "loss": 9.3462, + "step": 63220 + }, + { + "epoch": 0.31576319009213716, + "grad_norm": 0.09368572384119034, + "learning_rate": 2.057848757165386e-05, + "loss": 9.3582, + "step": 63230 + }, + { + "epoch": 0.3158131289170766, + "grad_norm": 0.09014841169118881, + "learning_rate": 2.0576985656712307e-05, + "loss": 9.3449, + "step": 63240 + }, + { + "epoch": 0.31586306774201606, + "grad_norm": 0.09297845512628555, + "learning_rate": 2.057548374177076e-05, + "loss": 9.3541, + "step": 63250 + }, + { + "epoch": 0.3159130065669555, + "grad_norm": 0.09388581663370132, + "learning_rate": 2.0573981826829207e-05, + "loss": 9.3532, + "step": 63260 + }, + { + "epoch": 0.31596294539189496, + "grad_norm": 0.09536658972501755, + "learning_rate": 2.0572479911887657e-05, + "loss": 9.3481, + "step": 63270 + }, + { + "epoch": 0.3160128842168344, + "grad_norm": 0.09043373167514801, + "learning_rate": 2.0570977996946107e-05, + "loss": 9.3532, + "step": 63280 + }, + { + "epoch": 0.3160628230417738, + "grad_norm": 0.09627131372690201, + "learning_rate": 2.0569476082004554e-05, + "loss": 9.3527, + "step": 63290 + }, + { + "epoch": 0.3161127618667133, + "grad_norm": 0.09719482809305191, + "learning_rate": 2.0567974167063008e-05, + "loss": 9.348, + "step": 63300 + }, + { + "epoch": 0.3161627006916527, + "grad_norm": 0.09590967744588852, + "learning_rate": 2.0566472252121454e-05, + "loss": 9.3409, + "step": 63310 + }, + { + "epoch": 0.3162126395165922, + "grad_norm": 0.09232126921415329, + "learning_rate": 2.0564970337179905e-05, + "loss": 9.3467, + "step": 63320 + }, + { + "epoch": 0.3162625783415316, + "grad_norm": 0.09349498897790909, + "learning_rate": 2.0563468422238355e-05, + "loss": 9.3443, + "step": 63330 + }, + { + "epoch": 0.3163125171664711, + "grad_norm": 0.09130989015102386, + "learning_rate": 2.05619665072968e-05, + "loss": 9.3482, + "step": 63340 + }, + { + "epoch": 0.3163624559914105, + "grad_norm": 0.09377502650022507, + "learning_rate": 2.0560464592355255e-05, + "loss": 9.3469, + "step": 63350 + }, + { + "epoch": 0.31641239481635, + "grad_norm": 0.08837824314832687, + "learning_rate": 2.0558962677413702e-05, + "loss": 9.3549, + "step": 63360 + }, + { + "epoch": 0.3164623336412894, + "grad_norm": 0.08911017328500748, + "learning_rate": 2.0557460762472152e-05, + "loss": 9.3441, + "step": 63370 + }, + { + "epoch": 0.31651227246622887, + "grad_norm": 0.09551316499710083, + "learning_rate": 2.0555958847530602e-05, + "loss": 9.3406, + "step": 63380 + }, + { + "epoch": 0.3165622112911683, + "grad_norm": 0.098643459379673, + "learning_rate": 2.0554456932589052e-05, + "loss": 9.3425, + "step": 63390 + }, + { + "epoch": 0.31661215011610777, + "grad_norm": 0.09688541293144226, + "learning_rate": 2.0552955017647503e-05, + "loss": 9.3504, + "step": 63400 + }, + { + "epoch": 0.3166620889410472, + "grad_norm": 0.0948943942785263, + "learning_rate": 2.055145310270595e-05, + "loss": 9.3492, + "step": 63410 + }, + { + "epoch": 0.31671202776598667, + "grad_norm": 0.09211491793394089, + "learning_rate": 2.05499511877644e-05, + "loss": 9.3537, + "step": 63420 + }, + { + "epoch": 0.3167619665909261, + "grad_norm": 0.0964820459485054, + "learning_rate": 2.054844927282285e-05, + "loss": 9.339, + "step": 63430 + }, + { + "epoch": 0.31681190541586557, + "grad_norm": 0.09166949987411499, + "learning_rate": 2.05469473578813e-05, + "loss": 9.3545, + "step": 63440 + }, + { + "epoch": 0.316861844240805, + "grad_norm": 0.10011352598667145, + "learning_rate": 2.054544544293975e-05, + "loss": 9.3521, + "step": 63450 + }, + { + "epoch": 0.31691178306574447, + "grad_norm": 0.0967571884393692, + "learning_rate": 2.0543943527998197e-05, + "loss": 9.3413, + "step": 63460 + }, + { + "epoch": 0.3169617218906839, + "grad_norm": 0.09536176174879074, + "learning_rate": 2.0542441613056647e-05, + "loss": 9.3398, + "step": 63470 + }, + { + "epoch": 0.31701166071562337, + "grad_norm": 0.09382636100053787, + "learning_rate": 2.0540939698115097e-05, + "loss": 9.3471, + "step": 63480 + }, + { + "epoch": 0.3170615995405628, + "grad_norm": 0.09492380917072296, + "learning_rate": 2.0539437783173547e-05, + "loss": 9.3293, + "step": 63490 + }, + { + "epoch": 0.31711153836550227, + "grad_norm": 0.09516463428735733, + "learning_rate": 2.0537935868231998e-05, + "loss": 9.3534, + "step": 63500 + }, + { + "epoch": 0.3171614771904417, + "grad_norm": 0.09900394827127457, + "learning_rate": 2.0536433953290444e-05, + "loss": 9.3535, + "step": 63510 + }, + { + "epoch": 0.31721141601538116, + "grad_norm": 0.09245557337999344, + "learning_rate": 2.0534932038348898e-05, + "loss": 9.3483, + "step": 63520 + }, + { + "epoch": 0.3172613548403206, + "grad_norm": 0.09061167389154434, + "learning_rate": 2.0533430123407345e-05, + "loss": 9.348, + "step": 63530 + }, + { + "epoch": 0.31731129366526006, + "grad_norm": 0.09653984010219574, + "learning_rate": 2.0531928208465795e-05, + "loss": 9.344, + "step": 63540 + }, + { + "epoch": 0.3173612324901995, + "grad_norm": 0.09344928711652756, + "learning_rate": 2.0530426293524245e-05, + "loss": 9.355, + "step": 63550 + }, + { + "epoch": 0.31741117131513896, + "grad_norm": 0.09327616542577744, + "learning_rate": 2.0528924378582692e-05, + "loss": 9.3527, + "step": 63560 + }, + { + "epoch": 0.3174611101400784, + "grad_norm": 0.088762067258358, + "learning_rate": 2.0527422463641146e-05, + "loss": 9.3329, + "step": 63570 + }, + { + "epoch": 0.31751104896501786, + "grad_norm": 0.09158555418252945, + "learning_rate": 2.0525920548699592e-05, + "loss": 9.3442, + "step": 63580 + }, + { + "epoch": 0.3175609877899573, + "grad_norm": 0.0912177637219429, + "learning_rate": 2.0524418633758042e-05, + "loss": 9.3425, + "step": 63590 + }, + { + "epoch": 0.31761092661489676, + "grad_norm": 0.08927998691797256, + "learning_rate": 2.0522916718816493e-05, + "loss": 9.3505, + "step": 63600 + }, + { + "epoch": 0.3176608654398362, + "grad_norm": 0.09646981954574585, + "learning_rate": 2.052141480387494e-05, + "loss": 9.338, + "step": 63610 + }, + { + "epoch": 0.31771080426477566, + "grad_norm": 0.10072862356901169, + "learning_rate": 2.0519912888933393e-05, + "loss": 9.3302, + "step": 63620 + }, + { + "epoch": 0.3177607430897151, + "grad_norm": 0.09051260352134705, + "learning_rate": 2.051841097399184e-05, + "loss": 9.342, + "step": 63630 + }, + { + "epoch": 0.31781068191465456, + "grad_norm": 0.09302885830402374, + "learning_rate": 2.051690905905029e-05, + "loss": 9.338, + "step": 63640 + }, + { + "epoch": 0.317860620739594, + "grad_norm": 0.0928134098649025, + "learning_rate": 2.051540714410874e-05, + "loss": 9.3399, + "step": 63650 + }, + { + "epoch": 0.31791055956453346, + "grad_norm": 0.09679362177848816, + "learning_rate": 2.0513905229167187e-05, + "loss": 9.3486, + "step": 63660 + }, + { + "epoch": 0.3179604983894729, + "grad_norm": 0.09691763669252396, + "learning_rate": 2.051240331422564e-05, + "loss": 9.3392, + "step": 63670 + }, + { + "epoch": 0.31801043721441236, + "grad_norm": 0.09337911009788513, + "learning_rate": 2.0510901399284087e-05, + "loss": 9.3413, + "step": 63680 + }, + { + "epoch": 0.3180603760393518, + "grad_norm": 0.09015453606843948, + "learning_rate": 2.0509399484342538e-05, + "loss": 9.3384, + "step": 63690 + }, + { + "epoch": 0.31811031486429125, + "grad_norm": 0.09561469405889511, + "learning_rate": 2.0507897569400988e-05, + "loss": 9.3374, + "step": 63700 + }, + { + "epoch": 0.3181602536892307, + "grad_norm": 0.09089536219835281, + "learning_rate": 2.0506395654459434e-05, + "loss": 9.3469, + "step": 63710 + }, + { + "epoch": 0.31821019251417015, + "grad_norm": 0.09029261767864227, + "learning_rate": 2.0504893739517888e-05, + "loss": 9.3534, + "step": 63720 + }, + { + "epoch": 0.3182601313391096, + "grad_norm": 0.092425636947155, + "learning_rate": 2.0503391824576335e-05, + "loss": 9.3394, + "step": 63730 + }, + { + "epoch": 0.31831007016404905, + "grad_norm": 0.09339354932308197, + "learning_rate": 2.0501889909634785e-05, + "loss": 9.3421, + "step": 63740 + }, + { + "epoch": 0.3183600089889885, + "grad_norm": 0.09930525720119476, + "learning_rate": 2.0500387994693235e-05, + "loss": 9.3387, + "step": 63750 + }, + { + "epoch": 0.31840994781392795, + "grad_norm": 0.09377606213092804, + "learning_rate": 2.0498886079751682e-05, + "loss": 9.3461, + "step": 63760 + }, + { + "epoch": 0.3184598866388674, + "grad_norm": 0.09875767678022385, + "learning_rate": 2.0497384164810136e-05, + "loss": 9.3302, + "step": 63770 + }, + { + "epoch": 0.31850982546380685, + "grad_norm": 0.09149225801229477, + "learning_rate": 2.0495882249868582e-05, + "loss": 9.342, + "step": 63780 + }, + { + "epoch": 0.31855976428874627, + "grad_norm": 0.0938776284456253, + "learning_rate": 2.0494380334927033e-05, + "loss": 9.3441, + "step": 63790 + }, + { + "epoch": 0.31860970311368575, + "grad_norm": 0.0972360149025917, + "learning_rate": 2.0492878419985483e-05, + "loss": 9.3438, + "step": 63800 + }, + { + "epoch": 0.31865964193862517, + "grad_norm": 0.09202911704778671, + "learning_rate": 2.049137650504393e-05, + "loss": 9.3384, + "step": 63810 + }, + { + "epoch": 0.31870958076356465, + "grad_norm": 0.09489790350198746, + "learning_rate": 2.0489874590102383e-05, + "loss": 9.3465, + "step": 63820 + }, + { + "epoch": 0.31875951958850407, + "grad_norm": 0.09030698239803314, + "learning_rate": 2.048837267516083e-05, + "loss": 9.3487, + "step": 63830 + }, + { + "epoch": 0.31880945841344355, + "grad_norm": 0.09348948299884796, + "learning_rate": 2.0486870760219283e-05, + "loss": 9.3306, + "step": 63840 + }, + { + "epoch": 0.31885939723838297, + "grad_norm": 0.09794354438781738, + "learning_rate": 2.048536884527773e-05, + "loss": 9.3349, + "step": 63850 + }, + { + "epoch": 0.31890933606332245, + "grad_norm": 0.09906934201717377, + "learning_rate": 2.0483866930336177e-05, + "loss": 9.3347, + "step": 63860 + }, + { + "epoch": 0.31895927488826187, + "grad_norm": 0.09116369485855103, + "learning_rate": 2.048236501539463e-05, + "loss": 9.3346, + "step": 63870 + }, + { + "epoch": 0.31900921371320135, + "grad_norm": 0.0981554165482521, + "learning_rate": 2.0480863100453077e-05, + "loss": 9.3393, + "step": 63880 + }, + { + "epoch": 0.31905915253814077, + "grad_norm": 0.09284525364637375, + "learning_rate": 2.047936118551153e-05, + "loss": 9.3378, + "step": 63890 + }, + { + "epoch": 0.31910909136308024, + "grad_norm": 0.08612337708473206, + "learning_rate": 2.0477859270569978e-05, + "loss": 9.3374, + "step": 63900 + }, + { + "epoch": 0.31915903018801967, + "grad_norm": 0.0932999700307846, + "learning_rate": 2.0476357355628424e-05, + "loss": 9.3239, + "step": 63910 + }, + { + "epoch": 0.31920896901295914, + "grad_norm": 0.09308277070522308, + "learning_rate": 2.0474855440686878e-05, + "loss": 9.3398, + "step": 63920 + }, + { + "epoch": 0.31925890783789856, + "grad_norm": 0.09518188238143921, + "learning_rate": 2.0473353525745325e-05, + "loss": 9.343, + "step": 63930 + }, + { + "epoch": 0.31930884666283804, + "grad_norm": 0.09198715537786484, + "learning_rate": 2.047185161080378e-05, + "loss": 9.333, + "step": 63940 + }, + { + "epoch": 0.31935878548777746, + "grad_norm": 0.09532855451107025, + "learning_rate": 2.0470349695862225e-05, + "loss": 9.3383, + "step": 63950 + }, + { + "epoch": 0.31940872431271694, + "grad_norm": 0.09205342829227448, + "learning_rate": 2.0468847780920672e-05, + "loss": 9.3471, + "step": 63960 + }, + { + "epoch": 0.31945866313765636, + "grad_norm": 0.09050062298774719, + "learning_rate": 2.0467345865979126e-05, + "loss": 9.3426, + "step": 63970 + }, + { + "epoch": 0.31950860196259584, + "grad_norm": 0.0922810360789299, + "learning_rate": 2.0465843951037572e-05, + "loss": 9.3293, + "step": 63980 + }, + { + "epoch": 0.31955854078753526, + "grad_norm": 0.09575898200273514, + "learning_rate": 2.0464342036096026e-05, + "loss": 9.3398, + "step": 63990 + }, + { + "epoch": 0.31960847961247474, + "grad_norm": 0.09558529406785965, + "learning_rate": 2.0462840121154473e-05, + "loss": 9.3395, + "step": 64000 + }, + { + "epoch": 0.31965841843741416, + "grad_norm": 0.09229427576065063, + "learning_rate": 2.046133820621292e-05, + "loss": 9.3204, + "step": 64010 + }, + { + "epoch": 0.31970835726235364, + "grad_norm": 0.10348726063966751, + "learning_rate": 2.0459836291271373e-05, + "loss": 9.3323, + "step": 64020 + }, + { + "epoch": 0.31975829608729306, + "grad_norm": 0.09877219051122665, + "learning_rate": 2.045833437632982e-05, + "loss": 9.3436, + "step": 64030 + }, + { + "epoch": 0.31980823491223254, + "grad_norm": 0.09454214572906494, + "learning_rate": 2.0456832461388273e-05, + "loss": 9.3461, + "step": 64040 + }, + { + "epoch": 0.31985817373717196, + "grad_norm": 0.09850972890853882, + "learning_rate": 2.045533054644672e-05, + "loss": 9.3352, + "step": 64050 + }, + { + "epoch": 0.31990811256211144, + "grad_norm": 0.0938955768942833, + "learning_rate": 2.0453828631505167e-05, + "loss": 9.3256, + "step": 64060 + }, + { + "epoch": 0.31995805138705086, + "grad_norm": 0.08874368667602539, + "learning_rate": 2.045232671656362e-05, + "loss": 9.3407, + "step": 64070 + }, + { + "epoch": 0.32000799021199033, + "grad_norm": 0.09031280875205994, + "learning_rate": 2.0450824801622067e-05, + "loss": 9.3334, + "step": 64080 + }, + { + "epoch": 0.32005792903692976, + "grad_norm": 0.09989767521619797, + "learning_rate": 2.044932288668052e-05, + "loss": 9.3453, + "step": 64090 + }, + { + "epoch": 0.32010786786186923, + "grad_norm": 0.09756146371364594, + "learning_rate": 2.0447820971738968e-05, + "loss": 9.335, + "step": 64100 + }, + { + "epoch": 0.32015780668680865, + "grad_norm": 0.09458436816930771, + "learning_rate": 2.0446319056797414e-05, + "loss": 9.3411, + "step": 64110 + }, + { + "epoch": 0.32020774551174813, + "grad_norm": 0.0925472229719162, + "learning_rate": 2.0444817141855868e-05, + "loss": 9.3411, + "step": 64120 + }, + { + "epoch": 0.32025768433668755, + "grad_norm": 0.09772704541683197, + "learning_rate": 2.0443315226914315e-05, + "loss": 9.3346, + "step": 64130 + }, + { + "epoch": 0.32030762316162703, + "grad_norm": 0.09279755502939224, + "learning_rate": 2.044181331197277e-05, + "loss": 9.3347, + "step": 64140 + }, + { + "epoch": 0.32035756198656645, + "grad_norm": 0.09363307803869247, + "learning_rate": 2.0440311397031215e-05, + "loss": 9.3403, + "step": 64150 + }, + { + "epoch": 0.32040750081150593, + "grad_norm": 0.09853540360927582, + "learning_rate": 2.0438809482089665e-05, + "loss": 9.3262, + "step": 64160 + }, + { + "epoch": 0.32045743963644535, + "grad_norm": 0.09178748726844788, + "learning_rate": 2.0437307567148116e-05, + "loss": 9.3342, + "step": 64170 + }, + { + "epoch": 0.32050737846138483, + "grad_norm": 0.09160963445901871, + "learning_rate": 2.0435805652206562e-05, + "loss": 9.3396, + "step": 64180 + }, + { + "epoch": 0.32055731728632425, + "grad_norm": 0.09168528020381927, + "learning_rate": 2.0434303737265016e-05, + "loss": 9.3464, + "step": 64190 + }, + { + "epoch": 0.3206072561112637, + "grad_norm": 0.09851713478565216, + "learning_rate": 2.0432801822323463e-05, + "loss": 9.3393, + "step": 64200 + }, + { + "epoch": 0.32065719493620315, + "grad_norm": 0.09476348012685776, + "learning_rate": 2.0431299907381913e-05, + "loss": 9.3281, + "step": 64210 + }, + { + "epoch": 0.3207071337611426, + "grad_norm": 0.08930637687444687, + "learning_rate": 2.0429797992440363e-05, + "loss": 9.3371, + "step": 64220 + }, + { + "epoch": 0.32075707258608205, + "grad_norm": 0.09610996395349503, + "learning_rate": 2.042829607749881e-05, + "loss": 9.3344, + "step": 64230 + }, + { + "epoch": 0.3208070114110215, + "grad_norm": 0.09887643158435822, + "learning_rate": 2.0426794162557263e-05, + "loss": 9.3348, + "step": 64240 + }, + { + "epoch": 0.32085695023596095, + "grad_norm": 0.09950941801071167, + "learning_rate": 2.042529224761571e-05, + "loss": 9.3368, + "step": 64250 + }, + { + "epoch": 0.3209068890609004, + "grad_norm": 0.09124087542295456, + "learning_rate": 2.042379033267416e-05, + "loss": 9.3366, + "step": 64260 + }, + { + "epoch": 0.32095682788583985, + "grad_norm": 0.08983422070741653, + "learning_rate": 2.042228841773261e-05, + "loss": 9.3281, + "step": 64270 + }, + { + "epoch": 0.32100676671077927, + "grad_norm": 0.09503147006034851, + "learning_rate": 2.0420786502791057e-05, + "loss": 9.336, + "step": 64280 + }, + { + "epoch": 0.32105670553571874, + "grad_norm": 0.09027642011642456, + "learning_rate": 2.041928458784951e-05, + "loss": 9.3261, + "step": 64290 + }, + { + "epoch": 0.32110664436065817, + "grad_norm": 0.09734740853309631, + "learning_rate": 2.0417782672907958e-05, + "loss": 9.3386, + "step": 64300 + }, + { + "epoch": 0.32115658318559764, + "grad_norm": 0.09774748980998993, + "learning_rate": 2.0416280757966408e-05, + "loss": 9.335, + "step": 64310 + }, + { + "epoch": 0.32120652201053707, + "grad_norm": 0.09811116009950638, + "learning_rate": 2.0414778843024858e-05, + "loss": 9.3404, + "step": 64320 + }, + { + "epoch": 0.32125646083547654, + "grad_norm": 0.09968292713165283, + "learning_rate": 2.0413276928083305e-05, + "loss": 9.3334, + "step": 64330 + }, + { + "epoch": 0.32130639966041596, + "grad_norm": 0.09214014559984207, + "learning_rate": 2.041177501314176e-05, + "loss": 9.3353, + "step": 64340 + }, + { + "epoch": 0.32135633848535544, + "grad_norm": 0.09302917867898941, + "learning_rate": 2.0410273098200205e-05, + "loss": 9.3377, + "step": 64350 + }, + { + "epoch": 0.32140627731029486, + "grad_norm": 0.09487059712409973, + "learning_rate": 2.0408771183258655e-05, + "loss": 9.3373, + "step": 64360 + }, + { + "epoch": 0.32145621613523434, + "grad_norm": 0.09410389512777328, + "learning_rate": 2.0407269268317106e-05, + "loss": 9.3376, + "step": 64370 + }, + { + "epoch": 0.32150615496017376, + "grad_norm": 0.09394783526659012, + "learning_rate": 2.0405767353375552e-05, + "loss": 9.3304, + "step": 64380 + }, + { + "epoch": 0.32155609378511324, + "grad_norm": 0.10009703040122986, + "learning_rate": 2.0404265438434006e-05, + "loss": 9.3396, + "step": 64390 + }, + { + "epoch": 0.32160603261005266, + "grad_norm": 0.088577039539814, + "learning_rate": 2.0402763523492453e-05, + "loss": 9.3351, + "step": 64400 + }, + { + "epoch": 0.32165597143499214, + "grad_norm": 0.09477560222148895, + "learning_rate": 2.0401261608550903e-05, + "loss": 9.3235, + "step": 64410 + }, + { + "epoch": 0.32170591025993156, + "grad_norm": 0.09325342625379562, + "learning_rate": 2.0399759693609353e-05, + "loss": 9.344, + "step": 64420 + }, + { + "epoch": 0.32175584908487104, + "grad_norm": 0.09685123711824417, + "learning_rate": 2.03982577786678e-05, + "loss": 9.3409, + "step": 64430 + }, + { + "epoch": 0.32180578790981046, + "grad_norm": 0.09679123014211655, + "learning_rate": 2.0396755863726253e-05, + "loss": 9.3162, + "step": 64440 + }, + { + "epoch": 0.32185572673474994, + "grad_norm": 0.09780653566122055, + "learning_rate": 2.03952539487847e-05, + "loss": 9.3269, + "step": 64450 + }, + { + "epoch": 0.32190566555968936, + "grad_norm": 0.09367544203996658, + "learning_rate": 2.039375203384315e-05, + "loss": 9.3264, + "step": 64460 + }, + { + "epoch": 0.32195560438462884, + "grad_norm": 0.0960097685456276, + "learning_rate": 2.03922501189016e-05, + "loss": 9.334, + "step": 64470 + }, + { + "epoch": 0.32200554320956826, + "grad_norm": 0.09054725617170334, + "learning_rate": 2.039074820396005e-05, + "loss": 9.3354, + "step": 64480 + }, + { + "epoch": 0.32205548203450773, + "grad_norm": 0.09367144107818604, + "learning_rate": 2.03892462890185e-05, + "loss": 9.3318, + "step": 64490 + }, + { + "epoch": 0.32210542085944716, + "grad_norm": 0.09095479547977448, + "learning_rate": 2.0387744374076948e-05, + "loss": 9.3297, + "step": 64500 + }, + { + "epoch": 0.32215535968438663, + "grad_norm": 0.09100858122110367, + "learning_rate": 2.0386242459135398e-05, + "loss": 9.3333, + "step": 64510 + }, + { + "epoch": 0.32220529850932605, + "grad_norm": 0.09426578879356384, + "learning_rate": 2.0384740544193848e-05, + "loss": 9.3289, + "step": 64520 + }, + { + "epoch": 0.32225523733426553, + "grad_norm": 0.08965863287448883, + "learning_rate": 2.0383238629252298e-05, + "loss": 9.3264, + "step": 64530 + }, + { + "epoch": 0.32230517615920495, + "grad_norm": 0.092529296875, + "learning_rate": 2.038173671431075e-05, + "loss": 9.337, + "step": 64540 + }, + { + "epoch": 0.32235511498414443, + "grad_norm": 0.09353473037481308, + "learning_rate": 2.0380234799369195e-05, + "loss": 9.3338, + "step": 64550 + }, + { + "epoch": 0.32240505380908385, + "grad_norm": 0.09257248789072037, + "learning_rate": 2.0378732884427645e-05, + "loss": 9.3232, + "step": 64560 + }, + { + "epoch": 0.32245499263402333, + "grad_norm": 0.09056426584720612, + "learning_rate": 2.0377230969486096e-05, + "loss": 9.3363, + "step": 64570 + }, + { + "epoch": 0.32250493145896275, + "grad_norm": 0.09313210844993591, + "learning_rate": 2.0375729054544546e-05, + "loss": 9.3281, + "step": 64580 + }, + { + "epoch": 0.32255487028390223, + "grad_norm": 0.09300418943166733, + "learning_rate": 2.0374227139602996e-05, + "loss": 9.3343, + "step": 64590 + }, + { + "epoch": 0.32260480910884165, + "grad_norm": 0.09289450198411942, + "learning_rate": 2.0372725224661443e-05, + "loss": 9.3244, + "step": 64600 + }, + { + "epoch": 0.3226547479337811, + "grad_norm": 0.09735582768917084, + "learning_rate": 2.0371223309719893e-05, + "loss": 9.3233, + "step": 64610 + }, + { + "epoch": 0.32270468675872055, + "grad_norm": 0.09457716345787048, + "learning_rate": 2.0369721394778343e-05, + "loss": 9.3306, + "step": 64620 + }, + { + "epoch": 0.32275462558366, + "grad_norm": 0.10149849951267242, + "learning_rate": 2.0368219479836793e-05, + "loss": 9.339, + "step": 64630 + }, + { + "epoch": 0.32280456440859945, + "grad_norm": 0.094261035323143, + "learning_rate": 2.0366717564895243e-05, + "loss": 9.3271, + "step": 64640 + }, + { + "epoch": 0.3228545032335389, + "grad_norm": 0.09714456647634506, + "learning_rate": 2.036521564995369e-05, + "loss": 9.3295, + "step": 64650 + }, + { + "epoch": 0.32290444205847835, + "grad_norm": 0.09245141595602036, + "learning_rate": 2.036371373501214e-05, + "loss": 9.3433, + "step": 64660 + }, + { + "epoch": 0.3229543808834178, + "grad_norm": 0.09247272461652756, + "learning_rate": 2.036221182007059e-05, + "loss": 9.3184, + "step": 64670 + }, + { + "epoch": 0.32300431970835725, + "grad_norm": 0.08987551927566528, + "learning_rate": 2.036070990512904e-05, + "loss": 9.3249, + "step": 64680 + }, + { + "epoch": 0.3230542585332967, + "grad_norm": 0.09094039350748062, + "learning_rate": 2.035920799018749e-05, + "loss": 9.341, + "step": 64690 + }, + { + "epoch": 0.32310419735823614, + "grad_norm": 0.09806418418884277, + "learning_rate": 2.0357706075245938e-05, + "loss": 9.3295, + "step": 64700 + }, + { + "epoch": 0.3231541361831756, + "grad_norm": 0.09336648136377335, + "learning_rate": 2.0356204160304388e-05, + "loss": 9.3318, + "step": 64710 + }, + { + "epoch": 0.32320407500811504, + "grad_norm": 0.09484011679887772, + "learning_rate": 2.0354702245362838e-05, + "loss": 9.3308, + "step": 64720 + }, + { + "epoch": 0.3232540138330545, + "grad_norm": 0.09836547821760178, + "learning_rate": 2.0353200330421288e-05, + "loss": 9.3284, + "step": 64730 + }, + { + "epoch": 0.32330395265799394, + "grad_norm": 0.09364361315965652, + "learning_rate": 2.035169841547974e-05, + "loss": 9.322, + "step": 64740 + }, + { + "epoch": 0.3233538914829334, + "grad_norm": 0.09323278069496155, + "learning_rate": 2.0350196500538185e-05, + "loss": 9.3337, + "step": 64750 + }, + { + "epoch": 0.32340383030787284, + "grad_norm": 0.09357263892889023, + "learning_rate": 2.0348694585596635e-05, + "loss": 9.3295, + "step": 64760 + }, + { + "epoch": 0.3234537691328123, + "grad_norm": 0.09743960201740265, + "learning_rate": 2.0347192670655086e-05, + "loss": 9.3393, + "step": 64770 + }, + { + "epoch": 0.32350370795775174, + "grad_norm": 0.09175863862037659, + "learning_rate": 2.0345690755713536e-05, + "loss": 9.3253, + "step": 64780 + }, + { + "epoch": 0.3235536467826912, + "grad_norm": 0.08689554035663605, + "learning_rate": 2.0344188840771986e-05, + "loss": 9.3292, + "step": 64790 + }, + { + "epoch": 0.32360358560763064, + "grad_norm": 0.09737929701805115, + "learning_rate": 2.0342686925830433e-05, + "loss": 9.3275, + "step": 64800 + }, + { + "epoch": 0.3236535244325701, + "grad_norm": 0.09190335869789124, + "learning_rate": 2.0341185010888883e-05, + "loss": 9.3309, + "step": 64810 + }, + { + "epoch": 0.32370346325750954, + "grad_norm": 0.09484836459159851, + "learning_rate": 2.0339683095947333e-05, + "loss": 9.3211, + "step": 64820 + }, + { + "epoch": 0.323753402082449, + "grad_norm": 0.09082531183958054, + "learning_rate": 2.0338181181005783e-05, + "loss": 9.324, + "step": 64830 + }, + { + "epoch": 0.32380334090738844, + "grad_norm": 0.09505081921815872, + "learning_rate": 2.0336679266064233e-05, + "loss": 9.3322, + "step": 64840 + }, + { + "epoch": 0.3238532797323279, + "grad_norm": 0.09214816242456436, + "learning_rate": 2.0335177351122684e-05, + "loss": 9.3252, + "step": 64850 + }, + { + "epoch": 0.32390321855726734, + "grad_norm": 0.09512375295162201, + "learning_rate": 2.033367543618113e-05, + "loss": 9.3216, + "step": 64860 + }, + { + "epoch": 0.3239531573822068, + "grad_norm": 0.09365373104810715, + "learning_rate": 2.033217352123958e-05, + "loss": 9.3385, + "step": 64870 + }, + { + "epoch": 0.32400309620714624, + "grad_norm": 0.09065309166908264, + "learning_rate": 2.033067160629803e-05, + "loss": 9.3191, + "step": 64880 + }, + { + "epoch": 0.3240530350320857, + "grad_norm": 0.09178464859724045, + "learning_rate": 2.032916969135648e-05, + "loss": 9.3268, + "step": 64890 + }, + { + "epoch": 0.32410297385702513, + "grad_norm": 0.09575120359659195, + "learning_rate": 2.032766777641493e-05, + "loss": 9.3209, + "step": 64900 + }, + { + "epoch": 0.3241529126819646, + "grad_norm": 0.09635672718286514, + "learning_rate": 2.0326165861473378e-05, + "loss": 9.3288, + "step": 64910 + }, + { + "epoch": 0.32420285150690403, + "grad_norm": 0.09563815593719482, + "learning_rate": 2.0324663946531828e-05, + "loss": 9.3158, + "step": 64920 + }, + { + "epoch": 0.3242527903318435, + "grad_norm": 0.09533455222845078, + "learning_rate": 2.0323162031590278e-05, + "loss": 9.325, + "step": 64930 + }, + { + "epoch": 0.32430272915678293, + "grad_norm": 0.09778301417827606, + "learning_rate": 2.032166011664873e-05, + "loss": 9.3241, + "step": 64940 + }, + { + "epoch": 0.3243526679817224, + "grad_norm": 0.09181251376867294, + "learning_rate": 2.032015820170718e-05, + "loss": 9.3237, + "step": 64950 + }, + { + "epoch": 0.32440260680666183, + "grad_norm": 0.09311333298683167, + "learning_rate": 2.0318656286765625e-05, + "loss": 9.3285, + "step": 64960 + }, + { + "epoch": 0.3244525456316013, + "grad_norm": 0.0935024842619896, + "learning_rate": 2.0317154371824076e-05, + "loss": 9.324, + "step": 64970 + }, + { + "epoch": 0.32450248445654073, + "grad_norm": 0.10296680778265, + "learning_rate": 2.0315652456882526e-05, + "loss": 9.3188, + "step": 64980 + }, + { + "epoch": 0.3245524232814802, + "grad_norm": 0.0903451070189476, + "learning_rate": 2.0314150541940976e-05, + "loss": 9.3293, + "step": 64990 + }, + { + "epoch": 0.32460236210641963, + "grad_norm": 0.09111391007900238, + "learning_rate": 2.0312648626999426e-05, + "loss": 9.3187, + "step": 65000 + }, + { + "epoch": 0.3246523009313591, + "grad_norm": 0.09491260349750519, + "learning_rate": 2.0311146712057873e-05, + "loss": 9.3262, + "step": 65010 + }, + { + "epoch": 0.3247022397562985, + "grad_norm": 0.09257805347442627, + "learning_rate": 2.0309644797116323e-05, + "loss": 9.3281, + "step": 65020 + }, + { + "epoch": 0.324752178581238, + "grad_norm": 0.09723227471113205, + "learning_rate": 2.0308142882174773e-05, + "loss": 9.3198, + "step": 65030 + }, + { + "epoch": 0.3248021174061774, + "grad_norm": 0.09565103054046631, + "learning_rate": 2.0306640967233223e-05, + "loss": 9.3241, + "step": 65040 + }, + { + "epoch": 0.3248520562311169, + "grad_norm": 0.09280645102262497, + "learning_rate": 2.0305139052291674e-05, + "loss": 9.3315, + "step": 65050 + }, + { + "epoch": 0.3249019950560563, + "grad_norm": 0.09414808452129364, + "learning_rate": 2.030363713735012e-05, + "loss": 9.318, + "step": 65060 + }, + { + "epoch": 0.3249519338809958, + "grad_norm": 0.09369461238384247, + "learning_rate": 2.030213522240857e-05, + "loss": 9.3343, + "step": 65070 + }, + { + "epoch": 0.3250018727059352, + "grad_norm": 0.09321442246437073, + "learning_rate": 2.030063330746702e-05, + "loss": 9.3193, + "step": 65080 + }, + { + "epoch": 0.3250518115308747, + "grad_norm": 0.0960422232747078, + "learning_rate": 2.029913139252547e-05, + "loss": 9.3226, + "step": 65090 + }, + { + "epoch": 0.3251017503558141, + "grad_norm": 0.0959312692284584, + "learning_rate": 2.029762947758392e-05, + "loss": 9.3237, + "step": 65100 + }, + { + "epoch": 0.3251516891807536, + "grad_norm": 0.09452484548091888, + "learning_rate": 2.0296127562642368e-05, + "loss": 9.3171, + "step": 65110 + }, + { + "epoch": 0.325201628005693, + "grad_norm": 0.09644035995006561, + "learning_rate": 2.0294625647700818e-05, + "loss": 9.3132, + "step": 65120 + }, + { + "epoch": 0.3252515668306325, + "grad_norm": 0.09128149598836899, + "learning_rate": 2.029312373275927e-05, + "loss": 9.3258, + "step": 65130 + }, + { + "epoch": 0.3253015056555719, + "grad_norm": 0.08971234411001205, + "learning_rate": 2.029162181781772e-05, + "loss": 9.3171, + "step": 65140 + }, + { + "epoch": 0.3253514444805114, + "grad_norm": 0.09548742324113846, + "learning_rate": 2.029011990287617e-05, + "loss": 9.3202, + "step": 65150 + }, + { + "epoch": 0.3254013833054508, + "grad_norm": 0.0900539830327034, + "learning_rate": 2.0288617987934615e-05, + "loss": 9.3234, + "step": 65160 + }, + { + "epoch": 0.3254513221303903, + "grad_norm": 0.08711142092943192, + "learning_rate": 2.028711607299307e-05, + "loss": 9.3191, + "step": 65170 + }, + { + "epoch": 0.3255012609553297, + "grad_norm": 0.09119576215744019, + "learning_rate": 2.0285614158051516e-05, + "loss": 9.3174, + "step": 65180 + }, + { + "epoch": 0.3255511997802692, + "grad_norm": 0.09764738380908966, + "learning_rate": 2.0284112243109966e-05, + "loss": 9.3225, + "step": 65190 + }, + { + "epoch": 0.3256011386052086, + "grad_norm": 0.09541153162717819, + "learning_rate": 2.0282610328168416e-05, + "loss": 9.3139, + "step": 65200 + }, + { + "epoch": 0.3256510774301481, + "grad_norm": 0.09369537979364395, + "learning_rate": 2.0281108413226863e-05, + "loss": 9.325, + "step": 65210 + }, + { + "epoch": 0.3257010162550875, + "grad_norm": 0.09462661296129227, + "learning_rate": 2.0279606498285317e-05, + "loss": 9.3226, + "step": 65220 + }, + { + "epoch": 0.325750955080027, + "grad_norm": 0.09155022352933884, + "learning_rate": 2.0278104583343763e-05, + "loss": 9.3225, + "step": 65230 + }, + { + "epoch": 0.3258008939049664, + "grad_norm": 0.09792565554380417, + "learning_rate": 2.0276602668402214e-05, + "loss": 9.3181, + "step": 65240 + }, + { + "epoch": 0.3258508327299059, + "grad_norm": 0.09263359010219574, + "learning_rate": 2.0275100753460664e-05, + "loss": 9.3194, + "step": 65250 + }, + { + "epoch": 0.3259007715548453, + "grad_norm": 0.09698522835969925, + "learning_rate": 2.027359883851911e-05, + "loss": 9.3212, + "step": 65260 + }, + { + "epoch": 0.32595071037978474, + "grad_norm": 0.09113046526908875, + "learning_rate": 2.0272096923577564e-05, + "loss": 9.3182, + "step": 65270 + }, + { + "epoch": 0.3260006492047242, + "grad_norm": 0.10038909316062927, + "learning_rate": 2.027059500863601e-05, + "loss": 9.3206, + "step": 65280 + }, + { + "epoch": 0.32605058802966363, + "grad_norm": 0.09228735417127609, + "learning_rate": 2.026909309369446e-05, + "loss": 9.3112, + "step": 65290 + }, + { + "epoch": 0.3261005268546031, + "grad_norm": 0.09285099059343338, + "learning_rate": 2.026759117875291e-05, + "loss": 9.319, + "step": 65300 + }, + { + "epoch": 0.32615046567954253, + "grad_norm": 0.09246840327978134, + "learning_rate": 2.0266089263811358e-05, + "loss": 9.3158, + "step": 65310 + }, + { + "epoch": 0.326200404504482, + "grad_norm": 0.09260924160480499, + "learning_rate": 2.026458734886981e-05, + "loss": 9.3126, + "step": 65320 + }, + { + "epoch": 0.32625034332942143, + "grad_norm": 0.09276574850082397, + "learning_rate": 2.026308543392826e-05, + "loss": 9.3142, + "step": 65330 + }, + { + "epoch": 0.3263002821543609, + "grad_norm": 0.09407460689544678, + "learning_rate": 2.026158351898671e-05, + "loss": 9.3089, + "step": 65340 + }, + { + "epoch": 0.32635022097930033, + "grad_norm": 0.09039918333292007, + "learning_rate": 2.026008160404516e-05, + "loss": 9.3121, + "step": 65350 + }, + { + "epoch": 0.3264001598042398, + "grad_norm": 0.09433960914611816, + "learning_rate": 2.0258579689103605e-05, + "loss": 9.3153, + "step": 65360 + }, + { + "epoch": 0.32645009862917923, + "grad_norm": 0.0907716453075409, + "learning_rate": 2.025707777416206e-05, + "loss": 9.313, + "step": 65370 + }, + { + "epoch": 0.3265000374541187, + "grad_norm": 0.097490094602108, + "learning_rate": 2.0255575859220506e-05, + "loss": 9.3096, + "step": 65380 + }, + { + "epoch": 0.32654997627905813, + "grad_norm": 0.0970582440495491, + "learning_rate": 2.0254073944278956e-05, + "loss": 9.3117, + "step": 65390 + }, + { + "epoch": 0.3265999151039976, + "grad_norm": 0.09796217083930969, + "learning_rate": 2.0252572029337406e-05, + "loss": 9.3095, + "step": 65400 + }, + { + "epoch": 0.32664985392893703, + "grad_norm": 0.09917185455560684, + "learning_rate": 2.0251070114395853e-05, + "loss": 9.315, + "step": 65410 + }, + { + "epoch": 0.3266997927538765, + "grad_norm": 0.0976761057972908, + "learning_rate": 2.0249568199454307e-05, + "loss": 9.3177, + "step": 65420 + }, + { + "epoch": 0.3267497315788159, + "grad_norm": 0.09032870829105377, + "learning_rate": 2.0248066284512753e-05, + "loss": 9.3253, + "step": 65430 + }, + { + "epoch": 0.3267996704037554, + "grad_norm": 0.0941029042005539, + "learning_rate": 2.0246564369571204e-05, + "loss": 9.3135, + "step": 65440 + }, + { + "epoch": 0.3268496092286948, + "grad_norm": 0.09661585092544556, + "learning_rate": 2.0245062454629654e-05, + "loss": 9.3097, + "step": 65450 + }, + { + "epoch": 0.3268995480536343, + "grad_norm": 0.09369435906410217, + "learning_rate": 2.02435605396881e-05, + "loss": 9.3211, + "step": 65460 + }, + { + "epoch": 0.3269494868785737, + "grad_norm": 0.0993766039609909, + "learning_rate": 2.0242058624746554e-05, + "loss": 9.3172, + "step": 65470 + }, + { + "epoch": 0.3269994257035132, + "grad_norm": 0.0999893844127655, + "learning_rate": 2.0240556709805e-05, + "loss": 9.3048, + "step": 65480 + }, + { + "epoch": 0.3270493645284526, + "grad_norm": 0.09266514331102371, + "learning_rate": 2.0239054794863454e-05, + "loss": 9.3177, + "step": 65490 + }, + { + "epoch": 0.3270993033533921, + "grad_norm": 0.09590865671634674, + "learning_rate": 2.02375528799219e-05, + "loss": 9.3148, + "step": 65500 + }, + { + "epoch": 0.3271492421783315, + "grad_norm": 0.10051383823156357, + "learning_rate": 2.0236050964980348e-05, + "loss": 9.3207, + "step": 65510 + }, + { + "epoch": 0.327199181003271, + "grad_norm": 0.09786202013492584, + "learning_rate": 2.02345490500388e-05, + "loss": 9.3105, + "step": 65520 + }, + { + "epoch": 0.3272491198282104, + "grad_norm": 0.09516331553459167, + "learning_rate": 2.023304713509725e-05, + "loss": 9.3199, + "step": 65530 + }, + { + "epoch": 0.3272990586531499, + "grad_norm": 0.09292834252119064, + "learning_rate": 2.0231545220155702e-05, + "loss": 9.3057, + "step": 65540 + }, + { + "epoch": 0.3273489974780893, + "grad_norm": 0.0924135223031044, + "learning_rate": 2.023004330521415e-05, + "loss": 9.3168, + "step": 65550 + }, + { + "epoch": 0.3273989363030288, + "grad_norm": 0.08832070231437683, + "learning_rate": 2.0228541390272595e-05, + "loss": 9.3189, + "step": 65560 + }, + { + "epoch": 0.3274488751279682, + "grad_norm": 0.09260696917772293, + "learning_rate": 2.022703947533105e-05, + "loss": 9.3099, + "step": 65570 + }, + { + "epoch": 0.3274988139529077, + "grad_norm": 0.09234783053398132, + "learning_rate": 2.0225537560389496e-05, + "loss": 9.3106, + "step": 65580 + }, + { + "epoch": 0.3275487527778471, + "grad_norm": 0.09439610689878464, + "learning_rate": 2.022403564544795e-05, + "loss": 9.309, + "step": 65590 + }, + { + "epoch": 0.3275986916027866, + "grad_norm": 0.09166769683361053, + "learning_rate": 2.0222533730506396e-05, + "loss": 9.3154, + "step": 65600 + }, + { + "epoch": 0.327648630427726, + "grad_norm": 0.09410682320594788, + "learning_rate": 2.0221031815564843e-05, + "loss": 9.3146, + "step": 65610 + }, + { + "epoch": 0.3276985692526655, + "grad_norm": 0.09807700663805008, + "learning_rate": 2.0219529900623297e-05, + "loss": 9.3173, + "step": 65620 + }, + { + "epoch": 0.3277485080776049, + "grad_norm": 0.10019030421972275, + "learning_rate": 2.0218027985681743e-05, + "loss": 9.3117, + "step": 65630 + }, + { + "epoch": 0.3277984469025444, + "grad_norm": 0.09734559059143066, + "learning_rate": 2.0216526070740197e-05, + "loss": 9.3175, + "step": 65640 + }, + { + "epoch": 0.3278483857274838, + "grad_norm": 0.09119731932878494, + "learning_rate": 2.0215024155798644e-05, + "loss": 9.3123, + "step": 65650 + }, + { + "epoch": 0.3278983245524233, + "grad_norm": 0.09404603391885757, + "learning_rate": 2.021352224085709e-05, + "loss": 9.3134, + "step": 65660 + }, + { + "epoch": 0.3279482633773627, + "grad_norm": 0.09394799917936325, + "learning_rate": 2.0212020325915544e-05, + "loss": 9.3197, + "step": 65670 + }, + { + "epoch": 0.3279982022023022, + "grad_norm": 0.0975813940167427, + "learning_rate": 2.021051841097399e-05, + "loss": 9.3185, + "step": 65680 + }, + { + "epoch": 0.3280481410272416, + "grad_norm": 0.09289105236530304, + "learning_rate": 2.0209016496032444e-05, + "loss": 9.317, + "step": 65690 + }, + { + "epoch": 0.3280980798521811, + "grad_norm": 0.08842571079730988, + "learning_rate": 2.020751458109089e-05, + "loss": 9.3099, + "step": 65700 + }, + { + "epoch": 0.3281480186771205, + "grad_norm": 0.09984628111124039, + "learning_rate": 2.0206012666149338e-05, + "loss": 9.3071, + "step": 65710 + }, + { + "epoch": 0.32819795750206, + "grad_norm": 0.09205308556556702, + "learning_rate": 2.020451075120779e-05, + "loss": 9.3137, + "step": 65720 + }, + { + "epoch": 0.3282478963269994, + "grad_norm": 0.09461898356676102, + "learning_rate": 2.020300883626624e-05, + "loss": 9.3055, + "step": 65730 + }, + { + "epoch": 0.3282978351519389, + "grad_norm": 0.09863221645355225, + "learning_rate": 2.0201506921324692e-05, + "loss": 9.3214, + "step": 65740 + }, + { + "epoch": 0.3283477739768783, + "grad_norm": 0.09489452093839645, + "learning_rate": 2.020000500638314e-05, + "loss": 9.3222, + "step": 65750 + }, + { + "epoch": 0.3283977128018178, + "grad_norm": 0.09285447001457214, + "learning_rate": 2.0198503091441586e-05, + "loss": 9.3073, + "step": 65760 + }, + { + "epoch": 0.3284476516267572, + "grad_norm": 0.09722167253494263, + "learning_rate": 2.019700117650004e-05, + "loss": 9.3122, + "step": 65770 + }, + { + "epoch": 0.3284975904516967, + "grad_norm": 0.09583593159914017, + "learning_rate": 2.0195499261558486e-05, + "loss": 9.3235, + "step": 65780 + }, + { + "epoch": 0.3285475292766361, + "grad_norm": 0.09201577305793762, + "learning_rate": 2.019399734661694e-05, + "loss": 9.3062, + "step": 65790 + }, + { + "epoch": 0.3285974681015756, + "grad_norm": 0.09347929805517197, + "learning_rate": 2.0192495431675386e-05, + "loss": 9.3122, + "step": 65800 + }, + { + "epoch": 0.328647406926515, + "grad_norm": 0.0961994007229805, + "learning_rate": 2.0190993516733836e-05, + "loss": 9.3093, + "step": 65810 + }, + { + "epoch": 0.3286973457514545, + "grad_norm": 0.0859488919377327, + "learning_rate": 2.0189491601792287e-05, + "loss": 9.3174, + "step": 65820 + }, + { + "epoch": 0.3287472845763939, + "grad_norm": 0.09572838991880417, + "learning_rate": 2.0187989686850733e-05, + "loss": 9.3193, + "step": 65830 + }, + { + "epoch": 0.3287972234013334, + "grad_norm": 0.09346995502710342, + "learning_rate": 2.0186487771909187e-05, + "loss": 9.3144, + "step": 65840 + }, + { + "epoch": 0.3288471622262728, + "grad_norm": 0.10546184331178665, + "learning_rate": 2.0184985856967634e-05, + "loss": 9.3125, + "step": 65850 + }, + { + "epoch": 0.3288971010512123, + "grad_norm": 0.08991089463233948, + "learning_rate": 2.0183483942026084e-05, + "loss": 9.3123, + "step": 65860 + }, + { + "epoch": 0.3289470398761517, + "grad_norm": 0.09135644137859344, + "learning_rate": 2.0181982027084534e-05, + "loss": 9.3112, + "step": 65870 + }, + { + "epoch": 0.3289969787010912, + "grad_norm": 0.0967700332403183, + "learning_rate": 2.018048011214298e-05, + "loss": 9.3096, + "step": 65880 + }, + { + "epoch": 0.3290469175260306, + "grad_norm": 0.09798181802034378, + "learning_rate": 2.0178978197201434e-05, + "loss": 9.3139, + "step": 65890 + }, + { + "epoch": 0.3290968563509701, + "grad_norm": 0.09936074167490005, + "learning_rate": 2.017747628225988e-05, + "loss": 9.3105, + "step": 65900 + }, + { + "epoch": 0.3291467951759095, + "grad_norm": 0.094666488468647, + "learning_rate": 2.017597436731833e-05, + "loss": 9.3142, + "step": 65910 + }, + { + "epoch": 0.329196734000849, + "grad_norm": 0.09142427891492844, + "learning_rate": 2.017447245237678e-05, + "loss": 9.3042, + "step": 65920 + }, + { + "epoch": 0.3292466728257884, + "grad_norm": 0.09408102929592133, + "learning_rate": 2.017297053743523e-05, + "loss": 9.3053, + "step": 65930 + }, + { + "epoch": 0.3292966116507279, + "grad_norm": 0.09405004233121872, + "learning_rate": 2.0171468622493682e-05, + "loss": 9.3083, + "step": 65940 + }, + { + "epoch": 0.3293465504756673, + "grad_norm": 0.09288471937179565, + "learning_rate": 2.016996670755213e-05, + "loss": 9.3006, + "step": 65950 + }, + { + "epoch": 0.3293964893006068, + "grad_norm": 0.09375298768281937, + "learning_rate": 2.016846479261058e-05, + "loss": 9.3127, + "step": 65960 + }, + { + "epoch": 0.3294464281255462, + "grad_norm": 0.0929841622710228, + "learning_rate": 2.016696287766903e-05, + "loss": 9.3043, + "step": 65970 + }, + { + "epoch": 0.3294963669504857, + "grad_norm": 0.09631754457950592, + "learning_rate": 2.0165460962727476e-05, + "loss": 9.3143, + "step": 65980 + }, + { + "epoch": 0.3295463057754251, + "grad_norm": 0.09023533016443253, + "learning_rate": 2.016395904778593e-05, + "loss": 9.3028, + "step": 65990 + }, + { + "epoch": 0.3295962446003646, + "grad_norm": 0.09742230176925659, + "learning_rate": 2.0162457132844376e-05, + "loss": 9.308, + "step": 66000 + }, + { + "epoch": 0.329646183425304, + "grad_norm": 0.09984151273965836, + "learning_rate": 2.0160955217902826e-05, + "loss": 9.309, + "step": 66010 + }, + { + "epoch": 0.3296961222502435, + "grad_norm": 0.09294534474611282, + "learning_rate": 2.0159453302961277e-05, + "loss": 9.3217, + "step": 66020 + }, + { + "epoch": 0.3297460610751829, + "grad_norm": 0.09389375895261765, + "learning_rate": 2.0157951388019723e-05, + "loss": 9.3111, + "step": 66030 + }, + { + "epoch": 0.32979599990012237, + "grad_norm": 0.09610763192176819, + "learning_rate": 2.0156449473078177e-05, + "loss": 9.2965, + "step": 66040 + }, + { + "epoch": 0.3298459387250618, + "grad_norm": 0.1011497899889946, + "learning_rate": 2.0154947558136624e-05, + "loss": 9.3115, + "step": 66050 + }, + { + "epoch": 0.32989587755000127, + "grad_norm": 0.09551998227834702, + "learning_rate": 2.0153445643195074e-05, + "loss": 9.312, + "step": 66060 + }, + { + "epoch": 0.3299458163749407, + "grad_norm": 0.09386122971773148, + "learning_rate": 2.0151943728253524e-05, + "loss": 9.3002, + "step": 66070 + }, + { + "epoch": 0.32999575519988017, + "grad_norm": 0.08605486899614334, + "learning_rate": 2.015044181331197e-05, + "loss": 9.3053, + "step": 66080 + }, + { + "epoch": 0.3300456940248196, + "grad_norm": 0.09588326513767242, + "learning_rate": 2.0148939898370424e-05, + "loss": 9.3046, + "step": 66090 + }, + { + "epoch": 0.33009563284975907, + "grad_norm": 0.09192916750907898, + "learning_rate": 2.014743798342887e-05, + "loss": 9.3079, + "step": 66100 + }, + { + "epoch": 0.3301455716746985, + "grad_norm": 0.0940568745136261, + "learning_rate": 2.014593606848732e-05, + "loss": 9.2998, + "step": 66110 + }, + { + "epoch": 0.33019551049963797, + "grad_norm": 0.09915883094072342, + "learning_rate": 2.014443415354577e-05, + "loss": 9.2972, + "step": 66120 + }, + { + "epoch": 0.3302454493245774, + "grad_norm": 0.09629667550325394, + "learning_rate": 2.0142932238604222e-05, + "loss": 9.3091, + "step": 66130 + }, + { + "epoch": 0.33029538814951687, + "grad_norm": 0.08921325206756592, + "learning_rate": 2.0141430323662672e-05, + "loss": 9.3056, + "step": 66140 + }, + { + "epoch": 0.3303453269744563, + "grad_norm": 0.09300815314054489, + "learning_rate": 2.013992840872112e-05, + "loss": 9.298, + "step": 66150 + }, + { + "epoch": 0.33039526579939577, + "grad_norm": 0.09541340917348862, + "learning_rate": 2.013842649377957e-05, + "loss": 9.323, + "step": 66160 + }, + { + "epoch": 0.3304452046243352, + "grad_norm": 0.09386754035949707, + "learning_rate": 2.013692457883802e-05, + "loss": 9.3023, + "step": 66170 + }, + { + "epoch": 0.33049514344927466, + "grad_norm": 0.09818427264690399, + "learning_rate": 2.013542266389647e-05, + "loss": 9.2981, + "step": 66180 + }, + { + "epoch": 0.3305450822742141, + "grad_norm": 0.09862353652715683, + "learning_rate": 2.013392074895492e-05, + "loss": 9.3156, + "step": 66190 + }, + { + "epoch": 0.33059502109915356, + "grad_norm": 0.10540249943733215, + "learning_rate": 2.0132418834013366e-05, + "loss": 9.3077, + "step": 66200 + }, + { + "epoch": 0.330644959924093, + "grad_norm": 0.09350631386041641, + "learning_rate": 2.0130916919071816e-05, + "loss": 9.309, + "step": 66210 + }, + { + "epoch": 0.33069489874903246, + "grad_norm": 0.09390132129192352, + "learning_rate": 2.0129415004130267e-05, + "loss": 9.2976, + "step": 66220 + }, + { + "epoch": 0.3307448375739719, + "grad_norm": 0.09093783795833588, + "learning_rate": 2.0127913089188717e-05, + "loss": 9.3009, + "step": 66230 + }, + { + "epoch": 0.33079477639891136, + "grad_norm": 0.09060053527355194, + "learning_rate": 2.0126411174247167e-05, + "loss": 9.2951, + "step": 66240 + }, + { + "epoch": 0.3308447152238508, + "grad_norm": 0.09574903547763824, + "learning_rate": 2.0124909259305614e-05, + "loss": 9.3095, + "step": 66250 + }, + { + "epoch": 0.3308946540487902, + "grad_norm": 0.09056057035923004, + "learning_rate": 2.0123407344364064e-05, + "loss": 9.3067, + "step": 66260 + }, + { + "epoch": 0.3309445928737297, + "grad_norm": 0.09261029213666916, + "learning_rate": 2.0121905429422514e-05, + "loss": 9.3051, + "step": 66270 + }, + { + "epoch": 0.3309945316986691, + "grad_norm": 0.0922454297542572, + "learning_rate": 2.0120403514480964e-05, + "loss": 9.3012, + "step": 66280 + }, + { + "epoch": 0.3310444705236086, + "grad_norm": 0.0930676981806755, + "learning_rate": 2.0118901599539414e-05, + "loss": 9.2958, + "step": 66290 + }, + { + "epoch": 0.331094409348548, + "grad_norm": 0.0963793620467186, + "learning_rate": 2.011739968459786e-05, + "loss": 9.2974, + "step": 66300 + }, + { + "epoch": 0.3311443481734875, + "grad_norm": 0.09505847096443176, + "learning_rate": 2.011589776965631e-05, + "loss": 9.3042, + "step": 66310 + }, + { + "epoch": 0.3311942869984269, + "grad_norm": 0.09292503446340561, + "learning_rate": 2.011439585471476e-05, + "loss": 9.3068, + "step": 66320 + }, + { + "epoch": 0.3312442258233664, + "grad_norm": 0.09125857800245285, + "learning_rate": 2.0112893939773212e-05, + "loss": 9.3085, + "step": 66330 + }, + { + "epoch": 0.3312941646483058, + "grad_norm": 0.09520285576581955, + "learning_rate": 2.0111392024831662e-05, + "loss": 9.3046, + "step": 66340 + }, + { + "epoch": 0.3313441034732453, + "grad_norm": 0.0909276232123375, + "learning_rate": 2.010989010989011e-05, + "loss": 9.3044, + "step": 66350 + }, + { + "epoch": 0.3313940422981847, + "grad_norm": 0.0963204950094223, + "learning_rate": 2.010838819494856e-05, + "loss": 9.3014, + "step": 66360 + }, + { + "epoch": 0.3314439811231242, + "grad_norm": 0.09370825439691544, + "learning_rate": 2.010688628000701e-05, + "loss": 9.3011, + "step": 66370 + }, + { + "epoch": 0.3314939199480636, + "grad_norm": 0.09406444430351257, + "learning_rate": 2.010538436506546e-05, + "loss": 9.3059, + "step": 66380 + }, + { + "epoch": 0.3315438587730031, + "grad_norm": 0.0930875912308693, + "learning_rate": 2.010388245012391e-05, + "loss": 9.2974, + "step": 66390 + }, + { + "epoch": 0.3315937975979425, + "grad_norm": 0.09899143129587173, + "learning_rate": 2.0102380535182356e-05, + "loss": 9.2958, + "step": 66400 + }, + { + "epoch": 0.331643736422882, + "grad_norm": 0.09795042127370834, + "learning_rate": 2.0100878620240806e-05, + "loss": 9.2952, + "step": 66410 + }, + { + "epoch": 0.3316936752478214, + "grad_norm": 0.09661424905061722, + "learning_rate": 2.0099376705299257e-05, + "loss": 9.3051, + "step": 66420 + }, + { + "epoch": 0.3317436140727609, + "grad_norm": 0.09605047106742859, + "learning_rate": 2.0097874790357707e-05, + "loss": 9.2961, + "step": 66430 + }, + { + "epoch": 0.3317935528977003, + "grad_norm": 0.09522906690835953, + "learning_rate": 2.0096372875416157e-05, + "loss": 9.3023, + "step": 66440 + }, + { + "epoch": 0.33184349172263977, + "grad_norm": 0.09131506085395813, + "learning_rate": 2.0094870960474607e-05, + "loss": 9.2996, + "step": 66450 + }, + { + "epoch": 0.3318934305475792, + "grad_norm": 0.09322021901607513, + "learning_rate": 2.0093369045533054e-05, + "loss": 9.3029, + "step": 66460 + }, + { + "epoch": 0.33194336937251867, + "grad_norm": 0.09292212128639221, + "learning_rate": 2.0091867130591504e-05, + "loss": 9.3136, + "step": 66470 + }, + { + "epoch": 0.3319933081974581, + "grad_norm": 0.09753609448671341, + "learning_rate": 2.0090365215649954e-05, + "loss": 9.2932, + "step": 66480 + }, + { + "epoch": 0.33204324702239757, + "grad_norm": 0.09411279857158661, + "learning_rate": 2.0088863300708404e-05, + "loss": 9.3017, + "step": 66490 + }, + { + "epoch": 0.332093185847337, + "grad_norm": 0.09396139532327652, + "learning_rate": 2.0087361385766855e-05, + "loss": 9.296, + "step": 66500 + }, + { + "epoch": 0.33214312467227647, + "grad_norm": 0.089934341609478, + "learning_rate": 2.00858594708253e-05, + "loss": 9.2877, + "step": 66510 + }, + { + "epoch": 0.3321930634972159, + "grad_norm": 0.09298644214868546, + "learning_rate": 2.008435755588375e-05, + "loss": 9.2991, + "step": 66520 + }, + { + "epoch": 0.33224300232215537, + "grad_norm": 0.09051031619310379, + "learning_rate": 2.0082855640942202e-05, + "loss": 9.2918, + "step": 66530 + }, + { + "epoch": 0.3322929411470948, + "grad_norm": 0.0935496836900711, + "learning_rate": 2.0081353726000652e-05, + "loss": 9.3055, + "step": 66540 + }, + { + "epoch": 0.33234287997203427, + "grad_norm": 0.0968925952911377, + "learning_rate": 2.0079851811059102e-05, + "loss": 9.2846, + "step": 66550 + }, + { + "epoch": 0.3323928187969737, + "grad_norm": 0.09733214229345322, + "learning_rate": 2.007834989611755e-05, + "loss": 9.2999, + "step": 66560 + }, + { + "epoch": 0.33244275762191317, + "grad_norm": 0.08981287479400635, + "learning_rate": 2.0076847981176e-05, + "loss": 9.2988, + "step": 66570 + }, + { + "epoch": 0.3324926964468526, + "grad_norm": 0.09502604603767395, + "learning_rate": 2.007534606623445e-05, + "loss": 9.3041, + "step": 66580 + }, + { + "epoch": 0.33254263527179206, + "grad_norm": 0.0965120941400528, + "learning_rate": 2.00738441512929e-05, + "loss": 9.3026, + "step": 66590 + }, + { + "epoch": 0.3325925740967315, + "grad_norm": 0.09275338053703308, + "learning_rate": 2.007234223635135e-05, + "loss": 9.3102, + "step": 66600 + }, + { + "epoch": 0.33264251292167096, + "grad_norm": 0.09039387851953506, + "learning_rate": 2.0070840321409796e-05, + "loss": 9.3041, + "step": 66610 + }, + { + "epoch": 0.3326924517466104, + "grad_norm": 0.09301149100065231, + "learning_rate": 2.0069338406468247e-05, + "loss": 9.295, + "step": 66620 + }, + { + "epoch": 0.33274239057154986, + "grad_norm": 0.09034721553325653, + "learning_rate": 2.0067836491526697e-05, + "loss": 9.3058, + "step": 66630 + }, + { + "epoch": 0.3327923293964893, + "grad_norm": 0.09412179887294769, + "learning_rate": 2.0066334576585147e-05, + "loss": 9.2925, + "step": 66640 + }, + { + "epoch": 0.33284226822142876, + "grad_norm": 0.09281489998102188, + "learning_rate": 2.0064832661643597e-05, + "loss": 9.303, + "step": 66650 + }, + { + "epoch": 0.3328922070463682, + "grad_norm": 0.09423484653234482, + "learning_rate": 2.0063330746702044e-05, + "loss": 9.3017, + "step": 66660 + }, + { + "epoch": 0.33294214587130766, + "grad_norm": 0.0937783271074295, + "learning_rate": 2.0061828831760494e-05, + "loss": 9.2948, + "step": 66670 + }, + { + "epoch": 0.3329920846962471, + "grad_norm": 0.09942499548196793, + "learning_rate": 2.0060326916818944e-05, + "loss": 9.3057, + "step": 66680 + }, + { + "epoch": 0.33304202352118656, + "grad_norm": 0.09352242201566696, + "learning_rate": 2.0058825001877394e-05, + "loss": 9.3025, + "step": 66690 + }, + { + "epoch": 0.333091962346126, + "grad_norm": 0.09306678175926208, + "learning_rate": 2.0057323086935845e-05, + "loss": 9.2967, + "step": 66700 + }, + { + "epoch": 0.33314190117106546, + "grad_norm": 0.09103065729141235, + "learning_rate": 2.005582117199429e-05, + "loss": 9.2991, + "step": 66710 + }, + { + "epoch": 0.3331918399960049, + "grad_norm": 0.0972660630941391, + "learning_rate": 2.005431925705274e-05, + "loss": 9.304, + "step": 66720 + }, + { + "epoch": 0.33324177882094436, + "grad_norm": 0.09878892451524734, + "learning_rate": 2.0052817342111192e-05, + "loss": 9.2933, + "step": 66730 + }, + { + "epoch": 0.3332917176458838, + "grad_norm": 0.08945970237255096, + "learning_rate": 2.0051315427169642e-05, + "loss": 9.296, + "step": 66740 + }, + { + "epoch": 0.33334165647082326, + "grad_norm": 0.0968952402472496, + "learning_rate": 2.0049813512228092e-05, + "loss": 9.304, + "step": 66750 + }, + { + "epoch": 0.3333915952957627, + "grad_norm": 0.09566505998373032, + "learning_rate": 2.004831159728654e-05, + "loss": 9.2936, + "step": 66760 + }, + { + "epoch": 0.33344153412070215, + "grad_norm": 0.09034927189350128, + "learning_rate": 2.0046809682344993e-05, + "loss": 9.2968, + "step": 66770 + }, + { + "epoch": 0.3334914729456416, + "grad_norm": 0.09364010393619537, + "learning_rate": 2.004530776740344e-05, + "loss": 9.3066, + "step": 66780 + }, + { + "epoch": 0.33354141177058105, + "grad_norm": 0.09636852890253067, + "learning_rate": 2.004380585246189e-05, + "loss": 9.3017, + "step": 66790 + }, + { + "epoch": 0.3335913505955205, + "grad_norm": 0.09206737577915192, + "learning_rate": 2.004230393752034e-05, + "loss": 9.3021, + "step": 66800 + }, + { + "epoch": 0.33364128942045995, + "grad_norm": 0.09280619025230408, + "learning_rate": 2.0040802022578786e-05, + "loss": 9.2885, + "step": 66810 + }, + { + "epoch": 0.3336912282453994, + "grad_norm": 0.09028727561235428, + "learning_rate": 2.003930010763724e-05, + "loss": 9.2951, + "step": 66820 + }, + { + "epoch": 0.33374116707033885, + "grad_norm": 0.0951092317700386, + "learning_rate": 2.0037798192695687e-05, + "loss": 9.305, + "step": 66830 + }, + { + "epoch": 0.3337911058952783, + "grad_norm": 0.09845414012670517, + "learning_rate": 2.0036296277754137e-05, + "loss": 9.2961, + "step": 66840 + }, + { + "epoch": 0.33384104472021775, + "grad_norm": 0.09636221081018448, + "learning_rate": 2.0034794362812587e-05, + "loss": 9.2949, + "step": 66850 + }, + { + "epoch": 0.33389098354515717, + "grad_norm": 0.09213186800479889, + "learning_rate": 2.0033292447871034e-05, + "loss": 9.2933, + "step": 66860 + }, + { + "epoch": 0.33394092237009665, + "grad_norm": 0.09357234835624695, + "learning_rate": 2.0031790532929488e-05, + "loss": 9.3005, + "step": 66870 + }, + { + "epoch": 0.33399086119503607, + "grad_norm": 0.0960192084312439, + "learning_rate": 2.0030288617987934e-05, + "loss": 9.2875, + "step": 66880 + }, + { + "epoch": 0.33404080001997555, + "grad_norm": 0.09275079518556595, + "learning_rate": 2.0028786703046385e-05, + "loss": 9.2984, + "step": 66890 + }, + { + "epoch": 0.33409073884491497, + "grad_norm": 0.08964371681213379, + "learning_rate": 2.0027284788104835e-05, + "loss": 9.301, + "step": 66900 + }, + { + "epoch": 0.33414067766985445, + "grad_norm": 0.096300408244133, + "learning_rate": 2.002578287316328e-05, + "loss": 9.3032, + "step": 66910 + }, + { + "epoch": 0.33419061649479387, + "grad_norm": 0.09406279772520065, + "learning_rate": 2.0024280958221735e-05, + "loss": 9.29, + "step": 66920 + }, + { + "epoch": 0.33424055531973335, + "grad_norm": 0.09408601373434067, + "learning_rate": 2.0022779043280182e-05, + "loss": 9.2842, + "step": 66930 + }, + { + "epoch": 0.33429049414467277, + "grad_norm": 0.10076167434453964, + "learning_rate": 2.0021277128338632e-05, + "loss": 9.2893, + "step": 66940 + }, + { + "epoch": 0.33434043296961224, + "grad_norm": 0.09452462196350098, + "learning_rate": 2.0019775213397082e-05, + "loss": 9.3011, + "step": 66950 + }, + { + "epoch": 0.33439037179455167, + "grad_norm": 0.1001209020614624, + "learning_rate": 2.001827329845553e-05, + "loss": 9.2893, + "step": 66960 + }, + { + "epoch": 0.33444031061949114, + "grad_norm": 0.09850946813821793, + "learning_rate": 2.0016771383513983e-05, + "loss": 9.2799, + "step": 66970 + }, + { + "epoch": 0.33449024944443057, + "grad_norm": 0.09696486592292786, + "learning_rate": 2.001526946857243e-05, + "loss": 9.2929, + "step": 66980 + }, + { + "epoch": 0.33454018826937004, + "grad_norm": 0.09150009602308273, + "learning_rate": 2.001376755363088e-05, + "loss": 9.2956, + "step": 66990 + }, + { + "epoch": 0.33459012709430946, + "grad_norm": 0.09880898147821426, + "learning_rate": 2.001226563868933e-05, + "loss": 9.2894, + "step": 67000 + }, + { + "epoch": 0.33464006591924894, + "grad_norm": 0.1000562235713005, + "learning_rate": 2.0010763723747776e-05, + "loss": 9.2878, + "step": 67010 + }, + { + "epoch": 0.33469000474418836, + "grad_norm": 0.09261028468608856, + "learning_rate": 2.000926180880623e-05, + "loss": 9.3001, + "step": 67020 + }, + { + "epoch": 0.33473994356912784, + "grad_norm": 0.09857013821601868, + "learning_rate": 2.0007759893864677e-05, + "loss": 9.2877, + "step": 67030 + }, + { + "epoch": 0.33478988239406726, + "grad_norm": 0.09461692720651627, + "learning_rate": 2.0006257978923127e-05, + "loss": 9.2916, + "step": 67040 + }, + { + "epoch": 0.33483982121900674, + "grad_norm": 0.09334218502044678, + "learning_rate": 2.0004756063981577e-05, + "loss": 9.2941, + "step": 67050 + }, + { + "epoch": 0.33488976004394616, + "grad_norm": 0.09218782186508179, + "learning_rate": 2.0003254149040024e-05, + "loss": 9.2912, + "step": 67060 + }, + { + "epoch": 0.33493969886888564, + "grad_norm": 0.09374010562896729, + "learning_rate": 2.0001752234098478e-05, + "loss": 9.3061, + "step": 67070 + }, + { + "epoch": 0.33498963769382506, + "grad_norm": 0.09382879734039307, + "learning_rate": 2.0000250319156924e-05, + "loss": 9.2875, + "step": 67080 + }, + { + "epoch": 0.33503957651876454, + "grad_norm": 0.0962020605802536, + "learning_rate": 1.9998748404215378e-05, + "loss": 9.2897, + "step": 67090 + }, + { + "epoch": 0.33508951534370396, + "grad_norm": 0.08903557062149048, + "learning_rate": 1.9997246489273825e-05, + "loss": 9.2978, + "step": 67100 + }, + { + "epoch": 0.33513945416864344, + "grad_norm": 0.09756463766098022, + "learning_rate": 1.999574457433227e-05, + "loss": 9.2837, + "step": 67110 + }, + { + "epoch": 0.33518939299358286, + "grad_norm": 0.09464458376169205, + "learning_rate": 1.9994242659390725e-05, + "loss": 9.2818, + "step": 67120 + }, + { + "epoch": 0.33523933181852233, + "grad_norm": 0.09560305625200272, + "learning_rate": 1.9992740744449172e-05, + "loss": 9.299, + "step": 67130 + }, + { + "epoch": 0.33528927064346176, + "grad_norm": 0.09456129372119904, + "learning_rate": 1.9991238829507625e-05, + "loss": 9.2885, + "step": 67140 + }, + { + "epoch": 0.33533920946840123, + "grad_norm": 0.09367375075817108, + "learning_rate": 1.9989736914566072e-05, + "loss": 9.2979, + "step": 67150 + }, + { + "epoch": 0.33538914829334066, + "grad_norm": 0.09220660477876663, + "learning_rate": 1.998823499962452e-05, + "loss": 9.2871, + "step": 67160 + }, + { + "epoch": 0.33543908711828013, + "grad_norm": 0.09274411201477051, + "learning_rate": 1.9986733084682973e-05, + "loss": 9.2825, + "step": 67170 + }, + { + "epoch": 0.33548902594321955, + "grad_norm": 0.09277375042438507, + "learning_rate": 1.998523116974142e-05, + "loss": 9.2908, + "step": 67180 + }, + { + "epoch": 0.33553896476815903, + "grad_norm": 0.09955313801765442, + "learning_rate": 1.9983729254799873e-05, + "loss": 9.2862, + "step": 67190 + }, + { + "epoch": 0.33558890359309845, + "grad_norm": 0.09518168866634369, + "learning_rate": 1.998222733985832e-05, + "loss": 9.2919, + "step": 67200 + }, + { + "epoch": 0.33563884241803793, + "grad_norm": 0.09314264357089996, + "learning_rate": 1.9980725424916767e-05, + "loss": 9.2893, + "step": 67210 + }, + { + "epoch": 0.33568878124297735, + "grad_norm": 0.09429054707288742, + "learning_rate": 1.997922350997522e-05, + "loss": 9.3004, + "step": 67220 + }, + { + "epoch": 0.33573872006791683, + "grad_norm": 0.09523293375968933, + "learning_rate": 1.9977721595033667e-05, + "loss": 9.3031, + "step": 67230 + }, + { + "epoch": 0.33578865889285625, + "grad_norm": 0.09158944338560104, + "learning_rate": 1.997621968009212e-05, + "loss": 9.2918, + "step": 67240 + }, + { + "epoch": 0.3358385977177957, + "grad_norm": 0.09265114367008209, + "learning_rate": 1.9974717765150567e-05, + "loss": 9.2749, + "step": 67250 + }, + { + "epoch": 0.33588853654273515, + "grad_norm": 0.09522463381290436, + "learning_rate": 1.9973215850209014e-05, + "loss": 9.288, + "step": 67260 + }, + { + "epoch": 0.33593847536767457, + "grad_norm": 0.08956436067819595, + "learning_rate": 1.9971713935267468e-05, + "loss": 9.2902, + "step": 67270 + }, + { + "epoch": 0.33598841419261405, + "grad_norm": 0.09145728498697281, + "learning_rate": 1.9970212020325914e-05, + "loss": 9.2872, + "step": 67280 + }, + { + "epoch": 0.33603835301755347, + "grad_norm": 0.093343086540699, + "learning_rate": 1.9968710105384368e-05, + "loss": 9.2793, + "step": 67290 + }, + { + "epoch": 0.33608829184249295, + "grad_norm": 0.08741866797208786, + "learning_rate": 1.9967208190442815e-05, + "loss": 9.2963, + "step": 67300 + }, + { + "epoch": 0.33613823066743237, + "grad_norm": 0.09440305083990097, + "learning_rate": 1.996570627550126e-05, + "loss": 9.2873, + "step": 67310 + }, + { + "epoch": 0.33618816949237185, + "grad_norm": 0.09325043857097626, + "learning_rate": 1.9964204360559715e-05, + "loss": 9.2893, + "step": 67320 + }, + { + "epoch": 0.33623810831731127, + "grad_norm": 0.09446980804204941, + "learning_rate": 1.9962702445618162e-05, + "loss": 9.2772, + "step": 67330 + }, + { + "epoch": 0.33628804714225075, + "grad_norm": 0.09616648405790329, + "learning_rate": 1.9961200530676615e-05, + "loss": 9.2826, + "step": 67340 + }, + { + "epoch": 0.33633798596719017, + "grad_norm": 0.09551723301410675, + "learning_rate": 1.9959698615735062e-05, + "loss": 9.279, + "step": 67350 + }, + { + "epoch": 0.33638792479212964, + "grad_norm": 0.09174014627933502, + "learning_rate": 1.995819670079351e-05, + "loss": 9.2902, + "step": 67360 + }, + { + "epoch": 0.33643786361706907, + "grad_norm": 0.09369547665119171, + "learning_rate": 1.9956694785851963e-05, + "loss": 9.297, + "step": 67370 + }, + { + "epoch": 0.33648780244200854, + "grad_norm": 0.09878925234079361, + "learning_rate": 1.995519287091041e-05, + "loss": 9.283, + "step": 67380 + }, + { + "epoch": 0.33653774126694797, + "grad_norm": 0.09697102010250092, + "learning_rate": 1.9953690955968863e-05, + "loss": 9.2967, + "step": 67390 + }, + { + "epoch": 0.33658768009188744, + "grad_norm": 0.10312024503946304, + "learning_rate": 1.995218904102731e-05, + "loss": 9.2857, + "step": 67400 + }, + { + "epoch": 0.33663761891682686, + "grad_norm": 0.10092073678970337, + "learning_rate": 1.995068712608576e-05, + "loss": 9.2912, + "step": 67410 + }, + { + "epoch": 0.33668755774176634, + "grad_norm": 0.08944955468177795, + "learning_rate": 1.994918521114421e-05, + "loss": 9.2866, + "step": 67420 + }, + { + "epoch": 0.33673749656670576, + "grad_norm": 0.09892292320728302, + "learning_rate": 1.9947683296202657e-05, + "loss": 9.2795, + "step": 67430 + }, + { + "epoch": 0.33678743539164524, + "grad_norm": 0.09553686529397964, + "learning_rate": 1.994618138126111e-05, + "loss": 9.2821, + "step": 67440 + }, + { + "epoch": 0.33683737421658466, + "grad_norm": 0.09259875863790512, + "learning_rate": 1.9944679466319557e-05, + "loss": 9.2918, + "step": 67450 + }, + { + "epoch": 0.33688731304152414, + "grad_norm": 0.09672240912914276, + "learning_rate": 1.9943177551378007e-05, + "loss": 9.2893, + "step": 67460 + }, + { + "epoch": 0.33693725186646356, + "grad_norm": 0.09132887423038483, + "learning_rate": 1.9941675636436458e-05, + "loss": 9.2791, + "step": 67470 + }, + { + "epoch": 0.33698719069140304, + "grad_norm": 0.09487363696098328, + "learning_rate": 1.9940173721494904e-05, + "loss": 9.2924, + "step": 67480 + }, + { + "epoch": 0.33703712951634246, + "grad_norm": 0.09358032792806625, + "learning_rate": 1.9938671806553358e-05, + "loss": 9.2942, + "step": 67490 + }, + { + "epoch": 0.33708706834128194, + "grad_norm": 0.09767814725637436, + "learning_rate": 1.9937169891611805e-05, + "loss": 9.2936, + "step": 67500 + }, + { + "epoch": 0.33713700716622136, + "grad_norm": 0.0938803032040596, + "learning_rate": 1.9935667976670255e-05, + "loss": 9.2903, + "step": 67510 + }, + { + "epoch": 0.33718694599116084, + "grad_norm": 0.09350651502609253, + "learning_rate": 1.9934166061728705e-05, + "loss": 9.2828, + "step": 67520 + }, + { + "epoch": 0.33723688481610026, + "grad_norm": 0.09044313430786133, + "learning_rate": 1.9932664146787152e-05, + "loss": 9.2797, + "step": 67530 + }, + { + "epoch": 0.33728682364103973, + "grad_norm": 0.09183132648468018, + "learning_rate": 1.9931162231845605e-05, + "loss": 9.2823, + "step": 67540 + }, + { + "epoch": 0.33733676246597916, + "grad_norm": 0.09145781397819519, + "learning_rate": 1.9929660316904052e-05, + "loss": 9.2747, + "step": 67550 + }, + { + "epoch": 0.33738670129091863, + "grad_norm": 0.09186349809169769, + "learning_rate": 1.9928158401962502e-05, + "loss": 9.283, + "step": 67560 + }, + { + "epoch": 0.33743664011585806, + "grad_norm": 0.09399545937776566, + "learning_rate": 1.9926656487020953e-05, + "loss": 9.2948, + "step": 67570 + }, + { + "epoch": 0.33748657894079753, + "grad_norm": 0.09273233264684677, + "learning_rate": 1.99251545720794e-05, + "loss": 9.2942, + "step": 67580 + }, + { + "epoch": 0.33753651776573695, + "grad_norm": 0.09469340741634369, + "learning_rate": 1.9923652657137853e-05, + "loss": 9.2731, + "step": 67590 + }, + { + "epoch": 0.33758645659067643, + "grad_norm": 0.0966138169169426, + "learning_rate": 1.99221507421963e-05, + "loss": 9.2785, + "step": 67600 + }, + { + "epoch": 0.33763639541561585, + "grad_norm": 0.08690140396356583, + "learning_rate": 1.992064882725475e-05, + "loss": 9.2847, + "step": 67610 + }, + { + "epoch": 0.33768633424055533, + "grad_norm": 0.09711094200611115, + "learning_rate": 1.99191469123132e-05, + "loss": 9.2989, + "step": 67620 + }, + { + "epoch": 0.33773627306549475, + "grad_norm": 0.09582893550395966, + "learning_rate": 1.9917644997371647e-05, + "loss": 9.2828, + "step": 67630 + }, + { + "epoch": 0.33778621189043423, + "grad_norm": 0.09695571660995483, + "learning_rate": 1.99161430824301e-05, + "loss": 9.2946, + "step": 67640 + }, + { + "epoch": 0.33783615071537365, + "grad_norm": 0.09266763180494308, + "learning_rate": 1.9914641167488547e-05, + "loss": 9.2909, + "step": 67650 + }, + { + "epoch": 0.33788608954031313, + "grad_norm": 0.09419077634811401, + "learning_rate": 1.9913139252546997e-05, + "loss": 9.2872, + "step": 67660 + }, + { + "epoch": 0.33793602836525255, + "grad_norm": 0.09436140954494476, + "learning_rate": 1.9911637337605448e-05, + "loss": 9.2898, + "step": 67670 + }, + { + "epoch": 0.337985967190192, + "grad_norm": 0.09518197923898697, + "learning_rate": 1.9910135422663894e-05, + "loss": 9.2801, + "step": 67680 + }, + { + "epoch": 0.33803590601513145, + "grad_norm": 0.09597449004650116, + "learning_rate": 1.9908633507722348e-05, + "loss": 9.2772, + "step": 67690 + }, + { + "epoch": 0.3380858448400709, + "grad_norm": 0.09348492324352264, + "learning_rate": 1.9907131592780795e-05, + "loss": 9.2983, + "step": 67700 + }, + { + "epoch": 0.33813578366501035, + "grad_norm": 0.09242329746484756, + "learning_rate": 1.9905629677839245e-05, + "loss": 9.2903, + "step": 67710 + }, + { + "epoch": 0.3381857224899498, + "grad_norm": 0.09632088989019394, + "learning_rate": 1.9904127762897695e-05, + "loss": 9.2947, + "step": 67720 + }, + { + "epoch": 0.33823566131488925, + "grad_norm": 0.09371468424797058, + "learning_rate": 1.9902625847956142e-05, + "loss": 9.2904, + "step": 67730 + }, + { + "epoch": 0.3382856001398287, + "grad_norm": 0.09897583723068237, + "learning_rate": 1.9901123933014595e-05, + "loss": 9.2772, + "step": 67740 + }, + { + "epoch": 0.33833553896476815, + "grad_norm": 0.0937107503414154, + "learning_rate": 1.9899622018073042e-05, + "loss": 9.2801, + "step": 67750 + }, + { + "epoch": 0.3383854777897076, + "grad_norm": 0.09183280169963837, + "learning_rate": 1.9898120103131492e-05, + "loss": 9.2962, + "step": 67760 + }, + { + "epoch": 0.33843541661464704, + "grad_norm": 0.09664995968341827, + "learning_rate": 1.9896618188189943e-05, + "loss": 9.2849, + "step": 67770 + }, + { + "epoch": 0.3384853554395865, + "grad_norm": 0.09200311452150345, + "learning_rate": 1.9895116273248393e-05, + "loss": 9.2824, + "step": 67780 + }, + { + "epoch": 0.33853529426452594, + "grad_norm": 0.09540259838104248, + "learning_rate": 1.9893614358306843e-05, + "loss": 9.2852, + "step": 67790 + }, + { + "epoch": 0.3385852330894654, + "grad_norm": 0.09478534013032913, + "learning_rate": 1.989211244336529e-05, + "loss": 9.2823, + "step": 67800 + }, + { + "epoch": 0.33863517191440484, + "grad_norm": 0.09047430753707886, + "learning_rate": 1.989061052842374e-05, + "loss": 9.2842, + "step": 67810 + }, + { + "epoch": 0.3386851107393443, + "grad_norm": 0.09374905377626419, + "learning_rate": 1.988910861348219e-05, + "loss": 9.2843, + "step": 67820 + }, + { + "epoch": 0.33873504956428374, + "grad_norm": 0.08902385830879211, + "learning_rate": 1.988760669854064e-05, + "loss": 9.2835, + "step": 67830 + }, + { + "epoch": 0.3387849883892232, + "grad_norm": 0.09766287356615067, + "learning_rate": 1.988610478359909e-05, + "loss": 9.2869, + "step": 67840 + }, + { + "epoch": 0.33883492721416264, + "grad_norm": 0.0938125029206276, + "learning_rate": 1.9884602868657537e-05, + "loss": 9.2816, + "step": 67850 + }, + { + "epoch": 0.3388848660391021, + "grad_norm": 0.09323128312826157, + "learning_rate": 1.9883100953715987e-05, + "loss": 9.268, + "step": 67860 + }, + { + "epoch": 0.33893480486404154, + "grad_norm": 0.092827707529068, + "learning_rate": 1.9881599038774438e-05, + "loss": 9.2823, + "step": 67870 + }, + { + "epoch": 0.338984743688981, + "grad_norm": 0.09353949874639511, + "learning_rate": 1.9880097123832888e-05, + "loss": 9.2872, + "step": 67880 + }, + { + "epoch": 0.33903468251392044, + "grad_norm": 0.09071552753448486, + "learning_rate": 1.9878595208891338e-05, + "loss": 9.2822, + "step": 67890 + }, + { + "epoch": 0.3390846213388599, + "grad_norm": 0.09243173152208328, + "learning_rate": 1.9877093293949785e-05, + "loss": 9.2805, + "step": 67900 + }, + { + "epoch": 0.33913456016379934, + "grad_norm": 0.09218911081552505, + "learning_rate": 1.987559137900824e-05, + "loss": 9.2797, + "step": 67910 + }, + { + "epoch": 0.3391844989887388, + "grad_norm": 0.09476479142904282, + "learning_rate": 1.9874089464066685e-05, + "loss": 9.2845, + "step": 67920 + }, + { + "epoch": 0.33923443781367824, + "grad_norm": 0.10015514492988586, + "learning_rate": 1.9872587549125135e-05, + "loss": 9.2723, + "step": 67930 + }, + { + "epoch": 0.3392843766386177, + "grad_norm": 0.09330648928880692, + "learning_rate": 1.9871085634183585e-05, + "loss": 9.2797, + "step": 67940 + }, + { + "epoch": 0.33933431546355713, + "grad_norm": 0.0925719290971756, + "learning_rate": 1.9869583719242032e-05, + "loss": 9.2761, + "step": 67950 + }, + { + "epoch": 0.3393842542884966, + "grad_norm": 0.0938037782907486, + "learning_rate": 1.9868081804300486e-05, + "loss": 9.2795, + "step": 67960 + }, + { + "epoch": 0.33943419311343603, + "grad_norm": 0.09363068640232086, + "learning_rate": 1.9866579889358933e-05, + "loss": 9.2788, + "step": 67970 + }, + { + "epoch": 0.3394841319383755, + "grad_norm": 0.09175083041191101, + "learning_rate": 1.9865077974417383e-05, + "loss": 9.2829, + "step": 67980 + }, + { + "epoch": 0.33953407076331493, + "grad_norm": 0.09043523669242859, + "learning_rate": 1.9863576059475833e-05, + "loss": 9.2802, + "step": 67990 + }, + { + "epoch": 0.3395840095882544, + "grad_norm": 0.09502432495355606, + "learning_rate": 1.986207414453428e-05, + "loss": 9.2757, + "step": 68000 + }, + { + "epoch": 0.33963394841319383, + "grad_norm": 0.09515243023633957, + "learning_rate": 1.9860572229592733e-05, + "loss": 9.2656, + "step": 68010 + }, + { + "epoch": 0.3396838872381333, + "grad_norm": 0.0931011438369751, + "learning_rate": 1.985907031465118e-05, + "loss": 9.271, + "step": 68020 + }, + { + "epoch": 0.33973382606307273, + "grad_norm": 0.09343037009239197, + "learning_rate": 1.985756839970963e-05, + "loss": 9.2821, + "step": 68030 + }, + { + "epoch": 0.3397837648880122, + "grad_norm": 0.08982733637094498, + "learning_rate": 1.985606648476808e-05, + "loss": 9.2779, + "step": 68040 + }, + { + "epoch": 0.33983370371295163, + "grad_norm": 0.09892088919878006, + "learning_rate": 1.9854564569826527e-05, + "loss": 9.2804, + "step": 68050 + }, + { + "epoch": 0.3398836425378911, + "grad_norm": 0.09419706463813782, + "learning_rate": 1.985306265488498e-05, + "loss": 9.2811, + "step": 68060 + }, + { + "epoch": 0.33993358136283053, + "grad_norm": 0.09958070516586304, + "learning_rate": 1.9851560739943428e-05, + "loss": 9.2808, + "step": 68070 + }, + { + "epoch": 0.33998352018777, + "grad_norm": 0.09093392640352249, + "learning_rate": 1.9850058825001878e-05, + "loss": 9.2843, + "step": 68080 + }, + { + "epoch": 0.3400334590127094, + "grad_norm": 0.0960288792848587, + "learning_rate": 1.9848556910060328e-05, + "loss": 9.2756, + "step": 68090 + }, + { + "epoch": 0.3400833978376489, + "grad_norm": 0.0980386883020401, + "learning_rate": 1.9847054995118778e-05, + "loss": 9.2687, + "step": 68100 + }, + { + "epoch": 0.3401333366625883, + "grad_norm": 0.09633202850818634, + "learning_rate": 1.984555308017723e-05, + "loss": 9.2726, + "step": 68110 + }, + { + "epoch": 0.3401832754875278, + "grad_norm": 0.09432492405176163, + "learning_rate": 1.9844051165235675e-05, + "loss": 9.2645, + "step": 68120 + }, + { + "epoch": 0.3402332143124672, + "grad_norm": 0.09382835030555725, + "learning_rate": 1.9842549250294125e-05, + "loss": 9.2777, + "step": 68130 + }, + { + "epoch": 0.3402831531374067, + "grad_norm": 0.09988310188055038, + "learning_rate": 1.9841047335352575e-05, + "loss": 9.2738, + "step": 68140 + }, + { + "epoch": 0.3403330919623461, + "grad_norm": 0.09288916736841202, + "learning_rate": 1.9839545420411026e-05, + "loss": 9.2727, + "step": 68150 + }, + { + "epoch": 0.3403830307872856, + "grad_norm": 0.08969771862030029, + "learning_rate": 1.9838043505469476e-05, + "loss": 9.2757, + "step": 68160 + }, + { + "epoch": 0.340432969612225, + "grad_norm": 0.09481577575206757, + "learning_rate": 1.9836541590527923e-05, + "loss": 9.2677, + "step": 68170 + }, + { + "epoch": 0.3404829084371645, + "grad_norm": 0.09572725743055344, + "learning_rate": 1.9835039675586373e-05, + "loss": 9.2771, + "step": 68180 + }, + { + "epoch": 0.3405328472621039, + "grad_norm": 0.09492908418178558, + "learning_rate": 1.9833537760644823e-05, + "loss": 9.2752, + "step": 68190 + }, + { + "epoch": 0.3405827860870434, + "grad_norm": 0.09422598034143448, + "learning_rate": 1.9832035845703273e-05, + "loss": 9.2728, + "step": 68200 + }, + { + "epoch": 0.3406327249119828, + "grad_norm": 0.08828997611999512, + "learning_rate": 1.9830533930761723e-05, + "loss": 9.2755, + "step": 68210 + }, + { + "epoch": 0.3406826637369223, + "grad_norm": 0.09550122916698456, + "learning_rate": 1.982903201582017e-05, + "loss": 9.2761, + "step": 68220 + }, + { + "epoch": 0.3407326025618617, + "grad_norm": 0.09217823296785355, + "learning_rate": 1.982753010087862e-05, + "loss": 9.28, + "step": 68230 + }, + { + "epoch": 0.34078254138680114, + "grad_norm": 0.09574399888515472, + "learning_rate": 1.982602818593707e-05, + "loss": 9.2811, + "step": 68240 + }, + { + "epoch": 0.3408324802117406, + "grad_norm": 0.09653940051794052, + "learning_rate": 1.982452627099552e-05, + "loss": 9.2771, + "step": 68250 + }, + { + "epoch": 0.34088241903668004, + "grad_norm": 0.09815537184476852, + "learning_rate": 1.982302435605397e-05, + "loss": 9.2731, + "step": 68260 + }, + { + "epoch": 0.3409323578616195, + "grad_norm": 0.09170655906200409, + "learning_rate": 1.9821522441112418e-05, + "loss": 9.2808, + "step": 68270 + }, + { + "epoch": 0.34098229668655894, + "grad_norm": 0.09200761467218399, + "learning_rate": 1.9820020526170868e-05, + "loss": 9.2747, + "step": 68280 + }, + { + "epoch": 0.3410322355114984, + "grad_norm": 0.08978271484375, + "learning_rate": 1.9818518611229318e-05, + "loss": 9.2733, + "step": 68290 + }, + { + "epoch": 0.34108217433643784, + "grad_norm": 0.09526646882295609, + "learning_rate": 1.9817016696287768e-05, + "loss": 9.2776, + "step": 68300 + }, + { + "epoch": 0.3411321131613773, + "grad_norm": 0.0968102440237999, + "learning_rate": 1.981551478134622e-05, + "loss": 9.267, + "step": 68310 + }, + { + "epoch": 0.34118205198631674, + "grad_norm": 0.09626856446266174, + "learning_rate": 1.9814012866404665e-05, + "loss": 9.2769, + "step": 68320 + }, + { + "epoch": 0.3412319908112562, + "grad_norm": 0.09758766740560532, + "learning_rate": 1.9812510951463115e-05, + "loss": 9.2728, + "step": 68330 + }, + { + "epoch": 0.34128192963619564, + "grad_norm": 0.09370464831590652, + "learning_rate": 1.9811009036521566e-05, + "loss": 9.2702, + "step": 68340 + }, + { + "epoch": 0.3413318684611351, + "grad_norm": 0.0944569855928421, + "learning_rate": 1.9809507121580016e-05, + "loss": 9.282, + "step": 68350 + }, + { + "epoch": 0.34138180728607453, + "grad_norm": 0.09909389168024063, + "learning_rate": 1.9808005206638466e-05, + "loss": 9.2794, + "step": 68360 + }, + { + "epoch": 0.341431746111014, + "grad_norm": 0.097555011510849, + "learning_rate": 1.9806503291696913e-05, + "loss": 9.2815, + "step": 68370 + }, + { + "epoch": 0.34148168493595343, + "grad_norm": 0.0935775637626648, + "learning_rate": 1.9805001376755363e-05, + "loss": 9.2732, + "step": 68380 + }, + { + "epoch": 0.3415316237608929, + "grad_norm": 0.09548195451498032, + "learning_rate": 1.9803499461813813e-05, + "loss": 9.2617, + "step": 68390 + }, + { + "epoch": 0.34158156258583233, + "grad_norm": 0.09691401571035385, + "learning_rate": 1.9801997546872263e-05, + "loss": 9.2762, + "step": 68400 + }, + { + "epoch": 0.3416315014107718, + "grad_norm": 0.09655098617076874, + "learning_rate": 1.9800495631930713e-05, + "loss": 9.2838, + "step": 68410 + }, + { + "epoch": 0.34168144023571123, + "grad_norm": 0.09758975356817245, + "learning_rate": 1.9798993716989164e-05, + "loss": 9.2779, + "step": 68420 + }, + { + "epoch": 0.3417313790606507, + "grad_norm": 0.0990617647767067, + "learning_rate": 1.979749180204761e-05, + "loss": 9.2683, + "step": 68430 + }, + { + "epoch": 0.34178131788559013, + "grad_norm": 0.09439375251531601, + "learning_rate": 1.979598988710606e-05, + "loss": 9.2771, + "step": 68440 + }, + { + "epoch": 0.3418312567105296, + "grad_norm": 0.09546418488025665, + "learning_rate": 1.979448797216451e-05, + "loss": 9.2703, + "step": 68450 + }, + { + "epoch": 0.34188119553546903, + "grad_norm": 0.09475396573543549, + "learning_rate": 1.979298605722296e-05, + "loss": 9.2702, + "step": 68460 + }, + { + "epoch": 0.3419311343604085, + "grad_norm": 0.08913654834032059, + "learning_rate": 1.979148414228141e-05, + "loss": 9.2771, + "step": 68470 + }, + { + "epoch": 0.34198107318534793, + "grad_norm": 0.09099783003330231, + "learning_rate": 1.9789982227339858e-05, + "loss": 9.2692, + "step": 68480 + }, + { + "epoch": 0.3420310120102874, + "grad_norm": 0.09147406369447708, + "learning_rate": 1.9788480312398308e-05, + "loss": 9.2736, + "step": 68490 + }, + { + "epoch": 0.3420809508352268, + "grad_norm": 0.09418332576751709, + "learning_rate": 1.9786978397456758e-05, + "loss": 9.2729, + "step": 68500 + }, + { + "epoch": 0.3421308896601663, + "grad_norm": 0.09262189269065857, + "learning_rate": 1.978547648251521e-05, + "loss": 9.2679, + "step": 68510 + }, + { + "epoch": 0.3421808284851057, + "grad_norm": 0.10010916739702225, + "learning_rate": 1.978397456757366e-05, + "loss": 9.2779, + "step": 68520 + }, + { + "epoch": 0.3422307673100452, + "grad_norm": 0.09533961862325668, + "learning_rate": 1.9782472652632105e-05, + "loss": 9.2709, + "step": 68530 + }, + { + "epoch": 0.3422807061349846, + "grad_norm": 0.09138842672109604, + "learning_rate": 1.9780970737690556e-05, + "loss": 9.2689, + "step": 68540 + }, + { + "epoch": 0.3423306449599241, + "grad_norm": 0.09413190186023712, + "learning_rate": 1.9779468822749006e-05, + "loss": 9.2706, + "step": 68550 + }, + { + "epoch": 0.3423805837848635, + "grad_norm": 0.09632963687181473, + "learning_rate": 1.9777966907807456e-05, + "loss": 9.2716, + "step": 68560 + }, + { + "epoch": 0.342430522609803, + "grad_norm": 0.09684199839830399, + "learning_rate": 1.9776464992865906e-05, + "loss": 9.2731, + "step": 68570 + }, + { + "epoch": 0.3424804614347424, + "grad_norm": 0.09297710657119751, + "learning_rate": 1.9774963077924353e-05, + "loss": 9.2758, + "step": 68580 + }, + { + "epoch": 0.3425304002596819, + "grad_norm": 0.09406734257936478, + "learning_rate": 1.9773461162982803e-05, + "loss": 9.2581, + "step": 68590 + }, + { + "epoch": 0.3425803390846213, + "grad_norm": 0.09379177540540695, + "learning_rate": 1.9771959248041253e-05, + "loss": 9.2652, + "step": 68600 + }, + { + "epoch": 0.3426302779095608, + "grad_norm": 0.0957665890455246, + "learning_rate": 1.9770457333099703e-05, + "loss": 9.2713, + "step": 68610 + }, + { + "epoch": 0.3426802167345002, + "grad_norm": 0.0952882319688797, + "learning_rate": 1.9768955418158154e-05, + "loss": 9.2763, + "step": 68620 + }, + { + "epoch": 0.3427301555594397, + "grad_norm": 0.09209991991519928, + "learning_rate": 1.97674535032166e-05, + "loss": 9.2651, + "step": 68630 + }, + { + "epoch": 0.3427800943843791, + "grad_norm": 0.09648279845714569, + "learning_rate": 1.976595158827505e-05, + "loss": 9.2669, + "step": 68640 + }, + { + "epoch": 0.3428300332093186, + "grad_norm": 0.09398400038480759, + "learning_rate": 1.97644496733335e-05, + "loss": 9.2713, + "step": 68650 + }, + { + "epoch": 0.342879972034258, + "grad_norm": 0.0925755649805069, + "learning_rate": 1.976294775839195e-05, + "loss": 9.2797, + "step": 68660 + }, + { + "epoch": 0.3429299108591975, + "grad_norm": 0.0941460132598877, + "learning_rate": 1.97614458434504e-05, + "loss": 9.2715, + "step": 68670 + }, + { + "epoch": 0.3429798496841369, + "grad_norm": 0.0912114679813385, + "learning_rate": 1.9759943928508848e-05, + "loss": 9.2772, + "step": 68680 + }, + { + "epoch": 0.3430297885090764, + "grad_norm": 0.09493990242481232, + "learning_rate": 1.9758442013567298e-05, + "loss": 9.26, + "step": 68690 + }, + { + "epoch": 0.3430797273340158, + "grad_norm": 0.09542454034090042, + "learning_rate": 1.9756940098625748e-05, + "loss": 9.2713, + "step": 68700 + }, + { + "epoch": 0.3431296661589553, + "grad_norm": 0.09154530614614487, + "learning_rate": 1.97554381836842e-05, + "loss": 9.2737, + "step": 68710 + }, + { + "epoch": 0.3431796049838947, + "grad_norm": 0.09703396260738373, + "learning_rate": 1.975393626874265e-05, + "loss": 9.2639, + "step": 68720 + }, + { + "epoch": 0.3432295438088342, + "grad_norm": 0.09249304234981537, + "learning_rate": 1.9752434353801095e-05, + "loss": 9.265, + "step": 68730 + }, + { + "epoch": 0.3432794826337736, + "grad_norm": 0.10178221017122269, + "learning_rate": 1.975093243885955e-05, + "loss": 9.2766, + "step": 68740 + }, + { + "epoch": 0.3433294214587131, + "grad_norm": 0.09313027560710907, + "learning_rate": 1.9749430523917996e-05, + "loss": 9.2733, + "step": 68750 + }, + { + "epoch": 0.3433793602836525, + "grad_norm": 0.09608368575572968, + "learning_rate": 1.9747928608976446e-05, + "loss": 9.2663, + "step": 68760 + }, + { + "epoch": 0.343429299108592, + "grad_norm": 0.09877718985080719, + "learning_rate": 1.9746426694034896e-05, + "loss": 9.2556, + "step": 68770 + }, + { + "epoch": 0.3434792379335314, + "grad_norm": 0.09784068167209625, + "learning_rate": 1.9744924779093343e-05, + "loss": 9.2615, + "step": 68780 + }, + { + "epoch": 0.3435291767584709, + "grad_norm": 0.09086261689662933, + "learning_rate": 1.9743422864151796e-05, + "loss": 9.2652, + "step": 68790 + }, + { + "epoch": 0.3435791155834103, + "grad_norm": 0.09742986410856247, + "learning_rate": 1.9741920949210243e-05, + "loss": 9.2638, + "step": 68800 + }, + { + "epoch": 0.3436290544083498, + "grad_norm": 0.09637963771820068, + "learning_rate": 1.9740419034268693e-05, + "loss": 9.2678, + "step": 68810 + }, + { + "epoch": 0.3436789932332892, + "grad_norm": 0.09257320314645767, + "learning_rate": 1.9738917119327144e-05, + "loss": 9.2625, + "step": 68820 + }, + { + "epoch": 0.3437289320582287, + "grad_norm": 0.09262960404157639, + "learning_rate": 1.973741520438559e-05, + "loss": 9.2747, + "step": 68830 + }, + { + "epoch": 0.3437788708831681, + "grad_norm": 0.09434715658426285, + "learning_rate": 1.9735913289444044e-05, + "loss": 9.2595, + "step": 68840 + }, + { + "epoch": 0.3438288097081076, + "grad_norm": 0.09280122816562653, + "learning_rate": 1.973441137450249e-05, + "loss": 9.2605, + "step": 68850 + }, + { + "epoch": 0.343878748533047, + "grad_norm": 0.09355524182319641, + "learning_rate": 1.973290945956094e-05, + "loss": 9.2708, + "step": 68860 + }, + { + "epoch": 0.3439286873579865, + "grad_norm": 0.09285325556993484, + "learning_rate": 1.973140754461939e-05, + "loss": 9.277, + "step": 68870 + }, + { + "epoch": 0.3439786261829259, + "grad_norm": 0.09656701236963272, + "learning_rate": 1.9729905629677838e-05, + "loss": 9.2666, + "step": 68880 + }, + { + "epoch": 0.3440285650078654, + "grad_norm": 0.10322730988264084, + "learning_rate": 1.972840371473629e-05, + "loss": 9.2636, + "step": 68890 + }, + { + "epoch": 0.3440785038328048, + "grad_norm": 0.09647629410028458, + "learning_rate": 1.9726901799794738e-05, + "loss": 9.276, + "step": 68900 + }, + { + "epoch": 0.3441284426577443, + "grad_norm": 0.09318669140338898, + "learning_rate": 1.972539988485319e-05, + "loss": 9.2602, + "step": 68910 + }, + { + "epoch": 0.3441783814826837, + "grad_norm": 0.09435082972049713, + "learning_rate": 1.972389796991164e-05, + "loss": 9.2701, + "step": 68920 + }, + { + "epoch": 0.3442283203076232, + "grad_norm": 0.09164106100797653, + "learning_rate": 1.9722396054970085e-05, + "loss": 9.2785, + "step": 68930 + }, + { + "epoch": 0.3442782591325626, + "grad_norm": 0.08812560886144638, + "learning_rate": 1.972089414002854e-05, + "loss": 9.2758, + "step": 68940 + }, + { + "epoch": 0.3443281979575021, + "grad_norm": 0.0958331823348999, + "learning_rate": 1.9719392225086986e-05, + "loss": 9.2655, + "step": 68950 + }, + { + "epoch": 0.3443781367824415, + "grad_norm": 0.0955391451716423, + "learning_rate": 1.9717890310145436e-05, + "loss": 9.2702, + "step": 68960 + }, + { + "epoch": 0.344428075607381, + "grad_norm": 0.09317716956138611, + "learning_rate": 1.9716388395203886e-05, + "loss": 9.2806, + "step": 68970 + }, + { + "epoch": 0.3444780144323204, + "grad_norm": 0.09601125121116638, + "learning_rate": 1.9714886480262333e-05, + "loss": 9.2552, + "step": 68980 + }, + { + "epoch": 0.3445279532572599, + "grad_norm": 0.08980926871299744, + "learning_rate": 1.9713384565320786e-05, + "loss": 9.2754, + "step": 68990 + }, + { + "epoch": 0.3445778920821993, + "grad_norm": 0.09495431184768677, + "learning_rate": 1.9711882650379233e-05, + "loss": 9.2638, + "step": 69000 + }, + { + "epoch": 0.3446278309071388, + "grad_norm": 0.09350882470607758, + "learning_rate": 1.9710380735437683e-05, + "loss": 9.2606, + "step": 69010 + }, + { + "epoch": 0.3446777697320782, + "grad_norm": 0.08809029310941696, + "learning_rate": 1.9708878820496134e-05, + "loss": 9.262, + "step": 69020 + }, + { + "epoch": 0.3447277085570177, + "grad_norm": 0.09742359071969986, + "learning_rate": 1.970737690555458e-05, + "loss": 9.2644, + "step": 69030 + }, + { + "epoch": 0.3447776473819571, + "grad_norm": 0.09343401342630386, + "learning_rate": 1.9705874990613034e-05, + "loss": 9.2674, + "step": 69040 + }, + { + "epoch": 0.3448275862068966, + "grad_norm": 0.09134664386510849, + "learning_rate": 1.970437307567148e-05, + "loss": 9.2566, + "step": 69050 + }, + { + "epoch": 0.344877525031836, + "grad_norm": 0.09671717882156372, + "learning_rate": 1.9702871160729934e-05, + "loss": 9.2612, + "step": 69060 + }, + { + "epoch": 0.3449274638567755, + "grad_norm": 0.09348636120557785, + "learning_rate": 1.970136924578838e-05, + "loss": 9.2665, + "step": 69070 + }, + { + "epoch": 0.3449774026817149, + "grad_norm": 0.0987660363316536, + "learning_rate": 1.9699867330846828e-05, + "loss": 9.2633, + "step": 69080 + }, + { + "epoch": 0.3450273415066544, + "grad_norm": 0.09916608035564423, + "learning_rate": 1.969836541590528e-05, + "loss": 9.2581, + "step": 69090 + }, + { + "epoch": 0.3450772803315938, + "grad_norm": 0.0879778116941452, + "learning_rate": 1.9696863500963728e-05, + "loss": 9.258, + "step": 69100 + }, + { + "epoch": 0.34512721915653327, + "grad_norm": 0.09248411655426025, + "learning_rate": 1.9695361586022182e-05, + "loss": 9.262, + "step": 69110 + }, + { + "epoch": 0.3451771579814727, + "grad_norm": 0.09013424068689346, + "learning_rate": 1.969385967108063e-05, + "loss": 9.2611, + "step": 69120 + }, + { + "epoch": 0.34522709680641217, + "grad_norm": 0.09179334342479706, + "learning_rate": 1.9692357756139075e-05, + "loss": 9.2494, + "step": 69130 + }, + { + "epoch": 0.3452770356313516, + "grad_norm": 0.09357766062021255, + "learning_rate": 1.969085584119753e-05, + "loss": 9.2625, + "step": 69140 + }, + { + "epoch": 0.34532697445629107, + "grad_norm": 0.09096767753362656, + "learning_rate": 1.9689353926255976e-05, + "loss": 9.2662, + "step": 69150 + }, + { + "epoch": 0.3453769132812305, + "grad_norm": 0.0954696536064148, + "learning_rate": 1.968785201131443e-05, + "loss": 9.2555, + "step": 69160 + }, + { + "epoch": 0.34542685210616997, + "grad_norm": 0.09414954483509064, + "learning_rate": 1.9686350096372876e-05, + "loss": 9.2556, + "step": 69170 + }, + { + "epoch": 0.3454767909311094, + "grad_norm": 0.09444237500429153, + "learning_rate": 1.9684848181431323e-05, + "loss": 9.2622, + "step": 69180 + }, + { + "epoch": 0.34552672975604887, + "grad_norm": 0.09886594861745834, + "learning_rate": 1.9683346266489776e-05, + "loss": 9.2541, + "step": 69190 + }, + { + "epoch": 0.3455766685809883, + "grad_norm": 0.09522037953138351, + "learning_rate": 1.9681844351548223e-05, + "loss": 9.2655, + "step": 69200 + }, + { + "epoch": 0.34562660740592777, + "grad_norm": 0.09733402729034424, + "learning_rate": 1.9680342436606677e-05, + "loss": 9.2584, + "step": 69210 + }, + { + "epoch": 0.3456765462308672, + "grad_norm": 0.09290558099746704, + "learning_rate": 1.9678840521665124e-05, + "loss": 9.2615, + "step": 69220 + }, + { + "epoch": 0.3457264850558066, + "grad_norm": 0.09087418764829636, + "learning_rate": 1.967733860672357e-05, + "loss": 9.2717, + "step": 69230 + }, + { + "epoch": 0.3457764238807461, + "grad_norm": 0.09687705338001251, + "learning_rate": 1.9675836691782024e-05, + "loss": 9.2599, + "step": 69240 + }, + { + "epoch": 0.3458263627056855, + "grad_norm": 0.09234501421451569, + "learning_rate": 1.967433477684047e-05, + "loss": 9.2586, + "step": 69250 + }, + { + "epoch": 0.345876301530625, + "grad_norm": 0.09087526053190231, + "learning_rate": 1.9672832861898924e-05, + "loss": 9.2582, + "step": 69260 + }, + { + "epoch": 0.3459262403555644, + "grad_norm": 0.0971473827958107, + "learning_rate": 1.967133094695737e-05, + "loss": 9.2627, + "step": 69270 + }, + { + "epoch": 0.3459761791805039, + "grad_norm": 0.0920579731464386, + "learning_rate": 1.9669829032015818e-05, + "loss": 9.2558, + "step": 69280 + }, + { + "epoch": 0.3460261180054433, + "grad_norm": 0.09076216071844101, + "learning_rate": 1.966832711707427e-05, + "loss": 9.2622, + "step": 69290 + }, + { + "epoch": 0.3460760568303828, + "grad_norm": 0.09444985538721085, + "learning_rate": 1.9666825202132718e-05, + "loss": 9.2538, + "step": 69300 + }, + { + "epoch": 0.3461259956553222, + "grad_norm": 0.08982237428426743, + "learning_rate": 1.9665323287191172e-05, + "loss": 9.257, + "step": 69310 + }, + { + "epoch": 0.3461759344802617, + "grad_norm": 0.09677401930093765, + "learning_rate": 1.966382137224962e-05, + "loss": 9.2609, + "step": 69320 + }, + { + "epoch": 0.3462258733052011, + "grad_norm": 0.09712191671133041, + "learning_rate": 1.9662319457308065e-05, + "loss": 9.2516, + "step": 69330 + }, + { + "epoch": 0.3462758121301406, + "grad_norm": 0.09230844676494598, + "learning_rate": 1.966081754236652e-05, + "loss": 9.2593, + "step": 69340 + }, + { + "epoch": 0.34632575095508, + "grad_norm": 0.09202716499567032, + "learning_rate": 1.9659315627424966e-05, + "loss": 9.2567, + "step": 69350 + }, + { + "epoch": 0.3463756897800195, + "grad_norm": 0.09389092773199081, + "learning_rate": 1.965781371248342e-05, + "loss": 9.2622, + "step": 69360 + }, + { + "epoch": 0.3464256286049589, + "grad_norm": 0.09576646238565445, + "learning_rate": 1.9656311797541866e-05, + "loss": 9.267, + "step": 69370 + }, + { + "epoch": 0.3464755674298984, + "grad_norm": 0.09165678918361664, + "learning_rate": 1.9654809882600316e-05, + "loss": 9.2661, + "step": 69380 + }, + { + "epoch": 0.3465255062548378, + "grad_norm": 0.09849521517753601, + "learning_rate": 1.9653307967658766e-05, + "loss": 9.2615, + "step": 69390 + }, + { + "epoch": 0.3465754450797773, + "grad_norm": 0.09328168630599976, + "learning_rate": 1.9651806052717213e-05, + "loss": 9.2594, + "step": 69400 + }, + { + "epoch": 0.3466253839047167, + "grad_norm": 0.09389086067676544, + "learning_rate": 1.9650304137775667e-05, + "loss": 9.2545, + "step": 69410 + }, + { + "epoch": 0.3466753227296562, + "grad_norm": 0.09465324133634567, + "learning_rate": 1.9648802222834114e-05, + "loss": 9.2589, + "step": 69420 + }, + { + "epoch": 0.3467252615545956, + "grad_norm": 0.09655381739139557, + "learning_rate": 1.9647300307892564e-05, + "loss": 9.2549, + "step": 69430 + }, + { + "epoch": 0.3467752003795351, + "grad_norm": 0.09776412695646286, + "learning_rate": 1.9645798392951014e-05, + "loss": 9.2511, + "step": 69440 + }, + { + "epoch": 0.3468251392044745, + "grad_norm": 0.09319782257080078, + "learning_rate": 1.964429647800946e-05, + "loss": 9.2605, + "step": 69450 + }, + { + "epoch": 0.346875078029414, + "grad_norm": 0.096680648624897, + "learning_rate": 1.9642794563067914e-05, + "loss": 9.2572, + "step": 69460 + }, + { + "epoch": 0.3469250168543534, + "grad_norm": 0.0916510671377182, + "learning_rate": 1.964129264812636e-05, + "loss": 9.2463, + "step": 69470 + }, + { + "epoch": 0.3469749556792929, + "grad_norm": 0.09772134572267532, + "learning_rate": 1.963979073318481e-05, + "loss": 9.2529, + "step": 69480 + }, + { + "epoch": 0.3470248945042323, + "grad_norm": 0.0930207371711731, + "learning_rate": 1.963828881824326e-05, + "loss": 9.2567, + "step": 69490 + }, + { + "epoch": 0.3470748333291718, + "grad_norm": 0.09252675622701645, + "learning_rate": 1.9636786903301708e-05, + "loss": 9.2588, + "step": 69500 + }, + { + "epoch": 0.3471247721541112, + "grad_norm": 0.09338805079460144, + "learning_rate": 1.9635284988360162e-05, + "loss": 9.2576, + "step": 69510 + }, + { + "epoch": 0.34717471097905067, + "grad_norm": 0.09176600724458694, + "learning_rate": 1.963378307341861e-05, + "loss": 18.6508, + "step": 69520 + }, + { + "epoch": 0.3472246498039901, + "grad_norm": 0.09212823212146759, + "learning_rate": 1.963228115847706e-05, + "loss": 15.6648, + "step": 69530 + }, + { + "epoch": 0.34727458862892957, + "grad_norm": 0.092359758913517, + "learning_rate": 1.963077924353551e-05, + "loss": 9.2493, + "step": 69540 + }, + { + "epoch": 0.347324527453869, + "grad_norm": 0.08757336437702179, + "learning_rate": 1.9629277328593956e-05, + "loss": 9.2656, + "step": 69550 + }, + { + "epoch": 0.34737446627880847, + "grad_norm": 0.08831485360860825, + "learning_rate": 1.962777541365241e-05, + "loss": 9.2628, + "step": 69560 + }, + { + "epoch": 0.3474244051037479, + "grad_norm": 0.09570340067148209, + "learning_rate": 1.9626273498710856e-05, + "loss": 9.2617, + "step": 69570 + }, + { + "epoch": 0.34747434392868737, + "grad_norm": 0.09006953239440918, + "learning_rate": 1.9624771583769306e-05, + "loss": 9.2511, + "step": 69580 + }, + { + "epoch": 0.3475242827536268, + "grad_norm": 0.09478635340929031, + "learning_rate": 1.9623269668827756e-05, + "loss": 9.268, + "step": 69590 + }, + { + "epoch": 0.34757422157856627, + "grad_norm": 0.09337896853685379, + "learning_rate": 1.9621767753886203e-05, + "loss": 9.2629, + "step": 69600 + }, + { + "epoch": 0.3476241604035057, + "grad_norm": 0.09267710894346237, + "learning_rate": 1.9620265838944657e-05, + "loss": 9.2478, + "step": 69610 + }, + { + "epoch": 0.34767409922844517, + "grad_norm": 0.09669796377420425, + "learning_rate": 1.9618763924003104e-05, + "loss": 9.2465, + "step": 69620 + }, + { + "epoch": 0.3477240380533846, + "grad_norm": 0.09430265426635742, + "learning_rate": 1.9617262009061554e-05, + "loss": 9.2617, + "step": 69630 + }, + { + "epoch": 0.34777397687832406, + "grad_norm": 0.09212590754032135, + "learning_rate": 1.9615760094120004e-05, + "loss": 9.2578, + "step": 69640 + }, + { + "epoch": 0.3478239157032635, + "grad_norm": 0.09378217160701752, + "learning_rate": 1.961425817917845e-05, + "loss": 9.2584, + "step": 69650 + }, + { + "epoch": 0.34787385452820296, + "grad_norm": 0.09014896303415298, + "learning_rate": 1.9612756264236904e-05, + "loss": 9.2596, + "step": 69660 + }, + { + "epoch": 0.3479237933531424, + "grad_norm": 0.0964147299528122, + "learning_rate": 1.961125434929535e-05, + "loss": 9.2503, + "step": 69670 + }, + { + "epoch": 0.34797373217808186, + "grad_norm": 0.09427673369646072, + "learning_rate": 1.96097524343538e-05, + "loss": 9.2688, + "step": 69680 + }, + { + "epoch": 0.3480236710030213, + "grad_norm": 0.09578701853752136, + "learning_rate": 1.960825051941225e-05, + "loss": 9.2582, + "step": 69690 + }, + { + "epoch": 0.34807360982796076, + "grad_norm": 0.09862515330314636, + "learning_rate": 1.96067486044707e-05, + "loss": 9.2589, + "step": 69700 + }, + { + "epoch": 0.3481235486529002, + "grad_norm": 0.09679950028657913, + "learning_rate": 1.9605246689529152e-05, + "loss": 9.2639, + "step": 69710 + }, + { + "epoch": 0.34817348747783966, + "grad_norm": 0.09329384565353394, + "learning_rate": 1.96037447745876e-05, + "loss": 9.2534, + "step": 69720 + }, + { + "epoch": 0.3482234263027791, + "grad_norm": 0.09050307422876358, + "learning_rate": 1.960224285964605e-05, + "loss": 9.258, + "step": 69730 + }, + { + "epoch": 0.34827336512771856, + "grad_norm": 0.08931013941764832, + "learning_rate": 1.96007409447045e-05, + "loss": 9.2506, + "step": 69740 + }, + { + "epoch": 0.348323303952658, + "grad_norm": 0.09590967744588852, + "learning_rate": 1.959923902976295e-05, + "loss": 9.2381, + "step": 69750 + }, + { + "epoch": 0.34837324277759746, + "grad_norm": 0.09243065118789673, + "learning_rate": 1.95977371148214e-05, + "loss": 9.2571, + "step": 69760 + }, + { + "epoch": 0.3484231816025369, + "grad_norm": 0.10139338672161102, + "learning_rate": 1.9596235199879846e-05, + "loss": 9.2542, + "step": 69770 + }, + { + "epoch": 0.34847312042747636, + "grad_norm": 0.09506342560052872, + "learning_rate": 1.9594733284938296e-05, + "loss": 9.2574, + "step": 69780 + }, + { + "epoch": 0.3485230592524158, + "grad_norm": 0.09532468020915985, + "learning_rate": 1.9593231369996746e-05, + "loss": 9.2527, + "step": 69790 + }, + { + "epoch": 0.34857299807735526, + "grad_norm": 0.09643257409334183, + "learning_rate": 1.9591729455055197e-05, + "loss": 9.2545, + "step": 69800 + }, + { + "epoch": 0.3486229369022947, + "grad_norm": 0.09859835356473923, + "learning_rate": 1.9590227540113647e-05, + "loss": 9.2565, + "step": 69810 + }, + { + "epoch": 0.34867287572723416, + "grad_norm": 0.09410768747329712, + "learning_rate": 1.9588725625172094e-05, + "loss": 9.2615, + "step": 69820 + }, + { + "epoch": 0.3487228145521736, + "grad_norm": 0.0925586149096489, + "learning_rate": 1.9587223710230544e-05, + "loss": 9.2492, + "step": 69830 + }, + { + "epoch": 0.34877275337711305, + "grad_norm": 0.09222124516963959, + "learning_rate": 1.9585721795288994e-05, + "loss": 9.2527, + "step": 69840 + }, + { + "epoch": 0.3488226922020525, + "grad_norm": 0.08799050003290176, + "learning_rate": 1.9584219880347444e-05, + "loss": 9.2572, + "step": 69850 + }, + { + "epoch": 0.34887263102699195, + "grad_norm": 0.09063519537448883, + "learning_rate": 1.9582717965405894e-05, + "loss": 9.2635, + "step": 69860 + }, + { + "epoch": 0.3489225698519314, + "grad_norm": 0.09138821065425873, + "learning_rate": 1.958121605046434e-05, + "loss": 9.2414, + "step": 69870 + }, + { + "epoch": 0.34897250867687085, + "grad_norm": 0.0935179740190506, + "learning_rate": 1.957971413552279e-05, + "loss": 9.2509, + "step": 69880 + }, + { + "epoch": 0.3490224475018103, + "grad_norm": 0.09351733326911926, + "learning_rate": 1.957821222058124e-05, + "loss": 9.2526, + "step": 69890 + }, + { + "epoch": 0.34907238632674975, + "grad_norm": 0.09050756692886353, + "learning_rate": 1.957671030563969e-05, + "loss": 9.2616, + "step": 69900 + }, + { + "epoch": 0.3491223251516892, + "grad_norm": 0.09149663895368576, + "learning_rate": 1.9575208390698142e-05, + "loss": 9.2517, + "step": 69910 + }, + { + "epoch": 0.34917226397662865, + "grad_norm": 0.09362272918224335, + "learning_rate": 1.957370647575659e-05, + "loss": 9.2577, + "step": 69920 + }, + { + "epoch": 0.34922220280156807, + "grad_norm": 0.09068772941827774, + "learning_rate": 1.957220456081504e-05, + "loss": 9.2577, + "step": 69930 + }, + { + "epoch": 0.34927214162650755, + "grad_norm": 0.08827726542949677, + "learning_rate": 1.957070264587349e-05, + "loss": 9.2646, + "step": 69940 + }, + { + "epoch": 0.34932208045144697, + "grad_norm": 0.09964391589164734, + "learning_rate": 1.956920073093194e-05, + "loss": 9.2494, + "step": 69950 + }, + { + "epoch": 0.34937201927638645, + "grad_norm": 0.09400717914104462, + "learning_rate": 1.956769881599039e-05, + "loss": 9.2437, + "step": 69960 + }, + { + "epoch": 0.34942195810132587, + "grad_norm": 0.0930166095495224, + "learning_rate": 1.9566196901048836e-05, + "loss": 9.2528, + "step": 69970 + }, + { + "epoch": 0.34947189692626535, + "grad_norm": 0.09116925299167633, + "learning_rate": 1.9564694986107286e-05, + "loss": 9.2635, + "step": 69980 + }, + { + "epoch": 0.34952183575120477, + "grad_norm": 0.09678564965724945, + "learning_rate": 1.9563193071165737e-05, + "loss": 9.2448, + "step": 69990 + }, + { + "epoch": 0.34957177457614425, + "grad_norm": 0.09541235119104385, + "learning_rate": 1.9561691156224187e-05, + "loss": 9.2439, + "step": 70000 + }, + { + "epoch": 0.34962171340108367, + "grad_norm": 0.09007764607667923, + "learning_rate": 1.9560189241282637e-05, + "loss": 9.2601, + "step": 70010 + }, + { + "epoch": 0.34967165222602314, + "grad_norm": 0.09365357458591461, + "learning_rate": 1.9558687326341087e-05, + "loss": 9.2501, + "step": 70020 + }, + { + "epoch": 0.34972159105096257, + "grad_norm": 0.09102270007133484, + "learning_rate": 1.9557185411399534e-05, + "loss": 9.2509, + "step": 70030 + }, + { + "epoch": 0.34977152987590204, + "grad_norm": 0.09124330431222916, + "learning_rate": 1.9555683496457984e-05, + "loss": 9.2493, + "step": 70040 + }, + { + "epoch": 0.34982146870084146, + "grad_norm": 0.09593762457370758, + "learning_rate": 1.9554181581516434e-05, + "loss": 9.2587, + "step": 70050 + }, + { + "epoch": 0.34987140752578094, + "grad_norm": 0.09397412836551666, + "learning_rate": 1.9552679666574884e-05, + "loss": 9.2518, + "step": 70060 + }, + { + "epoch": 0.34992134635072036, + "grad_norm": 0.09382015466690063, + "learning_rate": 1.9551177751633335e-05, + "loss": 9.2479, + "step": 70070 + }, + { + "epoch": 0.34997128517565984, + "grad_norm": 0.09169856458902359, + "learning_rate": 1.954967583669178e-05, + "loss": 9.2492, + "step": 70080 + }, + { + "epoch": 0.35002122400059926, + "grad_norm": 0.09524683654308319, + "learning_rate": 1.954817392175023e-05, + "loss": 9.2446, + "step": 70090 + }, + { + "epoch": 0.35007116282553874, + "grad_norm": 0.09151513129472733, + "learning_rate": 1.9546672006808682e-05, + "loss": 9.26, + "step": 70100 + }, + { + "epoch": 0.35012110165047816, + "grad_norm": 0.09831450879573822, + "learning_rate": 1.9545170091867132e-05, + "loss": 9.2467, + "step": 70110 + }, + { + "epoch": 0.35017104047541764, + "grad_norm": 0.08933068811893463, + "learning_rate": 1.9543668176925582e-05, + "loss": 9.2457, + "step": 70120 + }, + { + "epoch": 0.35022097930035706, + "grad_norm": 0.09220155328512192, + "learning_rate": 1.954216626198403e-05, + "loss": 9.2452, + "step": 70130 + }, + { + "epoch": 0.35027091812529654, + "grad_norm": 0.09587565809488297, + "learning_rate": 1.954066434704248e-05, + "loss": 9.2367, + "step": 70140 + }, + { + "epoch": 0.35032085695023596, + "grad_norm": 0.09450395405292511, + "learning_rate": 1.953916243210093e-05, + "loss": 9.2412, + "step": 70150 + }, + { + "epoch": 0.35037079577517544, + "grad_norm": 0.0945536196231842, + "learning_rate": 1.953766051715938e-05, + "loss": 9.2447, + "step": 70160 + }, + { + "epoch": 0.35042073460011486, + "grad_norm": 0.09448517858982086, + "learning_rate": 1.953615860221783e-05, + "loss": 9.2549, + "step": 70170 + }, + { + "epoch": 0.35047067342505434, + "grad_norm": 0.09346494823694229, + "learning_rate": 1.9534656687276276e-05, + "loss": 9.2633, + "step": 70180 + }, + { + "epoch": 0.35052061224999376, + "grad_norm": 0.0924750491976738, + "learning_rate": 1.9533154772334727e-05, + "loss": 9.243, + "step": 70190 + }, + { + "epoch": 0.35057055107493323, + "grad_norm": 0.09105124324560165, + "learning_rate": 1.9531652857393177e-05, + "loss": 9.2572, + "step": 70200 + }, + { + "epoch": 0.35062048989987266, + "grad_norm": 0.09556052833795547, + "learning_rate": 1.9530150942451627e-05, + "loss": 9.2382, + "step": 70210 + }, + { + "epoch": 0.3506704287248121, + "grad_norm": 0.09211760014295578, + "learning_rate": 1.9528649027510077e-05, + "loss": 9.2363, + "step": 70220 + }, + { + "epoch": 0.35072036754975155, + "grad_norm": 0.09628025442361832, + "learning_rate": 1.9527147112568524e-05, + "loss": 9.249, + "step": 70230 + }, + { + "epoch": 0.350770306374691, + "grad_norm": 0.09602992981672287, + "learning_rate": 1.9525645197626974e-05, + "loss": 9.2451, + "step": 70240 + }, + { + "epoch": 0.35082024519963045, + "grad_norm": 0.09380794316530228, + "learning_rate": 1.9524143282685424e-05, + "loss": 9.2573, + "step": 70250 + }, + { + "epoch": 0.3508701840245699, + "grad_norm": 0.09438929706811905, + "learning_rate": 1.9522641367743874e-05, + "loss": 9.2478, + "step": 70260 + }, + { + "epoch": 0.35092012284950935, + "grad_norm": 0.09579429030418396, + "learning_rate": 1.9521139452802325e-05, + "loss": 9.2498, + "step": 70270 + }, + { + "epoch": 0.3509700616744488, + "grad_norm": 0.0962810218334198, + "learning_rate": 1.951963753786077e-05, + "loss": 9.2395, + "step": 70280 + }, + { + "epoch": 0.35102000049938825, + "grad_norm": 0.0956900343298912, + "learning_rate": 1.951813562291922e-05, + "loss": 9.2403, + "step": 70290 + }, + { + "epoch": 0.3510699393243277, + "grad_norm": 0.09246978163719177, + "learning_rate": 1.9516633707977672e-05, + "loss": 9.2375, + "step": 70300 + }, + { + "epoch": 0.35111987814926715, + "grad_norm": 0.09685346484184265, + "learning_rate": 1.9515131793036122e-05, + "loss": 9.2514, + "step": 70310 + }, + { + "epoch": 0.3511698169742066, + "grad_norm": 0.09638645499944687, + "learning_rate": 1.9513629878094572e-05, + "loss": 9.2439, + "step": 70320 + }, + { + "epoch": 0.35121975579914605, + "grad_norm": 0.09618096798658371, + "learning_rate": 1.951212796315302e-05, + "loss": 9.2504, + "step": 70330 + }, + { + "epoch": 0.35126969462408547, + "grad_norm": 0.09635983407497406, + "learning_rate": 1.9510626048211472e-05, + "loss": 9.2449, + "step": 70340 + }, + { + "epoch": 0.35131963344902495, + "grad_norm": 0.09640626609325409, + "learning_rate": 1.950912413326992e-05, + "loss": 9.2455, + "step": 70350 + }, + { + "epoch": 0.35136957227396437, + "grad_norm": 0.08988912403583527, + "learning_rate": 1.950762221832837e-05, + "loss": 9.2467, + "step": 70360 + }, + { + "epoch": 0.35141951109890385, + "grad_norm": 0.09091424196958542, + "learning_rate": 1.950612030338682e-05, + "loss": 9.2542, + "step": 70370 + }, + { + "epoch": 0.35146944992384327, + "grad_norm": 0.09256154298782349, + "learning_rate": 1.9504618388445266e-05, + "loss": 9.2376, + "step": 70380 + }, + { + "epoch": 0.35151938874878275, + "grad_norm": 0.09144950658082962, + "learning_rate": 1.950311647350372e-05, + "loss": 9.2372, + "step": 70390 + }, + { + "epoch": 0.35156932757372217, + "grad_norm": 0.09599640965461731, + "learning_rate": 1.9501614558562167e-05, + "loss": 9.2434, + "step": 70400 + }, + { + "epoch": 0.35161926639866165, + "grad_norm": 0.0928352028131485, + "learning_rate": 1.9500112643620617e-05, + "loss": 9.253, + "step": 70410 + }, + { + "epoch": 0.35166920522360107, + "grad_norm": 0.09690866619348526, + "learning_rate": 1.9498610728679067e-05, + "loss": 9.2466, + "step": 70420 + }, + { + "epoch": 0.35171914404854054, + "grad_norm": 0.09045714139938354, + "learning_rate": 1.9497108813737514e-05, + "loss": 9.2495, + "step": 70430 + }, + { + "epoch": 0.35176908287347997, + "grad_norm": 0.09313403815031052, + "learning_rate": 1.9495606898795967e-05, + "loss": 9.2456, + "step": 70440 + }, + { + "epoch": 0.35181902169841944, + "grad_norm": 0.0966280922293663, + "learning_rate": 1.9494104983854414e-05, + "loss": 9.2418, + "step": 70450 + }, + { + "epoch": 0.35186896052335886, + "grad_norm": 0.09932943433523178, + "learning_rate": 1.9492603068912864e-05, + "loss": 9.24, + "step": 70460 + }, + { + "epoch": 0.35191889934829834, + "grad_norm": 0.10107915848493576, + "learning_rate": 1.9491101153971315e-05, + "loss": 9.2464, + "step": 70470 + }, + { + "epoch": 0.35196883817323776, + "grad_norm": 0.09643235802650452, + "learning_rate": 1.948959923902976e-05, + "loss": 9.2393, + "step": 70480 + }, + { + "epoch": 0.35201877699817724, + "grad_norm": 0.09176507592201233, + "learning_rate": 1.9488097324088215e-05, + "loss": 9.2509, + "step": 70490 + }, + { + "epoch": 0.35206871582311666, + "grad_norm": 0.09726466983556747, + "learning_rate": 1.9486595409146662e-05, + "loss": 9.235, + "step": 70500 + }, + { + "epoch": 0.35211865464805614, + "grad_norm": 0.09814643114805222, + "learning_rate": 1.9485093494205112e-05, + "loss": 9.2476, + "step": 70510 + }, + { + "epoch": 0.35216859347299556, + "grad_norm": 0.09261635690927505, + "learning_rate": 1.9483591579263562e-05, + "loss": 9.2453, + "step": 70520 + }, + { + "epoch": 0.35221853229793504, + "grad_norm": 0.09454991668462753, + "learning_rate": 1.948208966432201e-05, + "loss": 9.2552, + "step": 70530 + }, + { + "epoch": 0.35226847112287446, + "grad_norm": 0.09696483612060547, + "learning_rate": 1.9480587749380462e-05, + "loss": 9.2484, + "step": 70540 + }, + { + "epoch": 0.35231840994781394, + "grad_norm": 0.09152613580226898, + "learning_rate": 1.947908583443891e-05, + "loss": 9.249, + "step": 70550 + }, + { + "epoch": 0.35236834877275336, + "grad_norm": 0.09792204946279526, + "learning_rate": 1.947758391949736e-05, + "loss": 9.2417, + "step": 70560 + }, + { + "epoch": 0.35241828759769284, + "grad_norm": 0.09588468819856644, + "learning_rate": 1.947608200455581e-05, + "loss": 9.2381, + "step": 70570 + }, + { + "epoch": 0.35246822642263226, + "grad_norm": 0.09020346403121948, + "learning_rate": 1.9474580089614256e-05, + "loss": 9.2366, + "step": 70580 + }, + { + "epoch": 0.35251816524757174, + "grad_norm": 0.08940046280622482, + "learning_rate": 1.947307817467271e-05, + "loss": 9.2436, + "step": 70590 + }, + { + "epoch": 0.35256810407251116, + "grad_norm": 0.09213332086801529, + "learning_rate": 1.9471576259731157e-05, + "loss": 9.2367, + "step": 70600 + }, + { + "epoch": 0.35261804289745063, + "grad_norm": 0.09027668833732605, + "learning_rate": 1.9470074344789607e-05, + "loss": 9.242, + "step": 70610 + }, + { + "epoch": 0.35266798172239006, + "grad_norm": 0.0947854295372963, + "learning_rate": 1.9468572429848057e-05, + "loss": 9.2426, + "step": 70620 + }, + { + "epoch": 0.35271792054732953, + "grad_norm": 0.09516201168298721, + "learning_rate": 1.9467070514906504e-05, + "loss": 9.2549, + "step": 70630 + }, + { + "epoch": 0.35276785937226895, + "grad_norm": 0.09099344164133072, + "learning_rate": 1.9465568599964957e-05, + "loss": 9.2451, + "step": 70640 + }, + { + "epoch": 0.35281779819720843, + "grad_norm": 0.09635750204324722, + "learning_rate": 1.9464066685023404e-05, + "loss": 9.2471, + "step": 70650 + }, + { + "epoch": 0.35286773702214785, + "grad_norm": 0.0907939150929451, + "learning_rate": 1.9462564770081854e-05, + "loss": 9.2428, + "step": 70660 + }, + { + "epoch": 0.35291767584708733, + "grad_norm": 0.0959140956401825, + "learning_rate": 1.9461062855140305e-05, + "loss": 9.244, + "step": 70670 + }, + { + "epoch": 0.35296761467202675, + "grad_norm": 0.09400851279497147, + "learning_rate": 1.945956094019875e-05, + "loss": 9.2519, + "step": 70680 + }, + { + "epoch": 0.35301755349696623, + "grad_norm": 0.09195028990507126, + "learning_rate": 1.9458059025257205e-05, + "loss": 9.244, + "step": 70690 + }, + { + "epoch": 0.35306749232190565, + "grad_norm": 0.09665300697088242, + "learning_rate": 1.9456557110315652e-05, + "loss": 9.2462, + "step": 70700 + }, + { + "epoch": 0.35311743114684513, + "grad_norm": 0.09066880494356155, + "learning_rate": 1.9455055195374105e-05, + "loss": 9.2491, + "step": 70710 + }, + { + "epoch": 0.35316736997178455, + "grad_norm": 0.0936349481344223, + "learning_rate": 1.9453553280432552e-05, + "loss": 9.2481, + "step": 70720 + }, + { + "epoch": 0.353217308796724, + "grad_norm": 0.0960550457239151, + "learning_rate": 1.9452051365491e-05, + "loss": 9.2365, + "step": 70730 + }, + { + "epoch": 0.35326724762166345, + "grad_norm": 0.09481688588857651, + "learning_rate": 1.9450549450549452e-05, + "loss": 9.2371, + "step": 70740 + }, + { + "epoch": 0.3533171864466029, + "grad_norm": 0.099585622549057, + "learning_rate": 1.94490475356079e-05, + "loss": 9.241, + "step": 70750 + }, + { + "epoch": 0.35336712527154235, + "grad_norm": 0.09653332084417343, + "learning_rate": 1.9447545620666353e-05, + "loss": 9.2421, + "step": 70760 + }, + { + "epoch": 0.3534170640964818, + "grad_norm": 0.09810932725667953, + "learning_rate": 1.94460437057248e-05, + "loss": 9.2449, + "step": 70770 + }, + { + "epoch": 0.35346700292142125, + "grad_norm": 0.09474793076515198, + "learning_rate": 1.9444541790783246e-05, + "loss": 9.2413, + "step": 70780 + }, + { + "epoch": 0.3535169417463607, + "grad_norm": 0.09423346817493439, + "learning_rate": 1.94430398758417e-05, + "loss": 9.244, + "step": 70790 + }, + { + "epoch": 0.35356688057130015, + "grad_norm": 0.09000688046216965, + "learning_rate": 1.9441537960900147e-05, + "loss": 9.2306, + "step": 70800 + }, + { + "epoch": 0.3536168193962396, + "grad_norm": 0.09328895062208176, + "learning_rate": 1.94400360459586e-05, + "loss": 9.2454, + "step": 70810 + }, + { + "epoch": 0.35366675822117905, + "grad_norm": 0.09733468294143677, + "learning_rate": 1.9438534131017047e-05, + "loss": 9.2466, + "step": 70820 + }, + { + "epoch": 0.3537166970461185, + "grad_norm": 0.09681283682584763, + "learning_rate": 1.9437032216075494e-05, + "loss": 9.2315, + "step": 70830 + }, + { + "epoch": 0.35376663587105794, + "grad_norm": 0.09605410695075989, + "learning_rate": 1.9435530301133947e-05, + "loss": 9.2368, + "step": 70840 + }, + { + "epoch": 0.3538165746959974, + "grad_norm": 0.10018260776996613, + "learning_rate": 1.9434028386192394e-05, + "loss": 9.2381, + "step": 70850 + }, + { + "epoch": 0.35386651352093684, + "grad_norm": 0.09301795065402985, + "learning_rate": 1.9432526471250848e-05, + "loss": 9.2454, + "step": 70860 + }, + { + "epoch": 0.3539164523458763, + "grad_norm": 0.09586118161678314, + "learning_rate": 1.9431024556309295e-05, + "loss": 9.2435, + "step": 70870 + }, + { + "epoch": 0.35396639117081574, + "grad_norm": 0.09101895242929459, + "learning_rate": 1.942952264136774e-05, + "loss": 9.2297, + "step": 70880 + }, + { + "epoch": 0.3540163299957552, + "grad_norm": 0.08979874104261398, + "learning_rate": 1.9428020726426195e-05, + "loss": 9.2355, + "step": 70890 + }, + { + "epoch": 0.35406626882069464, + "grad_norm": 0.09098219126462936, + "learning_rate": 1.9426518811484642e-05, + "loss": 9.2347, + "step": 70900 + }, + { + "epoch": 0.3541162076456341, + "grad_norm": 0.09509828686714172, + "learning_rate": 1.9425016896543095e-05, + "loss": 9.2406, + "step": 70910 + }, + { + "epoch": 0.35416614647057354, + "grad_norm": 0.09496752172708511, + "learning_rate": 1.9423514981601542e-05, + "loss": 9.246, + "step": 70920 + }, + { + "epoch": 0.354216085295513, + "grad_norm": 0.09700390696525574, + "learning_rate": 1.942201306665999e-05, + "loss": 9.2376, + "step": 70930 + }, + { + "epoch": 0.35426602412045244, + "grad_norm": 0.09865306317806244, + "learning_rate": 1.9420511151718442e-05, + "loss": 9.2343, + "step": 70940 + }, + { + "epoch": 0.3543159629453919, + "grad_norm": 0.09420738369226456, + "learning_rate": 1.941900923677689e-05, + "loss": 9.2405, + "step": 70950 + }, + { + "epoch": 0.35436590177033134, + "grad_norm": 0.09997241944074631, + "learning_rate": 1.9417507321835343e-05, + "loss": 9.2398, + "step": 70960 + }, + { + "epoch": 0.3544158405952708, + "grad_norm": 0.09588761627674103, + "learning_rate": 1.941600540689379e-05, + "loss": 9.2359, + "step": 70970 + }, + { + "epoch": 0.35446577942021024, + "grad_norm": 0.09791746735572815, + "learning_rate": 1.9414503491952236e-05, + "loss": 9.2349, + "step": 70980 + }, + { + "epoch": 0.3545157182451497, + "grad_norm": 0.0913577750325203, + "learning_rate": 1.941300157701069e-05, + "loss": 9.238, + "step": 70990 + }, + { + "epoch": 0.35456565707008914, + "grad_norm": 0.08983467519283295, + "learning_rate": 1.9411499662069137e-05, + "loss": 9.2347, + "step": 71000 + }, + { + "epoch": 0.3546155958950286, + "grad_norm": 0.09971988946199417, + "learning_rate": 1.940999774712759e-05, + "loss": 9.2341, + "step": 71010 + }, + { + "epoch": 0.35466553471996803, + "grad_norm": 0.0975378155708313, + "learning_rate": 1.9408495832186037e-05, + "loss": 9.2304, + "step": 71020 + }, + { + "epoch": 0.3547154735449075, + "grad_norm": 0.09106051921844482, + "learning_rate": 1.9406993917244487e-05, + "loss": 9.2377, + "step": 71030 + }, + { + "epoch": 0.35476541236984693, + "grad_norm": 0.09189257025718689, + "learning_rate": 1.9405492002302937e-05, + "loss": 9.2449, + "step": 71040 + }, + { + "epoch": 0.3548153511947864, + "grad_norm": 0.09312552958726883, + "learning_rate": 1.9403990087361384e-05, + "loss": 9.2376, + "step": 71050 + }, + { + "epoch": 0.35486529001972583, + "grad_norm": 0.09094572812318802, + "learning_rate": 1.9402488172419838e-05, + "loss": 9.2444, + "step": 71060 + }, + { + "epoch": 0.3549152288446653, + "grad_norm": 0.09242424368858337, + "learning_rate": 1.9400986257478285e-05, + "loss": 9.2318, + "step": 71070 + }, + { + "epoch": 0.35496516766960473, + "grad_norm": 0.09304199367761612, + "learning_rate": 1.9399484342536735e-05, + "loss": 9.25, + "step": 71080 + }, + { + "epoch": 0.3550151064945442, + "grad_norm": 0.09726741909980774, + "learning_rate": 1.9397982427595185e-05, + "loss": 9.2376, + "step": 71090 + }, + { + "epoch": 0.35506504531948363, + "grad_norm": 0.09090828150510788, + "learning_rate": 1.9396480512653632e-05, + "loss": 9.2332, + "step": 71100 + }, + { + "epoch": 0.3551149841444231, + "grad_norm": 0.09156540036201477, + "learning_rate": 1.9394978597712085e-05, + "loss": 9.2448, + "step": 71110 + }, + { + "epoch": 0.35516492296936253, + "grad_norm": 0.0888853594660759, + "learning_rate": 1.9393476682770532e-05, + "loss": 9.2353, + "step": 71120 + }, + { + "epoch": 0.355214861794302, + "grad_norm": 0.09629367291927338, + "learning_rate": 1.9391974767828982e-05, + "loss": 9.2288, + "step": 71130 + }, + { + "epoch": 0.3552648006192414, + "grad_norm": 0.09092582762241364, + "learning_rate": 1.9390472852887432e-05, + "loss": 9.2368, + "step": 71140 + }, + { + "epoch": 0.3553147394441809, + "grad_norm": 0.09426487237215042, + "learning_rate": 1.938897093794588e-05, + "loss": 9.2381, + "step": 71150 + }, + { + "epoch": 0.3553646782691203, + "grad_norm": 0.09681005775928497, + "learning_rate": 1.9387469023004333e-05, + "loss": 9.2227, + "step": 71160 + }, + { + "epoch": 0.3554146170940598, + "grad_norm": 0.0895727351307869, + "learning_rate": 1.938596710806278e-05, + "loss": 9.2364, + "step": 71170 + }, + { + "epoch": 0.3554645559189992, + "grad_norm": 0.09309788048267365, + "learning_rate": 1.938446519312123e-05, + "loss": 9.2421, + "step": 71180 + }, + { + "epoch": 0.3555144947439387, + "grad_norm": 0.09737984091043472, + "learning_rate": 1.938296327817968e-05, + "loss": 9.236, + "step": 71190 + }, + { + "epoch": 0.3555644335688781, + "grad_norm": 0.09271812438964844, + "learning_rate": 1.9381461363238127e-05, + "loss": 9.23, + "step": 71200 + }, + { + "epoch": 0.35561437239381755, + "grad_norm": 0.09458890557289124, + "learning_rate": 1.937995944829658e-05, + "loss": 9.2267, + "step": 71210 + }, + { + "epoch": 0.355664311218757, + "grad_norm": 0.09335251152515411, + "learning_rate": 1.9378457533355027e-05, + "loss": 9.2298, + "step": 71220 + }, + { + "epoch": 0.35571425004369644, + "grad_norm": 0.09530318528413773, + "learning_rate": 1.9376955618413477e-05, + "loss": 9.2515, + "step": 71230 + }, + { + "epoch": 0.3557641888686359, + "grad_norm": 0.0916392132639885, + "learning_rate": 1.9375453703471927e-05, + "loss": 9.2341, + "step": 71240 + }, + { + "epoch": 0.35581412769357534, + "grad_norm": 0.09264536947011948, + "learning_rate": 1.9373951788530374e-05, + "loss": 9.2404, + "step": 71250 + }, + { + "epoch": 0.3558640665185148, + "grad_norm": 0.09547609835863113, + "learning_rate": 1.9372449873588828e-05, + "loss": 9.2349, + "step": 71260 + }, + { + "epoch": 0.35591400534345424, + "grad_norm": 0.099251389503479, + "learning_rate": 1.9370947958647275e-05, + "loss": 9.234, + "step": 71270 + }, + { + "epoch": 0.3559639441683937, + "grad_norm": 0.08727670460939407, + "learning_rate": 1.9369446043705725e-05, + "loss": 9.2337, + "step": 71280 + }, + { + "epoch": 0.35601388299333314, + "grad_norm": 0.09636933356523514, + "learning_rate": 1.9367944128764175e-05, + "loss": 9.2463, + "step": 71290 + }, + { + "epoch": 0.3560638218182726, + "grad_norm": 0.09374624490737915, + "learning_rate": 1.9366442213822622e-05, + "loss": 9.2331, + "step": 71300 + }, + { + "epoch": 0.35611376064321204, + "grad_norm": 0.08606693148612976, + "learning_rate": 1.9364940298881075e-05, + "loss": 9.22, + "step": 71310 + }, + { + "epoch": 0.3561636994681515, + "grad_norm": 0.09905658662319183, + "learning_rate": 1.9363438383939522e-05, + "loss": 9.2281, + "step": 71320 + }, + { + "epoch": 0.35621363829309094, + "grad_norm": 0.09433192014694214, + "learning_rate": 1.9361936468997972e-05, + "loss": 9.2424, + "step": 71330 + }, + { + "epoch": 0.3562635771180304, + "grad_norm": 0.09173821657896042, + "learning_rate": 1.9360434554056423e-05, + "loss": 9.2255, + "step": 71340 + }, + { + "epoch": 0.35631351594296984, + "grad_norm": 0.09594710916280746, + "learning_rate": 1.9358932639114873e-05, + "loss": 9.2267, + "step": 71350 + }, + { + "epoch": 0.3563634547679093, + "grad_norm": 0.09441184997558594, + "learning_rate": 1.9357430724173323e-05, + "loss": 9.2384, + "step": 71360 + }, + { + "epoch": 0.35641339359284874, + "grad_norm": 0.09391657263040543, + "learning_rate": 1.935592880923177e-05, + "loss": 9.2407, + "step": 71370 + }, + { + "epoch": 0.3564633324177882, + "grad_norm": 0.09173735976219177, + "learning_rate": 1.935442689429022e-05, + "loss": 9.2305, + "step": 71380 + }, + { + "epoch": 0.35651327124272764, + "grad_norm": 0.09266167879104614, + "learning_rate": 1.935292497934867e-05, + "loss": 9.2273, + "step": 71390 + }, + { + "epoch": 0.3565632100676671, + "grad_norm": 0.09733938425779343, + "learning_rate": 1.935142306440712e-05, + "loss": 9.222, + "step": 71400 + }, + { + "epoch": 0.35661314889260654, + "grad_norm": 0.09893626719713211, + "learning_rate": 1.934992114946557e-05, + "loss": 9.2267, + "step": 71410 + }, + { + "epoch": 0.356663087717546, + "grad_norm": 0.08972984552383423, + "learning_rate": 1.9348419234524017e-05, + "loss": 9.2272, + "step": 71420 + }, + { + "epoch": 0.35671302654248543, + "grad_norm": 0.09227081388235092, + "learning_rate": 1.9346917319582467e-05, + "loss": 9.2393, + "step": 71430 + }, + { + "epoch": 0.3567629653674249, + "grad_norm": 0.09158577024936676, + "learning_rate": 1.9345415404640918e-05, + "loss": 9.2323, + "step": 71440 + }, + { + "epoch": 0.35681290419236433, + "grad_norm": 0.09240981191396713, + "learning_rate": 1.9343913489699368e-05, + "loss": 9.2337, + "step": 71450 + }, + { + "epoch": 0.3568628430173038, + "grad_norm": 0.09477624297142029, + "learning_rate": 1.9342411574757818e-05, + "loss": 9.2299, + "step": 71460 + }, + { + "epoch": 0.35691278184224323, + "grad_norm": 0.0945139229297638, + "learning_rate": 1.9340909659816265e-05, + "loss": 9.2225, + "step": 71470 + }, + { + "epoch": 0.3569627206671827, + "grad_norm": 0.09509062021970749, + "learning_rate": 1.9339407744874715e-05, + "loss": 9.2309, + "step": 71480 + }, + { + "epoch": 0.35701265949212213, + "grad_norm": 0.08713449537754059, + "learning_rate": 1.9337905829933165e-05, + "loss": 9.2212, + "step": 71490 + }, + { + "epoch": 0.3570625983170616, + "grad_norm": 0.10225412994623184, + "learning_rate": 1.9336403914991615e-05, + "loss": 9.2294, + "step": 71500 + }, + { + "epoch": 0.35711253714200103, + "grad_norm": 0.09623237699270248, + "learning_rate": 1.9334902000050065e-05, + "loss": 9.2218, + "step": 71510 + }, + { + "epoch": 0.3571624759669405, + "grad_norm": 0.09265895932912827, + "learning_rate": 1.9333400085108512e-05, + "loss": 9.2148, + "step": 71520 + }, + { + "epoch": 0.35721241479187993, + "grad_norm": 0.09351124614477158, + "learning_rate": 1.9331898170166962e-05, + "loss": 9.2253, + "step": 71530 + }, + { + "epoch": 0.3572623536168194, + "grad_norm": 0.09479804337024689, + "learning_rate": 1.9330396255225413e-05, + "loss": 9.2182, + "step": 71540 + }, + { + "epoch": 0.3573122924417588, + "grad_norm": 0.09572158753871918, + "learning_rate": 1.9328894340283863e-05, + "loss": 9.2224, + "step": 71550 + }, + { + "epoch": 0.3573622312666983, + "grad_norm": 0.09059182554483414, + "learning_rate": 1.9327392425342313e-05, + "loss": 9.2344, + "step": 71560 + }, + { + "epoch": 0.3574121700916377, + "grad_norm": 0.09375757724046707, + "learning_rate": 1.932589051040076e-05, + "loss": 9.2307, + "step": 71570 + }, + { + "epoch": 0.3574621089165772, + "grad_norm": 0.08641654253005981, + "learning_rate": 1.932438859545921e-05, + "loss": 9.2391, + "step": 71580 + }, + { + "epoch": 0.3575120477415166, + "grad_norm": 0.09109997749328613, + "learning_rate": 1.932288668051766e-05, + "loss": 9.2417, + "step": 71590 + }, + { + "epoch": 0.3575619865664561, + "grad_norm": 0.1009616032242775, + "learning_rate": 1.932138476557611e-05, + "loss": 9.2308, + "step": 71600 + }, + { + "epoch": 0.3576119253913955, + "grad_norm": 0.09245188534259796, + "learning_rate": 1.931988285063456e-05, + "loss": 9.2251, + "step": 71610 + }, + { + "epoch": 0.357661864216335, + "grad_norm": 0.09447357058525085, + "learning_rate": 1.9318380935693007e-05, + "loss": 9.2347, + "step": 71620 + }, + { + "epoch": 0.3577118030412744, + "grad_norm": 0.0957716852426529, + "learning_rate": 1.9316879020751457e-05, + "loss": 9.2316, + "step": 71630 + }, + { + "epoch": 0.3577617418662139, + "grad_norm": 0.09259764105081558, + "learning_rate": 1.9315377105809908e-05, + "loss": 9.2325, + "step": 71640 + }, + { + "epoch": 0.3578116806911533, + "grad_norm": 0.09912045300006866, + "learning_rate": 1.9313875190868358e-05, + "loss": 9.2225, + "step": 71650 + }, + { + "epoch": 0.3578616195160928, + "grad_norm": 0.09714524447917938, + "learning_rate": 1.9312373275926808e-05, + "loss": 9.2267, + "step": 71660 + }, + { + "epoch": 0.3579115583410322, + "grad_norm": 0.08966837078332901, + "learning_rate": 1.9310871360985258e-05, + "loss": 9.2311, + "step": 71670 + }, + { + "epoch": 0.3579614971659717, + "grad_norm": 0.09709369391202927, + "learning_rate": 1.9309369446043705e-05, + "loss": 9.2262, + "step": 71680 + }, + { + "epoch": 0.3580114359909111, + "grad_norm": 0.0912432074546814, + "learning_rate": 1.9307867531102155e-05, + "loss": 9.2241, + "step": 71690 + }, + { + "epoch": 0.3580613748158506, + "grad_norm": 0.09726667404174805, + "learning_rate": 1.9306365616160605e-05, + "loss": 9.2349, + "step": 71700 + }, + { + "epoch": 0.35811131364079, + "grad_norm": 0.10004935413599014, + "learning_rate": 1.9304863701219055e-05, + "loss": 9.2206, + "step": 71710 + }, + { + "epoch": 0.3581612524657295, + "grad_norm": 0.0955386683344841, + "learning_rate": 1.9303361786277506e-05, + "loss": 9.2124, + "step": 71720 + }, + { + "epoch": 0.3582111912906689, + "grad_norm": 0.09714854508638382, + "learning_rate": 1.9301859871335952e-05, + "loss": 9.2274, + "step": 71730 + }, + { + "epoch": 0.3582611301156084, + "grad_norm": 0.0956362634897232, + "learning_rate": 1.9300357956394403e-05, + "loss": 9.2244, + "step": 71740 + }, + { + "epoch": 0.3583110689405478, + "grad_norm": 0.08816993236541748, + "learning_rate": 1.9298856041452853e-05, + "loss": 9.2184, + "step": 71750 + }, + { + "epoch": 0.3583610077654873, + "grad_norm": 0.09849813580513, + "learning_rate": 1.9297354126511303e-05, + "loss": 9.2217, + "step": 71760 + }, + { + "epoch": 0.3584109465904267, + "grad_norm": 0.0990118458867073, + "learning_rate": 1.9295852211569753e-05, + "loss": 9.2132, + "step": 71770 + }, + { + "epoch": 0.3584608854153662, + "grad_norm": 0.0910298228263855, + "learning_rate": 1.92943502966282e-05, + "loss": 9.2336, + "step": 71780 + }, + { + "epoch": 0.3585108242403056, + "grad_norm": 0.09591134637594223, + "learning_rate": 1.929284838168665e-05, + "loss": 9.2223, + "step": 71790 + }, + { + "epoch": 0.3585607630652451, + "grad_norm": 0.0979471430182457, + "learning_rate": 1.92913464667451e-05, + "loss": 9.2379, + "step": 71800 + }, + { + "epoch": 0.3586107018901845, + "grad_norm": 0.10352909564971924, + "learning_rate": 1.928984455180355e-05, + "loss": 9.2422, + "step": 71810 + }, + { + "epoch": 0.358660640715124, + "grad_norm": 0.09178424626588821, + "learning_rate": 1.9288342636862e-05, + "loss": 9.2233, + "step": 71820 + }, + { + "epoch": 0.3587105795400634, + "grad_norm": 0.09857258945703506, + "learning_rate": 1.9286840721920447e-05, + "loss": 9.2315, + "step": 71830 + }, + { + "epoch": 0.3587605183650029, + "grad_norm": 0.09110614657402039, + "learning_rate": 1.9285338806978898e-05, + "loss": 9.221, + "step": 71840 + }, + { + "epoch": 0.3588104571899423, + "grad_norm": 0.09467658400535583, + "learning_rate": 1.9283836892037348e-05, + "loss": 9.2234, + "step": 71850 + }, + { + "epoch": 0.3588603960148818, + "grad_norm": 0.10426273941993713, + "learning_rate": 1.9282334977095798e-05, + "loss": 9.2211, + "step": 71860 + }, + { + "epoch": 0.3589103348398212, + "grad_norm": 0.09133409708738327, + "learning_rate": 1.9280833062154248e-05, + "loss": 9.2181, + "step": 71870 + }, + { + "epoch": 0.3589602736647607, + "grad_norm": 0.09624101221561432, + "learning_rate": 1.9279331147212695e-05, + "loss": 9.2161, + "step": 71880 + }, + { + "epoch": 0.3590102124897001, + "grad_norm": 0.08982755988836288, + "learning_rate": 1.9277829232271145e-05, + "loss": 9.2249, + "step": 71890 + }, + { + "epoch": 0.3590601513146396, + "grad_norm": 0.09154070168733597, + "learning_rate": 1.9276327317329595e-05, + "loss": 9.2266, + "step": 71900 + }, + { + "epoch": 0.359110090139579, + "grad_norm": 0.09227220714092255, + "learning_rate": 1.9274825402388045e-05, + "loss": 9.2306, + "step": 71910 + }, + { + "epoch": 0.3591600289645185, + "grad_norm": 0.09317903965711594, + "learning_rate": 1.9273323487446496e-05, + "loss": 9.217, + "step": 71920 + }, + { + "epoch": 0.3592099677894579, + "grad_norm": 0.09271214157342911, + "learning_rate": 1.9271821572504942e-05, + "loss": 9.2249, + "step": 71930 + }, + { + "epoch": 0.3592599066143974, + "grad_norm": 0.10173588246107101, + "learning_rate": 1.9270319657563393e-05, + "loss": 9.2237, + "step": 71940 + }, + { + "epoch": 0.3593098454393368, + "grad_norm": 0.09192373603582382, + "learning_rate": 1.9268817742621843e-05, + "loss": 9.2223, + "step": 71950 + }, + { + "epoch": 0.3593597842642763, + "grad_norm": 0.09208924323320389, + "learning_rate": 1.9267315827680293e-05, + "loss": 9.2111, + "step": 71960 + }, + { + "epoch": 0.3594097230892157, + "grad_norm": 0.09781036525964737, + "learning_rate": 1.9265813912738743e-05, + "loss": 9.2236, + "step": 71970 + }, + { + "epoch": 0.3594596619141552, + "grad_norm": 0.09488288313150406, + "learning_rate": 1.926431199779719e-05, + "loss": 9.2249, + "step": 71980 + }, + { + "epoch": 0.3595096007390946, + "grad_norm": 0.08927453309297562, + "learning_rate": 1.9262810082855643e-05, + "loss": 9.2323, + "step": 71990 + }, + { + "epoch": 0.3595595395640341, + "grad_norm": 0.09006758779287338, + "learning_rate": 1.926130816791409e-05, + "loss": 9.2224, + "step": 72000 + }, + { + "epoch": 0.3596094783889735, + "grad_norm": 0.09184703975915909, + "learning_rate": 1.925980625297254e-05, + "loss": 9.2305, + "step": 72010 + }, + { + "epoch": 0.359659417213913, + "grad_norm": 0.0887824296951294, + "learning_rate": 1.925830433803099e-05, + "loss": 9.2437, + "step": 72020 + }, + { + "epoch": 0.3597093560388524, + "grad_norm": 0.09438683092594147, + "learning_rate": 1.9256802423089437e-05, + "loss": 9.2207, + "step": 72030 + }, + { + "epoch": 0.3597592948637919, + "grad_norm": 0.0963621437549591, + "learning_rate": 1.925530050814789e-05, + "loss": 9.2236, + "step": 72040 + }, + { + "epoch": 0.3598092336887313, + "grad_norm": 0.09187499433755875, + "learning_rate": 1.9253798593206338e-05, + "loss": 9.2143, + "step": 72050 + }, + { + "epoch": 0.3598591725136708, + "grad_norm": 0.0932173877954483, + "learning_rate": 1.9252296678264788e-05, + "loss": 9.228, + "step": 72060 + }, + { + "epoch": 0.3599091113386102, + "grad_norm": 0.09906915575265884, + "learning_rate": 1.9250794763323238e-05, + "loss": 9.2218, + "step": 72070 + }, + { + "epoch": 0.3599590501635497, + "grad_norm": 0.09343940764665604, + "learning_rate": 1.9249292848381685e-05, + "loss": 9.2335, + "step": 72080 + }, + { + "epoch": 0.3600089889884891, + "grad_norm": 0.09147544950246811, + "learning_rate": 1.924779093344014e-05, + "loss": 9.2215, + "step": 72090 + }, + { + "epoch": 0.3600589278134286, + "grad_norm": 0.0935380831360817, + "learning_rate": 1.9246289018498585e-05, + "loss": 9.217, + "step": 72100 + }, + { + "epoch": 0.360108866638368, + "grad_norm": 0.09664879739284515, + "learning_rate": 1.9244787103557035e-05, + "loss": 9.2295, + "step": 72110 + }, + { + "epoch": 0.3601588054633075, + "grad_norm": 0.09369748085737228, + "learning_rate": 1.9243285188615486e-05, + "loss": 9.2209, + "step": 72120 + }, + { + "epoch": 0.3602087442882469, + "grad_norm": 0.09158635139465332, + "learning_rate": 1.9241783273673936e-05, + "loss": 9.2208, + "step": 72130 + }, + { + "epoch": 0.3602586831131864, + "grad_norm": 0.08802355080842972, + "learning_rate": 1.9240281358732386e-05, + "loss": 9.2261, + "step": 72140 + }, + { + "epoch": 0.3603086219381258, + "grad_norm": 0.09446180611848831, + "learning_rate": 1.9238779443790833e-05, + "loss": 9.2287, + "step": 72150 + }, + { + "epoch": 0.36035856076306527, + "grad_norm": 0.10025842487812042, + "learning_rate": 1.9237277528849283e-05, + "loss": 9.2188, + "step": 72160 + }, + { + "epoch": 0.3604084995880047, + "grad_norm": 0.09552531689405441, + "learning_rate": 1.9235775613907733e-05, + "loss": 9.213, + "step": 72170 + }, + { + "epoch": 0.36045843841294417, + "grad_norm": 0.09716292470693588, + "learning_rate": 1.9234273698966183e-05, + "loss": 9.2195, + "step": 72180 + }, + { + "epoch": 0.3605083772378836, + "grad_norm": 0.09107545018196106, + "learning_rate": 1.9232771784024633e-05, + "loss": 9.2349, + "step": 72190 + }, + { + "epoch": 0.360558316062823, + "grad_norm": 0.09314329922199249, + "learning_rate": 1.923126986908308e-05, + "loss": 9.2195, + "step": 72200 + }, + { + "epoch": 0.3606082548877625, + "grad_norm": 0.09497959911823273, + "learning_rate": 1.922976795414153e-05, + "loss": 9.216, + "step": 72210 + }, + { + "epoch": 0.3606581937127019, + "grad_norm": 0.09450450539588928, + "learning_rate": 1.922826603919998e-05, + "loss": 9.2196, + "step": 72220 + }, + { + "epoch": 0.3607081325376414, + "grad_norm": 0.09029270708560944, + "learning_rate": 1.922676412425843e-05, + "loss": 9.2221, + "step": 72230 + }, + { + "epoch": 0.3607580713625808, + "grad_norm": 0.09402526915073395, + "learning_rate": 1.922526220931688e-05, + "loss": 9.2112, + "step": 72240 + }, + { + "epoch": 0.3608080101875203, + "grad_norm": 0.09604809433221817, + "learning_rate": 1.9223760294375328e-05, + "loss": 9.2266, + "step": 72250 + }, + { + "epoch": 0.3608579490124597, + "grad_norm": 0.09894632548093796, + "learning_rate": 1.9222258379433778e-05, + "loss": 9.2271, + "step": 72260 + }, + { + "epoch": 0.3609078878373992, + "grad_norm": 0.0926297977566719, + "learning_rate": 1.9220756464492228e-05, + "loss": 9.219, + "step": 72270 + }, + { + "epoch": 0.3609578266623386, + "grad_norm": 0.08637505769729614, + "learning_rate": 1.9219254549550678e-05, + "loss": 9.2202, + "step": 72280 + }, + { + "epoch": 0.3610077654872781, + "grad_norm": 0.09499508887529373, + "learning_rate": 1.921775263460913e-05, + "loss": 9.2172, + "step": 72290 + }, + { + "epoch": 0.3610577043122175, + "grad_norm": 0.09541734308004379, + "learning_rate": 1.9216250719667575e-05, + "loss": 9.2201, + "step": 72300 + }, + { + "epoch": 0.361107643137157, + "grad_norm": 0.09449708461761475, + "learning_rate": 1.921474880472603e-05, + "loss": 9.2271, + "step": 72310 + }, + { + "epoch": 0.3611575819620964, + "grad_norm": 0.1025526151061058, + "learning_rate": 1.9213246889784476e-05, + "loss": 9.2293, + "step": 72320 + }, + { + "epoch": 0.3612075207870359, + "grad_norm": 0.08863754570484161, + "learning_rate": 1.9211744974842926e-05, + "loss": 9.2345, + "step": 72330 + }, + { + "epoch": 0.3612574596119753, + "grad_norm": 0.09427140653133392, + "learning_rate": 1.9210243059901376e-05, + "loss": 9.2335, + "step": 72340 + }, + { + "epoch": 0.3613073984369148, + "grad_norm": 0.09397964179515839, + "learning_rate": 1.9208741144959823e-05, + "loss": 9.2185, + "step": 72350 + }, + { + "epoch": 0.3613573372618542, + "grad_norm": 0.09295367449522018, + "learning_rate": 1.9207239230018276e-05, + "loss": 9.2255, + "step": 72360 + }, + { + "epoch": 0.3614072760867937, + "grad_norm": 0.09041722118854523, + "learning_rate": 1.9205737315076723e-05, + "loss": 9.2303, + "step": 72370 + }, + { + "epoch": 0.3614572149117331, + "grad_norm": 0.08931518346071243, + "learning_rate": 1.9204235400135173e-05, + "loss": 9.2179, + "step": 72380 + }, + { + "epoch": 0.3615071537366726, + "grad_norm": 0.0969778448343277, + "learning_rate": 1.9202733485193623e-05, + "loss": 9.2101, + "step": 72390 + }, + { + "epoch": 0.361557092561612, + "grad_norm": 0.09464286267757416, + "learning_rate": 1.920123157025207e-05, + "loss": 9.2113, + "step": 72400 + }, + { + "epoch": 0.3616070313865515, + "grad_norm": 0.09430237114429474, + "learning_rate": 1.9199729655310524e-05, + "loss": 9.2103, + "step": 72410 + }, + { + "epoch": 0.3616569702114909, + "grad_norm": 0.0944664403796196, + "learning_rate": 1.919822774036897e-05, + "loss": 9.2215, + "step": 72420 + }, + { + "epoch": 0.3617069090364304, + "grad_norm": 0.09468348324298859, + "learning_rate": 1.919672582542742e-05, + "loss": 9.2222, + "step": 72430 + }, + { + "epoch": 0.3617568478613698, + "grad_norm": 0.08907091617584229, + "learning_rate": 1.919522391048587e-05, + "loss": 9.2238, + "step": 72440 + }, + { + "epoch": 0.3618067866863093, + "grad_norm": 0.09387124329805374, + "learning_rate": 1.9193721995544318e-05, + "loss": 9.2185, + "step": 72450 + }, + { + "epoch": 0.3618567255112487, + "grad_norm": 0.09770091623067856, + "learning_rate": 1.919222008060277e-05, + "loss": 9.2177, + "step": 72460 + }, + { + "epoch": 0.3619066643361882, + "grad_norm": 0.09893268346786499, + "learning_rate": 1.9190718165661218e-05, + "loss": 9.2117, + "step": 72470 + }, + { + "epoch": 0.3619566031611276, + "grad_norm": 0.09433605521917343, + "learning_rate": 1.9189216250719668e-05, + "loss": 9.2256, + "step": 72480 + }, + { + "epoch": 0.3620065419860671, + "grad_norm": 0.09570042043924332, + "learning_rate": 1.918771433577812e-05, + "loss": 9.2137, + "step": 72490 + }, + { + "epoch": 0.3620564808110065, + "grad_norm": 0.09163347631692886, + "learning_rate": 1.9186212420836565e-05, + "loss": 9.2188, + "step": 72500 + }, + { + "epoch": 0.362106419635946, + "grad_norm": 0.08918924629688263, + "learning_rate": 1.918471050589502e-05, + "loss": 9.215, + "step": 72510 + }, + { + "epoch": 0.3621563584608854, + "grad_norm": 0.10020863264799118, + "learning_rate": 1.9183208590953466e-05, + "loss": 9.2027, + "step": 72520 + }, + { + "epoch": 0.3622062972858249, + "grad_norm": 0.09962757676839828, + "learning_rate": 1.9181706676011916e-05, + "loss": 9.2011, + "step": 72530 + }, + { + "epoch": 0.3622562361107643, + "grad_norm": 0.09085911512374878, + "learning_rate": 1.9180204761070366e-05, + "loss": 9.2144, + "step": 72540 + }, + { + "epoch": 0.3623061749357038, + "grad_norm": 0.0901787281036377, + "learning_rate": 1.9178702846128813e-05, + "loss": 9.2207, + "step": 72550 + }, + { + "epoch": 0.3623561137606432, + "grad_norm": 0.09482782334089279, + "learning_rate": 1.9177200931187266e-05, + "loss": 9.2262, + "step": 72560 + }, + { + "epoch": 0.36240605258558267, + "grad_norm": 0.09666303545236588, + "learning_rate": 1.9175699016245713e-05, + "loss": 9.2132, + "step": 72570 + }, + { + "epoch": 0.3624559914105221, + "grad_norm": 0.09145186096429825, + "learning_rate": 1.9174197101304163e-05, + "loss": 9.219, + "step": 72580 + }, + { + "epoch": 0.36250593023546157, + "grad_norm": 0.09651076048612595, + "learning_rate": 1.9172695186362613e-05, + "loss": 9.2178, + "step": 72590 + }, + { + "epoch": 0.362555869060401, + "grad_norm": 0.0937245786190033, + "learning_rate": 1.917119327142106e-05, + "loss": 9.2151, + "step": 72600 + }, + { + "epoch": 0.36260580788534047, + "grad_norm": 0.09232796728610992, + "learning_rate": 1.9169691356479514e-05, + "loss": 9.2195, + "step": 72610 + }, + { + "epoch": 0.3626557467102799, + "grad_norm": 0.09451702237129211, + "learning_rate": 1.916818944153796e-05, + "loss": 9.2251, + "step": 72620 + }, + { + "epoch": 0.36270568553521937, + "grad_norm": 0.08963721245527267, + "learning_rate": 1.9166687526596414e-05, + "loss": 9.2175, + "step": 72630 + }, + { + "epoch": 0.3627556243601588, + "grad_norm": 0.09087999910116196, + "learning_rate": 1.916518561165486e-05, + "loss": 9.2081, + "step": 72640 + }, + { + "epoch": 0.36280556318509827, + "grad_norm": 0.09726609289646149, + "learning_rate": 1.9163683696713308e-05, + "loss": 9.2042, + "step": 72650 + }, + { + "epoch": 0.3628555020100377, + "grad_norm": 0.09195300191640854, + "learning_rate": 1.916218178177176e-05, + "loss": 9.2071, + "step": 72660 + }, + { + "epoch": 0.36290544083497717, + "grad_norm": 0.09186240285634995, + "learning_rate": 1.9160679866830208e-05, + "loss": 9.1994, + "step": 72670 + }, + { + "epoch": 0.3629553796599166, + "grad_norm": 0.09509220719337463, + "learning_rate": 1.9159177951888662e-05, + "loss": 9.2143, + "step": 72680 + }, + { + "epoch": 0.36300531848485607, + "grad_norm": 0.10255935788154602, + "learning_rate": 1.915767603694711e-05, + "loss": 9.2106, + "step": 72690 + }, + { + "epoch": 0.3630552573097955, + "grad_norm": 0.09038600325584412, + "learning_rate": 1.9156174122005555e-05, + "loss": 9.2043, + "step": 72700 + }, + { + "epoch": 0.36310519613473496, + "grad_norm": 0.09224878251552582, + "learning_rate": 1.915467220706401e-05, + "loss": 9.201, + "step": 72710 + }, + { + "epoch": 0.3631551349596744, + "grad_norm": 0.0966305211186409, + "learning_rate": 1.9153170292122456e-05, + "loss": 9.2141, + "step": 72720 + }, + { + "epoch": 0.36320507378461386, + "grad_norm": 0.0945696085691452, + "learning_rate": 1.915166837718091e-05, + "loss": 9.2121, + "step": 72730 + }, + { + "epoch": 0.3632550126095533, + "grad_norm": 0.09432712197303772, + "learning_rate": 1.9150166462239356e-05, + "loss": 9.2173, + "step": 72740 + }, + { + "epoch": 0.36330495143449276, + "grad_norm": 0.09079490602016449, + "learning_rate": 1.9148664547297803e-05, + "loss": 9.2089, + "step": 72750 + }, + { + "epoch": 0.3633548902594322, + "grad_norm": 0.09984689205884933, + "learning_rate": 1.9147162632356256e-05, + "loss": 9.2111, + "step": 72760 + }, + { + "epoch": 0.36340482908437166, + "grad_norm": 0.09602593630552292, + "learning_rate": 1.9145660717414703e-05, + "loss": 9.207, + "step": 72770 + }, + { + "epoch": 0.3634547679093111, + "grad_norm": 0.09502044320106506, + "learning_rate": 1.9144158802473157e-05, + "loss": 9.2066, + "step": 72780 + }, + { + "epoch": 0.36350470673425056, + "grad_norm": 0.0925392359495163, + "learning_rate": 1.9142656887531603e-05, + "loss": 9.2145, + "step": 72790 + }, + { + "epoch": 0.36355464555919, + "grad_norm": 0.09208685904741287, + "learning_rate": 1.914115497259005e-05, + "loss": 9.229, + "step": 72800 + }, + { + "epoch": 0.36360458438412946, + "grad_norm": 0.09518954902887344, + "learning_rate": 1.9139653057648504e-05, + "loss": 9.2098, + "step": 72810 + }, + { + "epoch": 0.3636545232090689, + "grad_norm": 0.09336472302675247, + "learning_rate": 1.913815114270695e-05, + "loss": 9.2115, + "step": 72820 + }, + { + "epoch": 0.36370446203400836, + "grad_norm": 0.09499195218086243, + "learning_rate": 1.9136649227765404e-05, + "loss": 9.2051, + "step": 72830 + }, + { + "epoch": 0.3637544008589478, + "grad_norm": 0.09333008527755737, + "learning_rate": 1.913514731282385e-05, + "loss": 9.2088, + "step": 72840 + }, + { + "epoch": 0.36380433968388726, + "grad_norm": 0.0993448942899704, + "learning_rate": 1.9133645397882298e-05, + "loss": 9.2097, + "step": 72850 + }, + { + "epoch": 0.3638542785088267, + "grad_norm": 0.0906200036406517, + "learning_rate": 1.913214348294075e-05, + "loss": 9.2196, + "step": 72860 + }, + { + "epoch": 0.36390421733376616, + "grad_norm": 0.09111246466636658, + "learning_rate": 1.9130641567999198e-05, + "loss": 9.21, + "step": 72870 + }, + { + "epoch": 0.3639541561587056, + "grad_norm": 0.09346730262041092, + "learning_rate": 1.9129139653057652e-05, + "loss": 9.2001, + "step": 72880 + }, + { + "epoch": 0.36400409498364505, + "grad_norm": 0.09085837006568909, + "learning_rate": 1.91276377381161e-05, + "loss": 9.2215, + "step": 72890 + }, + { + "epoch": 0.3640540338085845, + "grad_norm": 0.09417754411697388, + "learning_rate": 1.9126135823174545e-05, + "loss": 9.2197, + "step": 72900 + }, + { + "epoch": 0.36410397263352395, + "grad_norm": 0.09656990319490433, + "learning_rate": 1.9124633908233e-05, + "loss": 9.2044, + "step": 72910 + }, + { + "epoch": 0.3641539114584634, + "grad_norm": 0.0960773453116417, + "learning_rate": 1.9123131993291446e-05, + "loss": 9.2099, + "step": 72920 + }, + { + "epoch": 0.36420385028340285, + "grad_norm": 0.10151112079620361, + "learning_rate": 1.91216300783499e-05, + "loss": 9.2108, + "step": 72930 + }, + { + "epoch": 0.3642537891083423, + "grad_norm": 0.09444685280323029, + "learning_rate": 1.9120128163408346e-05, + "loss": 9.2017, + "step": 72940 + }, + { + "epoch": 0.36430372793328175, + "grad_norm": 0.09510553628206253, + "learning_rate": 1.9118626248466796e-05, + "loss": 9.2096, + "step": 72950 + }, + { + "epoch": 0.3643536667582212, + "grad_norm": 0.09737194329500198, + "learning_rate": 1.9117124333525246e-05, + "loss": 9.2117, + "step": 72960 + }, + { + "epoch": 0.36440360558316065, + "grad_norm": 0.09484457224607468, + "learning_rate": 1.9115622418583693e-05, + "loss": 9.1955, + "step": 72970 + }, + { + "epoch": 0.36445354440810007, + "grad_norm": 0.09121875464916229, + "learning_rate": 1.9114120503642147e-05, + "loss": 9.2148, + "step": 72980 + }, + { + "epoch": 0.36450348323303955, + "grad_norm": 0.09583168476819992, + "learning_rate": 1.9112618588700594e-05, + "loss": 9.2091, + "step": 72990 + }, + { + "epoch": 0.36455342205797897, + "grad_norm": 0.09783457964658737, + "learning_rate": 1.9111116673759044e-05, + "loss": 9.1992, + "step": 73000 + }, + { + "epoch": 0.36460336088291845, + "grad_norm": 0.09332898259162903, + "learning_rate": 1.9109614758817494e-05, + "loss": 9.2091, + "step": 73010 + }, + { + "epoch": 0.36465329970785787, + "grad_norm": 0.09087220579385757, + "learning_rate": 1.910811284387594e-05, + "loss": 9.2208, + "step": 73020 + }, + { + "epoch": 0.36470323853279735, + "grad_norm": 0.09480462223291397, + "learning_rate": 1.9106610928934394e-05, + "loss": 9.2173, + "step": 73030 + }, + { + "epoch": 0.36475317735773677, + "grad_norm": 0.0977669283747673, + "learning_rate": 1.910510901399284e-05, + "loss": 9.2026, + "step": 73040 + }, + { + "epoch": 0.36480311618267625, + "grad_norm": 0.08492804318666458, + "learning_rate": 1.910360709905129e-05, + "loss": 9.2201, + "step": 73050 + }, + { + "epoch": 0.36485305500761567, + "grad_norm": 0.09246183931827545, + "learning_rate": 1.910210518410974e-05, + "loss": 9.2205, + "step": 73060 + }, + { + "epoch": 0.36490299383255514, + "grad_norm": 0.08867323398590088, + "learning_rate": 1.9100603269168188e-05, + "loss": 9.2047, + "step": 73070 + }, + { + "epoch": 0.36495293265749457, + "grad_norm": 0.09597288072109222, + "learning_rate": 1.9099101354226642e-05, + "loss": 9.2167, + "step": 73080 + }, + { + "epoch": 0.36500287148243404, + "grad_norm": 0.0952095165848732, + "learning_rate": 1.909759943928509e-05, + "loss": 9.2064, + "step": 73090 + }, + { + "epoch": 0.36505281030737347, + "grad_norm": 0.08946932852268219, + "learning_rate": 1.909609752434354e-05, + "loss": 9.2084, + "step": 73100 + }, + { + "epoch": 0.36510274913231294, + "grad_norm": 0.09637124091386795, + "learning_rate": 1.909459560940199e-05, + "loss": 9.2031, + "step": 73110 + }, + { + "epoch": 0.36515268795725236, + "grad_norm": 0.09121710807085037, + "learning_rate": 1.9093093694460436e-05, + "loss": 9.2069, + "step": 73120 + }, + { + "epoch": 0.36520262678219184, + "grad_norm": 0.09637744724750519, + "learning_rate": 1.909159177951889e-05, + "loss": 9.2288, + "step": 73130 + }, + { + "epoch": 0.36525256560713126, + "grad_norm": 0.09214238077402115, + "learning_rate": 1.9090089864577336e-05, + "loss": 9.2124, + "step": 73140 + }, + { + "epoch": 0.36530250443207074, + "grad_norm": 0.08832499384880066, + "learning_rate": 1.9088587949635786e-05, + "loss": 9.2153, + "step": 73150 + }, + { + "epoch": 0.36535244325701016, + "grad_norm": 0.09127902239561081, + "learning_rate": 1.9087086034694236e-05, + "loss": 9.2171, + "step": 73160 + }, + { + "epoch": 0.36540238208194964, + "grad_norm": 0.09237400442361832, + "learning_rate": 1.9085584119752683e-05, + "loss": 9.2115, + "step": 73170 + }, + { + "epoch": 0.36545232090688906, + "grad_norm": 0.09848447889089584, + "learning_rate": 1.9084082204811137e-05, + "loss": 9.2101, + "step": 73180 + }, + { + "epoch": 0.3655022597318285, + "grad_norm": 0.09511636197566986, + "learning_rate": 1.9082580289869584e-05, + "loss": 9.2122, + "step": 73190 + }, + { + "epoch": 0.36555219855676796, + "grad_norm": 0.09176871925592422, + "learning_rate": 1.9081078374928034e-05, + "loss": 9.2076, + "step": 73200 + }, + { + "epoch": 0.3656021373817074, + "grad_norm": 0.0967327430844307, + "learning_rate": 1.9079576459986484e-05, + "loss": 9.213, + "step": 73210 + }, + { + "epoch": 0.36565207620664686, + "grad_norm": 0.09490523487329483, + "learning_rate": 1.907807454504493e-05, + "loss": 9.2131, + "step": 73220 + }, + { + "epoch": 0.3657020150315863, + "grad_norm": 0.09480072557926178, + "learning_rate": 1.9076572630103384e-05, + "loss": 9.2003, + "step": 73230 + }, + { + "epoch": 0.36575195385652576, + "grad_norm": 0.08888121694326401, + "learning_rate": 1.907507071516183e-05, + "loss": 9.1996, + "step": 73240 + }, + { + "epoch": 0.3658018926814652, + "grad_norm": 0.09302229434251785, + "learning_rate": 1.907356880022028e-05, + "loss": 9.2104, + "step": 73250 + }, + { + "epoch": 0.36585183150640466, + "grad_norm": 0.09518302977085114, + "learning_rate": 1.907206688527873e-05, + "loss": 9.1888, + "step": 73260 + }, + { + "epoch": 0.3659017703313441, + "grad_norm": 0.09398709237575531, + "learning_rate": 1.907056497033718e-05, + "loss": 9.2083, + "step": 73270 + }, + { + "epoch": 0.36595170915628356, + "grad_norm": 0.09403207898139954, + "learning_rate": 1.9069063055395632e-05, + "loss": 9.2055, + "step": 73280 + }, + { + "epoch": 0.366001647981223, + "grad_norm": 0.09166137129068375, + "learning_rate": 1.906756114045408e-05, + "loss": 9.2105, + "step": 73290 + }, + { + "epoch": 0.36605158680616245, + "grad_norm": 0.08635640889406204, + "learning_rate": 1.906605922551253e-05, + "loss": 9.2103, + "step": 73300 + }, + { + "epoch": 0.3661015256311019, + "grad_norm": 0.09352033585309982, + "learning_rate": 1.906455731057098e-05, + "loss": 9.2081, + "step": 73310 + }, + { + "epoch": 0.36615146445604135, + "grad_norm": 0.09585576504468918, + "learning_rate": 1.906305539562943e-05, + "loss": 9.2063, + "step": 73320 + }, + { + "epoch": 0.3662014032809808, + "grad_norm": 0.10186425596475601, + "learning_rate": 1.906155348068788e-05, + "loss": 9.2024, + "step": 73330 + }, + { + "epoch": 0.36625134210592025, + "grad_norm": 0.08858046680688858, + "learning_rate": 1.9060051565746326e-05, + "loss": 9.2075, + "step": 73340 + }, + { + "epoch": 0.3663012809308597, + "grad_norm": 0.09600647538900375, + "learning_rate": 1.9058549650804776e-05, + "loss": 9.1993, + "step": 73350 + }, + { + "epoch": 0.36635121975579915, + "grad_norm": 0.093485526740551, + "learning_rate": 1.9057047735863226e-05, + "loss": 9.2022, + "step": 73360 + }, + { + "epoch": 0.3664011585807386, + "grad_norm": 0.09988955408334732, + "learning_rate": 1.9055545820921677e-05, + "loss": 9.2141, + "step": 73370 + }, + { + "epoch": 0.36645109740567805, + "grad_norm": 0.10079696774482727, + "learning_rate": 1.9054043905980127e-05, + "loss": 9.2063, + "step": 73380 + }, + { + "epoch": 0.36650103623061747, + "grad_norm": 0.09347499907016754, + "learning_rate": 1.9052541991038574e-05, + "loss": 9.2044, + "step": 73390 + }, + { + "epoch": 0.36655097505555695, + "grad_norm": 0.09521867334842682, + "learning_rate": 1.9051040076097024e-05, + "loss": 9.2019, + "step": 73400 + }, + { + "epoch": 0.36660091388049637, + "grad_norm": 0.09266289323568344, + "learning_rate": 1.9049538161155474e-05, + "loss": 9.2043, + "step": 73410 + }, + { + "epoch": 0.36665085270543585, + "grad_norm": 0.09217309206724167, + "learning_rate": 1.9048036246213924e-05, + "loss": 9.2132, + "step": 73420 + }, + { + "epoch": 0.36670079153037527, + "grad_norm": 0.0919642373919487, + "learning_rate": 1.9046534331272374e-05, + "loss": 9.2002, + "step": 73430 + }, + { + "epoch": 0.36675073035531475, + "grad_norm": 0.0903974175453186, + "learning_rate": 1.904503241633082e-05, + "loss": 9.2031, + "step": 73440 + }, + { + "epoch": 0.36680066918025417, + "grad_norm": 0.09081780910491943, + "learning_rate": 1.904353050138927e-05, + "loss": 9.2017, + "step": 73450 + }, + { + "epoch": 0.36685060800519365, + "grad_norm": 0.09484659135341644, + "learning_rate": 1.904202858644772e-05, + "loss": 9.2111, + "step": 73460 + }, + { + "epoch": 0.36690054683013307, + "grad_norm": 0.09380007535219193, + "learning_rate": 1.904052667150617e-05, + "loss": 9.2005, + "step": 73470 + }, + { + "epoch": 0.36695048565507254, + "grad_norm": 0.09407030791044235, + "learning_rate": 1.9039024756564622e-05, + "loss": 9.2089, + "step": 73480 + }, + { + "epoch": 0.36700042448001197, + "grad_norm": 0.09412670135498047, + "learning_rate": 1.903752284162307e-05, + "loss": 9.2009, + "step": 73490 + }, + { + "epoch": 0.36705036330495144, + "grad_norm": 0.09622817486524582, + "learning_rate": 1.903602092668152e-05, + "loss": 9.2022, + "step": 73500 + }, + { + "epoch": 0.36710030212989087, + "grad_norm": 0.09574905782938004, + "learning_rate": 1.903451901173997e-05, + "loss": 9.2067, + "step": 73510 + }, + { + "epoch": 0.36715024095483034, + "grad_norm": 0.09512118250131607, + "learning_rate": 1.903301709679842e-05, + "loss": 9.2085, + "step": 73520 + }, + { + "epoch": 0.36720017977976976, + "grad_norm": 0.08740708976984024, + "learning_rate": 1.903151518185687e-05, + "loss": 9.2168, + "step": 73530 + }, + { + "epoch": 0.36725011860470924, + "grad_norm": 0.091730996966362, + "learning_rate": 1.9030013266915316e-05, + "loss": 9.2058, + "step": 73540 + }, + { + "epoch": 0.36730005742964866, + "grad_norm": 0.09345594793558121, + "learning_rate": 1.9028511351973766e-05, + "loss": 9.1933, + "step": 73550 + }, + { + "epoch": 0.36734999625458814, + "grad_norm": 0.1008530855178833, + "learning_rate": 1.9027009437032216e-05, + "loss": 9.195, + "step": 73560 + }, + { + "epoch": 0.36739993507952756, + "grad_norm": 0.10389885306358337, + "learning_rate": 1.9025507522090667e-05, + "loss": 9.2012, + "step": 73570 + }, + { + "epoch": 0.36744987390446704, + "grad_norm": 0.09878107160329819, + "learning_rate": 1.9024005607149117e-05, + "loss": 9.2016, + "step": 73580 + }, + { + "epoch": 0.36749981272940646, + "grad_norm": 0.09092985093593597, + "learning_rate": 1.9022503692207564e-05, + "loss": 9.2007, + "step": 73590 + }, + { + "epoch": 0.36754975155434594, + "grad_norm": 0.09142585843801498, + "learning_rate": 1.9021001777266014e-05, + "loss": 9.2079, + "step": 73600 + }, + { + "epoch": 0.36759969037928536, + "grad_norm": 0.09261012822389603, + "learning_rate": 1.9019499862324464e-05, + "loss": 9.2041, + "step": 73610 + }, + { + "epoch": 0.36764962920422484, + "grad_norm": 0.09699272364377975, + "learning_rate": 1.9017997947382914e-05, + "loss": 9.2008, + "step": 73620 + }, + { + "epoch": 0.36769956802916426, + "grad_norm": 0.0889492779970169, + "learning_rate": 1.9016496032441364e-05, + "loss": 9.1979, + "step": 73630 + }, + { + "epoch": 0.36774950685410374, + "grad_norm": 0.09963024407625198, + "learning_rate": 1.9014994117499814e-05, + "loss": 9.2071, + "step": 73640 + }, + { + "epoch": 0.36779944567904316, + "grad_norm": 0.08926968276500702, + "learning_rate": 1.901349220255826e-05, + "loss": 9.2077, + "step": 73650 + }, + { + "epoch": 0.36784938450398263, + "grad_norm": 0.10151121020317078, + "learning_rate": 1.901199028761671e-05, + "loss": 9.1939, + "step": 73660 + }, + { + "epoch": 0.36789932332892206, + "grad_norm": 0.09443957358598709, + "learning_rate": 1.901048837267516e-05, + "loss": 9.2078, + "step": 73670 + }, + { + "epoch": 0.36794926215386153, + "grad_norm": 0.09295155853033066, + "learning_rate": 1.9008986457733612e-05, + "loss": 9.2032, + "step": 73680 + }, + { + "epoch": 0.36799920097880096, + "grad_norm": 0.09365091472864151, + "learning_rate": 1.9007484542792062e-05, + "loss": 9.1929, + "step": 73690 + }, + { + "epoch": 0.36804913980374043, + "grad_norm": 0.09538646787405014, + "learning_rate": 1.900598262785051e-05, + "loss": 9.1853, + "step": 73700 + }, + { + "epoch": 0.36809907862867985, + "grad_norm": 0.09404728561639786, + "learning_rate": 1.900448071290896e-05, + "loss": 9.2012, + "step": 73710 + }, + { + "epoch": 0.36814901745361933, + "grad_norm": 0.09444870799779892, + "learning_rate": 1.900297879796741e-05, + "loss": 9.1991, + "step": 73720 + }, + { + "epoch": 0.36819895627855875, + "grad_norm": 0.09309567511081696, + "learning_rate": 1.900147688302586e-05, + "loss": 9.1996, + "step": 73730 + }, + { + "epoch": 0.36824889510349823, + "grad_norm": 0.09412892907857895, + "learning_rate": 1.899997496808431e-05, + "loss": 9.1956, + "step": 73740 + }, + { + "epoch": 0.36829883392843765, + "grad_norm": 0.09056131541728973, + "learning_rate": 1.8998473053142756e-05, + "loss": 9.2004, + "step": 73750 + }, + { + "epoch": 0.36834877275337713, + "grad_norm": 0.09614895284175873, + "learning_rate": 1.8996971138201206e-05, + "loss": 9.2077, + "step": 73760 + }, + { + "epoch": 0.36839871157831655, + "grad_norm": 0.09568426012992859, + "learning_rate": 1.8995469223259657e-05, + "loss": 9.196, + "step": 73770 + }, + { + "epoch": 0.36844865040325603, + "grad_norm": 0.09588686376810074, + "learning_rate": 1.8993967308318107e-05, + "loss": 9.1894, + "step": 73780 + }, + { + "epoch": 0.36849858922819545, + "grad_norm": 0.09887325763702393, + "learning_rate": 1.8992465393376557e-05, + "loss": 9.1864, + "step": 73790 + }, + { + "epoch": 0.3685485280531349, + "grad_norm": 0.09332797676324844, + "learning_rate": 1.8990963478435004e-05, + "loss": 9.1968, + "step": 73800 + }, + { + "epoch": 0.36859846687807435, + "grad_norm": 0.10205885022878647, + "learning_rate": 1.8989461563493454e-05, + "loss": 9.1965, + "step": 73810 + }, + { + "epoch": 0.3686484057030138, + "grad_norm": 0.09981803596019745, + "learning_rate": 1.8987959648551904e-05, + "loss": 9.195, + "step": 73820 + }, + { + "epoch": 0.36869834452795325, + "grad_norm": 0.09201083332300186, + "learning_rate": 1.8986457733610354e-05, + "loss": 9.2003, + "step": 73830 + }, + { + "epoch": 0.3687482833528927, + "grad_norm": 0.09341354668140411, + "learning_rate": 1.8984955818668804e-05, + "loss": 9.2033, + "step": 73840 + }, + { + "epoch": 0.36879822217783215, + "grad_norm": 0.0915553867816925, + "learning_rate": 1.898345390372725e-05, + "loss": 9.2004, + "step": 73850 + }, + { + "epoch": 0.3688481610027716, + "grad_norm": 0.09335311502218246, + "learning_rate": 1.89819519887857e-05, + "loss": 9.2035, + "step": 73860 + }, + { + "epoch": 0.36889809982771105, + "grad_norm": 0.10016678273677826, + "learning_rate": 1.898045007384415e-05, + "loss": 9.196, + "step": 73870 + }, + { + "epoch": 0.3689480386526505, + "grad_norm": 0.09652851521968842, + "learning_rate": 1.8978948158902602e-05, + "loss": 9.202, + "step": 73880 + }, + { + "epoch": 0.36899797747758994, + "grad_norm": 0.09326925128698349, + "learning_rate": 1.8977446243961052e-05, + "loss": 9.1939, + "step": 73890 + }, + { + "epoch": 0.3690479163025294, + "grad_norm": 0.09441187977790833, + "learning_rate": 1.89759443290195e-05, + "loss": 9.1978, + "step": 73900 + }, + { + "epoch": 0.36909785512746884, + "grad_norm": 0.09244664758443832, + "learning_rate": 1.897444241407795e-05, + "loss": 9.1851, + "step": 73910 + }, + { + "epoch": 0.3691477939524083, + "grad_norm": 0.09576987475156784, + "learning_rate": 1.89729404991364e-05, + "loss": 9.1974, + "step": 73920 + }, + { + "epoch": 0.36919773277734774, + "grad_norm": 0.10163576900959015, + "learning_rate": 1.897143858419485e-05, + "loss": 9.1942, + "step": 73930 + }, + { + "epoch": 0.3692476716022872, + "grad_norm": 0.09293472766876221, + "learning_rate": 1.89699366692533e-05, + "loss": 9.2004, + "step": 73940 + }, + { + "epoch": 0.36929761042722664, + "grad_norm": 0.09498841315507889, + "learning_rate": 1.8968434754311746e-05, + "loss": 9.2015, + "step": 73950 + }, + { + "epoch": 0.3693475492521661, + "grad_norm": 0.0949832871556282, + "learning_rate": 1.89669328393702e-05, + "loss": 9.1872, + "step": 73960 + }, + { + "epoch": 0.36939748807710554, + "grad_norm": 0.09256528317928314, + "learning_rate": 1.8965430924428647e-05, + "loss": 9.1972, + "step": 73970 + }, + { + "epoch": 0.369447426902045, + "grad_norm": 0.0952831581234932, + "learning_rate": 1.8963929009487097e-05, + "loss": 9.1852, + "step": 73980 + }, + { + "epoch": 0.36949736572698444, + "grad_norm": 0.10047879070043564, + "learning_rate": 1.8962427094545547e-05, + "loss": 9.2014, + "step": 73990 + }, + { + "epoch": 0.3695473045519239, + "grad_norm": 0.09470636397600174, + "learning_rate": 1.8960925179603994e-05, + "loss": 9.1897, + "step": 74000 + }, + { + "epoch": 0.36959724337686334, + "grad_norm": 0.08998063206672668, + "learning_rate": 1.8959423264662447e-05, + "loss": 9.2008, + "step": 74010 + }, + { + "epoch": 0.3696471822018028, + "grad_norm": 0.09682891517877579, + "learning_rate": 1.8957921349720894e-05, + "loss": 9.1968, + "step": 74020 + }, + { + "epoch": 0.36969712102674224, + "grad_norm": 0.093925341963768, + "learning_rate": 1.8956419434779344e-05, + "loss": 9.2174, + "step": 74030 + }, + { + "epoch": 0.3697470598516817, + "grad_norm": 0.09512005746364594, + "learning_rate": 1.8954917519837794e-05, + "loss": 9.194, + "step": 74040 + }, + { + "epoch": 0.36979699867662114, + "grad_norm": 0.09474792331457138, + "learning_rate": 1.895341560489624e-05, + "loss": 9.1955, + "step": 74050 + }, + { + "epoch": 0.3698469375015606, + "grad_norm": 0.08794379979372025, + "learning_rate": 1.8951913689954695e-05, + "loss": 9.1912, + "step": 74060 + }, + { + "epoch": 0.36989687632650003, + "grad_norm": 0.09021158516407013, + "learning_rate": 1.895041177501314e-05, + "loss": 9.1967, + "step": 74070 + }, + { + "epoch": 0.3699468151514395, + "grad_norm": 0.09125207364559174, + "learning_rate": 1.8948909860071592e-05, + "loss": 9.1879, + "step": 74080 + }, + { + "epoch": 0.36999675397637893, + "grad_norm": 0.09770797193050385, + "learning_rate": 1.8947407945130042e-05, + "loss": 9.1984, + "step": 74090 + }, + { + "epoch": 0.3700466928013184, + "grad_norm": 0.09228277951478958, + "learning_rate": 1.894590603018849e-05, + "loss": 9.1979, + "step": 74100 + }, + { + "epoch": 0.37009663162625783, + "grad_norm": 0.09251270443201065, + "learning_rate": 1.8944404115246942e-05, + "loss": 9.199, + "step": 74110 + }, + { + "epoch": 0.3701465704511973, + "grad_norm": 0.09587238729000092, + "learning_rate": 1.894290220030539e-05, + "loss": 9.1933, + "step": 74120 + }, + { + "epoch": 0.37019650927613673, + "grad_norm": 0.08899569511413574, + "learning_rate": 1.894140028536384e-05, + "loss": 9.1968, + "step": 74130 + }, + { + "epoch": 0.3702464481010762, + "grad_norm": 0.09335777163505554, + "learning_rate": 1.893989837042229e-05, + "loss": 9.1918, + "step": 74140 + }, + { + "epoch": 0.37029638692601563, + "grad_norm": 0.09356404095888138, + "learning_rate": 1.8938396455480736e-05, + "loss": 9.1845, + "step": 74150 + }, + { + "epoch": 0.3703463257509551, + "grad_norm": 0.09852045774459839, + "learning_rate": 1.893689454053919e-05, + "loss": 9.1924, + "step": 74160 + }, + { + "epoch": 0.37039626457589453, + "grad_norm": 0.09660044312477112, + "learning_rate": 1.8935392625597637e-05, + "loss": 9.2063, + "step": 74170 + }, + { + "epoch": 0.37044620340083395, + "grad_norm": 0.09085432440042496, + "learning_rate": 1.8933890710656087e-05, + "loss": 9.1791, + "step": 74180 + }, + { + "epoch": 0.37049614222577343, + "grad_norm": 0.09493508189916611, + "learning_rate": 1.8932388795714537e-05, + "loss": 9.1994, + "step": 74190 + }, + { + "epoch": 0.37054608105071285, + "grad_norm": 0.09557243436574936, + "learning_rate": 1.8930886880772984e-05, + "loss": 9.1895, + "step": 74200 + }, + { + "epoch": 0.3705960198756523, + "grad_norm": 0.09346624463796616, + "learning_rate": 1.8929384965831437e-05, + "loss": 9.1871, + "step": 74210 + }, + { + "epoch": 0.37064595870059175, + "grad_norm": 0.09579519182443619, + "learning_rate": 1.8927883050889884e-05, + "loss": 9.1834, + "step": 74220 + }, + { + "epoch": 0.3706958975255312, + "grad_norm": 0.10192973166704178, + "learning_rate": 1.8926381135948334e-05, + "loss": 9.1982, + "step": 74230 + }, + { + "epoch": 0.37074583635047065, + "grad_norm": 0.09080871194601059, + "learning_rate": 1.8924879221006784e-05, + "loss": 9.2006, + "step": 74240 + }, + { + "epoch": 0.3707957751754101, + "grad_norm": 0.09512729942798615, + "learning_rate": 1.892337730606523e-05, + "loss": 9.191, + "step": 74250 + }, + { + "epoch": 0.37084571400034955, + "grad_norm": 0.09607906639575958, + "learning_rate": 1.8921875391123685e-05, + "loss": 9.1898, + "step": 74260 + }, + { + "epoch": 0.370895652825289, + "grad_norm": 0.09581431746482849, + "learning_rate": 1.892037347618213e-05, + "loss": 9.1822, + "step": 74270 + }, + { + "epoch": 0.37094559165022845, + "grad_norm": 0.09435351938009262, + "learning_rate": 1.8918871561240585e-05, + "loss": 9.1853, + "step": 74280 + }, + { + "epoch": 0.3709955304751679, + "grad_norm": 0.09405918419361115, + "learning_rate": 1.8917369646299032e-05, + "loss": 9.1877, + "step": 74290 + }, + { + "epoch": 0.37104546930010734, + "grad_norm": 0.09423495829105377, + "learning_rate": 1.891586773135748e-05, + "loss": 9.2003, + "step": 74300 + }, + { + "epoch": 0.3710954081250468, + "grad_norm": 0.0974857285618782, + "learning_rate": 1.8914365816415932e-05, + "loss": 9.1888, + "step": 74310 + }, + { + "epoch": 0.37114534694998624, + "grad_norm": 0.09585097432136536, + "learning_rate": 1.891286390147438e-05, + "loss": 9.2001, + "step": 74320 + }, + { + "epoch": 0.3711952857749257, + "grad_norm": 0.09716727584600449, + "learning_rate": 1.8911361986532833e-05, + "loss": 9.1998, + "step": 74330 + }, + { + "epoch": 0.37124522459986514, + "grad_norm": 0.09428082406520844, + "learning_rate": 1.890986007159128e-05, + "loss": 9.1946, + "step": 74340 + }, + { + "epoch": 0.3712951634248046, + "grad_norm": 0.09271190315485, + "learning_rate": 1.8908358156649726e-05, + "loss": 9.1989, + "step": 74350 + }, + { + "epoch": 0.37134510224974404, + "grad_norm": 0.09324190765619278, + "learning_rate": 1.890685624170818e-05, + "loss": 9.1995, + "step": 74360 + }, + { + "epoch": 0.3713950410746835, + "grad_norm": 0.09561111032962799, + "learning_rate": 1.8905354326766627e-05, + "loss": 9.201, + "step": 74370 + }, + { + "epoch": 0.37144497989962294, + "grad_norm": 0.09302332252264023, + "learning_rate": 1.890385241182508e-05, + "loss": 9.1906, + "step": 74380 + }, + { + "epoch": 0.3714949187245624, + "grad_norm": 0.09225195646286011, + "learning_rate": 1.8902350496883527e-05, + "loss": 9.1941, + "step": 74390 + }, + { + "epoch": 0.37154485754950184, + "grad_norm": 0.09414453059434891, + "learning_rate": 1.8900848581941974e-05, + "loss": 9.1922, + "step": 74400 + }, + { + "epoch": 0.3715947963744413, + "grad_norm": 0.1040930524468422, + "learning_rate": 1.8899346667000427e-05, + "loss": 9.1835, + "step": 74410 + }, + { + "epoch": 0.37164473519938074, + "grad_norm": 0.10101134330034256, + "learning_rate": 1.8897844752058874e-05, + "loss": 9.1919, + "step": 74420 + }, + { + "epoch": 0.3716946740243202, + "grad_norm": 0.09623407572507858, + "learning_rate": 1.8896342837117328e-05, + "loss": 9.1749, + "step": 74430 + }, + { + "epoch": 0.37174461284925964, + "grad_norm": 0.0893046110868454, + "learning_rate": 1.8894840922175775e-05, + "loss": 9.191, + "step": 74440 + }, + { + "epoch": 0.3717945516741991, + "grad_norm": 0.09399503469467163, + "learning_rate": 1.889333900723422e-05, + "loss": 9.1985, + "step": 74450 + }, + { + "epoch": 0.37184449049913854, + "grad_norm": 0.09701071679592133, + "learning_rate": 1.8891837092292675e-05, + "loss": 9.1864, + "step": 74460 + }, + { + "epoch": 0.371894429324078, + "grad_norm": 0.09602676331996918, + "learning_rate": 1.889033517735112e-05, + "loss": 9.1936, + "step": 74470 + }, + { + "epoch": 0.37194436814901743, + "grad_norm": 0.09337245672941208, + "learning_rate": 1.8888833262409575e-05, + "loss": 9.1728, + "step": 74480 + }, + { + "epoch": 0.3719943069739569, + "grad_norm": 0.09136126190423965, + "learning_rate": 1.8887331347468022e-05, + "loss": 9.1816, + "step": 74490 + }, + { + "epoch": 0.37204424579889633, + "grad_norm": 0.09118318557739258, + "learning_rate": 1.888582943252647e-05, + "loss": 9.1867, + "step": 74500 + }, + { + "epoch": 0.3720941846238358, + "grad_norm": 0.09339746087789536, + "learning_rate": 1.8884327517584922e-05, + "loss": 9.1911, + "step": 74510 + }, + { + "epoch": 0.37214412344877523, + "grad_norm": 0.09358755499124527, + "learning_rate": 1.888282560264337e-05, + "loss": 9.1926, + "step": 74520 + }, + { + "epoch": 0.3721940622737147, + "grad_norm": 0.09610449522733688, + "learning_rate": 1.8881323687701823e-05, + "loss": 9.188, + "step": 74530 + }, + { + "epoch": 0.37224400109865413, + "grad_norm": 0.09586481004953384, + "learning_rate": 1.887982177276027e-05, + "loss": 9.1889, + "step": 74540 + }, + { + "epoch": 0.3722939399235936, + "grad_norm": 0.0982070192694664, + "learning_rate": 1.8878319857818716e-05, + "loss": 9.1799, + "step": 74550 + }, + { + "epoch": 0.37234387874853303, + "grad_norm": 0.09355320036411285, + "learning_rate": 1.887681794287717e-05, + "loss": 9.191, + "step": 74560 + }, + { + "epoch": 0.3723938175734725, + "grad_norm": 0.09481959044933319, + "learning_rate": 1.8875316027935617e-05, + "loss": 9.186, + "step": 74570 + }, + { + "epoch": 0.37244375639841193, + "grad_norm": 0.09273957461118698, + "learning_rate": 1.887381411299407e-05, + "loss": 9.1892, + "step": 74580 + }, + { + "epoch": 0.3724936952233514, + "grad_norm": 0.09700888395309448, + "learning_rate": 1.8872312198052517e-05, + "loss": 9.1935, + "step": 74590 + }, + { + "epoch": 0.37254363404829083, + "grad_norm": 0.09876874834299088, + "learning_rate": 1.8870810283110967e-05, + "loss": 9.1894, + "step": 74600 + }, + { + "epoch": 0.3725935728732303, + "grad_norm": 0.09617974609136581, + "learning_rate": 1.8869308368169417e-05, + "loss": 9.1823, + "step": 74610 + }, + { + "epoch": 0.3726435116981697, + "grad_norm": 0.09874873608350754, + "learning_rate": 1.8867806453227864e-05, + "loss": 9.198, + "step": 74620 + }, + { + "epoch": 0.3726934505231092, + "grad_norm": 0.0900963768362999, + "learning_rate": 1.8866304538286318e-05, + "loss": 9.1833, + "step": 74630 + }, + { + "epoch": 0.3727433893480486, + "grad_norm": 0.09320524334907532, + "learning_rate": 1.8864802623344765e-05, + "loss": 9.1847, + "step": 74640 + }, + { + "epoch": 0.3727933281729881, + "grad_norm": 0.09384310245513916, + "learning_rate": 1.8863300708403215e-05, + "loss": 9.182, + "step": 74650 + }, + { + "epoch": 0.3728432669979275, + "grad_norm": 0.09499943256378174, + "learning_rate": 1.8861798793461665e-05, + "loss": 9.1918, + "step": 74660 + }, + { + "epoch": 0.372893205822867, + "grad_norm": 0.09199872612953186, + "learning_rate": 1.886029687852011e-05, + "loss": 9.181, + "step": 74670 + }, + { + "epoch": 0.3729431446478064, + "grad_norm": 0.0918089747428894, + "learning_rate": 1.8858794963578565e-05, + "loss": 9.1903, + "step": 74680 + }, + { + "epoch": 0.3729930834727459, + "grad_norm": 0.09253137558698654, + "learning_rate": 1.8857293048637012e-05, + "loss": 9.1935, + "step": 74690 + }, + { + "epoch": 0.3730430222976853, + "grad_norm": 0.09675570577383041, + "learning_rate": 1.8855791133695462e-05, + "loss": 9.1777, + "step": 74700 + }, + { + "epoch": 0.3730929611226248, + "grad_norm": 0.09218452125787735, + "learning_rate": 1.8854289218753912e-05, + "loss": 9.1843, + "step": 74710 + }, + { + "epoch": 0.3731428999475642, + "grad_norm": 0.09730592370033264, + "learning_rate": 1.885278730381236e-05, + "loss": 9.1803, + "step": 74720 + }, + { + "epoch": 0.3731928387725037, + "grad_norm": 0.09571095556020737, + "learning_rate": 1.8851285388870813e-05, + "loss": 9.1813, + "step": 74730 + }, + { + "epoch": 0.3732427775974431, + "grad_norm": 0.09244304150342941, + "learning_rate": 1.884978347392926e-05, + "loss": 9.1753, + "step": 74740 + }, + { + "epoch": 0.3732927164223826, + "grad_norm": 0.08950365334749222, + "learning_rate": 1.884828155898771e-05, + "loss": 9.1876, + "step": 74750 + }, + { + "epoch": 0.373342655247322, + "grad_norm": 0.0952899232506752, + "learning_rate": 1.884677964404616e-05, + "loss": 9.1857, + "step": 74760 + }, + { + "epoch": 0.3733925940722615, + "grad_norm": 0.0960029885172844, + "learning_rate": 1.8845277729104607e-05, + "loss": 9.1913, + "step": 74770 + }, + { + "epoch": 0.3734425328972009, + "grad_norm": 0.08860934525728226, + "learning_rate": 1.884377581416306e-05, + "loss": 9.1803, + "step": 74780 + }, + { + "epoch": 0.3734924717221404, + "grad_norm": 0.09400645643472672, + "learning_rate": 1.8842273899221507e-05, + "loss": 9.1803, + "step": 74790 + }, + { + "epoch": 0.3735424105470798, + "grad_norm": 0.09583377838134766, + "learning_rate": 1.8840771984279957e-05, + "loss": 9.1885, + "step": 74800 + }, + { + "epoch": 0.3735923493720193, + "grad_norm": 0.09530255943536758, + "learning_rate": 1.8839270069338407e-05, + "loss": 9.1873, + "step": 74810 + }, + { + "epoch": 0.3736422881969587, + "grad_norm": 0.09696003049612045, + "learning_rate": 1.8837768154396854e-05, + "loss": 9.1831, + "step": 74820 + }, + { + "epoch": 0.3736922270218982, + "grad_norm": 0.09576781839132309, + "learning_rate": 1.8836266239455308e-05, + "loss": 9.1916, + "step": 74830 + }, + { + "epoch": 0.3737421658468376, + "grad_norm": 0.08903126418590546, + "learning_rate": 1.8834764324513755e-05, + "loss": 9.1917, + "step": 74840 + }, + { + "epoch": 0.3737921046717771, + "grad_norm": 0.09006597101688385, + "learning_rate": 1.8833262409572205e-05, + "loss": 9.174, + "step": 74850 + }, + { + "epoch": 0.3738420434967165, + "grad_norm": 0.09339811652898788, + "learning_rate": 1.8831760494630655e-05, + "loss": 9.1918, + "step": 74860 + }, + { + "epoch": 0.373891982321656, + "grad_norm": 0.09909702837467194, + "learning_rate": 1.88302585796891e-05, + "loss": 9.1877, + "step": 74870 + }, + { + "epoch": 0.3739419211465954, + "grad_norm": 0.09441126883029938, + "learning_rate": 1.8828756664747555e-05, + "loss": 9.1836, + "step": 74880 + }, + { + "epoch": 0.3739918599715349, + "grad_norm": 0.09800399839878082, + "learning_rate": 1.8827254749806002e-05, + "loss": 9.1844, + "step": 74890 + }, + { + "epoch": 0.3740417987964743, + "grad_norm": 0.10018418729305267, + "learning_rate": 1.8825752834864452e-05, + "loss": 9.1881, + "step": 74900 + }, + { + "epoch": 0.3740917376214138, + "grad_norm": 0.0955013558268547, + "learning_rate": 1.8824250919922902e-05, + "loss": 9.1919, + "step": 74910 + }, + { + "epoch": 0.3741416764463532, + "grad_norm": 0.09473446756601334, + "learning_rate": 1.8822749004981353e-05, + "loss": 9.191, + "step": 74920 + }, + { + "epoch": 0.3741916152712927, + "grad_norm": 0.09358023852109909, + "learning_rate": 1.8821247090039803e-05, + "loss": 9.1891, + "step": 74930 + }, + { + "epoch": 0.3742415540962321, + "grad_norm": 0.09420783072710037, + "learning_rate": 1.881974517509825e-05, + "loss": 9.1874, + "step": 74940 + }, + { + "epoch": 0.3742914929211716, + "grad_norm": 0.09367396682500839, + "learning_rate": 1.88182432601567e-05, + "loss": 9.1976, + "step": 74950 + }, + { + "epoch": 0.374341431746111, + "grad_norm": 0.09339455515146255, + "learning_rate": 1.881674134521515e-05, + "loss": 9.1843, + "step": 74960 + }, + { + "epoch": 0.3743913705710505, + "grad_norm": 0.09701774269342422, + "learning_rate": 1.88152394302736e-05, + "loss": 9.1973, + "step": 74970 + }, + { + "epoch": 0.3744413093959899, + "grad_norm": 0.09740804880857468, + "learning_rate": 1.881373751533205e-05, + "loss": 9.1823, + "step": 74980 + }, + { + "epoch": 0.3744912482209294, + "grad_norm": 0.09477312117815018, + "learning_rate": 1.8812235600390497e-05, + "loss": 9.1887, + "step": 74990 + }, + { + "epoch": 0.3745411870458688, + "grad_norm": 0.09397511929273605, + "learning_rate": 1.8810733685448947e-05, + "loss": 9.178, + "step": 75000 + }, + { + "epoch": 0.3745911258708083, + "grad_norm": 0.093665711581707, + "learning_rate": 1.8809231770507397e-05, + "loss": 9.1876, + "step": 75010 + }, + { + "epoch": 0.3746410646957477, + "grad_norm": 0.09372160583734512, + "learning_rate": 1.8807729855565848e-05, + "loss": 9.1882, + "step": 75020 + }, + { + "epoch": 0.3746910035206872, + "grad_norm": 0.09285315126180649, + "learning_rate": 1.8806227940624298e-05, + "loss": 9.1869, + "step": 75030 + }, + { + "epoch": 0.3747409423456266, + "grad_norm": 0.10530349612236023, + "learning_rate": 1.8804726025682745e-05, + "loss": 9.1785, + "step": 75040 + }, + { + "epoch": 0.3747908811705661, + "grad_norm": 0.09486869722604752, + "learning_rate": 1.8803224110741195e-05, + "loss": 9.1772, + "step": 75050 + }, + { + "epoch": 0.3748408199955055, + "grad_norm": 0.09095942974090576, + "learning_rate": 1.8801722195799645e-05, + "loss": 9.1886, + "step": 75060 + }, + { + "epoch": 0.374890758820445, + "grad_norm": 0.09552841633558273, + "learning_rate": 1.8800220280858095e-05, + "loss": 9.1746, + "step": 75070 + }, + { + "epoch": 0.3749406976453844, + "grad_norm": 0.09439145773649216, + "learning_rate": 1.8798718365916545e-05, + "loss": 9.1689, + "step": 75080 + }, + { + "epoch": 0.3749906364703239, + "grad_norm": 0.0905107706785202, + "learning_rate": 1.8797216450974992e-05, + "loss": 9.1867, + "step": 75090 + }, + { + "epoch": 0.3750405752952633, + "grad_norm": 0.0866839811205864, + "learning_rate": 1.8795714536033442e-05, + "loss": 9.1908, + "step": 75100 + }, + { + "epoch": 0.3750905141202028, + "grad_norm": 0.09543773531913757, + "learning_rate": 1.8794212621091892e-05, + "loss": 9.1941, + "step": 75110 + }, + { + "epoch": 0.3751404529451422, + "grad_norm": 0.09861119091510773, + "learning_rate": 1.8792710706150343e-05, + "loss": 9.186, + "step": 75120 + }, + { + "epoch": 0.3751903917700817, + "grad_norm": 0.09284248948097229, + "learning_rate": 1.8791208791208793e-05, + "loss": 9.1845, + "step": 75130 + }, + { + "epoch": 0.3752403305950211, + "grad_norm": 0.0985037088394165, + "learning_rate": 1.878970687626724e-05, + "loss": 9.172, + "step": 75140 + }, + { + "epoch": 0.3752902694199606, + "grad_norm": 0.09096460044384003, + "learning_rate": 1.878820496132569e-05, + "loss": 9.1894, + "step": 75150 + }, + { + "epoch": 0.3753402082449, + "grad_norm": 0.09545445442199707, + "learning_rate": 1.878670304638414e-05, + "loss": 9.1824, + "step": 75160 + }, + { + "epoch": 0.3753901470698394, + "grad_norm": 0.08448511362075806, + "learning_rate": 1.878520113144259e-05, + "loss": 9.1783, + "step": 75170 + }, + { + "epoch": 0.3754400858947789, + "grad_norm": 0.09297330677509308, + "learning_rate": 1.878369921650104e-05, + "loss": 9.1779, + "step": 75180 + }, + { + "epoch": 0.3754900247197183, + "grad_norm": 0.09360940754413605, + "learning_rate": 1.8782197301559487e-05, + "loss": 9.1796, + "step": 75190 + }, + { + "epoch": 0.3755399635446578, + "grad_norm": 0.09942979365587234, + "learning_rate": 1.8780695386617937e-05, + "loss": 9.1824, + "step": 75200 + }, + { + "epoch": 0.3755899023695972, + "grad_norm": 0.08949796110391617, + "learning_rate": 1.8779193471676387e-05, + "loss": 9.1709, + "step": 75210 + }, + { + "epoch": 0.3756398411945367, + "grad_norm": 0.09671638906002045, + "learning_rate": 1.8777691556734838e-05, + "loss": 9.1738, + "step": 75220 + }, + { + "epoch": 0.3756897800194761, + "grad_norm": 0.09702275693416595, + "learning_rate": 1.8776189641793288e-05, + "loss": 9.19, + "step": 75230 + }, + { + "epoch": 0.3757397188444156, + "grad_norm": 0.09984516352415085, + "learning_rate": 1.8774687726851738e-05, + "loss": 9.1752, + "step": 75240 + }, + { + "epoch": 0.375789657669355, + "grad_norm": 0.08986981213092804, + "learning_rate": 1.8773185811910185e-05, + "loss": 9.1787, + "step": 75250 + }, + { + "epoch": 0.3758395964942945, + "grad_norm": 0.09705835580825806, + "learning_rate": 1.8771683896968635e-05, + "loss": 9.1834, + "step": 75260 + }, + { + "epoch": 0.3758895353192339, + "grad_norm": 0.09416690468788147, + "learning_rate": 1.8770181982027085e-05, + "loss": 9.1762, + "step": 75270 + }, + { + "epoch": 0.3759394741441734, + "grad_norm": 0.09269827604293823, + "learning_rate": 1.8768680067085535e-05, + "loss": 9.173, + "step": 75280 + }, + { + "epoch": 0.3759894129691128, + "grad_norm": 0.08903074264526367, + "learning_rate": 1.8767178152143985e-05, + "loss": 9.183, + "step": 75290 + }, + { + "epoch": 0.3760393517940523, + "grad_norm": 0.09470517188310623, + "learning_rate": 1.8765676237202432e-05, + "loss": 9.1779, + "step": 75300 + }, + { + "epoch": 0.3760892906189917, + "grad_norm": 0.09774582087993622, + "learning_rate": 1.8764174322260882e-05, + "loss": 9.1639, + "step": 75310 + }, + { + "epoch": 0.3761392294439312, + "grad_norm": 0.0924445167183876, + "learning_rate": 1.8762672407319333e-05, + "loss": 9.1775, + "step": 75320 + }, + { + "epoch": 0.3761891682688706, + "grad_norm": 0.09741353988647461, + "learning_rate": 1.8761170492377783e-05, + "loss": 9.1726, + "step": 75330 + }, + { + "epoch": 0.3762391070938101, + "grad_norm": 0.0985332578420639, + "learning_rate": 1.8759668577436233e-05, + "loss": 9.1825, + "step": 75340 + }, + { + "epoch": 0.3762890459187495, + "grad_norm": 0.09728459268808365, + "learning_rate": 1.875816666249468e-05, + "loss": 9.1818, + "step": 75350 + }, + { + "epoch": 0.376338984743689, + "grad_norm": 0.09466975927352905, + "learning_rate": 1.875666474755313e-05, + "loss": 9.1808, + "step": 75360 + }, + { + "epoch": 0.3763889235686284, + "grad_norm": 0.09836606681346893, + "learning_rate": 1.875516283261158e-05, + "loss": 9.1755, + "step": 75370 + }, + { + "epoch": 0.3764388623935679, + "grad_norm": 0.09210119396448135, + "learning_rate": 1.875366091767003e-05, + "loss": 9.1742, + "step": 75380 + }, + { + "epoch": 0.3764888012185073, + "grad_norm": 0.09598752856254578, + "learning_rate": 1.875215900272848e-05, + "loss": 9.1785, + "step": 75390 + }, + { + "epoch": 0.3765387400434468, + "grad_norm": 0.09594704955816269, + "learning_rate": 1.8750657087786927e-05, + "loss": 9.17, + "step": 75400 + }, + { + "epoch": 0.3765886788683862, + "grad_norm": 0.09366414695978165, + "learning_rate": 1.8749155172845377e-05, + "loss": 9.1825, + "step": 75410 + }, + { + "epoch": 0.3766386176933257, + "grad_norm": 0.09399878233671188, + "learning_rate": 1.8747653257903828e-05, + "loss": 9.1663, + "step": 75420 + }, + { + "epoch": 0.3766885565182651, + "grad_norm": 0.09145677834749222, + "learning_rate": 1.8746151342962278e-05, + "loss": 9.1777, + "step": 75430 + }, + { + "epoch": 0.3767384953432046, + "grad_norm": 0.0970328152179718, + "learning_rate": 1.8744649428020728e-05, + "loss": 9.1692, + "step": 75440 + }, + { + "epoch": 0.376788434168144, + "grad_norm": 0.09038615226745605, + "learning_rate": 1.8743147513079175e-05, + "loss": 9.1891, + "step": 75450 + }, + { + "epoch": 0.3768383729930835, + "grad_norm": 0.09302138537168503, + "learning_rate": 1.8741645598137625e-05, + "loss": 9.1833, + "step": 75460 + }, + { + "epoch": 0.3768883118180229, + "grad_norm": 0.09453801065683365, + "learning_rate": 1.8740143683196075e-05, + "loss": 9.177, + "step": 75470 + }, + { + "epoch": 0.3769382506429624, + "grad_norm": 0.09908052533864975, + "learning_rate": 1.8738641768254525e-05, + "loss": 9.182, + "step": 75480 + }, + { + "epoch": 0.3769881894679018, + "grad_norm": 0.08896263688802719, + "learning_rate": 1.8737139853312975e-05, + "loss": 9.1729, + "step": 75490 + }, + { + "epoch": 0.3770381282928413, + "grad_norm": 0.09187068045139313, + "learning_rate": 1.8735637938371422e-05, + "loss": 9.1645, + "step": 75500 + }, + { + "epoch": 0.3770880671177807, + "grad_norm": 0.09145993739366531, + "learning_rate": 1.8734136023429872e-05, + "loss": 9.1855, + "step": 75510 + }, + { + "epoch": 0.3771380059427202, + "grad_norm": 0.09123537689447403, + "learning_rate": 1.8732634108488323e-05, + "loss": 9.1817, + "step": 75520 + }, + { + "epoch": 0.3771879447676596, + "grad_norm": 0.09334246069192886, + "learning_rate": 1.8731132193546773e-05, + "loss": 9.1796, + "step": 75530 + }, + { + "epoch": 0.3772378835925991, + "grad_norm": 0.09891509264707565, + "learning_rate": 1.8729630278605223e-05, + "loss": 9.1878, + "step": 75540 + }, + { + "epoch": 0.3772878224175385, + "grad_norm": 0.0893426239490509, + "learning_rate": 1.872812836366367e-05, + "loss": 9.1732, + "step": 75550 + }, + { + "epoch": 0.377337761242478, + "grad_norm": 0.09921654313802719, + "learning_rate": 1.8726626448722123e-05, + "loss": 9.1717, + "step": 75560 + }, + { + "epoch": 0.3773877000674174, + "grad_norm": 0.0956064984202385, + "learning_rate": 1.872512453378057e-05, + "loss": 9.1776, + "step": 75570 + }, + { + "epoch": 0.3774376388923569, + "grad_norm": 0.09236710518598557, + "learning_rate": 1.872362261883902e-05, + "loss": 9.1823, + "step": 75580 + }, + { + "epoch": 0.3774875777172963, + "grad_norm": 0.09194951504468918, + "learning_rate": 1.872212070389747e-05, + "loss": 9.1675, + "step": 75590 + }, + { + "epoch": 0.3775375165422358, + "grad_norm": 0.09250642359256744, + "learning_rate": 1.8720618788955917e-05, + "loss": 9.1711, + "step": 75600 + }, + { + "epoch": 0.3775874553671752, + "grad_norm": 0.09264052659273148, + "learning_rate": 1.871911687401437e-05, + "loss": 9.1799, + "step": 75610 + }, + { + "epoch": 0.3776373941921147, + "grad_norm": 0.09335634112358093, + "learning_rate": 1.8717614959072818e-05, + "loss": 9.1752, + "step": 75620 + }, + { + "epoch": 0.3776873330170541, + "grad_norm": 0.10500754415988922, + "learning_rate": 1.8716113044131268e-05, + "loss": 9.1719, + "step": 75630 + }, + { + "epoch": 0.37773727184199357, + "grad_norm": 0.09396055340766907, + "learning_rate": 1.8714611129189718e-05, + "loss": 9.1832, + "step": 75640 + }, + { + "epoch": 0.377787210666933, + "grad_norm": 0.09786514937877655, + "learning_rate": 1.8713109214248165e-05, + "loss": 9.1689, + "step": 75650 + }, + { + "epoch": 0.37783714949187247, + "grad_norm": 0.09014048427343369, + "learning_rate": 1.871160729930662e-05, + "loss": 9.1699, + "step": 75660 + }, + { + "epoch": 0.3778870883168119, + "grad_norm": 0.09452909231185913, + "learning_rate": 1.8710105384365065e-05, + "loss": 9.1733, + "step": 75670 + }, + { + "epoch": 0.37793702714175137, + "grad_norm": 0.09486046433448792, + "learning_rate": 1.8708603469423515e-05, + "loss": 9.1663, + "step": 75680 + }, + { + "epoch": 0.3779869659666908, + "grad_norm": 0.09308832138776779, + "learning_rate": 1.8707101554481965e-05, + "loss": 9.1795, + "step": 75690 + }, + { + "epoch": 0.37803690479163027, + "grad_norm": 0.09890872985124588, + "learning_rate": 1.8705599639540412e-05, + "loss": 9.1685, + "step": 75700 + }, + { + "epoch": 0.3780868436165697, + "grad_norm": 0.09047377854585648, + "learning_rate": 1.8704097724598866e-05, + "loss": 9.1856, + "step": 75710 + }, + { + "epoch": 0.37813678244150917, + "grad_norm": 0.09121832996606827, + "learning_rate": 1.8702595809657313e-05, + "loss": 9.1825, + "step": 75720 + }, + { + "epoch": 0.3781867212664486, + "grad_norm": 0.09841986000537872, + "learning_rate": 1.8701093894715763e-05, + "loss": 9.1776, + "step": 75730 + }, + { + "epoch": 0.37823666009138807, + "grad_norm": 0.09331878274679184, + "learning_rate": 1.8699591979774213e-05, + "loss": 9.1805, + "step": 75740 + }, + { + "epoch": 0.3782865989163275, + "grad_norm": 0.09422630816698074, + "learning_rate": 1.869809006483266e-05, + "loss": 9.1732, + "step": 75750 + }, + { + "epoch": 0.37833653774126697, + "grad_norm": 0.09663139283657074, + "learning_rate": 1.8696588149891113e-05, + "loss": 9.1744, + "step": 75760 + }, + { + "epoch": 0.3783864765662064, + "grad_norm": 0.0949576273560524, + "learning_rate": 1.869508623494956e-05, + "loss": 9.1771, + "step": 75770 + }, + { + "epoch": 0.37843641539114586, + "grad_norm": 0.09695305675268173, + "learning_rate": 1.869358432000801e-05, + "loss": 9.1587, + "step": 75780 + }, + { + "epoch": 0.3784863542160853, + "grad_norm": 0.09372842311859131, + "learning_rate": 1.869208240506646e-05, + "loss": 9.1616, + "step": 75790 + }, + { + "epoch": 0.37853629304102476, + "grad_norm": 0.09371675550937653, + "learning_rate": 1.8690580490124907e-05, + "loss": 9.1689, + "step": 75800 + }, + { + "epoch": 0.3785862318659642, + "grad_norm": 0.09579101204872131, + "learning_rate": 1.868907857518336e-05, + "loss": 9.1754, + "step": 75810 + }, + { + "epoch": 0.37863617069090366, + "grad_norm": 0.09259112924337387, + "learning_rate": 1.8687576660241808e-05, + "loss": 9.1816, + "step": 75820 + }, + { + "epoch": 0.3786861095158431, + "grad_norm": 0.09288639575242996, + "learning_rate": 1.8686074745300258e-05, + "loss": 9.1795, + "step": 75830 + }, + { + "epoch": 0.37873604834078256, + "grad_norm": 0.09235890954732895, + "learning_rate": 1.8684572830358708e-05, + "loss": 9.1722, + "step": 75840 + }, + { + "epoch": 0.378785987165722, + "grad_norm": 0.0942944809794426, + "learning_rate": 1.8683070915417155e-05, + "loss": 9.1728, + "step": 75850 + }, + { + "epoch": 0.37883592599066146, + "grad_norm": 0.09688469022512436, + "learning_rate": 1.868156900047561e-05, + "loss": 9.1777, + "step": 75860 + }, + { + "epoch": 0.3788858648156009, + "grad_norm": 0.08886689692735672, + "learning_rate": 1.8680067085534055e-05, + "loss": 9.1681, + "step": 75870 + }, + { + "epoch": 0.37893580364054036, + "grad_norm": 0.09597429633140564, + "learning_rate": 1.867856517059251e-05, + "loss": 9.1737, + "step": 75880 + }, + { + "epoch": 0.3789857424654798, + "grad_norm": 0.08873099833726883, + "learning_rate": 1.8677063255650955e-05, + "loss": 9.1717, + "step": 75890 + }, + { + "epoch": 0.37903568129041926, + "grad_norm": 0.08912356197834015, + "learning_rate": 1.8675561340709402e-05, + "loss": 9.1843, + "step": 75900 + }, + { + "epoch": 0.3790856201153587, + "grad_norm": 0.09434056282043457, + "learning_rate": 1.8674059425767856e-05, + "loss": 9.1582, + "step": 75910 + }, + { + "epoch": 0.37913555894029816, + "grad_norm": 0.08778839558362961, + "learning_rate": 1.8672557510826303e-05, + "loss": 9.1786, + "step": 75920 + }, + { + "epoch": 0.3791854977652376, + "grad_norm": 0.09374192357063293, + "learning_rate": 1.8671055595884756e-05, + "loss": 9.1732, + "step": 75930 + }, + { + "epoch": 0.37923543659017706, + "grad_norm": 0.0933762714266777, + "learning_rate": 1.8669553680943203e-05, + "loss": 9.1647, + "step": 75940 + }, + { + "epoch": 0.3792853754151165, + "grad_norm": 0.09484566748142242, + "learning_rate": 1.866805176600165e-05, + "loss": 9.1594, + "step": 75950 + }, + { + "epoch": 0.37933531424005595, + "grad_norm": 0.09006204456090927, + "learning_rate": 1.8666549851060103e-05, + "loss": 9.1784, + "step": 75960 + }, + { + "epoch": 0.3793852530649954, + "grad_norm": 0.09235017746686935, + "learning_rate": 1.866504793611855e-05, + "loss": 9.164, + "step": 75970 + }, + { + "epoch": 0.37943519188993485, + "grad_norm": 0.09987694025039673, + "learning_rate": 1.8663546021177004e-05, + "loss": 9.1695, + "step": 75980 + }, + { + "epoch": 0.3794851307148743, + "grad_norm": 0.08617845177650452, + "learning_rate": 1.866204410623545e-05, + "loss": 9.182, + "step": 75990 + }, + { + "epoch": 0.37953506953981375, + "grad_norm": 0.08854833245277405, + "learning_rate": 1.8660542191293897e-05, + "loss": 9.1611, + "step": 76000 + }, + { + "epoch": 0.3795850083647532, + "grad_norm": 0.09249111264944077, + "learning_rate": 1.865904027635235e-05, + "loss": 9.1808, + "step": 76010 + }, + { + "epoch": 0.37963494718969265, + "grad_norm": 0.09673107415437698, + "learning_rate": 1.8657538361410798e-05, + "loss": 9.1706, + "step": 76020 + }, + { + "epoch": 0.3796848860146321, + "grad_norm": 0.09875845909118652, + "learning_rate": 1.865603644646925e-05, + "loss": 9.1723, + "step": 76030 + }, + { + "epoch": 0.37973482483957155, + "grad_norm": 0.09128276258707047, + "learning_rate": 1.8654534531527698e-05, + "loss": 9.1666, + "step": 76040 + }, + { + "epoch": 0.37978476366451097, + "grad_norm": 0.09420790523290634, + "learning_rate": 1.8653032616586145e-05, + "loss": 9.163, + "step": 76050 + }, + { + "epoch": 0.37983470248945045, + "grad_norm": 0.09466836601495743, + "learning_rate": 1.86515307016446e-05, + "loss": 9.1646, + "step": 76060 + }, + { + "epoch": 0.37988464131438987, + "grad_norm": 0.10341818630695343, + "learning_rate": 1.8650028786703045e-05, + "loss": 9.1677, + "step": 76070 + }, + { + "epoch": 0.37993458013932935, + "grad_norm": 0.0988142341375351, + "learning_rate": 1.86485268717615e-05, + "loss": 9.1715, + "step": 76080 + }, + { + "epoch": 0.37998451896426877, + "grad_norm": 0.09664300084114075, + "learning_rate": 1.8647024956819946e-05, + "loss": 9.1588, + "step": 76090 + }, + { + "epoch": 0.38003445778920825, + "grad_norm": 0.09185276925563812, + "learning_rate": 1.8645523041878392e-05, + "loss": 9.1747, + "step": 76100 + }, + { + "epoch": 0.38008439661414767, + "grad_norm": 0.09593552350997925, + "learning_rate": 1.8644021126936846e-05, + "loss": 9.1682, + "step": 76110 + }, + { + "epoch": 0.38013433543908715, + "grad_norm": 0.09431128203868866, + "learning_rate": 1.8642519211995293e-05, + "loss": 9.1772, + "step": 76120 + }, + { + "epoch": 0.38018427426402657, + "grad_norm": 0.09099570661783218, + "learning_rate": 1.8641017297053746e-05, + "loss": 9.1571, + "step": 76130 + }, + { + "epoch": 0.38023421308896604, + "grad_norm": 0.08938246220350266, + "learning_rate": 1.8639515382112193e-05, + "loss": 9.1721, + "step": 76140 + }, + { + "epoch": 0.38028415191390547, + "grad_norm": 0.09171757102012634, + "learning_rate": 1.863801346717064e-05, + "loss": 9.1665, + "step": 76150 + }, + { + "epoch": 0.3803340907388449, + "grad_norm": 0.09394662827253342, + "learning_rate": 1.8636511552229093e-05, + "loss": 9.1786, + "step": 76160 + }, + { + "epoch": 0.38038402956378436, + "grad_norm": 0.0892520397901535, + "learning_rate": 1.863500963728754e-05, + "loss": 9.1615, + "step": 76170 + }, + { + "epoch": 0.3804339683887238, + "grad_norm": 0.09431727975606918, + "learning_rate": 1.8633507722345994e-05, + "loss": 9.1807, + "step": 76180 + }, + { + "epoch": 0.38048390721366326, + "grad_norm": 0.09187456965446472, + "learning_rate": 1.863200580740444e-05, + "loss": 9.1699, + "step": 76190 + }, + { + "epoch": 0.3805338460386027, + "grad_norm": 0.09355001896619797, + "learning_rate": 1.863050389246289e-05, + "loss": 9.1677, + "step": 76200 + }, + { + "epoch": 0.38058378486354216, + "grad_norm": 0.09874553978443146, + "learning_rate": 1.862900197752134e-05, + "loss": 9.1619, + "step": 76210 + }, + { + "epoch": 0.3806337236884816, + "grad_norm": 0.0971277505159378, + "learning_rate": 1.8627500062579788e-05, + "loss": 9.157, + "step": 76220 + }, + { + "epoch": 0.38068366251342106, + "grad_norm": 0.09368085116147995, + "learning_rate": 1.862599814763824e-05, + "loss": 9.1691, + "step": 76230 + }, + { + "epoch": 0.3807336013383605, + "grad_norm": 0.08936905115842819, + "learning_rate": 1.8624496232696688e-05, + "loss": 9.1715, + "step": 76240 + }, + { + "epoch": 0.38078354016329996, + "grad_norm": 0.09351339191198349, + "learning_rate": 1.8622994317755138e-05, + "loss": 9.1657, + "step": 76250 + }, + { + "epoch": 0.3808334789882394, + "grad_norm": 0.09461808949708939, + "learning_rate": 1.862149240281359e-05, + "loss": 9.1624, + "step": 76260 + }, + { + "epoch": 0.38088341781317886, + "grad_norm": 0.08949335664510727, + "learning_rate": 1.8619990487872035e-05, + "loss": 9.1684, + "step": 76270 + }, + { + "epoch": 0.3809333566381183, + "grad_norm": 0.08932676911354065, + "learning_rate": 1.861848857293049e-05, + "loss": 9.1631, + "step": 76280 + }, + { + "epoch": 0.38098329546305776, + "grad_norm": 0.09498759359121323, + "learning_rate": 1.8616986657988936e-05, + "loss": 9.163, + "step": 76290 + }, + { + "epoch": 0.3810332342879972, + "grad_norm": 0.09363845735788345, + "learning_rate": 1.8615484743047386e-05, + "loss": 9.174, + "step": 76300 + }, + { + "epoch": 0.38108317311293666, + "grad_norm": 0.08573130518198013, + "learning_rate": 1.8613982828105836e-05, + "loss": 9.1665, + "step": 76310 + }, + { + "epoch": 0.3811331119378761, + "grad_norm": 0.09437863528728485, + "learning_rate": 1.8612480913164283e-05, + "loss": 9.1553, + "step": 76320 + }, + { + "epoch": 0.38118305076281556, + "grad_norm": 0.09514939785003662, + "learning_rate": 1.8610978998222736e-05, + "loss": 9.161, + "step": 76330 + }, + { + "epoch": 0.381232989587755, + "grad_norm": 0.0890861302614212, + "learning_rate": 1.8609477083281183e-05, + "loss": 9.1585, + "step": 76340 + }, + { + "epoch": 0.38128292841269446, + "grad_norm": 0.09901072829961777, + "learning_rate": 1.8607975168339633e-05, + "loss": 9.1656, + "step": 76350 + }, + { + "epoch": 0.3813328672376339, + "grad_norm": 0.09135191142559052, + "learning_rate": 1.8606473253398083e-05, + "loss": 9.1679, + "step": 76360 + }, + { + "epoch": 0.38138280606257335, + "grad_norm": 0.08994290977716446, + "learning_rate": 1.860497133845653e-05, + "loss": 9.1553, + "step": 76370 + }, + { + "epoch": 0.3814327448875128, + "grad_norm": 0.09007209539413452, + "learning_rate": 1.8603469423514984e-05, + "loss": 9.1617, + "step": 76380 + }, + { + "epoch": 0.38148268371245225, + "grad_norm": 0.09258635342121124, + "learning_rate": 1.860196750857343e-05, + "loss": 9.1664, + "step": 76390 + }, + { + "epoch": 0.3815326225373917, + "grad_norm": 0.09930216521024704, + "learning_rate": 1.860046559363188e-05, + "loss": 9.1606, + "step": 76400 + }, + { + "epoch": 0.38158256136233115, + "grad_norm": 0.10146905481815338, + "learning_rate": 1.859896367869033e-05, + "loss": 9.1603, + "step": 76410 + }, + { + "epoch": 0.3816325001872706, + "grad_norm": 0.09039684385061264, + "learning_rate": 1.8597461763748778e-05, + "loss": 9.1433, + "step": 76420 + }, + { + "epoch": 0.38168243901221005, + "grad_norm": 0.09020677208900452, + "learning_rate": 1.859595984880723e-05, + "loss": 9.1615, + "step": 76430 + }, + { + "epoch": 0.3817323778371495, + "grad_norm": 0.0926944836974144, + "learning_rate": 1.8594457933865678e-05, + "loss": 9.1614, + "step": 76440 + }, + { + "epoch": 0.38178231666208895, + "grad_norm": 0.08853311091661453, + "learning_rate": 1.8592956018924128e-05, + "loss": 9.1708, + "step": 76450 + }, + { + "epoch": 0.38183225548702837, + "grad_norm": 0.09793388843536377, + "learning_rate": 1.859145410398258e-05, + "loss": 9.1545, + "step": 76460 + }, + { + "epoch": 0.38188219431196785, + "grad_norm": 0.08943363279104233, + "learning_rate": 1.8589952189041025e-05, + "loss": 9.1679, + "step": 76470 + }, + { + "epoch": 0.38193213313690727, + "grad_norm": 0.09909660369157791, + "learning_rate": 1.858845027409948e-05, + "loss": 9.1526, + "step": 76480 + }, + { + "epoch": 0.38198207196184675, + "grad_norm": 0.09491763263940811, + "learning_rate": 1.8586948359157926e-05, + "loss": 9.1674, + "step": 76490 + }, + { + "epoch": 0.38203201078678617, + "grad_norm": 0.0981326624751091, + "learning_rate": 1.8585446444216376e-05, + "loss": 9.1596, + "step": 76500 + }, + { + "epoch": 0.38208194961172565, + "grad_norm": 0.09292647987604141, + "learning_rate": 1.8583944529274826e-05, + "loss": 9.1512, + "step": 76510 + }, + { + "epoch": 0.38213188843666507, + "grad_norm": 0.0934506356716156, + "learning_rate": 1.8582442614333273e-05, + "loss": 9.1633, + "step": 76520 + }, + { + "epoch": 0.38218182726160455, + "grad_norm": 0.09270615875720978, + "learning_rate": 1.8580940699391726e-05, + "loss": 9.155, + "step": 76530 + }, + { + "epoch": 0.38223176608654397, + "grad_norm": 0.09436081349849701, + "learning_rate": 1.8579438784450173e-05, + "loss": 9.1638, + "step": 76540 + }, + { + "epoch": 0.38228170491148344, + "grad_norm": 0.09928996115922928, + "learning_rate": 1.8577936869508623e-05, + "loss": 9.1611, + "step": 76550 + }, + { + "epoch": 0.38233164373642287, + "grad_norm": 0.09387561678886414, + "learning_rate": 1.8576434954567073e-05, + "loss": 9.1742, + "step": 76560 + }, + { + "epoch": 0.38238158256136234, + "grad_norm": 0.09134583920240402, + "learning_rate": 1.8574933039625524e-05, + "loss": 9.1692, + "step": 76570 + }, + { + "epoch": 0.38243152138630176, + "grad_norm": 0.09804858267307281, + "learning_rate": 1.8573431124683974e-05, + "loss": 9.1597, + "step": 76580 + }, + { + "epoch": 0.38248146021124124, + "grad_norm": 0.09265462309122086, + "learning_rate": 1.857192920974242e-05, + "loss": 9.1541, + "step": 76590 + }, + { + "epoch": 0.38253139903618066, + "grad_norm": 0.0962829664349556, + "learning_rate": 1.8570427294800874e-05, + "loss": 9.1548, + "step": 76600 + }, + { + "epoch": 0.38258133786112014, + "grad_norm": 0.10263549536466599, + "learning_rate": 1.856892537985932e-05, + "loss": 9.1655, + "step": 76610 + }, + { + "epoch": 0.38263127668605956, + "grad_norm": 0.0963212177157402, + "learning_rate": 1.856742346491777e-05, + "loss": 9.161, + "step": 76620 + }, + { + "epoch": 0.38268121551099904, + "grad_norm": 0.08831122517585754, + "learning_rate": 1.856592154997622e-05, + "loss": 9.1554, + "step": 76630 + }, + { + "epoch": 0.38273115433593846, + "grad_norm": 0.0948970839381218, + "learning_rate": 1.8564419635034668e-05, + "loss": 9.1628, + "step": 76640 + }, + { + "epoch": 0.38278109316087794, + "grad_norm": 0.09297934174537659, + "learning_rate": 1.856291772009312e-05, + "loss": 9.1581, + "step": 76650 + }, + { + "epoch": 0.38283103198581736, + "grad_norm": 0.09483528882265091, + "learning_rate": 1.856141580515157e-05, + "loss": 9.1561, + "step": 76660 + }, + { + "epoch": 0.38288097081075684, + "grad_norm": 0.09543635696172714, + "learning_rate": 1.855991389021002e-05, + "loss": 9.1531, + "step": 76670 + }, + { + "epoch": 0.38293090963569626, + "grad_norm": 0.09006334096193314, + "learning_rate": 1.855841197526847e-05, + "loss": 9.157, + "step": 76680 + }, + { + "epoch": 0.38298084846063574, + "grad_norm": 0.09074097126722336, + "learning_rate": 1.8556910060326916e-05, + "loss": 9.1644, + "step": 76690 + }, + { + "epoch": 0.38303078728557516, + "grad_norm": 0.09905076771974564, + "learning_rate": 1.855540814538537e-05, + "loss": 9.1571, + "step": 76700 + }, + { + "epoch": 0.38308072611051464, + "grad_norm": 0.09774630516767502, + "learning_rate": 1.8553906230443816e-05, + "loss": 9.1457, + "step": 76710 + }, + { + "epoch": 0.38313066493545406, + "grad_norm": 0.09291476756334305, + "learning_rate": 1.8552404315502266e-05, + "loss": 9.144, + "step": 76720 + }, + { + "epoch": 0.38318060376039353, + "grad_norm": 0.09428045153617859, + "learning_rate": 1.8550902400560716e-05, + "loss": 9.1604, + "step": 76730 + }, + { + "epoch": 0.38323054258533296, + "grad_norm": 0.09279389679431915, + "learning_rate": 1.8549400485619163e-05, + "loss": 9.1636, + "step": 76740 + }, + { + "epoch": 0.38328048141027243, + "grad_norm": 0.10000916570425034, + "learning_rate": 1.8547898570677617e-05, + "loss": 9.1772, + "step": 76750 + }, + { + "epoch": 0.38333042023521186, + "grad_norm": 0.09311681985855103, + "learning_rate": 1.8546396655736063e-05, + "loss": 9.1693, + "step": 76760 + }, + { + "epoch": 0.38338035906015133, + "grad_norm": 0.09120059013366699, + "learning_rate": 1.8544894740794514e-05, + "loss": 9.1541, + "step": 76770 + }, + { + "epoch": 0.38343029788509075, + "grad_norm": 0.08788907527923584, + "learning_rate": 1.8543392825852964e-05, + "loss": 9.1603, + "step": 76780 + }, + { + "epoch": 0.38348023671003023, + "grad_norm": 0.09764920175075531, + "learning_rate": 1.854189091091141e-05, + "loss": 9.1641, + "step": 76790 + }, + { + "epoch": 0.38353017553496965, + "grad_norm": 0.098087377846241, + "learning_rate": 1.8540388995969864e-05, + "loss": 9.1472, + "step": 76800 + }, + { + "epoch": 0.38358011435990913, + "grad_norm": 0.09712155908346176, + "learning_rate": 1.853888708102831e-05, + "loss": 9.1688, + "step": 76810 + }, + { + "epoch": 0.38363005318484855, + "grad_norm": 0.09204491227865219, + "learning_rate": 1.853738516608676e-05, + "loss": 9.1757, + "step": 76820 + }, + { + "epoch": 0.38367999200978803, + "grad_norm": 0.09970824420452118, + "learning_rate": 1.853588325114521e-05, + "loss": 9.171, + "step": 76830 + }, + { + "epoch": 0.38372993083472745, + "grad_norm": 0.09648123383522034, + "learning_rate": 1.8534381336203658e-05, + "loss": 9.1693, + "step": 76840 + }, + { + "epoch": 0.38377986965966693, + "grad_norm": 0.09382256120443344, + "learning_rate": 1.853287942126211e-05, + "loss": 9.1641, + "step": 76850 + }, + { + "epoch": 0.38382980848460635, + "grad_norm": 0.09341836720705032, + "learning_rate": 1.853137750632056e-05, + "loss": 9.1648, + "step": 76860 + }, + { + "epoch": 0.3838797473095458, + "grad_norm": 0.09189660847187042, + "learning_rate": 1.852987559137901e-05, + "loss": 9.1553, + "step": 76870 + }, + { + "epoch": 0.38392968613448525, + "grad_norm": 0.09186588227748871, + "learning_rate": 1.852837367643746e-05, + "loss": 9.1624, + "step": 76880 + }, + { + "epoch": 0.3839796249594247, + "grad_norm": 0.09395985305309296, + "learning_rate": 1.852687176149591e-05, + "loss": 9.1741, + "step": 76890 + }, + { + "epoch": 0.38402956378436415, + "grad_norm": 0.09236618131399155, + "learning_rate": 1.852536984655436e-05, + "loss": 9.1473, + "step": 76900 + }, + { + "epoch": 0.3840795026093036, + "grad_norm": 0.1052381843328476, + "learning_rate": 1.8523867931612806e-05, + "loss": 9.1679, + "step": 76910 + }, + { + "epoch": 0.38412944143424305, + "grad_norm": 0.09067463874816895, + "learning_rate": 1.8522366016671256e-05, + "loss": 9.1509, + "step": 76920 + }, + { + "epoch": 0.3841793802591825, + "grad_norm": 0.09203337877988815, + "learning_rate": 1.8520864101729706e-05, + "loss": 9.1688, + "step": 76930 + }, + { + "epoch": 0.38422931908412195, + "grad_norm": 0.09451109915971756, + "learning_rate": 1.8519362186788156e-05, + "loss": 9.1508, + "step": 76940 + }, + { + "epoch": 0.3842792579090614, + "grad_norm": 0.09311412274837494, + "learning_rate": 1.8517860271846607e-05, + "loss": 9.1549, + "step": 76950 + }, + { + "epoch": 0.38432919673400084, + "grad_norm": 0.08877348154783249, + "learning_rate": 1.8516358356905053e-05, + "loss": 9.1582, + "step": 76960 + }, + { + "epoch": 0.3843791355589403, + "grad_norm": 0.09304699301719666, + "learning_rate": 1.8514856441963504e-05, + "loss": 9.1632, + "step": 76970 + }, + { + "epoch": 0.38442907438387974, + "grad_norm": 0.09285631775856018, + "learning_rate": 1.8513354527021954e-05, + "loss": 9.1592, + "step": 76980 + }, + { + "epoch": 0.3844790132088192, + "grad_norm": 0.09747754037380219, + "learning_rate": 1.8511852612080404e-05, + "loss": 9.1499, + "step": 76990 + }, + { + "epoch": 0.38452895203375864, + "grad_norm": 0.09608352929353714, + "learning_rate": 1.8510350697138854e-05, + "loss": 9.1616, + "step": 77000 + }, + { + "epoch": 0.3845788908586981, + "grad_norm": 0.09752684831619263, + "learning_rate": 1.85088487821973e-05, + "loss": 9.1647, + "step": 77010 + }, + { + "epoch": 0.38462882968363754, + "grad_norm": 0.09097377210855484, + "learning_rate": 1.850734686725575e-05, + "loss": 9.1597, + "step": 77020 + }, + { + "epoch": 0.384678768508577, + "grad_norm": 0.09708648175001144, + "learning_rate": 1.85058449523142e-05, + "loss": 9.158, + "step": 77030 + }, + { + "epoch": 0.38472870733351644, + "grad_norm": 0.09771420806646347, + "learning_rate": 1.850434303737265e-05, + "loss": 9.1618, + "step": 77040 + }, + { + "epoch": 0.3847786461584559, + "grad_norm": 0.09642086923122406, + "learning_rate": 1.85028411224311e-05, + "loss": 9.1574, + "step": 77050 + }, + { + "epoch": 0.38482858498339534, + "grad_norm": 0.09355588257312775, + "learning_rate": 1.850133920748955e-05, + "loss": 9.1562, + "step": 77060 + }, + { + "epoch": 0.3848785238083348, + "grad_norm": 0.09392425417900085, + "learning_rate": 1.8499837292548e-05, + "loss": 9.1579, + "step": 77070 + }, + { + "epoch": 0.38492846263327424, + "grad_norm": 0.09295708686113358, + "learning_rate": 1.849833537760645e-05, + "loss": 9.1519, + "step": 77080 + }, + { + "epoch": 0.3849784014582137, + "grad_norm": 0.09548468142747879, + "learning_rate": 1.84968334626649e-05, + "loss": 9.1605, + "step": 77090 + }, + { + "epoch": 0.38502834028315314, + "grad_norm": 0.09481420367956161, + "learning_rate": 1.849533154772335e-05, + "loss": 9.1501, + "step": 77100 + }, + { + "epoch": 0.3850782791080926, + "grad_norm": 0.10212825238704681, + "learning_rate": 1.8493829632781796e-05, + "loss": 9.1495, + "step": 77110 + }, + { + "epoch": 0.38512821793303204, + "grad_norm": 0.09605475515127182, + "learning_rate": 1.8492327717840246e-05, + "loss": 9.1723, + "step": 77120 + }, + { + "epoch": 0.3851781567579715, + "grad_norm": 0.09045825153589249, + "learning_rate": 1.8490825802898696e-05, + "loss": 9.1645, + "step": 77130 + }, + { + "epoch": 0.38522809558291093, + "grad_norm": 0.0943480134010315, + "learning_rate": 1.8489323887957146e-05, + "loss": 9.1471, + "step": 77140 + }, + { + "epoch": 0.38527803440785036, + "grad_norm": 0.09214089065790176, + "learning_rate": 1.8487821973015597e-05, + "loss": 9.1521, + "step": 77150 + }, + { + "epoch": 0.38532797323278983, + "grad_norm": 0.09132665395736694, + "learning_rate": 1.8486320058074043e-05, + "loss": 9.1588, + "step": 77160 + }, + { + "epoch": 0.38537791205772925, + "grad_norm": 0.09081696718931198, + "learning_rate": 1.8484818143132494e-05, + "loss": 9.1676, + "step": 77170 + }, + { + "epoch": 0.38542785088266873, + "grad_norm": 0.09519261121749878, + "learning_rate": 1.8483316228190944e-05, + "loss": 9.1709, + "step": 77180 + }, + { + "epoch": 0.38547778970760815, + "grad_norm": 0.090855672955513, + "learning_rate": 1.8481814313249394e-05, + "loss": 9.1543, + "step": 77190 + }, + { + "epoch": 0.38552772853254763, + "grad_norm": 0.09267590194940567, + "learning_rate": 1.8480312398307844e-05, + "loss": 9.1494, + "step": 77200 + }, + { + "epoch": 0.38557766735748705, + "grad_norm": 0.1009821891784668, + "learning_rate": 1.8478810483366294e-05, + "loss": 9.1579, + "step": 77210 + }, + { + "epoch": 0.38562760618242653, + "grad_norm": 0.09545056521892548, + "learning_rate": 1.847730856842474e-05, + "loss": 9.1533, + "step": 77220 + }, + { + "epoch": 0.38567754500736595, + "grad_norm": 0.09436902403831482, + "learning_rate": 1.847580665348319e-05, + "loss": 9.1525, + "step": 77230 + }, + { + "epoch": 0.38572748383230543, + "grad_norm": 0.09197907149791718, + "learning_rate": 1.847430473854164e-05, + "loss": 9.1414, + "step": 77240 + }, + { + "epoch": 0.38577742265724485, + "grad_norm": 0.08811882883310318, + "learning_rate": 1.847280282360009e-05, + "loss": 9.1626, + "step": 77250 + }, + { + "epoch": 0.38582736148218433, + "grad_norm": 0.09159471839666367, + "learning_rate": 1.8471300908658542e-05, + "loss": 9.1453, + "step": 77260 + }, + { + "epoch": 0.38587730030712375, + "grad_norm": 0.08988473564386368, + "learning_rate": 1.846979899371699e-05, + "loss": 9.1385, + "step": 77270 + }, + { + "epoch": 0.3859272391320632, + "grad_norm": 0.09255439788103104, + "learning_rate": 1.846829707877544e-05, + "loss": 9.1462, + "step": 77280 + }, + { + "epoch": 0.38597717795700265, + "grad_norm": 0.09517369419336319, + "learning_rate": 1.846679516383389e-05, + "loss": 9.1512, + "step": 77290 + }, + { + "epoch": 0.3860271167819421, + "grad_norm": 0.09043008089065552, + "learning_rate": 1.846529324889234e-05, + "loss": 9.1543, + "step": 77300 + }, + { + "epoch": 0.38607705560688155, + "grad_norm": 0.0920136496424675, + "learning_rate": 1.846379133395079e-05, + "loss": 9.1581, + "step": 77310 + }, + { + "epoch": 0.386126994431821, + "grad_norm": 0.09350711852312088, + "learning_rate": 1.8462289419009236e-05, + "loss": 9.1577, + "step": 77320 + }, + { + "epoch": 0.38617693325676045, + "grad_norm": 0.09980452060699463, + "learning_rate": 1.8460787504067686e-05, + "loss": 9.1488, + "step": 77330 + }, + { + "epoch": 0.3862268720816999, + "grad_norm": 0.09318140149116516, + "learning_rate": 1.8459285589126136e-05, + "loss": 9.1622, + "step": 77340 + }, + { + "epoch": 0.38627681090663935, + "grad_norm": 0.08900347352027893, + "learning_rate": 1.8457783674184587e-05, + "loss": 9.1495, + "step": 77350 + }, + { + "epoch": 0.3863267497315788, + "grad_norm": 0.09255148470401764, + "learning_rate": 1.8456281759243037e-05, + "loss": 9.1642, + "step": 77360 + }, + { + "epoch": 0.38637668855651824, + "grad_norm": 0.09274660795927048, + "learning_rate": 1.8454779844301484e-05, + "loss": 9.1568, + "step": 77370 + }, + { + "epoch": 0.3864266273814577, + "grad_norm": 0.09435738623142242, + "learning_rate": 1.8453277929359934e-05, + "loss": 9.1571, + "step": 77380 + }, + { + "epoch": 0.38647656620639714, + "grad_norm": 0.09425579011440277, + "learning_rate": 1.8451776014418384e-05, + "loss": 9.1503, + "step": 77390 + }, + { + "epoch": 0.3865265050313366, + "grad_norm": 0.09888068586587906, + "learning_rate": 1.8450274099476834e-05, + "loss": 9.1483, + "step": 77400 + }, + { + "epoch": 0.38657644385627604, + "grad_norm": 0.0868912860751152, + "learning_rate": 1.8448772184535284e-05, + "loss": 9.1477, + "step": 77410 + }, + { + "epoch": 0.3866263826812155, + "grad_norm": 0.09297952800989151, + "learning_rate": 1.844727026959373e-05, + "loss": 9.1485, + "step": 77420 + }, + { + "epoch": 0.38667632150615494, + "grad_norm": 0.09065989404916763, + "learning_rate": 1.844576835465218e-05, + "loss": 9.1491, + "step": 77430 + }, + { + "epoch": 0.3867262603310944, + "grad_norm": 0.0999770238995552, + "learning_rate": 1.844426643971063e-05, + "loss": 9.1413, + "step": 77440 + }, + { + "epoch": 0.38677619915603384, + "grad_norm": 0.09342008084058762, + "learning_rate": 1.844276452476908e-05, + "loss": 9.1529, + "step": 77450 + }, + { + "epoch": 0.3868261379809733, + "grad_norm": 0.09146399050951004, + "learning_rate": 1.8441262609827532e-05, + "loss": 9.1423, + "step": 77460 + }, + { + "epoch": 0.38687607680591274, + "grad_norm": 0.09426107257604599, + "learning_rate": 1.843976069488598e-05, + "loss": 9.1583, + "step": 77470 + }, + { + "epoch": 0.3869260156308522, + "grad_norm": 0.0957350954413414, + "learning_rate": 1.843825877994443e-05, + "loss": 9.1493, + "step": 77480 + }, + { + "epoch": 0.38697595445579164, + "grad_norm": 0.09453484416007996, + "learning_rate": 1.843675686500288e-05, + "loss": 9.1603, + "step": 77490 + }, + { + "epoch": 0.3870258932807311, + "grad_norm": 0.09307164698839188, + "learning_rate": 1.843525495006133e-05, + "loss": 9.1468, + "step": 77500 + }, + { + "epoch": 0.38707583210567054, + "grad_norm": 0.09427579492330551, + "learning_rate": 1.843375303511978e-05, + "loss": 9.1512, + "step": 77510 + }, + { + "epoch": 0.38712577093061, + "grad_norm": 0.09382929652929306, + "learning_rate": 1.8432251120178226e-05, + "loss": 9.154, + "step": 77520 + }, + { + "epoch": 0.38717570975554944, + "grad_norm": 0.0964818224310875, + "learning_rate": 1.843074920523668e-05, + "loss": 9.1498, + "step": 77530 + }, + { + "epoch": 0.3872256485804889, + "grad_norm": 0.09739819914102554, + "learning_rate": 1.8429247290295127e-05, + "loss": 9.1422, + "step": 77540 + }, + { + "epoch": 0.38727558740542833, + "grad_norm": 0.0981798991560936, + "learning_rate": 1.8427745375353577e-05, + "loss": 9.1577, + "step": 77550 + }, + { + "epoch": 0.3873255262303678, + "grad_norm": 0.09459348767995834, + "learning_rate": 1.8426243460412027e-05, + "loss": 9.1525, + "step": 77560 + }, + { + "epoch": 0.38737546505530723, + "grad_norm": 0.08874860405921936, + "learning_rate": 1.8424741545470474e-05, + "loss": 9.1339, + "step": 77570 + }, + { + "epoch": 0.3874254038802467, + "grad_norm": 0.0875902771949768, + "learning_rate": 1.8423239630528927e-05, + "loss": 9.1537, + "step": 77580 + }, + { + "epoch": 0.38747534270518613, + "grad_norm": 0.09034288674592972, + "learning_rate": 1.8421737715587374e-05, + "loss": 9.1553, + "step": 77590 + }, + { + "epoch": 0.3875252815301256, + "grad_norm": 0.09590810537338257, + "learning_rate": 1.8420235800645824e-05, + "loss": 9.162, + "step": 77600 + }, + { + "epoch": 0.38757522035506503, + "grad_norm": 0.0925174131989479, + "learning_rate": 1.8418733885704274e-05, + "loss": 9.1575, + "step": 77610 + }, + { + "epoch": 0.3876251591800045, + "grad_norm": 0.09790638089179993, + "learning_rate": 1.841723197076272e-05, + "loss": 9.1518, + "step": 77620 + }, + { + "epoch": 0.38767509800494393, + "grad_norm": 0.09194985032081604, + "learning_rate": 1.8415730055821175e-05, + "loss": 9.1374, + "step": 77630 + }, + { + "epoch": 0.3877250368298834, + "grad_norm": 0.09064792096614838, + "learning_rate": 1.841422814087962e-05, + "loss": 9.1472, + "step": 77640 + }, + { + "epoch": 0.38777497565482283, + "grad_norm": 0.09329324215650558, + "learning_rate": 1.841272622593807e-05, + "loss": 9.1595, + "step": 77650 + }, + { + "epoch": 0.3878249144797623, + "grad_norm": 0.09056134521961212, + "learning_rate": 1.8411224310996522e-05, + "loss": 9.1453, + "step": 77660 + }, + { + "epoch": 0.3878748533047017, + "grad_norm": 0.08973592519760132, + "learning_rate": 1.840972239605497e-05, + "loss": 9.1536, + "step": 77670 + }, + { + "epoch": 0.3879247921296412, + "grad_norm": 0.09086141735315323, + "learning_rate": 1.8408220481113422e-05, + "loss": 9.1471, + "step": 77680 + }, + { + "epoch": 0.3879747309545806, + "grad_norm": 0.09189939498901367, + "learning_rate": 1.840671856617187e-05, + "loss": 9.1613, + "step": 77690 + }, + { + "epoch": 0.3880246697795201, + "grad_norm": 0.09157737344503403, + "learning_rate": 1.840521665123032e-05, + "loss": 9.1504, + "step": 77700 + }, + { + "epoch": 0.3880746086044595, + "grad_norm": 0.09373574703931808, + "learning_rate": 1.840371473628877e-05, + "loss": 9.1432, + "step": 77710 + }, + { + "epoch": 0.388124547429399, + "grad_norm": 0.09311201423406601, + "learning_rate": 1.8402212821347216e-05, + "loss": 9.1433, + "step": 77720 + }, + { + "epoch": 0.3881744862543384, + "grad_norm": 0.09637656062841415, + "learning_rate": 1.840071090640567e-05, + "loss": 9.1406, + "step": 77730 + }, + { + "epoch": 0.3882244250792779, + "grad_norm": 0.09410546720027924, + "learning_rate": 1.8399208991464117e-05, + "loss": 9.1557, + "step": 77740 + }, + { + "epoch": 0.3882743639042173, + "grad_norm": 0.09236341714859009, + "learning_rate": 1.8397707076522567e-05, + "loss": 9.151, + "step": 77750 + }, + { + "epoch": 0.3883243027291568, + "grad_norm": 0.10012514889240265, + "learning_rate": 1.8396205161581017e-05, + "loss": 9.1482, + "step": 77760 + }, + { + "epoch": 0.3883742415540962, + "grad_norm": 0.09507282823324203, + "learning_rate": 1.8394703246639464e-05, + "loss": 9.1616, + "step": 77770 + }, + { + "epoch": 0.3884241803790357, + "grad_norm": 0.09531018882989883, + "learning_rate": 1.8393201331697917e-05, + "loss": 9.1508, + "step": 77780 + }, + { + "epoch": 0.3884741192039751, + "grad_norm": 0.09432236105203629, + "learning_rate": 1.8391699416756364e-05, + "loss": 9.1536, + "step": 77790 + }, + { + "epoch": 0.3885240580289146, + "grad_norm": 0.09611290693283081, + "learning_rate": 1.8390197501814814e-05, + "loss": 9.1479, + "step": 77800 + }, + { + "epoch": 0.388573996853854, + "grad_norm": 0.09352581202983856, + "learning_rate": 1.8388695586873264e-05, + "loss": 9.157, + "step": 77810 + }, + { + "epoch": 0.3886239356787935, + "grad_norm": 0.09698393195867538, + "learning_rate": 1.838719367193171e-05, + "loss": 9.1378, + "step": 77820 + }, + { + "epoch": 0.3886738745037329, + "grad_norm": 0.09182106703519821, + "learning_rate": 1.8385691756990165e-05, + "loss": 9.1501, + "step": 77830 + }, + { + "epoch": 0.3887238133286724, + "grad_norm": 0.09386096894741058, + "learning_rate": 1.838418984204861e-05, + "loss": 9.1381, + "step": 77840 + }, + { + "epoch": 0.3887737521536118, + "grad_norm": 0.09277167916297913, + "learning_rate": 1.8382687927107065e-05, + "loss": 9.1395, + "step": 77850 + }, + { + "epoch": 0.3888236909785513, + "grad_norm": 0.09407127648591995, + "learning_rate": 1.8381186012165512e-05, + "loss": 9.1482, + "step": 77860 + }, + { + "epoch": 0.3888736298034907, + "grad_norm": 0.09299039095640182, + "learning_rate": 1.837968409722396e-05, + "loss": 9.1526, + "step": 77870 + }, + { + "epoch": 0.3889235686284302, + "grad_norm": 0.09196474403142929, + "learning_rate": 1.8378182182282412e-05, + "loss": 9.1458, + "step": 77880 + }, + { + "epoch": 0.3889735074533696, + "grad_norm": 0.09697432816028595, + "learning_rate": 1.837668026734086e-05, + "loss": 9.1328, + "step": 77890 + }, + { + "epoch": 0.3890234462783091, + "grad_norm": 0.09332502633333206, + "learning_rate": 1.8375178352399313e-05, + "loss": 9.1421, + "step": 77900 + }, + { + "epoch": 0.3890733851032485, + "grad_norm": 0.09647480398416519, + "learning_rate": 1.837367643745776e-05, + "loss": 9.1494, + "step": 77910 + }, + { + "epoch": 0.389123323928188, + "grad_norm": 0.08914009481668472, + "learning_rate": 1.8372174522516206e-05, + "loss": 9.1537, + "step": 77920 + }, + { + "epoch": 0.3891732627531274, + "grad_norm": 0.09153462946414948, + "learning_rate": 1.837067260757466e-05, + "loss": 9.1461, + "step": 77930 + }, + { + "epoch": 0.3892232015780669, + "grad_norm": 0.09255599975585938, + "learning_rate": 1.8369170692633107e-05, + "loss": 9.1481, + "step": 77940 + }, + { + "epoch": 0.3892731404030063, + "grad_norm": 0.10034976899623871, + "learning_rate": 1.836766877769156e-05, + "loss": 9.1427, + "step": 77950 + }, + { + "epoch": 0.3893230792279458, + "grad_norm": 0.09407934546470642, + "learning_rate": 1.8366166862750007e-05, + "loss": 9.1505, + "step": 77960 + }, + { + "epoch": 0.3893730180528852, + "grad_norm": 0.09382222592830658, + "learning_rate": 1.8364664947808454e-05, + "loss": 9.1392, + "step": 77970 + }, + { + "epoch": 0.3894229568778247, + "grad_norm": 0.09767767041921616, + "learning_rate": 1.8363163032866907e-05, + "loss": 9.1463, + "step": 77980 + }, + { + "epoch": 0.3894728957027641, + "grad_norm": 0.0940956324338913, + "learning_rate": 1.8361661117925354e-05, + "loss": 9.144, + "step": 77990 + }, + { + "epoch": 0.3895228345277036, + "grad_norm": 0.09381180256605148, + "learning_rate": 1.8360159202983808e-05, + "loss": 9.1565, + "step": 78000 + }, + { + "epoch": 0.389572773352643, + "grad_norm": 0.09209650754928589, + "learning_rate": 1.8358657288042254e-05, + "loss": 9.149, + "step": 78010 + }, + { + "epoch": 0.3896227121775825, + "grad_norm": 0.09117661416530609, + "learning_rate": 1.83571553731007e-05, + "loss": 9.1426, + "step": 78020 + }, + { + "epoch": 0.3896726510025219, + "grad_norm": 0.0906655564904213, + "learning_rate": 1.8355653458159155e-05, + "loss": 9.1527, + "step": 78030 + }, + { + "epoch": 0.3897225898274614, + "grad_norm": 0.09310529381036758, + "learning_rate": 1.83541515432176e-05, + "loss": 9.1374, + "step": 78040 + }, + { + "epoch": 0.3897725286524008, + "grad_norm": 0.08677937090396881, + "learning_rate": 1.8352649628276055e-05, + "loss": 9.1483, + "step": 78050 + }, + { + "epoch": 0.3898224674773403, + "grad_norm": 0.09317050874233246, + "learning_rate": 1.8351147713334502e-05, + "loss": 9.1481, + "step": 78060 + }, + { + "epoch": 0.3898724063022797, + "grad_norm": 0.09428145736455917, + "learning_rate": 1.834964579839295e-05, + "loss": 9.1525, + "step": 78070 + }, + { + "epoch": 0.3899223451272192, + "grad_norm": 0.09383665770292282, + "learning_rate": 1.8348143883451402e-05, + "loss": 9.1374, + "step": 78080 + }, + { + "epoch": 0.3899722839521586, + "grad_norm": 0.08776134252548218, + "learning_rate": 1.834664196850985e-05, + "loss": 9.1481, + "step": 78090 + }, + { + "epoch": 0.3900222227770981, + "grad_norm": 0.09563107043504715, + "learning_rate": 1.8345140053568303e-05, + "loss": 9.1403, + "step": 78100 + }, + { + "epoch": 0.3900721616020375, + "grad_norm": 0.09589152038097382, + "learning_rate": 1.834363813862675e-05, + "loss": 9.1492, + "step": 78110 + }, + { + "epoch": 0.390122100426977, + "grad_norm": 0.09651998430490494, + "learning_rate": 1.8342136223685196e-05, + "loss": 9.1421, + "step": 78120 + }, + { + "epoch": 0.3901720392519164, + "grad_norm": 0.0984501987695694, + "learning_rate": 1.834063430874365e-05, + "loss": 9.1399, + "step": 78130 + }, + { + "epoch": 0.3902219780768558, + "grad_norm": 0.09631562978029251, + "learning_rate": 1.8339132393802097e-05, + "loss": 9.1386, + "step": 78140 + }, + { + "epoch": 0.3902719169017953, + "grad_norm": 0.09599178284406662, + "learning_rate": 1.833763047886055e-05, + "loss": 9.1481, + "step": 78150 + }, + { + "epoch": 0.3903218557267347, + "grad_norm": 0.0964631512761116, + "learning_rate": 1.8336128563918997e-05, + "loss": 9.14, + "step": 78160 + }, + { + "epoch": 0.3903717945516742, + "grad_norm": 0.09247509390115738, + "learning_rate": 1.8334626648977447e-05, + "loss": 9.1432, + "step": 78170 + }, + { + "epoch": 0.3904217333766136, + "grad_norm": 0.09320325404405594, + "learning_rate": 1.8333124734035897e-05, + "loss": 9.1439, + "step": 78180 + }, + { + "epoch": 0.3904716722015531, + "grad_norm": 0.09139348566532135, + "learning_rate": 1.8331622819094344e-05, + "loss": 9.156, + "step": 78190 + }, + { + "epoch": 0.3905216110264925, + "grad_norm": 0.09385591000318527, + "learning_rate": 1.8330120904152798e-05, + "loss": 9.1441, + "step": 78200 + }, + { + "epoch": 0.390571549851432, + "grad_norm": 0.09762391448020935, + "learning_rate": 1.8328618989211244e-05, + "loss": 9.1494, + "step": 78210 + }, + { + "epoch": 0.3906214886763714, + "grad_norm": 0.09472424536943436, + "learning_rate": 1.8327117074269695e-05, + "loss": 9.1339, + "step": 78220 + }, + { + "epoch": 0.3906714275013109, + "grad_norm": 0.09121805429458618, + "learning_rate": 1.8325615159328145e-05, + "loss": 9.1461, + "step": 78230 + }, + { + "epoch": 0.3907213663262503, + "grad_norm": 0.09944367408752441, + "learning_rate": 1.832411324438659e-05, + "loss": 9.1434, + "step": 78240 + }, + { + "epoch": 0.3907713051511898, + "grad_norm": 0.08815374970436096, + "learning_rate": 1.8322611329445045e-05, + "loss": 9.138, + "step": 78250 + }, + { + "epoch": 0.3908212439761292, + "grad_norm": 0.09055221825838089, + "learning_rate": 1.8321109414503492e-05, + "loss": 9.1388, + "step": 78260 + }, + { + "epoch": 0.3908711828010687, + "grad_norm": 0.09463029354810715, + "learning_rate": 1.8319607499561942e-05, + "loss": 9.1477, + "step": 78270 + }, + { + "epoch": 0.3909211216260081, + "grad_norm": 0.09334297478199005, + "learning_rate": 1.8318105584620392e-05, + "loss": 9.1358, + "step": 78280 + }, + { + "epoch": 0.3909710604509476, + "grad_norm": 0.09212377667427063, + "learning_rate": 1.831660366967884e-05, + "loss": 9.1363, + "step": 78290 + }, + { + "epoch": 0.391020999275887, + "grad_norm": 0.09191081672906876, + "learning_rate": 1.8315101754737293e-05, + "loss": 9.1443, + "step": 78300 + }, + { + "epoch": 0.3910709381008265, + "grad_norm": 0.0968773290514946, + "learning_rate": 1.831359983979574e-05, + "loss": 9.1507, + "step": 78310 + }, + { + "epoch": 0.3911208769257659, + "grad_norm": 0.09789808839559555, + "learning_rate": 1.831209792485419e-05, + "loss": 9.1365, + "step": 78320 + }, + { + "epoch": 0.3911708157507054, + "grad_norm": 0.09304534643888474, + "learning_rate": 1.831059600991264e-05, + "loss": 9.1417, + "step": 78330 + }, + { + "epoch": 0.3912207545756448, + "grad_norm": 0.09302885830402374, + "learning_rate": 1.8309094094971087e-05, + "loss": 9.1402, + "step": 78340 + }, + { + "epoch": 0.3912706934005843, + "grad_norm": 0.10683131963014603, + "learning_rate": 1.830759218002954e-05, + "loss": 9.1388, + "step": 78350 + }, + { + "epoch": 0.3913206322255237, + "grad_norm": 0.09414584934711456, + "learning_rate": 1.8306090265087987e-05, + "loss": 9.1488, + "step": 78360 + }, + { + "epoch": 0.3913705710504632, + "grad_norm": 0.10074611753225327, + "learning_rate": 1.8304588350146437e-05, + "loss": 9.1423, + "step": 78370 + }, + { + "epoch": 0.3914205098754026, + "grad_norm": 0.0921291634440422, + "learning_rate": 1.8303086435204887e-05, + "loss": 9.1431, + "step": 78380 + }, + { + "epoch": 0.3914704487003421, + "grad_norm": 0.09323565661907196, + "learning_rate": 1.8301584520263334e-05, + "loss": 9.1458, + "step": 78390 + }, + { + "epoch": 0.3915203875252815, + "grad_norm": 0.09994424134492874, + "learning_rate": 1.8300082605321788e-05, + "loss": 9.1407, + "step": 78400 + }, + { + "epoch": 0.391570326350221, + "grad_norm": 0.09224946796894073, + "learning_rate": 1.8298580690380234e-05, + "loss": 9.1374, + "step": 78410 + }, + { + "epoch": 0.3916202651751604, + "grad_norm": 0.09452834725379944, + "learning_rate": 1.8297078775438685e-05, + "loss": 9.145, + "step": 78420 + }, + { + "epoch": 0.3916702040000999, + "grad_norm": 0.10009356588125229, + "learning_rate": 1.8295576860497135e-05, + "loss": 9.1396, + "step": 78430 + }, + { + "epoch": 0.3917201428250393, + "grad_norm": 0.0899641364812851, + "learning_rate": 1.829407494555558e-05, + "loss": 9.1334, + "step": 78440 + }, + { + "epoch": 0.3917700816499788, + "grad_norm": 0.09872201830148697, + "learning_rate": 1.8292573030614035e-05, + "loss": 9.1392, + "step": 78450 + }, + { + "epoch": 0.3918200204749182, + "grad_norm": 0.09648354351520538, + "learning_rate": 1.8291071115672482e-05, + "loss": 9.1291, + "step": 78460 + }, + { + "epoch": 0.3918699592998577, + "grad_norm": 0.09189938753843307, + "learning_rate": 1.8289569200730932e-05, + "loss": 9.1376, + "step": 78470 + }, + { + "epoch": 0.3919198981247971, + "grad_norm": 0.0942487046122551, + "learning_rate": 1.8288067285789382e-05, + "loss": 9.1554, + "step": 78480 + }, + { + "epoch": 0.3919698369497366, + "grad_norm": 0.0920250341296196, + "learning_rate": 1.8286565370847832e-05, + "loss": 9.1335, + "step": 78490 + }, + { + "epoch": 0.392019775774676, + "grad_norm": 0.09978582710027695, + "learning_rate": 1.8285063455906283e-05, + "loss": 9.1339, + "step": 78500 + }, + { + "epoch": 0.3920697145996155, + "grad_norm": 0.09699604660272598, + "learning_rate": 1.828356154096473e-05, + "loss": 9.146, + "step": 78510 + }, + { + "epoch": 0.3921196534245549, + "grad_norm": 0.09093842655420303, + "learning_rate": 1.828205962602318e-05, + "loss": 9.1379, + "step": 78520 + }, + { + "epoch": 0.3921695922494944, + "grad_norm": 0.09174724668264389, + "learning_rate": 1.828055771108163e-05, + "loss": 9.1321, + "step": 78530 + }, + { + "epoch": 0.3922195310744338, + "grad_norm": 0.10035578906536102, + "learning_rate": 1.827905579614008e-05, + "loss": 9.1408, + "step": 78540 + }, + { + "epoch": 0.3922694698993733, + "grad_norm": 0.09467300027608871, + "learning_rate": 1.827755388119853e-05, + "loss": 9.1563, + "step": 78550 + }, + { + "epoch": 0.3923194087243127, + "grad_norm": 0.09329366683959961, + "learning_rate": 1.8276051966256977e-05, + "loss": 9.1288, + "step": 78560 + }, + { + "epoch": 0.3923693475492522, + "grad_norm": 0.09315571188926697, + "learning_rate": 1.8274550051315427e-05, + "loss": 9.1358, + "step": 78570 + }, + { + "epoch": 0.3924192863741916, + "grad_norm": 0.09450618177652359, + "learning_rate": 1.8273048136373877e-05, + "loss": 9.1401, + "step": 78580 + }, + { + "epoch": 0.3924692251991311, + "grad_norm": 0.09535399079322815, + "learning_rate": 1.8271546221432327e-05, + "loss": 9.1415, + "step": 78590 + }, + { + "epoch": 0.3925191640240705, + "grad_norm": 0.09569433331489563, + "learning_rate": 1.8270044306490778e-05, + "loss": 9.145, + "step": 78600 + }, + { + "epoch": 0.39256910284901, + "grad_norm": 0.09955570846796036, + "learning_rate": 1.8268542391549224e-05, + "loss": 9.1322, + "step": 78610 + }, + { + "epoch": 0.3926190416739494, + "grad_norm": 0.10329762101173401, + "learning_rate": 1.8267040476607675e-05, + "loss": 9.127, + "step": 78620 + }, + { + "epoch": 0.3926689804988889, + "grad_norm": 0.09589072316884995, + "learning_rate": 1.8265538561666125e-05, + "loss": 9.1384, + "step": 78630 + }, + { + "epoch": 0.3927189193238283, + "grad_norm": 0.09281908720731735, + "learning_rate": 1.8264036646724575e-05, + "loss": 9.143, + "step": 78640 + }, + { + "epoch": 0.3927688581487678, + "grad_norm": 0.08548375964164734, + "learning_rate": 1.8262534731783025e-05, + "loss": 9.1432, + "step": 78650 + }, + { + "epoch": 0.3928187969737072, + "grad_norm": 0.10105335712432861, + "learning_rate": 1.8261032816841472e-05, + "loss": 9.1397, + "step": 78660 + }, + { + "epoch": 0.3928687357986467, + "grad_norm": 0.10008376091718674, + "learning_rate": 1.8259530901899922e-05, + "loss": 9.1335, + "step": 78670 + }, + { + "epoch": 0.3929186746235861, + "grad_norm": 0.09541326016187668, + "learning_rate": 1.8258028986958372e-05, + "loss": 9.1324, + "step": 78680 + }, + { + "epoch": 0.39296861344852557, + "grad_norm": 0.0945446565747261, + "learning_rate": 1.8256527072016822e-05, + "loss": 9.1329, + "step": 78690 + }, + { + "epoch": 0.393018552273465, + "grad_norm": 0.09741361439228058, + "learning_rate": 1.8255025157075273e-05, + "loss": 9.1452, + "step": 78700 + }, + { + "epoch": 0.39306849109840447, + "grad_norm": 0.09333933889865875, + "learning_rate": 1.825352324213372e-05, + "loss": 9.1364, + "step": 78710 + }, + { + "epoch": 0.3931184299233439, + "grad_norm": 0.09559603035449982, + "learning_rate": 1.825202132719217e-05, + "loss": 9.1332, + "step": 78720 + }, + { + "epoch": 0.39316836874828337, + "grad_norm": 0.09066354483366013, + "learning_rate": 1.825051941225062e-05, + "loss": 9.1269, + "step": 78730 + }, + { + "epoch": 0.3932183075732228, + "grad_norm": 0.09495797008275986, + "learning_rate": 1.824901749730907e-05, + "loss": 9.1402, + "step": 78740 + }, + { + "epoch": 0.39326824639816227, + "grad_norm": 0.08994494378566742, + "learning_rate": 1.824751558236752e-05, + "loss": 9.145, + "step": 78750 + }, + { + "epoch": 0.3933181852231017, + "grad_norm": 0.08942119777202606, + "learning_rate": 1.8246013667425967e-05, + "loss": 9.1393, + "step": 78760 + }, + { + "epoch": 0.39336812404804117, + "grad_norm": 0.09385786950588226, + "learning_rate": 1.8244511752484417e-05, + "loss": 9.141, + "step": 78770 + }, + { + "epoch": 0.3934180628729806, + "grad_norm": 0.09488862007856369, + "learning_rate": 1.8243009837542867e-05, + "loss": 9.1458, + "step": 78780 + }, + { + "epoch": 0.39346800169792007, + "grad_norm": 0.0987120270729065, + "learning_rate": 1.8241507922601317e-05, + "loss": 9.1272, + "step": 78790 + }, + { + "epoch": 0.3935179405228595, + "grad_norm": 0.09095241129398346, + "learning_rate": 1.8240006007659768e-05, + "loss": 9.13, + "step": 78800 + }, + { + "epoch": 0.39356787934779897, + "grad_norm": 0.08821319043636322, + "learning_rate": 1.8238504092718218e-05, + "loss": 9.1371, + "step": 78810 + }, + { + "epoch": 0.3936178181727384, + "grad_norm": 0.09783562272787094, + "learning_rate": 1.8237002177776665e-05, + "loss": 9.1444, + "step": 78820 + }, + { + "epoch": 0.39366775699767786, + "grad_norm": 0.09780313819646835, + "learning_rate": 1.8235500262835115e-05, + "loss": 9.1244, + "step": 78830 + }, + { + "epoch": 0.3937176958226173, + "grad_norm": 0.0926724523305893, + "learning_rate": 1.8233998347893565e-05, + "loss": 9.134, + "step": 78840 + }, + { + "epoch": 0.39376763464755676, + "grad_norm": 0.09907516092061996, + "learning_rate": 1.8232496432952015e-05, + "loss": 9.137, + "step": 78850 + }, + { + "epoch": 0.3938175734724962, + "grad_norm": 0.09416226297616959, + "learning_rate": 1.8230994518010465e-05, + "loss": 9.1347, + "step": 78860 + }, + { + "epoch": 0.39386751229743566, + "grad_norm": 0.09291341155767441, + "learning_rate": 1.8229492603068912e-05, + "loss": 9.1378, + "step": 78870 + }, + { + "epoch": 0.3939174511223751, + "grad_norm": 0.09237024933099747, + "learning_rate": 1.8227990688127362e-05, + "loss": 9.135, + "step": 78880 + }, + { + "epoch": 0.39396738994731456, + "grad_norm": 0.09598385542631149, + "learning_rate": 1.8226488773185812e-05, + "loss": 9.1253, + "step": 78890 + }, + { + "epoch": 0.394017328772254, + "grad_norm": 0.09644147008657455, + "learning_rate": 1.8224986858244263e-05, + "loss": 9.1379, + "step": 78900 + }, + { + "epoch": 0.39406726759719346, + "grad_norm": 0.09208100289106369, + "learning_rate": 1.8223484943302713e-05, + "loss": 9.1481, + "step": 78910 + }, + { + "epoch": 0.3941172064221329, + "grad_norm": 0.091837577521801, + "learning_rate": 1.822198302836116e-05, + "loss": 9.142, + "step": 78920 + }, + { + "epoch": 0.39416714524707236, + "grad_norm": 0.09068531543016434, + "learning_rate": 1.822048111341961e-05, + "loss": 9.1359, + "step": 78930 + }, + { + "epoch": 0.3942170840720118, + "grad_norm": 0.09340940415859222, + "learning_rate": 1.821897919847806e-05, + "loss": 9.1407, + "step": 78940 + }, + { + "epoch": 0.39426702289695126, + "grad_norm": 0.09838276356458664, + "learning_rate": 1.821747728353651e-05, + "loss": 9.1273, + "step": 78950 + }, + { + "epoch": 0.3943169617218907, + "grad_norm": 0.09257405251264572, + "learning_rate": 1.821597536859496e-05, + "loss": 9.1309, + "step": 78960 + }, + { + "epoch": 0.39436690054683016, + "grad_norm": 0.09464617073535919, + "learning_rate": 1.8214473453653407e-05, + "loss": 9.125, + "step": 78970 + }, + { + "epoch": 0.3944168393717696, + "grad_norm": 0.09580957889556885, + "learning_rate": 1.8212971538711857e-05, + "loss": 9.1253, + "step": 78980 + }, + { + "epoch": 0.39446677819670906, + "grad_norm": 0.09709136933088303, + "learning_rate": 1.8211469623770308e-05, + "loss": 9.1327, + "step": 78990 + }, + { + "epoch": 0.3945167170216485, + "grad_norm": 0.10155075043439865, + "learning_rate": 1.8209967708828758e-05, + "loss": 9.1258, + "step": 79000 + }, + { + "epoch": 0.39456665584658795, + "grad_norm": 0.08959164470434189, + "learning_rate": 1.8208465793887208e-05, + "loss": 9.1309, + "step": 79010 + }, + { + "epoch": 0.3946165946715274, + "grad_norm": 0.10539384931325912, + "learning_rate": 1.8206963878945655e-05, + "loss": 9.1265, + "step": 79020 + }, + { + "epoch": 0.39466653349646685, + "grad_norm": 0.09576883167028427, + "learning_rate": 1.8205461964004105e-05, + "loss": 9.1324, + "step": 79030 + }, + { + "epoch": 0.3947164723214063, + "grad_norm": 0.09333497285842896, + "learning_rate": 1.8203960049062555e-05, + "loss": 9.1188, + "step": 79040 + }, + { + "epoch": 0.39476641114634575, + "grad_norm": 0.10127103328704834, + "learning_rate": 1.8202458134121005e-05, + "loss": 9.1255, + "step": 79050 + }, + { + "epoch": 0.3948163499712852, + "grad_norm": 0.09161249548196793, + "learning_rate": 1.8200956219179455e-05, + "loss": 9.1357, + "step": 79060 + }, + { + "epoch": 0.39486628879622465, + "grad_norm": 0.08713789284229279, + "learning_rate": 1.8199454304237902e-05, + "loss": 9.1448, + "step": 79070 + }, + { + "epoch": 0.3949162276211641, + "grad_norm": 0.09979274123907089, + "learning_rate": 1.8197952389296352e-05, + "loss": 9.1245, + "step": 79080 + }, + { + "epoch": 0.39496616644610355, + "grad_norm": 0.09479266405105591, + "learning_rate": 1.8196450474354803e-05, + "loss": 9.1359, + "step": 79090 + }, + { + "epoch": 0.39501610527104297, + "grad_norm": 0.0914844274520874, + "learning_rate": 1.8194948559413253e-05, + "loss": 9.1356, + "step": 79100 + }, + { + "epoch": 0.3950660440959824, + "grad_norm": 0.09772706776857376, + "learning_rate": 1.8193446644471703e-05, + "loss": 9.1253, + "step": 79110 + }, + { + "epoch": 0.39511598292092187, + "grad_norm": 0.08755208551883698, + "learning_rate": 1.819194472953015e-05, + "loss": 9.1353, + "step": 79120 + }, + { + "epoch": 0.3951659217458613, + "grad_norm": 0.09900221973657608, + "learning_rate": 1.8190442814588603e-05, + "loss": 9.1268, + "step": 79130 + }, + { + "epoch": 0.39521586057080077, + "grad_norm": 0.09705707430839539, + "learning_rate": 1.818894089964705e-05, + "loss": 9.1355, + "step": 79140 + }, + { + "epoch": 0.3952657993957402, + "grad_norm": 0.09519544243812561, + "learning_rate": 1.81874389847055e-05, + "loss": 9.1456, + "step": 79150 + }, + { + "epoch": 0.39531573822067967, + "grad_norm": 0.09430651366710663, + "learning_rate": 1.818593706976395e-05, + "loss": 9.1446, + "step": 79160 + }, + { + "epoch": 0.3953656770456191, + "grad_norm": 0.0924086719751358, + "learning_rate": 1.8184435154822397e-05, + "loss": 9.1273, + "step": 79170 + }, + { + "epoch": 0.39541561587055857, + "grad_norm": 0.09312140196561813, + "learning_rate": 1.818293323988085e-05, + "loss": 9.1378, + "step": 79180 + }, + { + "epoch": 0.395465554695498, + "grad_norm": 0.08896911144256592, + "learning_rate": 1.8181431324939298e-05, + "loss": 9.1462, + "step": 79190 + }, + { + "epoch": 0.39551549352043747, + "grad_norm": 0.09490480273962021, + "learning_rate": 1.8179929409997748e-05, + "loss": 9.1304, + "step": 79200 + }, + { + "epoch": 0.3955654323453769, + "grad_norm": 0.09386398643255234, + "learning_rate": 1.8178427495056198e-05, + "loss": 9.1264, + "step": 79210 + }, + { + "epoch": 0.39561537117031637, + "grad_norm": 0.09263758361339569, + "learning_rate": 1.8176925580114645e-05, + "loss": 9.1253, + "step": 79220 + }, + { + "epoch": 0.3956653099952558, + "grad_norm": 0.09182369709014893, + "learning_rate": 1.8175423665173098e-05, + "loss": 9.1266, + "step": 79230 + }, + { + "epoch": 0.39571524882019526, + "grad_norm": 0.09554483741521835, + "learning_rate": 1.8173921750231545e-05, + "loss": 9.1378, + "step": 79240 + }, + { + "epoch": 0.3957651876451347, + "grad_norm": 0.09417613595724106, + "learning_rate": 1.8172419835289995e-05, + "loss": 9.1335, + "step": 79250 + }, + { + "epoch": 0.39581512647007416, + "grad_norm": 0.09561773389577866, + "learning_rate": 1.8170917920348445e-05, + "loss": 9.1292, + "step": 79260 + }, + { + "epoch": 0.3958650652950136, + "grad_norm": 0.09833815693855286, + "learning_rate": 1.8169416005406892e-05, + "loss": 9.1259, + "step": 79270 + }, + { + "epoch": 0.39591500411995306, + "grad_norm": 0.09367582947015762, + "learning_rate": 1.8167914090465346e-05, + "loss": 9.1323, + "step": 79280 + }, + { + "epoch": 0.3959649429448925, + "grad_norm": 0.09364238381385803, + "learning_rate": 1.8166412175523793e-05, + "loss": 9.1206, + "step": 79290 + }, + { + "epoch": 0.39601488176983196, + "grad_norm": 0.09396982192993164, + "learning_rate": 1.8164910260582243e-05, + "loss": 9.1248, + "step": 79300 + }, + { + "epoch": 0.3960648205947714, + "grad_norm": 0.09186158329248428, + "learning_rate": 1.8163408345640693e-05, + "loss": 9.1398, + "step": 79310 + }, + { + "epoch": 0.39611475941971086, + "grad_norm": 0.09493670612573624, + "learning_rate": 1.816190643069914e-05, + "loss": 9.129, + "step": 79320 + }, + { + "epoch": 0.3961646982446503, + "grad_norm": 0.0939958244562149, + "learning_rate": 1.8160404515757593e-05, + "loss": 9.1374, + "step": 79330 + }, + { + "epoch": 0.39621463706958976, + "grad_norm": 0.09256591647863388, + "learning_rate": 1.815890260081604e-05, + "loss": 9.1247, + "step": 79340 + }, + { + "epoch": 0.3962645758945292, + "grad_norm": 0.0958244577050209, + "learning_rate": 1.815740068587449e-05, + "loss": 9.1239, + "step": 79350 + }, + { + "epoch": 0.39631451471946866, + "grad_norm": 0.09303276985883713, + "learning_rate": 1.815589877093294e-05, + "loss": 9.1319, + "step": 79360 + }, + { + "epoch": 0.3963644535444081, + "grad_norm": 0.10332775115966797, + "learning_rate": 1.8154396855991387e-05, + "loss": 9.1269, + "step": 79370 + }, + { + "epoch": 0.39641439236934756, + "grad_norm": 0.09653986990451813, + "learning_rate": 1.815289494104984e-05, + "loss": 9.1214, + "step": 79380 + }, + { + "epoch": 0.396464331194287, + "grad_norm": 0.09338979423046112, + "learning_rate": 1.8151393026108288e-05, + "loss": 9.1155, + "step": 79390 + }, + { + "epoch": 0.39651427001922646, + "grad_norm": 0.09556922316551208, + "learning_rate": 1.8149891111166738e-05, + "loss": 9.1303, + "step": 79400 + }, + { + "epoch": 0.3965642088441659, + "grad_norm": 0.09039589762687683, + "learning_rate": 1.8148389196225188e-05, + "loss": 9.1323, + "step": 79410 + }, + { + "epoch": 0.39661414766910535, + "grad_norm": 0.09440034627914429, + "learning_rate": 1.8146887281283635e-05, + "loss": 9.131, + "step": 79420 + }, + { + "epoch": 0.3966640864940448, + "grad_norm": 0.0989178940653801, + "learning_rate": 1.8145385366342088e-05, + "loss": 9.1244, + "step": 79430 + }, + { + "epoch": 0.39671402531898425, + "grad_norm": 0.0976061150431633, + "learning_rate": 1.8143883451400535e-05, + "loss": 9.1148, + "step": 79440 + }, + { + "epoch": 0.3967639641439237, + "grad_norm": 0.09087694436311722, + "learning_rate": 1.8142381536458985e-05, + "loss": 9.1231, + "step": 79450 + }, + { + "epoch": 0.39681390296886315, + "grad_norm": 0.08781003206968307, + "learning_rate": 1.8140879621517435e-05, + "loss": 9.1166, + "step": 79460 + }, + { + "epoch": 0.3968638417938026, + "grad_norm": 0.08944723010063171, + "learning_rate": 1.8139377706575882e-05, + "loss": 9.1332, + "step": 79470 + }, + { + "epoch": 0.39691378061874205, + "grad_norm": 0.09698821604251862, + "learning_rate": 1.8137875791634336e-05, + "loss": 9.123, + "step": 79480 + }, + { + "epoch": 0.3969637194436815, + "grad_norm": 0.09338833391666412, + "learning_rate": 1.8136373876692783e-05, + "loss": 9.1365, + "step": 79490 + }, + { + "epoch": 0.39701365826862095, + "grad_norm": 0.09354853630065918, + "learning_rate": 1.8134871961751236e-05, + "loss": 9.1218, + "step": 79500 + }, + { + "epoch": 0.39706359709356037, + "grad_norm": 0.09762848913669586, + "learning_rate": 1.8133370046809683e-05, + "loss": 9.1297, + "step": 79510 + }, + { + "epoch": 0.39711353591849985, + "grad_norm": 0.0980941578745842, + "learning_rate": 1.813186813186813e-05, + "loss": 9.1269, + "step": 79520 + }, + { + "epoch": 0.39716347474343927, + "grad_norm": 0.09508627653121948, + "learning_rate": 1.8130366216926583e-05, + "loss": 9.1146, + "step": 79530 + }, + { + "epoch": 0.39721341356837875, + "grad_norm": 0.09081006050109863, + "learning_rate": 1.812886430198503e-05, + "loss": 9.1403, + "step": 79540 + }, + { + "epoch": 0.39726335239331817, + "grad_norm": 0.09213867783546448, + "learning_rate": 1.8127362387043484e-05, + "loss": 9.116, + "step": 79550 + }, + { + "epoch": 0.39731329121825765, + "grad_norm": 0.09143271297216415, + "learning_rate": 1.812586047210193e-05, + "loss": 9.1232, + "step": 79560 + }, + { + "epoch": 0.39736323004319707, + "grad_norm": 0.09485948085784912, + "learning_rate": 1.8124358557160377e-05, + "loss": 9.1199, + "step": 79570 + }, + { + "epoch": 0.39741316886813655, + "grad_norm": 0.09463505446910858, + "learning_rate": 1.812285664221883e-05, + "loss": 9.1312, + "step": 79580 + }, + { + "epoch": 0.39746310769307597, + "grad_norm": 0.09464465081691742, + "learning_rate": 1.8121354727277278e-05, + "loss": 9.1267, + "step": 79590 + }, + { + "epoch": 0.39751304651801544, + "grad_norm": 0.09754239022731781, + "learning_rate": 1.811985281233573e-05, + "loss": 9.1344, + "step": 79600 + }, + { + "epoch": 0.39756298534295487, + "grad_norm": 0.09760978072881699, + "learning_rate": 1.8118350897394178e-05, + "loss": 9.1294, + "step": 79610 + }, + { + "epoch": 0.39761292416789434, + "grad_norm": 0.08997610211372375, + "learning_rate": 1.8116848982452625e-05, + "loss": 9.1341, + "step": 79620 + }, + { + "epoch": 0.39766286299283377, + "grad_norm": 0.09305060654878616, + "learning_rate": 1.8115347067511078e-05, + "loss": 9.1429, + "step": 79630 + }, + { + "epoch": 0.39771280181777324, + "grad_norm": 0.0855415090918541, + "learning_rate": 1.8113845152569525e-05, + "loss": 9.1285, + "step": 79640 + }, + { + "epoch": 0.39776274064271266, + "grad_norm": 0.09722486883401871, + "learning_rate": 1.811234323762798e-05, + "loss": 9.1218, + "step": 79650 + }, + { + "epoch": 0.39781267946765214, + "grad_norm": 0.09455563873052597, + "learning_rate": 1.8110841322686425e-05, + "loss": 9.1177, + "step": 79660 + }, + { + "epoch": 0.39786261829259156, + "grad_norm": 0.09245886653661728, + "learning_rate": 1.8109339407744872e-05, + "loss": 9.1312, + "step": 79670 + }, + { + "epoch": 0.39791255711753104, + "grad_norm": 0.09259022772312164, + "learning_rate": 1.8107837492803326e-05, + "loss": 9.1313, + "step": 79680 + }, + { + "epoch": 0.39796249594247046, + "grad_norm": 0.08969958871603012, + "learning_rate": 1.8106335577861773e-05, + "loss": 9.1278, + "step": 79690 + }, + { + "epoch": 0.39801243476740994, + "grad_norm": 0.08772096782922745, + "learning_rate": 1.8104833662920226e-05, + "loss": 9.132, + "step": 79700 + }, + { + "epoch": 0.39806237359234936, + "grad_norm": 0.0951986312866211, + "learning_rate": 1.8103331747978673e-05, + "loss": 9.1208, + "step": 79710 + }, + { + "epoch": 0.39811231241728884, + "grad_norm": 0.09764955192804337, + "learning_rate": 1.810182983303712e-05, + "loss": 9.1099, + "step": 79720 + }, + { + "epoch": 0.39816225124222826, + "grad_norm": 0.09765150398015976, + "learning_rate": 1.8100327918095573e-05, + "loss": 9.1259, + "step": 79730 + }, + { + "epoch": 0.39821219006716774, + "grad_norm": 0.09225156158208847, + "learning_rate": 1.809882600315402e-05, + "loss": 9.1319, + "step": 79740 + }, + { + "epoch": 0.39826212889210716, + "grad_norm": 0.09323499351739883, + "learning_rate": 1.8097324088212474e-05, + "loss": 9.1295, + "step": 79750 + }, + { + "epoch": 0.39831206771704664, + "grad_norm": 0.0989677682518959, + "learning_rate": 1.809582217327092e-05, + "loss": 9.1328, + "step": 79760 + }, + { + "epoch": 0.39836200654198606, + "grad_norm": 0.09290597587823868, + "learning_rate": 1.8094320258329367e-05, + "loss": 9.1173, + "step": 79770 + }, + { + "epoch": 0.39841194536692554, + "grad_norm": 0.09229425340890884, + "learning_rate": 1.809281834338782e-05, + "loss": 9.1144, + "step": 79780 + }, + { + "epoch": 0.39846188419186496, + "grad_norm": 0.09045591205358505, + "learning_rate": 1.8091316428446268e-05, + "loss": 9.1248, + "step": 79790 + }, + { + "epoch": 0.39851182301680443, + "grad_norm": 0.0939536839723587, + "learning_rate": 1.808981451350472e-05, + "loss": 9.1254, + "step": 79800 + }, + { + "epoch": 0.39856176184174386, + "grad_norm": 0.09491851180791855, + "learning_rate": 1.8088312598563168e-05, + "loss": 9.123, + "step": 79810 + }, + { + "epoch": 0.39861170066668333, + "grad_norm": 0.08969860523939133, + "learning_rate": 1.8086810683621618e-05, + "loss": 9.1183, + "step": 79820 + }, + { + "epoch": 0.39866163949162275, + "grad_norm": 0.10173804312944412, + "learning_rate": 1.8085308768680068e-05, + "loss": 9.1313, + "step": 79830 + }, + { + "epoch": 0.39871157831656223, + "grad_norm": 0.0922439694404602, + "learning_rate": 1.8083806853738515e-05, + "loss": 9.1186, + "step": 79840 + }, + { + "epoch": 0.39876151714150165, + "grad_norm": 0.08746393769979477, + "learning_rate": 1.808230493879697e-05, + "loss": 9.1344, + "step": 79850 + }, + { + "epoch": 0.39881145596644113, + "grad_norm": 0.095425546169281, + "learning_rate": 1.8080803023855415e-05, + "loss": 9.1175, + "step": 79860 + }, + { + "epoch": 0.39886139479138055, + "grad_norm": 0.09297501295804977, + "learning_rate": 1.8079301108913866e-05, + "loss": 9.1252, + "step": 79870 + }, + { + "epoch": 0.39891133361632003, + "grad_norm": 0.09445119649171829, + "learning_rate": 1.8077799193972316e-05, + "loss": 9.1152, + "step": 79880 + }, + { + "epoch": 0.39896127244125945, + "grad_norm": 0.08981538563966751, + "learning_rate": 1.8076297279030763e-05, + "loss": 9.1123, + "step": 79890 + }, + { + "epoch": 0.39901121126619893, + "grad_norm": 0.09903477132320404, + "learning_rate": 1.8074795364089216e-05, + "loss": 9.1247, + "step": 79900 + }, + { + "epoch": 0.39906115009113835, + "grad_norm": 0.09373074769973755, + "learning_rate": 1.8073293449147663e-05, + "loss": 9.1176, + "step": 79910 + }, + { + "epoch": 0.3991110889160778, + "grad_norm": 0.0910414308309555, + "learning_rate": 1.8071791534206113e-05, + "loss": 9.124, + "step": 79920 + }, + { + "epoch": 0.39916102774101725, + "grad_norm": 0.09311582893133163, + "learning_rate": 1.8070289619264563e-05, + "loss": 9.111, + "step": 79930 + }, + { + "epoch": 0.3992109665659567, + "grad_norm": 0.09247269481420517, + "learning_rate": 1.806878770432301e-05, + "loss": 9.1145, + "step": 79940 + }, + { + "epoch": 0.39926090539089615, + "grad_norm": 0.09804600477218628, + "learning_rate": 1.8067285789381464e-05, + "loss": 9.1214, + "step": 79950 + }, + { + "epoch": 0.3993108442158356, + "grad_norm": 0.09990818053483963, + "learning_rate": 1.806578387443991e-05, + "loss": 9.1349, + "step": 79960 + }, + { + "epoch": 0.39936078304077505, + "grad_norm": 0.09712336957454681, + "learning_rate": 1.806428195949836e-05, + "loss": 9.1245, + "step": 79970 + }, + { + "epoch": 0.3994107218657145, + "grad_norm": 0.09431684017181396, + "learning_rate": 1.806278004455681e-05, + "loss": 9.1178, + "step": 79980 + }, + { + "epoch": 0.39946066069065395, + "grad_norm": 0.10282929986715317, + "learning_rate": 1.8061278129615258e-05, + "loss": 9.1093, + "step": 79990 + }, + { + "epoch": 0.3995105995155934, + "grad_norm": 0.09497708082199097, + "learning_rate": 1.805977621467371e-05, + "loss": 9.1184, + "step": 80000 + }, + { + "epoch": 0.39956053834053284, + "grad_norm": 0.09546716511249542, + "learning_rate": 1.8058274299732158e-05, + "loss": 9.1205, + "step": 80010 + }, + { + "epoch": 0.3996104771654723, + "grad_norm": 0.0929088369011879, + "learning_rate": 1.8056772384790608e-05, + "loss": 9.1212, + "step": 80020 + }, + { + "epoch": 0.39966041599041174, + "grad_norm": 0.09245330840349197, + "learning_rate": 1.8055270469849058e-05, + "loss": 9.1023, + "step": 80030 + }, + { + "epoch": 0.3997103548153512, + "grad_norm": 0.09106311202049255, + "learning_rate": 1.8053768554907505e-05, + "loss": 9.1202, + "step": 80040 + }, + { + "epoch": 0.39976029364029064, + "grad_norm": 0.10017859935760498, + "learning_rate": 1.805226663996596e-05, + "loss": 9.126, + "step": 80050 + }, + { + "epoch": 0.3998102324652301, + "grad_norm": 0.09317342936992645, + "learning_rate": 1.8050764725024405e-05, + "loss": 9.1289, + "step": 80060 + }, + { + "epoch": 0.39986017129016954, + "grad_norm": 0.0955187976360321, + "learning_rate": 1.8049262810082856e-05, + "loss": 9.1126, + "step": 80070 + }, + { + "epoch": 0.399910110115109, + "grad_norm": 0.09439694136381149, + "learning_rate": 1.8047760895141306e-05, + "loss": 9.1324, + "step": 80080 + }, + { + "epoch": 0.39996004894004844, + "grad_norm": 0.09123414754867554, + "learning_rate": 1.8046258980199753e-05, + "loss": 9.1342, + "step": 80090 + }, + { + "epoch": 0.40000998776498786, + "grad_norm": 0.09292822331190109, + "learning_rate": 1.8044757065258206e-05, + "loss": 9.111, + "step": 80100 + }, + { + "epoch": 0.40005992658992734, + "grad_norm": 0.09875541925430298, + "learning_rate": 1.8043255150316653e-05, + "loss": 9.1114, + "step": 80110 + }, + { + "epoch": 0.40010986541486676, + "grad_norm": 0.09288553893566132, + "learning_rate": 1.8041753235375103e-05, + "loss": 9.1085, + "step": 80120 + }, + { + "epoch": 0.40015980423980624, + "grad_norm": 0.09572578966617584, + "learning_rate": 1.8040251320433553e-05, + "loss": 9.1151, + "step": 80130 + }, + { + "epoch": 0.40020974306474566, + "grad_norm": 0.09946201741695404, + "learning_rate": 1.8038749405492003e-05, + "loss": 9.1163, + "step": 80140 + }, + { + "epoch": 0.40025968188968514, + "grad_norm": 0.09416273236274719, + "learning_rate": 1.8037247490550454e-05, + "loss": 9.1196, + "step": 80150 + }, + { + "epoch": 0.40030962071462456, + "grad_norm": 0.09236134588718414, + "learning_rate": 1.80357455756089e-05, + "loss": 9.1084, + "step": 80160 + }, + { + "epoch": 0.40035955953956404, + "grad_norm": 0.09351241588592529, + "learning_rate": 1.803424366066735e-05, + "loss": 9.1082, + "step": 80170 + }, + { + "epoch": 0.40040949836450346, + "grad_norm": 0.09657840430736542, + "learning_rate": 1.80327417457258e-05, + "loss": 9.1077, + "step": 80180 + }, + { + "epoch": 0.40045943718944293, + "grad_norm": 0.09256424009799957, + "learning_rate": 1.803123983078425e-05, + "loss": 9.1178, + "step": 80190 + }, + { + "epoch": 0.40050937601438236, + "grad_norm": 0.09746617078781128, + "learning_rate": 1.80297379158427e-05, + "loss": 9.1223, + "step": 80200 + }, + { + "epoch": 0.40055931483932183, + "grad_norm": 0.09331115335226059, + "learning_rate": 1.8028236000901148e-05, + "loss": 9.1215, + "step": 80210 + }, + { + "epoch": 0.40060925366426126, + "grad_norm": 0.09966427087783813, + "learning_rate": 1.8026734085959598e-05, + "loss": 9.1028, + "step": 80220 + }, + { + "epoch": 0.40065919248920073, + "grad_norm": 0.09679210931062698, + "learning_rate": 1.8025232171018048e-05, + "loss": 9.1186, + "step": 80230 + }, + { + "epoch": 0.40070913131414015, + "grad_norm": 0.09198209643363953, + "learning_rate": 1.80237302560765e-05, + "loss": 9.1265, + "step": 80240 + }, + { + "epoch": 0.40075907013907963, + "grad_norm": 0.09457174688577652, + "learning_rate": 1.802222834113495e-05, + "loss": 9.1212, + "step": 80250 + }, + { + "epoch": 0.40080900896401905, + "grad_norm": 0.09352682530879974, + "learning_rate": 1.8020726426193395e-05, + "loss": 9.1148, + "step": 80260 + }, + { + "epoch": 0.40085894778895853, + "grad_norm": 0.0890619084239006, + "learning_rate": 1.8019224511251846e-05, + "loss": 9.126, + "step": 80270 + }, + { + "epoch": 0.40090888661389795, + "grad_norm": 0.09067501127719879, + "learning_rate": 1.8017722596310296e-05, + "loss": 9.1201, + "step": 80280 + }, + { + "epoch": 0.40095882543883743, + "grad_norm": 0.09653503447771072, + "learning_rate": 1.8016220681368746e-05, + "loss": 9.1192, + "step": 80290 + }, + { + "epoch": 0.40100876426377685, + "grad_norm": 0.09168026596307755, + "learning_rate": 1.8014718766427196e-05, + "loss": 9.1156, + "step": 80300 + }, + { + "epoch": 0.40105870308871633, + "grad_norm": 0.09609059989452362, + "learning_rate": 1.8013216851485643e-05, + "loss": 9.1139, + "step": 80310 + }, + { + "epoch": 0.40110864191365575, + "grad_norm": 0.09476880729198456, + "learning_rate": 1.8011714936544093e-05, + "loss": 9.1134, + "step": 80320 + }, + { + "epoch": 0.4011585807385952, + "grad_norm": 0.10273495316505432, + "learning_rate": 1.8010213021602543e-05, + "loss": 9.1195, + "step": 80330 + }, + { + "epoch": 0.40120851956353465, + "grad_norm": 0.09645021706819534, + "learning_rate": 1.8008711106660993e-05, + "loss": 9.1238, + "step": 80340 + }, + { + "epoch": 0.4012584583884741, + "grad_norm": 0.09573419392108917, + "learning_rate": 1.8007209191719444e-05, + "loss": 9.1229, + "step": 80350 + }, + { + "epoch": 0.40130839721341355, + "grad_norm": 0.09707140922546387, + "learning_rate": 1.800570727677789e-05, + "loss": 9.1014, + "step": 80360 + }, + { + "epoch": 0.401358336038353, + "grad_norm": 0.08955365419387817, + "learning_rate": 1.800420536183634e-05, + "loss": 9.1242, + "step": 80370 + }, + { + "epoch": 0.40140827486329245, + "grad_norm": 0.09678129851818085, + "learning_rate": 1.800270344689479e-05, + "loss": 9.1202, + "step": 80380 + }, + { + "epoch": 0.4014582136882319, + "grad_norm": 0.09080725908279419, + "learning_rate": 1.800120153195324e-05, + "loss": 9.1198, + "step": 80390 + }, + { + "epoch": 0.40150815251317135, + "grad_norm": 0.09258688241243362, + "learning_rate": 1.799969961701169e-05, + "loss": 9.1001, + "step": 80400 + }, + { + "epoch": 0.4015580913381108, + "grad_norm": 0.09553803503513336, + "learning_rate": 1.7998197702070138e-05, + "loss": 9.1137, + "step": 80410 + }, + { + "epoch": 0.40160803016305024, + "grad_norm": 0.09325069189071655, + "learning_rate": 1.7996695787128588e-05, + "loss": 9.1155, + "step": 80420 + }, + { + "epoch": 0.4016579689879897, + "grad_norm": 0.09590888768434525, + "learning_rate": 1.799519387218704e-05, + "loss": 9.1142, + "step": 80430 + }, + { + "epoch": 0.40170790781292914, + "grad_norm": 0.08667058497667313, + "learning_rate": 1.799369195724549e-05, + "loss": 9.1148, + "step": 80440 + }, + { + "epoch": 0.4017578466378686, + "grad_norm": 0.088908351957798, + "learning_rate": 1.799219004230394e-05, + "loss": 9.1099, + "step": 80450 + }, + { + "epoch": 0.40180778546280804, + "grad_norm": 0.09739526361227036, + "learning_rate": 1.799068812736239e-05, + "loss": 9.114, + "step": 80460 + }, + { + "epoch": 0.4018577242877475, + "grad_norm": 0.09346520155668259, + "learning_rate": 1.7989186212420836e-05, + "loss": 9.1227, + "step": 80470 + }, + { + "epoch": 0.40190766311268694, + "grad_norm": 0.09403660148382187, + "learning_rate": 1.7987684297479286e-05, + "loss": 9.123, + "step": 80480 + }, + { + "epoch": 0.4019576019376264, + "grad_norm": 0.09151037782430649, + "learning_rate": 1.7986182382537736e-05, + "loss": 9.109, + "step": 80490 + }, + { + "epoch": 0.40200754076256584, + "grad_norm": 0.09313507378101349, + "learning_rate": 1.7984680467596186e-05, + "loss": 9.102, + "step": 80500 + }, + { + "epoch": 0.4020574795875053, + "grad_norm": 0.09583766758441925, + "learning_rate": 1.7983178552654636e-05, + "loss": 9.095, + "step": 80510 + }, + { + "epoch": 0.40210741841244474, + "grad_norm": 0.10357358306646347, + "learning_rate": 1.7981676637713083e-05, + "loss": 9.1297, + "step": 80520 + }, + { + "epoch": 0.4021573572373842, + "grad_norm": 0.09157849848270416, + "learning_rate": 1.7980174722771533e-05, + "loss": 9.0986, + "step": 80530 + }, + { + "epoch": 0.40220729606232364, + "grad_norm": 0.0905398353934288, + "learning_rate": 1.7978672807829984e-05, + "loss": 9.1144, + "step": 80540 + }, + { + "epoch": 0.4022572348872631, + "grad_norm": 0.09321150183677673, + "learning_rate": 1.7977170892888434e-05, + "loss": 9.1098, + "step": 80550 + }, + { + "epoch": 0.40230717371220254, + "grad_norm": 0.09091385453939438, + "learning_rate": 1.7975668977946884e-05, + "loss": 9.1172, + "step": 80560 + }, + { + "epoch": 0.402357112537142, + "grad_norm": 0.09568551182746887, + "learning_rate": 1.797416706300533e-05, + "loss": 9.1055, + "step": 80570 + }, + { + "epoch": 0.40240705136208144, + "grad_norm": 0.08929819613695145, + "learning_rate": 1.797266514806378e-05, + "loss": 9.1045, + "step": 80580 + }, + { + "epoch": 0.4024569901870209, + "grad_norm": 0.09343628585338593, + "learning_rate": 1.797116323312223e-05, + "loss": 9.0965, + "step": 80590 + }, + { + "epoch": 0.40250692901196033, + "grad_norm": 0.0918562188744545, + "learning_rate": 1.796966131818068e-05, + "loss": 9.103, + "step": 80600 + }, + { + "epoch": 0.4025568678368998, + "grad_norm": 0.09256646037101746, + "learning_rate": 1.796815940323913e-05, + "loss": 9.1031, + "step": 80610 + }, + { + "epoch": 0.40260680666183923, + "grad_norm": 0.08834106475114822, + "learning_rate": 1.7966657488297578e-05, + "loss": 9.1066, + "step": 80620 + }, + { + "epoch": 0.4026567454867787, + "grad_norm": 0.09646911174058914, + "learning_rate": 1.796515557335603e-05, + "loss": 9.1038, + "step": 80630 + }, + { + "epoch": 0.40270668431171813, + "grad_norm": 0.09727106243371964, + "learning_rate": 1.796365365841448e-05, + "loss": 9.1046, + "step": 80640 + }, + { + "epoch": 0.4027566231366576, + "grad_norm": 0.0980573520064354, + "learning_rate": 1.796215174347293e-05, + "loss": 9.1105, + "step": 80650 + }, + { + "epoch": 0.40280656196159703, + "grad_norm": 0.0933801680803299, + "learning_rate": 1.796064982853138e-05, + "loss": 9.1102, + "step": 80660 + }, + { + "epoch": 0.4028565007865365, + "grad_norm": 0.09309963136911392, + "learning_rate": 1.7959147913589826e-05, + "loss": 9.1067, + "step": 80670 + }, + { + "epoch": 0.40290643961147593, + "grad_norm": 0.09479737281799316, + "learning_rate": 1.7957645998648276e-05, + "loss": 9.118, + "step": 80680 + }, + { + "epoch": 0.4029563784364154, + "grad_norm": 0.08766873925924301, + "learning_rate": 1.7956144083706726e-05, + "loss": 9.1088, + "step": 80690 + }, + { + "epoch": 0.40300631726135483, + "grad_norm": 0.10010071843862534, + "learning_rate": 1.7954642168765176e-05, + "loss": 9.1077, + "step": 80700 + }, + { + "epoch": 0.4030562560862943, + "grad_norm": 0.09243807941675186, + "learning_rate": 1.7953140253823626e-05, + "loss": 9.1047, + "step": 80710 + }, + { + "epoch": 0.40310619491123373, + "grad_norm": 0.0926956832408905, + "learning_rate": 1.7951638338882073e-05, + "loss": 9.1029, + "step": 80720 + }, + { + "epoch": 0.4031561337361732, + "grad_norm": 0.09393865615129471, + "learning_rate": 1.7950136423940523e-05, + "loss": 9.1034, + "step": 80730 + }, + { + "epoch": 0.4032060725611126, + "grad_norm": 0.09327403455972672, + "learning_rate": 1.7948634508998974e-05, + "loss": 9.1105, + "step": 80740 + }, + { + "epoch": 0.4032560113860521, + "grad_norm": 0.09919864684343338, + "learning_rate": 1.7947132594057424e-05, + "loss": 9.1182, + "step": 80750 + }, + { + "epoch": 0.4033059502109915, + "grad_norm": 0.09142395853996277, + "learning_rate": 1.7945630679115874e-05, + "loss": 9.1112, + "step": 80760 + }, + { + "epoch": 0.403355889035931, + "grad_norm": 0.09137079864740372, + "learning_rate": 1.794412876417432e-05, + "loss": 9.1173, + "step": 80770 + }, + { + "epoch": 0.4034058278608704, + "grad_norm": 0.09005033224821091, + "learning_rate": 1.7942626849232774e-05, + "loss": 9.116, + "step": 80780 + }, + { + "epoch": 0.4034557666858099, + "grad_norm": 0.09195708483457565, + "learning_rate": 1.794112493429122e-05, + "loss": 9.1215, + "step": 80790 + }, + { + "epoch": 0.4035057055107493, + "grad_norm": 0.09235309809446335, + "learning_rate": 1.793962301934967e-05, + "loss": 9.1151, + "step": 80800 + }, + { + "epoch": 0.4035556443356888, + "grad_norm": 0.09343268722295761, + "learning_rate": 1.793812110440812e-05, + "loss": 9.1282, + "step": 80810 + }, + { + "epoch": 0.4036055831606282, + "grad_norm": 0.09357087314128876, + "learning_rate": 1.793661918946657e-05, + "loss": 9.1123, + "step": 80820 + }, + { + "epoch": 0.4036555219855677, + "grad_norm": 0.08820904046297073, + "learning_rate": 1.7935117274525022e-05, + "loss": 9.1184, + "step": 80830 + }, + { + "epoch": 0.4037054608105071, + "grad_norm": 0.08932943642139435, + "learning_rate": 1.793361535958347e-05, + "loss": 9.1211, + "step": 80840 + }, + { + "epoch": 0.4037553996354466, + "grad_norm": 0.09997081011533737, + "learning_rate": 1.793211344464192e-05, + "loss": 9.0972, + "step": 80850 + }, + { + "epoch": 0.403805338460386, + "grad_norm": 0.09369473904371262, + "learning_rate": 1.793061152970037e-05, + "loss": 9.1223, + "step": 80860 + }, + { + "epoch": 0.4038552772853255, + "grad_norm": 0.09159389138221741, + "learning_rate": 1.792910961475882e-05, + "loss": 9.1404, + "step": 80870 + }, + { + "epoch": 0.4039052161102649, + "grad_norm": 0.09258721768856049, + "learning_rate": 1.792760769981727e-05, + "loss": 9.1047, + "step": 80880 + }, + { + "epoch": 0.4039551549352044, + "grad_norm": 0.0894956886768341, + "learning_rate": 1.7926105784875716e-05, + "loss": 9.1137, + "step": 80890 + }, + { + "epoch": 0.4040050937601438, + "grad_norm": 0.09068527817726135, + "learning_rate": 1.7924603869934166e-05, + "loss": 9.1043, + "step": 80900 + }, + { + "epoch": 0.4040550325850833, + "grad_norm": 0.09284330904483795, + "learning_rate": 1.7923101954992616e-05, + "loss": 9.1099, + "step": 80910 + }, + { + "epoch": 0.4041049714100227, + "grad_norm": 0.10179001837968826, + "learning_rate": 1.7921600040051067e-05, + "loss": 9.0992, + "step": 80920 + }, + { + "epoch": 0.4041549102349622, + "grad_norm": 0.09713611751794815, + "learning_rate": 1.7920098125109517e-05, + "loss": 9.1143, + "step": 80930 + }, + { + "epoch": 0.4042048490599016, + "grad_norm": 0.10156739503145218, + "learning_rate": 1.7918596210167964e-05, + "loss": 9.1069, + "step": 80940 + }, + { + "epoch": 0.4042547878848411, + "grad_norm": 0.09405851364135742, + "learning_rate": 1.7917094295226414e-05, + "loss": 9.1072, + "step": 80950 + }, + { + "epoch": 0.4043047267097805, + "grad_norm": 0.0946517214179039, + "learning_rate": 1.7915592380284864e-05, + "loss": 9.1207, + "step": 80960 + }, + { + "epoch": 0.40435466553472, + "grad_norm": 0.09372934699058533, + "learning_rate": 1.7914090465343314e-05, + "loss": 9.1101, + "step": 80970 + }, + { + "epoch": 0.4044046043596594, + "grad_norm": 0.08745318651199341, + "learning_rate": 1.7912588550401764e-05, + "loss": 9.1133, + "step": 80980 + }, + { + "epoch": 0.4044545431845989, + "grad_norm": 0.09735770523548126, + "learning_rate": 1.791108663546021e-05, + "loss": 9.1006, + "step": 80990 + }, + { + "epoch": 0.4045044820095383, + "grad_norm": 0.10448341071605682, + "learning_rate": 1.790958472051866e-05, + "loss": 9.0897, + "step": 81000 + }, + { + "epoch": 0.4045544208344778, + "grad_norm": 0.09520471096038818, + "learning_rate": 1.790808280557711e-05, + "loss": 9.1102, + "step": 81010 + }, + { + "epoch": 0.4046043596594172, + "grad_norm": 0.09410350024700165, + "learning_rate": 1.790658089063556e-05, + "loss": 9.1109, + "step": 81020 + }, + { + "epoch": 0.4046542984843567, + "grad_norm": 0.09858789294958115, + "learning_rate": 1.7905078975694012e-05, + "loss": 9.1101, + "step": 81030 + }, + { + "epoch": 0.4047042373092961, + "grad_norm": 0.09166577458381653, + "learning_rate": 1.790357706075246e-05, + "loss": 9.1214, + "step": 81040 + }, + { + "epoch": 0.4047541761342356, + "grad_norm": 0.0967087671160698, + "learning_rate": 1.790207514581091e-05, + "loss": 9.1012, + "step": 81050 + }, + { + "epoch": 0.404804114959175, + "grad_norm": 0.09214182943105698, + "learning_rate": 1.790057323086936e-05, + "loss": 9.1048, + "step": 81060 + }, + { + "epoch": 0.4048540537841145, + "grad_norm": 0.09241543710231781, + "learning_rate": 1.789907131592781e-05, + "loss": 9.1192, + "step": 81070 + }, + { + "epoch": 0.4049039926090539, + "grad_norm": 0.09916938096284866, + "learning_rate": 1.789756940098626e-05, + "loss": 9.101, + "step": 81080 + }, + { + "epoch": 0.40495393143399333, + "grad_norm": 0.09278815984725952, + "learning_rate": 1.7896067486044706e-05, + "loss": 9.1098, + "step": 81090 + }, + { + "epoch": 0.4050038702589328, + "grad_norm": 0.09353374689817429, + "learning_rate": 1.789456557110316e-05, + "loss": 9.1081, + "step": 81100 + }, + { + "epoch": 0.40505380908387223, + "grad_norm": 0.09639580547809601, + "learning_rate": 1.7893063656161606e-05, + "loss": 9.0991, + "step": 81110 + }, + { + "epoch": 0.4051037479088117, + "grad_norm": 0.09381458908319473, + "learning_rate": 1.7891561741220057e-05, + "loss": 9.1231, + "step": 81120 + }, + { + "epoch": 0.40515368673375113, + "grad_norm": 0.09496497362852097, + "learning_rate": 1.7890059826278507e-05, + "loss": 9.1022, + "step": 81130 + }, + { + "epoch": 0.4052036255586906, + "grad_norm": 0.09724625945091248, + "learning_rate": 1.7888557911336954e-05, + "loss": 9.1116, + "step": 81140 + }, + { + "epoch": 0.40525356438363, + "grad_norm": 0.09216088056564331, + "learning_rate": 1.7887055996395407e-05, + "loss": 9.1035, + "step": 81150 + }, + { + "epoch": 0.4053035032085695, + "grad_norm": 0.09402379393577576, + "learning_rate": 1.7885554081453854e-05, + "loss": 9.1074, + "step": 81160 + }, + { + "epoch": 0.4053534420335089, + "grad_norm": 0.09243787080049515, + "learning_rate": 1.7884052166512304e-05, + "loss": 9.0979, + "step": 81170 + }, + { + "epoch": 0.4054033808584484, + "grad_norm": 0.0954827070236206, + "learning_rate": 1.7882550251570754e-05, + "loss": 9.104, + "step": 81180 + }, + { + "epoch": 0.4054533196833878, + "grad_norm": 0.09604278206825256, + "learning_rate": 1.78810483366292e-05, + "loss": 9.0999, + "step": 81190 + }, + { + "epoch": 0.4055032585083273, + "grad_norm": 0.09881725907325745, + "learning_rate": 1.7879546421687655e-05, + "loss": 9.1049, + "step": 81200 + }, + { + "epoch": 0.4055531973332667, + "grad_norm": 0.09090153127908707, + "learning_rate": 1.78780445067461e-05, + "loss": 9.1028, + "step": 81210 + }, + { + "epoch": 0.4056031361582062, + "grad_norm": 0.09422878175973892, + "learning_rate": 1.787654259180455e-05, + "loss": 9.0912, + "step": 81220 + }, + { + "epoch": 0.4056530749831456, + "grad_norm": 0.09033548831939697, + "learning_rate": 1.7875040676863002e-05, + "loss": 9.1124, + "step": 81230 + }, + { + "epoch": 0.4057030138080851, + "grad_norm": 0.08588909357786179, + "learning_rate": 1.787353876192145e-05, + "loss": 9.0967, + "step": 81240 + }, + { + "epoch": 0.4057529526330245, + "grad_norm": 0.0940273180603981, + "learning_rate": 1.7872036846979902e-05, + "loss": 9.1105, + "step": 81250 + }, + { + "epoch": 0.405802891457964, + "grad_norm": 0.0950455591082573, + "learning_rate": 1.787053493203835e-05, + "loss": 9.1095, + "step": 81260 + }, + { + "epoch": 0.4058528302829034, + "grad_norm": 0.09447299689054489, + "learning_rate": 1.78690330170968e-05, + "loss": 9.1007, + "step": 81270 + }, + { + "epoch": 0.4059027691078429, + "grad_norm": 0.09144210070371628, + "learning_rate": 1.786753110215525e-05, + "loss": 9.1033, + "step": 81280 + }, + { + "epoch": 0.4059527079327823, + "grad_norm": 0.08806926012039185, + "learning_rate": 1.7866029187213696e-05, + "loss": 9.105, + "step": 81290 + }, + { + "epoch": 0.4060026467577218, + "grad_norm": 0.09366438537836075, + "learning_rate": 1.786452727227215e-05, + "loss": 9.0969, + "step": 81300 + }, + { + "epoch": 0.4060525855826612, + "grad_norm": 0.0918484702706337, + "learning_rate": 1.7863025357330596e-05, + "loss": 9.1047, + "step": 81310 + }, + { + "epoch": 0.4061025244076007, + "grad_norm": 0.09249420464038849, + "learning_rate": 1.7861523442389047e-05, + "loss": 9.0934, + "step": 81320 + }, + { + "epoch": 0.4061524632325401, + "grad_norm": 0.09651041775941849, + "learning_rate": 1.7860021527447497e-05, + "loss": 9.1173, + "step": 81330 + }, + { + "epoch": 0.4062024020574796, + "grad_norm": 0.09468764811754227, + "learning_rate": 1.7858519612505944e-05, + "loss": 9.0975, + "step": 81340 + }, + { + "epoch": 0.406252340882419, + "grad_norm": 0.09159006178379059, + "learning_rate": 1.7857017697564397e-05, + "loss": 9.108, + "step": 81350 + }, + { + "epoch": 0.4063022797073585, + "grad_norm": 0.08911147713661194, + "learning_rate": 1.7855515782622844e-05, + "loss": 9.1011, + "step": 81360 + }, + { + "epoch": 0.4063522185322979, + "grad_norm": 0.09557855129241943, + "learning_rate": 1.7854013867681294e-05, + "loss": 9.1114, + "step": 81370 + }, + { + "epoch": 0.4064021573572374, + "grad_norm": 0.09145300090312958, + "learning_rate": 1.7852511952739744e-05, + "loss": 9.0937, + "step": 81380 + }, + { + "epoch": 0.4064520961821768, + "grad_norm": 0.09059559553861618, + "learning_rate": 1.785101003779819e-05, + "loss": 9.1113, + "step": 81390 + }, + { + "epoch": 0.4065020350071163, + "grad_norm": 0.09117017686367035, + "learning_rate": 1.7849508122856645e-05, + "loss": 9.1032, + "step": 81400 + }, + { + "epoch": 0.4065519738320557, + "grad_norm": 0.09077339619398117, + "learning_rate": 1.784800620791509e-05, + "loss": 9.102, + "step": 81410 + }, + { + "epoch": 0.4066019126569952, + "grad_norm": 0.0927463248372078, + "learning_rate": 1.7846504292973545e-05, + "loss": 9.1111, + "step": 81420 + }, + { + "epoch": 0.4066518514819346, + "grad_norm": 0.09298672527074814, + "learning_rate": 1.7845002378031992e-05, + "loss": 9.1062, + "step": 81430 + }, + { + "epoch": 0.4067017903068741, + "grad_norm": 0.09268884360790253, + "learning_rate": 1.784350046309044e-05, + "loss": 9.0985, + "step": 81440 + }, + { + "epoch": 0.4067517291318135, + "grad_norm": 0.09739190340042114, + "learning_rate": 1.7841998548148892e-05, + "loss": 9.1097, + "step": 81450 + }, + { + "epoch": 0.406801667956753, + "grad_norm": 0.08956177532672882, + "learning_rate": 1.784049663320734e-05, + "loss": 9.104, + "step": 81460 + }, + { + "epoch": 0.4068516067816924, + "grad_norm": 0.09477641433477402, + "learning_rate": 1.7838994718265792e-05, + "loss": 9.1018, + "step": 81470 + }, + { + "epoch": 0.4069015456066319, + "grad_norm": 0.09133501350879669, + "learning_rate": 1.783749280332424e-05, + "loss": 9.1138, + "step": 81480 + }, + { + "epoch": 0.4069514844315713, + "grad_norm": 0.09000437706708908, + "learning_rate": 1.7835990888382686e-05, + "loss": 9.1056, + "step": 81490 + }, + { + "epoch": 0.4070014232565108, + "grad_norm": 0.09407982975244522, + "learning_rate": 1.783448897344114e-05, + "loss": 9.1159, + "step": 81500 + }, + { + "epoch": 0.4070513620814502, + "grad_norm": 0.09220529347658157, + "learning_rate": 1.7832987058499586e-05, + "loss": 9.0999, + "step": 81510 + }, + { + "epoch": 0.4071013009063897, + "grad_norm": 0.10050280392169952, + "learning_rate": 1.783148514355804e-05, + "loss": 9.0938, + "step": 81520 + }, + { + "epoch": 0.4071512397313291, + "grad_norm": 0.09240719676017761, + "learning_rate": 1.7829983228616487e-05, + "loss": 9.1099, + "step": 81530 + }, + { + "epoch": 0.4072011785562686, + "grad_norm": 0.09365271776914597, + "learning_rate": 1.7828481313674934e-05, + "loss": 9.105, + "step": 81540 + }, + { + "epoch": 0.407251117381208, + "grad_norm": 0.0895438864827156, + "learning_rate": 1.7826979398733387e-05, + "loss": 9.0933, + "step": 81550 + }, + { + "epoch": 0.4073010562061475, + "grad_norm": 0.09172806888818741, + "learning_rate": 1.7825477483791834e-05, + "loss": 9.0977, + "step": 81560 + }, + { + "epoch": 0.4073509950310869, + "grad_norm": 0.09493505954742432, + "learning_rate": 1.7823975568850287e-05, + "loss": 9.1043, + "step": 81570 + }, + { + "epoch": 0.4074009338560264, + "grad_norm": 0.08669907599687576, + "learning_rate": 1.7822473653908734e-05, + "loss": 9.0986, + "step": 81580 + }, + { + "epoch": 0.4074508726809658, + "grad_norm": 0.09328383207321167, + "learning_rate": 1.782097173896718e-05, + "loss": 9.0989, + "step": 81590 + }, + { + "epoch": 0.4075008115059053, + "grad_norm": 0.09149834513664246, + "learning_rate": 1.7819469824025635e-05, + "loss": 9.114, + "step": 81600 + }, + { + "epoch": 0.4075507503308447, + "grad_norm": 0.08791434019804001, + "learning_rate": 1.781796790908408e-05, + "loss": 9.1065, + "step": 81610 + }, + { + "epoch": 0.4076006891557842, + "grad_norm": 0.09626881778240204, + "learning_rate": 1.7816465994142535e-05, + "loss": 9.0977, + "step": 81620 + }, + { + "epoch": 0.4076506279807236, + "grad_norm": 0.08901573717594147, + "learning_rate": 1.7814964079200982e-05, + "loss": 9.1001, + "step": 81630 + }, + { + "epoch": 0.4077005668056631, + "grad_norm": 0.09143085032701492, + "learning_rate": 1.781346216425943e-05, + "loss": 9.0981, + "step": 81640 + }, + { + "epoch": 0.4077505056306025, + "grad_norm": 0.09885165840387344, + "learning_rate": 1.7811960249317882e-05, + "loss": 9.0968, + "step": 81650 + }, + { + "epoch": 0.407800444455542, + "grad_norm": 0.09109603613615036, + "learning_rate": 1.781045833437633e-05, + "loss": 9.0985, + "step": 81660 + }, + { + "epoch": 0.4078503832804814, + "grad_norm": 0.09030817449092865, + "learning_rate": 1.7808956419434783e-05, + "loss": 9.1018, + "step": 81670 + }, + { + "epoch": 0.4079003221054209, + "grad_norm": 0.09494989365339279, + "learning_rate": 1.780745450449323e-05, + "loss": 9.1095, + "step": 81680 + }, + { + "epoch": 0.4079502609303603, + "grad_norm": 0.09289959818124771, + "learning_rate": 1.7805952589551676e-05, + "loss": 9.0857, + "step": 81690 + }, + { + "epoch": 0.4080001997552998, + "grad_norm": 0.09349525719881058, + "learning_rate": 1.780445067461013e-05, + "loss": 9.097, + "step": 81700 + }, + { + "epoch": 0.4080501385802392, + "grad_norm": 0.08992131799459457, + "learning_rate": 1.7802948759668576e-05, + "loss": 9.0873, + "step": 81710 + }, + { + "epoch": 0.4081000774051787, + "grad_norm": 0.09782059490680695, + "learning_rate": 1.780144684472703e-05, + "loss": 9.0824, + "step": 81720 + }, + { + "epoch": 0.4081500162301181, + "grad_norm": 0.09610580652952194, + "learning_rate": 1.7799944929785477e-05, + "loss": 9.1044, + "step": 81730 + }, + { + "epoch": 0.4081999550550576, + "grad_norm": 0.09353209286928177, + "learning_rate": 1.7798443014843927e-05, + "loss": 9.0979, + "step": 81740 + }, + { + "epoch": 0.408249893879997, + "grad_norm": 0.08947628736495972, + "learning_rate": 1.7796941099902377e-05, + "loss": 9.0948, + "step": 81750 + }, + { + "epoch": 0.40829983270493647, + "grad_norm": 0.09303894639015198, + "learning_rate": 1.7795439184960824e-05, + "loss": 9.0996, + "step": 81760 + }, + { + "epoch": 0.4083497715298759, + "grad_norm": 0.09388189017772675, + "learning_rate": 1.7793937270019278e-05, + "loss": 9.1086, + "step": 81770 + }, + { + "epoch": 0.40839971035481537, + "grad_norm": 0.09600455313920975, + "learning_rate": 1.7792435355077724e-05, + "loss": 9.0987, + "step": 81780 + }, + { + "epoch": 0.4084496491797548, + "grad_norm": 0.091989666223526, + "learning_rate": 1.7790933440136174e-05, + "loss": 9.1066, + "step": 81790 + }, + { + "epoch": 0.40849958800469427, + "grad_norm": 0.09617997705936432, + "learning_rate": 1.7789431525194625e-05, + "loss": 9.1038, + "step": 81800 + }, + { + "epoch": 0.4085495268296337, + "grad_norm": 0.09295359253883362, + "learning_rate": 1.778792961025307e-05, + "loss": 9.0986, + "step": 81810 + }, + { + "epoch": 0.40859946565457317, + "grad_norm": 0.09218577295541763, + "learning_rate": 1.7786427695311525e-05, + "loss": 9.1062, + "step": 81820 + }, + { + "epoch": 0.4086494044795126, + "grad_norm": 0.09626112878322601, + "learning_rate": 1.7784925780369972e-05, + "loss": 9.0865, + "step": 81830 + }, + { + "epoch": 0.40869934330445207, + "grad_norm": 0.08941785991191864, + "learning_rate": 1.7783423865428422e-05, + "loss": 9.0962, + "step": 81840 + }, + { + "epoch": 0.4087492821293915, + "grad_norm": 0.0907067134976387, + "learning_rate": 1.7781921950486872e-05, + "loss": 9.1065, + "step": 81850 + }, + { + "epoch": 0.40879922095433097, + "grad_norm": 0.08822061866521835, + "learning_rate": 1.778042003554532e-05, + "loss": 9.1054, + "step": 81860 + }, + { + "epoch": 0.4088491597792704, + "grad_norm": 0.08900550752878189, + "learning_rate": 1.7778918120603773e-05, + "loss": 9.1029, + "step": 81870 + }, + { + "epoch": 0.40889909860420987, + "grad_norm": 0.09885650873184204, + "learning_rate": 1.777741620566222e-05, + "loss": 9.0903, + "step": 81880 + }, + { + "epoch": 0.4089490374291493, + "grad_norm": 0.09264232963323593, + "learning_rate": 1.777591429072067e-05, + "loss": 9.0968, + "step": 81890 + }, + { + "epoch": 0.40899897625408876, + "grad_norm": 0.0919133648276329, + "learning_rate": 1.777441237577912e-05, + "loss": 9.0921, + "step": 81900 + }, + { + "epoch": 0.4090489150790282, + "grad_norm": 0.09178303927183151, + "learning_rate": 1.7772910460837566e-05, + "loss": 9.0978, + "step": 81910 + }, + { + "epoch": 0.40909885390396766, + "grad_norm": 0.09064862877130508, + "learning_rate": 1.777140854589602e-05, + "loss": 9.1053, + "step": 81920 + }, + { + "epoch": 0.4091487927289071, + "grad_norm": 0.0949510857462883, + "learning_rate": 1.7769906630954467e-05, + "loss": 9.1046, + "step": 81930 + }, + { + "epoch": 0.40919873155384656, + "grad_norm": 0.09381455928087234, + "learning_rate": 1.7768404716012917e-05, + "loss": 9.1019, + "step": 81940 + }, + { + "epoch": 0.409248670378786, + "grad_norm": 0.0917896181344986, + "learning_rate": 1.7766902801071367e-05, + "loss": 9.0914, + "step": 81950 + }, + { + "epoch": 0.40929860920372546, + "grad_norm": 0.09546224027872086, + "learning_rate": 1.7765400886129814e-05, + "loss": 9.0897, + "step": 81960 + }, + { + "epoch": 0.4093485480286649, + "grad_norm": 0.09134580194950104, + "learning_rate": 1.7763898971188268e-05, + "loss": 9.0924, + "step": 81970 + }, + { + "epoch": 0.40939848685360436, + "grad_norm": 0.09422563761472702, + "learning_rate": 1.7762397056246714e-05, + "loss": 9.1046, + "step": 81980 + }, + { + "epoch": 0.4094484256785438, + "grad_norm": 0.09437835216522217, + "learning_rate": 1.7760895141305164e-05, + "loss": 9.1111, + "step": 81990 + }, + { + "epoch": 0.40949836450348326, + "grad_norm": 0.09388506412506104, + "learning_rate": 1.7759393226363615e-05, + "loss": 9.0865, + "step": 82000 + }, + { + "epoch": 0.4095483033284227, + "grad_norm": 0.09779687970876694, + "learning_rate": 1.775789131142206e-05, + "loss": 9.0946, + "step": 82010 + }, + { + "epoch": 0.40959824215336216, + "grad_norm": 0.08977722376585007, + "learning_rate": 1.7756389396480515e-05, + "loss": 9.1029, + "step": 82020 + }, + { + "epoch": 0.4096481809783016, + "grad_norm": 0.09358174353837967, + "learning_rate": 1.7754887481538962e-05, + "loss": 9.0842, + "step": 82030 + }, + { + "epoch": 0.40969811980324106, + "grad_norm": 0.09870479255914688, + "learning_rate": 1.7753385566597412e-05, + "loss": 9.0784, + "step": 82040 + }, + { + "epoch": 0.4097480586281805, + "grad_norm": 0.09142067283391953, + "learning_rate": 1.7751883651655862e-05, + "loss": 9.0896, + "step": 82050 + }, + { + "epoch": 0.40979799745311996, + "grad_norm": 0.08930368721485138, + "learning_rate": 1.7750381736714312e-05, + "loss": 9.1012, + "step": 82060 + }, + { + "epoch": 0.4098479362780594, + "grad_norm": 0.09085031598806381, + "learning_rate": 1.7748879821772763e-05, + "loss": 9.0979, + "step": 82070 + }, + { + "epoch": 0.4098978751029988, + "grad_norm": 0.09649605304002762, + "learning_rate": 1.774737790683121e-05, + "loss": 9.0945, + "step": 82080 + }, + { + "epoch": 0.4099478139279383, + "grad_norm": 0.09378213435411453, + "learning_rate": 1.774587599188966e-05, + "loss": 9.1019, + "step": 82090 + }, + { + "epoch": 0.4099977527528777, + "grad_norm": 0.09320469200611115, + "learning_rate": 1.774437407694811e-05, + "loss": 9.0815, + "step": 82100 + }, + { + "epoch": 0.4100476915778172, + "grad_norm": 0.09025924652814865, + "learning_rate": 1.774287216200656e-05, + "loss": 9.1096, + "step": 82110 + }, + { + "epoch": 0.4100976304027566, + "grad_norm": 0.09439290314912796, + "learning_rate": 1.774137024706501e-05, + "loss": 9.1072, + "step": 82120 + }, + { + "epoch": 0.4101475692276961, + "grad_norm": 0.09332076460123062, + "learning_rate": 1.7739868332123457e-05, + "loss": 9.104, + "step": 82130 + }, + { + "epoch": 0.4101975080526355, + "grad_norm": 0.10301212221384048, + "learning_rate": 1.7738366417181907e-05, + "loss": 9.0906, + "step": 82140 + }, + { + "epoch": 0.410247446877575, + "grad_norm": 0.08951026201248169, + "learning_rate": 1.7736864502240357e-05, + "loss": 9.1012, + "step": 82150 + }, + { + "epoch": 0.4102973857025144, + "grad_norm": 0.09387951344251633, + "learning_rate": 1.7735362587298807e-05, + "loss": 9.1009, + "step": 82160 + }, + { + "epoch": 0.41034732452745387, + "grad_norm": 0.0935429260134697, + "learning_rate": 1.7733860672357258e-05, + "loss": 9.0949, + "step": 82170 + }, + { + "epoch": 0.4103972633523933, + "grad_norm": 0.09872562438249588, + "learning_rate": 1.7732358757415704e-05, + "loss": 9.0956, + "step": 82180 + }, + { + "epoch": 0.41044720217733277, + "grad_norm": 0.09391787648200989, + "learning_rate": 1.7730856842474155e-05, + "loss": 9.0856, + "step": 82190 + }, + { + "epoch": 0.4104971410022722, + "grad_norm": 0.09664194285869598, + "learning_rate": 1.7729354927532605e-05, + "loss": 9.0931, + "step": 82200 + }, + { + "epoch": 0.41054707982721167, + "grad_norm": 0.10238590836524963, + "learning_rate": 1.7727853012591055e-05, + "loss": 9.0853, + "step": 82210 + }, + { + "epoch": 0.4105970186521511, + "grad_norm": 0.08652331680059433, + "learning_rate": 1.7726351097649505e-05, + "loss": 9.1032, + "step": 82220 + }, + { + "epoch": 0.41064695747709057, + "grad_norm": 0.09455471485853195, + "learning_rate": 1.7724849182707952e-05, + "loss": 9.0888, + "step": 82230 + }, + { + "epoch": 0.41069689630203, + "grad_norm": 0.09298815578222275, + "learning_rate": 1.7723347267766402e-05, + "loss": 9.0829, + "step": 82240 + }, + { + "epoch": 0.41074683512696947, + "grad_norm": 0.09878503531217575, + "learning_rate": 1.7721845352824852e-05, + "loss": 9.0886, + "step": 82250 + }, + { + "epoch": 0.4107967739519089, + "grad_norm": 0.09556316584348679, + "learning_rate": 1.7720343437883302e-05, + "loss": 9.0951, + "step": 82260 + }, + { + "epoch": 0.41084671277684837, + "grad_norm": 0.094581738114357, + "learning_rate": 1.7718841522941753e-05, + "loss": 9.0941, + "step": 82270 + }, + { + "epoch": 0.4108966516017878, + "grad_norm": 0.09531483799219131, + "learning_rate": 1.77173396080002e-05, + "loss": 9.0941, + "step": 82280 + }, + { + "epoch": 0.41094659042672727, + "grad_norm": 0.09399186819791794, + "learning_rate": 1.771583769305865e-05, + "loss": 9.0851, + "step": 82290 + }, + { + "epoch": 0.4109965292516667, + "grad_norm": 0.09608032554388046, + "learning_rate": 1.77143357781171e-05, + "loss": 9.0931, + "step": 82300 + }, + { + "epoch": 0.41104646807660616, + "grad_norm": 0.0954754576086998, + "learning_rate": 1.771283386317555e-05, + "loss": 9.1091, + "step": 82310 + }, + { + "epoch": 0.4110964069015456, + "grad_norm": 0.08752164989709854, + "learning_rate": 1.7711331948234e-05, + "loss": 9.0904, + "step": 82320 + }, + { + "epoch": 0.41114634572648506, + "grad_norm": 0.09011632949113846, + "learning_rate": 1.7709830033292447e-05, + "loss": 9.1022, + "step": 82330 + }, + { + "epoch": 0.4111962845514245, + "grad_norm": 0.08889009058475494, + "learning_rate": 1.7708328118350897e-05, + "loss": 9.0813, + "step": 82340 + }, + { + "epoch": 0.41124622337636396, + "grad_norm": 0.0934472531080246, + "learning_rate": 1.7706826203409347e-05, + "loss": 9.084, + "step": 82350 + }, + { + "epoch": 0.4112961622013034, + "grad_norm": 0.0992710068821907, + "learning_rate": 1.7705324288467797e-05, + "loss": 9.1012, + "step": 82360 + }, + { + "epoch": 0.41134610102624286, + "grad_norm": 0.0951753556728363, + "learning_rate": 1.7703822373526248e-05, + "loss": 9.0896, + "step": 82370 + }, + { + "epoch": 0.4113960398511823, + "grad_norm": 0.09202361106872559, + "learning_rate": 1.7702320458584694e-05, + "loss": 9.0929, + "step": 82380 + }, + { + "epoch": 0.41144597867612176, + "grad_norm": 0.09348968416452408, + "learning_rate": 1.7700818543643145e-05, + "loss": 9.0928, + "step": 82390 + }, + { + "epoch": 0.4114959175010612, + "grad_norm": 0.0965779572725296, + "learning_rate": 1.7699316628701595e-05, + "loss": 9.086, + "step": 82400 + }, + { + "epoch": 0.41154585632600066, + "grad_norm": 0.09712937474250793, + "learning_rate": 1.7697814713760045e-05, + "loss": 9.085, + "step": 82410 + }, + { + "epoch": 0.4115957951509401, + "grad_norm": 0.09527904540300369, + "learning_rate": 1.7696312798818495e-05, + "loss": 9.0898, + "step": 82420 + }, + { + "epoch": 0.41164573397587956, + "grad_norm": 0.09039422124624252, + "learning_rate": 1.7694810883876945e-05, + "loss": 9.0958, + "step": 82430 + }, + { + "epoch": 0.411695672800819, + "grad_norm": 0.09513916820287704, + "learning_rate": 1.7693308968935392e-05, + "loss": 9.085, + "step": 82440 + }, + { + "epoch": 0.41174561162575846, + "grad_norm": 0.09290063381195068, + "learning_rate": 1.7691807053993842e-05, + "loss": 9.0877, + "step": 82450 + }, + { + "epoch": 0.4117955504506979, + "grad_norm": 0.09197252988815308, + "learning_rate": 1.7690305139052292e-05, + "loss": 9.0885, + "step": 82460 + }, + { + "epoch": 0.41184548927563736, + "grad_norm": 0.0950205847620964, + "learning_rate": 1.7688803224110743e-05, + "loss": 9.0964, + "step": 82470 + }, + { + "epoch": 0.4118954281005768, + "grad_norm": 0.09569496661424637, + "learning_rate": 1.7687301309169193e-05, + "loss": 9.0962, + "step": 82480 + }, + { + "epoch": 0.41194536692551625, + "grad_norm": 0.09163631498813629, + "learning_rate": 1.768579939422764e-05, + "loss": 9.101, + "step": 82490 + }, + { + "epoch": 0.4119953057504557, + "grad_norm": 0.09723185002803802, + "learning_rate": 1.768429747928609e-05, + "loss": 9.0751, + "step": 82500 + }, + { + "epoch": 0.41204524457539515, + "grad_norm": 0.09386081993579865, + "learning_rate": 1.768279556434454e-05, + "loss": 9.0936, + "step": 82510 + }, + { + "epoch": 0.4120951834003346, + "grad_norm": 0.09162143617868423, + "learning_rate": 1.768129364940299e-05, + "loss": 9.1021, + "step": 82520 + }, + { + "epoch": 0.41214512222527405, + "grad_norm": 0.09791895002126694, + "learning_rate": 1.767979173446144e-05, + "loss": 9.0871, + "step": 82530 + }, + { + "epoch": 0.4121950610502135, + "grad_norm": 0.09404843300580978, + "learning_rate": 1.7678289819519887e-05, + "loss": 9.0875, + "step": 82540 + }, + { + "epoch": 0.41224499987515295, + "grad_norm": 0.09097657352685928, + "learning_rate": 1.7676787904578337e-05, + "loss": 9.0881, + "step": 82550 + }, + { + "epoch": 0.4122949387000924, + "grad_norm": 0.090410977602005, + "learning_rate": 1.7675285989636787e-05, + "loss": 9.0898, + "step": 82560 + }, + { + "epoch": 0.41234487752503185, + "grad_norm": 0.09892985969781876, + "learning_rate": 1.7673784074695238e-05, + "loss": 9.0896, + "step": 82570 + }, + { + "epoch": 0.41239481634997127, + "grad_norm": 0.09155625849962234, + "learning_rate": 1.7672282159753688e-05, + "loss": 9.0865, + "step": 82580 + }, + { + "epoch": 0.41244475517491075, + "grad_norm": 0.09150595217943192, + "learning_rate": 1.7670780244812135e-05, + "loss": 9.0801, + "step": 82590 + }, + { + "epoch": 0.41249469399985017, + "grad_norm": 0.09165318310260773, + "learning_rate": 1.7669278329870585e-05, + "loss": 9.0751, + "step": 82600 + }, + { + "epoch": 0.41254463282478965, + "grad_norm": 0.09589849412441254, + "learning_rate": 1.7667776414929035e-05, + "loss": 9.0826, + "step": 82610 + }, + { + "epoch": 0.41259457164972907, + "grad_norm": 0.10229449719190598, + "learning_rate": 1.7666274499987485e-05, + "loss": 9.0724, + "step": 82620 + }, + { + "epoch": 0.41264451047466855, + "grad_norm": 0.09139035642147064, + "learning_rate": 1.7664772585045935e-05, + "loss": 9.091, + "step": 82630 + }, + { + "epoch": 0.41269444929960797, + "grad_norm": 0.09130216389894485, + "learning_rate": 1.7663270670104382e-05, + "loss": 9.0797, + "step": 82640 + }, + { + "epoch": 0.41274438812454745, + "grad_norm": 0.08872245997190475, + "learning_rate": 1.7661768755162832e-05, + "loss": 9.0946, + "step": 82650 + }, + { + "epoch": 0.41279432694948687, + "grad_norm": 0.09483634680509567, + "learning_rate": 1.7660266840221282e-05, + "loss": 9.0982, + "step": 82660 + }, + { + "epoch": 0.41284426577442634, + "grad_norm": 0.10040302574634552, + "learning_rate": 1.7658764925279733e-05, + "loss": 9.0882, + "step": 82670 + }, + { + "epoch": 0.41289420459936577, + "grad_norm": 0.09902846813201904, + "learning_rate": 1.7657263010338183e-05, + "loss": 9.0838, + "step": 82680 + }, + { + "epoch": 0.41294414342430524, + "grad_norm": 0.090021513402462, + "learning_rate": 1.765576109539663e-05, + "loss": 9.0854, + "step": 82690 + }, + { + "epoch": 0.41299408224924467, + "grad_norm": 0.09512148797512054, + "learning_rate": 1.765425918045508e-05, + "loss": 9.0922, + "step": 82700 + }, + { + "epoch": 0.41304402107418414, + "grad_norm": 0.09530332684516907, + "learning_rate": 1.765275726551353e-05, + "loss": 9.0939, + "step": 82710 + }, + { + "epoch": 0.41309395989912356, + "grad_norm": 0.09510703384876251, + "learning_rate": 1.765125535057198e-05, + "loss": 9.0849, + "step": 82720 + }, + { + "epoch": 0.41314389872406304, + "grad_norm": 0.09195013344287872, + "learning_rate": 1.764975343563043e-05, + "loss": 9.0738, + "step": 82730 + }, + { + "epoch": 0.41319383754900246, + "grad_norm": 0.09670493006706238, + "learning_rate": 1.7648251520688877e-05, + "loss": 9.0946, + "step": 82740 + }, + { + "epoch": 0.41324377637394194, + "grad_norm": 0.09696520864963531, + "learning_rate": 1.764674960574733e-05, + "loss": 9.0757, + "step": 82750 + }, + { + "epoch": 0.41329371519888136, + "grad_norm": 0.09648612141609192, + "learning_rate": 1.7645247690805777e-05, + "loss": 9.0891, + "step": 82760 + }, + { + "epoch": 0.41334365402382084, + "grad_norm": 0.09383885562419891, + "learning_rate": 1.7643745775864228e-05, + "loss": 9.0929, + "step": 82770 + }, + { + "epoch": 0.41339359284876026, + "grad_norm": 0.09602250903844833, + "learning_rate": 1.7642243860922678e-05, + "loss": 9.0904, + "step": 82780 + }, + { + "epoch": 0.41344353167369974, + "grad_norm": 0.08740779012441635, + "learning_rate": 1.7640741945981125e-05, + "loss": 9.0999, + "step": 82790 + }, + { + "epoch": 0.41349347049863916, + "grad_norm": 0.10102899372577667, + "learning_rate": 1.7639240031039578e-05, + "loss": 9.0845, + "step": 82800 + }, + { + "epoch": 0.41354340932357864, + "grad_norm": 0.10466084629297256, + "learning_rate": 1.7637738116098025e-05, + "loss": 9.0695, + "step": 82810 + }, + { + "epoch": 0.41359334814851806, + "grad_norm": 0.09442269057035446, + "learning_rate": 1.7636236201156475e-05, + "loss": 9.0806, + "step": 82820 + }, + { + "epoch": 0.41364328697345754, + "grad_norm": 0.09268420934677124, + "learning_rate": 1.7634734286214925e-05, + "loss": 9.1006, + "step": 82830 + }, + { + "epoch": 0.41369322579839696, + "grad_norm": 0.09057000279426575, + "learning_rate": 1.7633232371273372e-05, + "loss": 9.0809, + "step": 82840 + }, + { + "epoch": 0.41374316462333643, + "grad_norm": 0.0951695516705513, + "learning_rate": 1.7631730456331826e-05, + "loss": 9.0833, + "step": 82850 + }, + { + "epoch": 0.41379310344827586, + "grad_norm": 0.09882236272096634, + "learning_rate": 1.7630228541390272e-05, + "loss": 9.0878, + "step": 82860 + }, + { + "epoch": 0.41384304227321533, + "grad_norm": 0.09375642985105515, + "learning_rate": 1.7628726626448723e-05, + "loss": 9.0873, + "step": 82870 + }, + { + "epoch": 0.41389298109815476, + "grad_norm": 0.08775999397039413, + "learning_rate": 1.7627224711507173e-05, + "loss": 9.1052, + "step": 82880 + }, + { + "epoch": 0.41394291992309423, + "grad_norm": 0.09294060617685318, + "learning_rate": 1.762572279656562e-05, + "loss": 9.0823, + "step": 82890 + }, + { + "epoch": 0.41399285874803365, + "grad_norm": 0.10115856677293777, + "learning_rate": 1.7624220881624073e-05, + "loss": 9.0824, + "step": 82900 + }, + { + "epoch": 0.41404279757297313, + "grad_norm": 0.09972229599952698, + "learning_rate": 1.762271896668252e-05, + "loss": 9.0816, + "step": 82910 + }, + { + "epoch": 0.41409273639791255, + "grad_norm": 0.0935303345322609, + "learning_rate": 1.762121705174097e-05, + "loss": 9.0848, + "step": 82920 + }, + { + "epoch": 0.41414267522285203, + "grad_norm": 0.09236697852611542, + "learning_rate": 1.761971513679942e-05, + "loss": 9.091, + "step": 82930 + }, + { + "epoch": 0.41419261404779145, + "grad_norm": 0.08565404266119003, + "learning_rate": 1.7618213221857867e-05, + "loss": 9.0823, + "step": 82940 + }, + { + "epoch": 0.41424255287273093, + "grad_norm": 0.09559847414493561, + "learning_rate": 1.761671130691632e-05, + "loss": 9.0797, + "step": 82950 + }, + { + "epoch": 0.41429249169767035, + "grad_norm": 0.09396830946207047, + "learning_rate": 1.7615209391974767e-05, + "loss": 9.088, + "step": 82960 + }, + { + "epoch": 0.41434243052260983, + "grad_norm": 0.09308158606290817, + "learning_rate": 1.7613707477033218e-05, + "loss": 9.0753, + "step": 82970 + }, + { + "epoch": 0.41439236934754925, + "grad_norm": 0.10932270437479019, + "learning_rate": 1.7612205562091668e-05, + "loss": 9.0863, + "step": 82980 + }, + { + "epoch": 0.4144423081724887, + "grad_norm": 0.09359308332204819, + "learning_rate": 1.7610703647150115e-05, + "loss": 9.084, + "step": 82990 + }, + { + "epoch": 0.41449224699742815, + "grad_norm": 0.09583938866853714, + "learning_rate": 1.7609201732208568e-05, + "loss": 9.075, + "step": 83000 + }, + { + "epoch": 0.4145421858223676, + "grad_norm": 0.09451348334550858, + "learning_rate": 1.7607699817267015e-05, + "loss": 9.0976, + "step": 83010 + }, + { + "epoch": 0.41459212464730705, + "grad_norm": 0.09366924315690994, + "learning_rate": 1.7606197902325465e-05, + "loss": 9.0789, + "step": 83020 + }, + { + "epoch": 0.4146420634722465, + "grad_norm": 0.09320022910833359, + "learning_rate": 1.7604695987383915e-05, + "loss": 9.0808, + "step": 83030 + }, + { + "epoch": 0.41469200229718595, + "grad_norm": 0.09090333431959152, + "learning_rate": 1.7603194072442362e-05, + "loss": 9.0839, + "step": 83040 + }, + { + "epoch": 0.4147419411221254, + "grad_norm": 0.093370221555233, + "learning_rate": 1.7601692157500816e-05, + "loss": 9.0735, + "step": 83050 + }, + { + "epoch": 0.41479187994706485, + "grad_norm": 0.09438198059797287, + "learning_rate": 1.7600190242559262e-05, + "loss": 9.085, + "step": 83060 + }, + { + "epoch": 0.41484181877200427, + "grad_norm": 0.09503324329853058, + "learning_rate": 1.7598688327617716e-05, + "loss": 9.087, + "step": 83070 + }, + { + "epoch": 0.41489175759694374, + "grad_norm": 0.09370449930429459, + "learning_rate": 1.7597186412676163e-05, + "loss": 9.0811, + "step": 83080 + }, + { + "epoch": 0.41494169642188317, + "grad_norm": 0.09288004785776138, + "learning_rate": 1.759568449773461e-05, + "loss": 9.0904, + "step": 83090 + }, + { + "epoch": 0.41499163524682264, + "grad_norm": 0.09713056683540344, + "learning_rate": 1.7594182582793063e-05, + "loss": 9.0817, + "step": 83100 + }, + { + "epoch": 0.41504157407176206, + "grad_norm": 0.09737755358219147, + "learning_rate": 1.759268066785151e-05, + "loss": 9.071, + "step": 83110 + }, + { + "epoch": 0.41509151289670154, + "grad_norm": 0.10059583187103271, + "learning_rate": 1.7591178752909963e-05, + "loss": 9.0756, + "step": 83120 + }, + { + "epoch": 0.41514145172164096, + "grad_norm": 0.0912448912858963, + "learning_rate": 1.758967683796841e-05, + "loss": 9.0848, + "step": 83130 + }, + { + "epoch": 0.41519139054658044, + "grad_norm": 0.09705382585525513, + "learning_rate": 1.7588174923026857e-05, + "loss": 9.0865, + "step": 83140 + }, + { + "epoch": 0.41524132937151986, + "grad_norm": 0.08912192285060883, + "learning_rate": 1.758667300808531e-05, + "loss": 9.0884, + "step": 83150 + }, + { + "epoch": 0.41529126819645934, + "grad_norm": 0.09693977981805801, + "learning_rate": 1.7585171093143757e-05, + "loss": 9.081, + "step": 83160 + }, + { + "epoch": 0.41534120702139876, + "grad_norm": 0.09407562017440796, + "learning_rate": 1.758366917820221e-05, + "loss": 9.0875, + "step": 83170 + }, + { + "epoch": 0.41539114584633824, + "grad_norm": 0.0872037410736084, + "learning_rate": 1.7582167263260658e-05, + "loss": 9.0934, + "step": 83180 + }, + { + "epoch": 0.41544108467127766, + "grad_norm": 0.09205826371908188, + "learning_rate": 1.7580665348319105e-05, + "loss": 9.0891, + "step": 83190 + }, + { + "epoch": 0.41549102349621714, + "grad_norm": 0.09072985500097275, + "learning_rate": 1.7579163433377558e-05, + "loss": 9.0847, + "step": 83200 + }, + { + "epoch": 0.41554096232115656, + "grad_norm": 0.08977384865283966, + "learning_rate": 1.7577661518436005e-05, + "loss": 9.0795, + "step": 83210 + }, + { + "epoch": 0.41559090114609604, + "grad_norm": 0.0946430042386055, + "learning_rate": 1.757615960349446e-05, + "loss": 9.0804, + "step": 83220 + }, + { + "epoch": 0.41564083997103546, + "grad_norm": 0.09836602956056595, + "learning_rate": 1.7574657688552905e-05, + "loss": 9.0745, + "step": 83230 + }, + { + "epoch": 0.41569077879597494, + "grad_norm": 0.09133485704660416, + "learning_rate": 1.7573155773611352e-05, + "loss": 9.0921, + "step": 83240 + }, + { + "epoch": 0.41574071762091436, + "grad_norm": 0.093567855656147, + "learning_rate": 1.7571653858669806e-05, + "loss": 9.0733, + "step": 83250 + }, + { + "epoch": 0.41579065644585383, + "grad_norm": 0.09800833463668823, + "learning_rate": 1.7570151943728252e-05, + "loss": 9.0626, + "step": 83260 + }, + { + "epoch": 0.41584059527079326, + "grad_norm": 0.1007581278681755, + "learning_rate": 1.7568650028786706e-05, + "loss": 9.0695, + "step": 83270 + }, + { + "epoch": 0.41589053409573273, + "grad_norm": 0.09314744919538498, + "learning_rate": 1.7567148113845153e-05, + "loss": 9.0963, + "step": 83280 + }, + { + "epoch": 0.41594047292067216, + "grad_norm": 0.09255973249673843, + "learning_rate": 1.75656461989036e-05, + "loss": 9.0841, + "step": 83290 + }, + { + "epoch": 0.41599041174561163, + "grad_norm": 0.0947415828704834, + "learning_rate": 1.7564144283962053e-05, + "loss": 9.0759, + "step": 83300 + }, + { + "epoch": 0.41604035057055105, + "grad_norm": 0.09376344829797745, + "learning_rate": 1.75626423690205e-05, + "loss": 9.0787, + "step": 83310 + }, + { + "epoch": 0.41609028939549053, + "grad_norm": 0.09048786014318466, + "learning_rate": 1.7561140454078954e-05, + "loss": 9.0731, + "step": 83320 + }, + { + "epoch": 0.41614022822042995, + "grad_norm": 0.09429807960987091, + "learning_rate": 1.75596385391374e-05, + "loss": 9.0738, + "step": 83330 + }, + { + "epoch": 0.41619016704536943, + "grad_norm": 0.0935714915394783, + "learning_rate": 1.7558136624195847e-05, + "loss": 9.065, + "step": 83340 + }, + { + "epoch": 0.41624010587030885, + "grad_norm": 0.09685614705085754, + "learning_rate": 1.75566347092543e-05, + "loss": 9.0758, + "step": 83350 + }, + { + "epoch": 0.41629004469524833, + "grad_norm": 0.09270419180393219, + "learning_rate": 1.7555132794312747e-05, + "loss": 9.0771, + "step": 83360 + }, + { + "epoch": 0.41633998352018775, + "grad_norm": 0.09448056668043137, + "learning_rate": 1.75536308793712e-05, + "loss": 9.0852, + "step": 83370 + }, + { + "epoch": 0.41638992234512723, + "grad_norm": 0.09013166278600693, + "learning_rate": 1.7552128964429648e-05, + "loss": 9.0791, + "step": 83380 + }, + { + "epoch": 0.41643986117006665, + "grad_norm": 0.09527698159217834, + "learning_rate": 1.7550627049488098e-05, + "loss": 9.0773, + "step": 83390 + }, + { + "epoch": 0.4164897999950061, + "grad_norm": 0.09563423693180084, + "learning_rate": 1.7549125134546548e-05, + "loss": 9.0723, + "step": 83400 + }, + { + "epoch": 0.41653973881994555, + "grad_norm": 0.09608939290046692, + "learning_rate": 1.7547623219604995e-05, + "loss": 9.0768, + "step": 83410 + }, + { + "epoch": 0.416589677644885, + "grad_norm": 0.09871455281972885, + "learning_rate": 1.754612130466345e-05, + "loss": 9.0737, + "step": 83420 + }, + { + "epoch": 0.41663961646982445, + "grad_norm": 0.09225298464298248, + "learning_rate": 1.7544619389721895e-05, + "loss": 9.0819, + "step": 83430 + }, + { + "epoch": 0.4166895552947639, + "grad_norm": 0.09640655666589737, + "learning_rate": 1.7543117474780345e-05, + "loss": 9.0798, + "step": 83440 + }, + { + "epoch": 0.41673949411970335, + "grad_norm": 0.09529406577348709, + "learning_rate": 1.7541615559838796e-05, + "loss": 9.0874, + "step": 83450 + }, + { + "epoch": 0.4167894329446428, + "grad_norm": 0.09108598530292511, + "learning_rate": 1.7540113644897242e-05, + "loss": 9.071, + "step": 83460 + }, + { + "epoch": 0.41683937176958225, + "grad_norm": 0.09113084524869919, + "learning_rate": 1.7538611729955696e-05, + "loss": 9.0852, + "step": 83470 + }, + { + "epoch": 0.4168893105945217, + "grad_norm": 0.09776317328214645, + "learning_rate": 1.7537109815014143e-05, + "loss": 9.0882, + "step": 83480 + }, + { + "epoch": 0.41693924941946114, + "grad_norm": 0.09335512667894363, + "learning_rate": 1.7535607900072593e-05, + "loss": 9.077, + "step": 83490 + }, + { + "epoch": 0.4169891882444006, + "grad_norm": 0.09077412635087967, + "learning_rate": 1.7534105985131043e-05, + "loss": 9.0792, + "step": 83500 + }, + { + "epoch": 0.41703912706934004, + "grad_norm": 0.1023026630282402, + "learning_rate": 1.753260407018949e-05, + "loss": 9.0743, + "step": 83510 + }, + { + "epoch": 0.4170890658942795, + "grad_norm": 0.09414954483509064, + "learning_rate": 1.7531102155247944e-05, + "loss": 9.0811, + "step": 83520 + }, + { + "epoch": 0.41713900471921894, + "grad_norm": 0.09618096053600311, + "learning_rate": 1.752960024030639e-05, + "loss": 9.0919, + "step": 83530 + }, + { + "epoch": 0.4171889435441584, + "grad_norm": 0.0886245146393776, + "learning_rate": 1.752809832536484e-05, + "loss": 9.0966, + "step": 83540 + }, + { + "epoch": 0.41723888236909784, + "grad_norm": 0.09386541694402695, + "learning_rate": 1.752659641042329e-05, + "loss": 9.0889, + "step": 83550 + }, + { + "epoch": 0.4172888211940373, + "grad_norm": 0.08851998299360275, + "learning_rate": 1.7525094495481737e-05, + "loss": 9.0748, + "step": 83560 + }, + { + "epoch": 0.41733876001897674, + "grad_norm": 0.1024288460612297, + "learning_rate": 1.752359258054019e-05, + "loss": 9.0864, + "step": 83570 + }, + { + "epoch": 0.4173886988439162, + "grad_norm": 0.09188997000455856, + "learning_rate": 1.7522090665598638e-05, + "loss": 9.0743, + "step": 83580 + }, + { + "epoch": 0.41743863766885564, + "grad_norm": 0.09162962436676025, + "learning_rate": 1.7520588750657088e-05, + "loss": 9.0847, + "step": 83590 + }, + { + "epoch": 0.4174885764937951, + "grad_norm": 0.0950562134385109, + "learning_rate": 1.7519086835715538e-05, + "loss": 9.0715, + "step": 83600 + }, + { + "epoch": 0.41753851531873454, + "grad_norm": 0.09186748415231705, + "learning_rate": 1.7517584920773985e-05, + "loss": 9.072, + "step": 83610 + }, + { + "epoch": 0.417588454143674, + "grad_norm": 0.0918838381767273, + "learning_rate": 1.751608300583244e-05, + "loss": 9.0721, + "step": 83620 + }, + { + "epoch": 0.41763839296861344, + "grad_norm": 0.09041111171245575, + "learning_rate": 1.7514581090890885e-05, + "loss": 9.0816, + "step": 83630 + }, + { + "epoch": 0.4176883317935529, + "grad_norm": 0.08900655061006546, + "learning_rate": 1.7513079175949336e-05, + "loss": 9.0725, + "step": 83640 + }, + { + "epoch": 0.41773827061849234, + "grad_norm": 0.09458507597446442, + "learning_rate": 1.7511577261007786e-05, + "loss": 9.0837, + "step": 83650 + }, + { + "epoch": 0.4177882094434318, + "grad_norm": 0.08990652859210968, + "learning_rate": 1.7510075346066232e-05, + "loss": 9.0666, + "step": 83660 + }, + { + "epoch": 0.41783814826837123, + "grad_norm": 0.09383875876665115, + "learning_rate": 1.7508573431124686e-05, + "loss": 9.0684, + "step": 83670 + }, + { + "epoch": 0.4178880870933107, + "grad_norm": 0.0938841700553894, + "learning_rate": 1.7507071516183133e-05, + "loss": 9.076, + "step": 83680 + }, + { + "epoch": 0.41793802591825013, + "grad_norm": 0.0932869017124176, + "learning_rate": 1.7505569601241583e-05, + "loss": 9.0731, + "step": 83690 + }, + { + "epoch": 0.4179879647431896, + "grad_norm": 0.09501129388809204, + "learning_rate": 1.7504067686300033e-05, + "loss": 9.0626, + "step": 83700 + }, + { + "epoch": 0.41803790356812903, + "grad_norm": 0.09345944225788116, + "learning_rate": 1.7502565771358483e-05, + "loss": 9.0749, + "step": 83710 + }, + { + "epoch": 0.4180878423930685, + "grad_norm": 0.09460506588220596, + "learning_rate": 1.7501063856416934e-05, + "loss": 9.0764, + "step": 83720 + }, + { + "epoch": 0.41813778121800793, + "grad_norm": 0.09743547439575195, + "learning_rate": 1.749956194147538e-05, + "loss": 9.0757, + "step": 83730 + }, + { + "epoch": 0.4181877200429474, + "grad_norm": 0.09816765040159225, + "learning_rate": 1.749806002653383e-05, + "loss": 9.0767, + "step": 83740 + }, + { + "epoch": 0.41823765886788683, + "grad_norm": 0.09210207313299179, + "learning_rate": 1.749655811159228e-05, + "loss": 9.0721, + "step": 83750 + }, + { + "epoch": 0.4182875976928263, + "grad_norm": 0.09752412140369415, + "learning_rate": 1.749505619665073e-05, + "loss": 9.0679, + "step": 83760 + }, + { + "epoch": 0.41833753651776573, + "grad_norm": 0.09351370483636856, + "learning_rate": 1.749355428170918e-05, + "loss": 9.0862, + "step": 83770 + }, + { + "epoch": 0.4183874753427052, + "grad_norm": 0.09450095891952515, + "learning_rate": 1.7492052366767628e-05, + "loss": 9.0659, + "step": 83780 + }, + { + "epoch": 0.41843741416764463, + "grad_norm": 0.09616388380527496, + "learning_rate": 1.7490550451826078e-05, + "loss": 9.0898, + "step": 83790 + }, + { + "epoch": 0.4184873529925841, + "grad_norm": 0.09121173620223999, + "learning_rate": 1.7489048536884528e-05, + "loss": 9.1012, + "step": 83800 + }, + { + "epoch": 0.4185372918175235, + "grad_norm": 0.09462346136569977, + "learning_rate": 1.748754662194298e-05, + "loss": 9.0969, + "step": 83810 + }, + { + "epoch": 0.418587230642463, + "grad_norm": 0.09324286133050919, + "learning_rate": 1.748604470700143e-05, + "loss": 9.0746, + "step": 83820 + }, + { + "epoch": 0.4186371694674024, + "grad_norm": 0.09117257595062256, + "learning_rate": 1.7484542792059875e-05, + "loss": 9.0724, + "step": 83830 + }, + { + "epoch": 0.4186871082923419, + "grad_norm": 0.09084919095039368, + "learning_rate": 1.7483040877118326e-05, + "loss": 9.0775, + "step": 83840 + }, + { + "epoch": 0.4187370471172813, + "grad_norm": 0.09522408992052078, + "learning_rate": 1.7481538962176776e-05, + "loss": 9.0747, + "step": 83850 + }, + { + "epoch": 0.4187869859422208, + "grad_norm": 0.09409532696008682, + "learning_rate": 1.7480037047235226e-05, + "loss": 9.0663, + "step": 83860 + }, + { + "epoch": 0.4188369247671602, + "grad_norm": 0.08830953389406204, + "learning_rate": 1.7478535132293676e-05, + "loss": 9.0784, + "step": 83870 + }, + { + "epoch": 0.4188868635920997, + "grad_norm": 0.0929829478263855, + "learning_rate": 1.7477033217352123e-05, + "loss": 9.0728, + "step": 83880 + }, + { + "epoch": 0.4189368024170391, + "grad_norm": 0.09145011007785797, + "learning_rate": 1.7475531302410573e-05, + "loss": 9.0703, + "step": 83890 + }, + { + "epoch": 0.4189867412419786, + "grad_norm": 0.09224165230989456, + "learning_rate": 1.7474029387469023e-05, + "loss": 9.077, + "step": 83900 + }, + { + "epoch": 0.419036680066918, + "grad_norm": 0.09355958551168442, + "learning_rate": 1.7472527472527473e-05, + "loss": 9.0668, + "step": 83910 + }, + { + "epoch": 0.4190866188918575, + "grad_norm": 0.09556030482053757, + "learning_rate": 1.7471025557585924e-05, + "loss": 9.0659, + "step": 83920 + }, + { + "epoch": 0.4191365577167969, + "grad_norm": 0.09351398795843124, + "learning_rate": 1.746952364264437e-05, + "loss": 9.0652, + "step": 83930 + }, + { + "epoch": 0.4191864965417364, + "grad_norm": 0.09275408834218979, + "learning_rate": 1.746802172770282e-05, + "loss": 9.0618, + "step": 83940 + }, + { + "epoch": 0.4192364353666758, + "grad_norm": 0.0906294509768486, + "learning_rate": 1.746651981276127e-05, + "loss": 9.0744, + "step": 83950 + }, + { + "epoch": 0.4192863741916153, + "grad_norm": 0.09481019526720047, + "learning_rate": 1.746501789781972e-05, + "loss": 9.0686, + "step": 83960 + }, + { + "epoch": 0.4193363130165547, + "grad_norm": 0.0955205038189888, + "learning_rate": 1.746351598287817e-05, + "loss": 9.0717, + "step": 83970 + }, + { + "epoch": 0.4193862518414942, + "grad_norm": 0.09485220909118652, + "learning_rate": 1.7462014067936618e-05, + "loss": 9.0709, + "step": 83980 + }, + { + "epoch": 0.4194361906664336, + "grad_norm": 0.09109359234571457, + "learning_rate": 1.7460512152995068e-05, + "loss": 9.0645, + "step": 83990 + }, + { + "epoch": 0.4194861294913731, + "grad_norm": 0.09005378186702728, + "learning_rate": 1.7459010238053518e-05, + "loss": 9.0704, + "step": 84000 + }, + { + "epoch": 0.4195360683163125, + "grad_norm": 0.09958352148532867, + "learning_rate": 1.745750832311197e-05, + "loss": 9.0769, + "step": 84010 + }, + { + "epoch": 0.419586007141252, + "grad_norm": 0.09399083256721497, + "learning_rate": 1.745600640817042e-05, + "loss": 9.0654, + "step": 84020 + }, + { + "epoch": 0.4196359459661914, + "grad_norm": 0.09548033028841019, + "learning_rate": 1.745450449322887e-05, + "loss": 9.0775, + "step": 84030 + }, + { + "epoch": 0.4196858847911309, + "grad_norm": 0.09409498423337936, + "learning_rate": 1.7453002578287316e-05, + "loss": 9.0756, + "step": 84040 + }, + { + "epoch": 0.4197358236160703, + "grad_norm": 0.09729974716901779, + "learning_rate": 1.7451500663345766e-05, + "loss": 9.0749, + "step": 84050 + }, + { + "epoch": 0.41978576244100974, + "grad_norm": 0.09663716703653336, + "learning_rate": 1.7449998748404216e-05, + "loss": 9.069, + "step": 84060 + }, + { + "epoch": 0.4198357012659492, + "grad_norm": 0.09065845608711243, + "learning_rate": 1.7448496833462666e-05, + "loss": 9.066, + "step": 84070 + }, + { + "epoch": 0.41988564009088863, + "grad_norm": 0.09165765345096588, + "learning_rate": 1.7446994918521116e-05, + "loss": 9.0866, + "step": 84080 + }, + { + "epoch": 0.4199355789158281, + "grad_norm": 0.09514272212982178, + "learning_rate": 1.7445493003579563e-05, + "loss": 9.07, + "step": 84090 + }, + { + "epoch": 0.41998551774076753, + "grad_norm": 0.090760737657547, + "learning_rate": 1.7443991088638013e-05, + "loss": 9.0838, + "step": 84100 + }, + { + "epoch": 0.420035456565707, + "grad_norm": 0.09050191193819046, + "learning_rate": 1.7442489173696463e-05, + "loss": 9.0707, + "step": 84110 + }, + { + "epoch": 0.42008539539064643, + "grad_norm": 0.09378772228956223, + "learning_rate": 1.7440987258754914e-05, + "loss": 9.0724, + "step": 84120 + }, + { + "epoch": 0.4201353342155859, + "grad_norm": 0.09341657161712646, + "learning_rate": 1.7439485343813364e-05, + "loss": 9.067, + "step": 84130 + }, + { + "epoch": 0.42018527304052533, + "grad_norm": 0.09716533869504929, + "learning_rate": 1.743798342887181e-05, + "loss": 9.0632, + "step": 84140 + }, + { + "epoch": 0.4202352118654648, + "grad_norm": 0.09457621723413467, + "learning_rate": 1.743648151393026e-05, + "loss": 9.0649, + "step": 84150 + }, + { + "epoch": 0.42028515069040423, + "grad_norm": 0.0924045741558075, + "learning_rate": 1.743497959898871e-05, + "loss": 9.0646, + "step": 84160 + }, + { + "epoch": 0.4203350895153437, + "grad_norm": 0.09211113303899765, + "learning_rate": 1.743347768404716e-05, + "loss": 9.0715, + "step": 84170 + }, + { + "epoch": 0.42038502834028313, + "grad_norm": 0.09424125403165817, + "learning_rate": 1.743197576910561e-05, + "loss": 9.069, + "step": 84180 + }, + { + "epoch": 0.4204349671652226, + "grad_norm": 0.08973129838705063, + "learning_rate": 1.7430473854164058e-05, + "loss": 9.0637, + "step": 84190 + }, + { + "epoch": 0.42048490599016203, + "grad_norm": 0.09753432869911194, + "learning_rate": 1.7428971939222508e-05, + "loss": 9.0612, + "step": 84200 + }, + { + "epoch": 0.4205348448151015, + "grad_norm": 0.09066448360681534, + "learning_rate": 1.742747002428096e-05, + "loss": 9.0633, + "step": 84210 + }, + { + "epoch": 0.4205847836400409, + "grad_norm": 0.09251467883586884, + "learning_rate": 1.742596810933941e-05, + "loss": 9.0689, + "step": 84220 + }, + { + "epoch": 0.4206347224649804, + "grad_norm": 0.09440715610980988, + "learning_rate": 1.742446619439786e-05, + "loss": 9.0761, + "step": 84230 + }, + { + "epoch": 0.4206846612899198, + "grad_norm": 0.09155986458063126, + "learning_rate": 1.7422964279456306e-05, + "loss": 9.0703, + "step": 84240 + }, + { + "epoch": 0.4207346001148593, + "grad_norm": 0.0903191938996315, + "learning_rate": 1.7421462364514756e-05, + "loss": 9.0739, + "step": 84250 + }, + { + "epoch": 0.4207845389397987, + "grad_norm": 0.09451457858085632, + "learning_rate": 1.7419960449573206e-05, + "loss": 9.0768, + "step": 84260 + }, + { + "epoch": 0.4208344777647382, + "grad_norm": 0.09495178610086441, + "learning_rate": 1.7418458534631656e-05, + "loss": 9.0598, + "step": 84270 + }, + { + "epoch": 0.4208844165896776, + "grad_norm": 0.09594985842704773, + "learning_rate": 1.7416956619690106e-05, + "loss": 9.0666, + "step": 84280 + }, + { + "epoch": 0.4209343554146171, + "grad_norm": 0.09474395960569382, + "learning_rate": 1.7415454704748553e-05, + "loss": 9.0648, + "step": 84290 + }, + { + "epoch": 0.4209842942395565, + "grad_norm": 0.10143403708934784, + "learning_rate": 1.7413952789807003e-05, + "loss": 9.0528, + "step": 84300 + }, + { + "epoch": 0.421034233064496, + "grad_norm": 0.09076518565416336, + "learning_rate": 1.7412450874865453e-05, + "loss": 9.0731, + "step": 84310 + }, + { + "epoch": 0.4210841718894354, + "grad_norm": 0.0921441987156868, + "learning_rate": 1.7410948959923904e-05, + "loss": 9.067, + "step": 84320 + }, + { + "epoch": 0.4211341107143749, + "grad_norm": 0.09396324306726456, + "learning_rate": 1.7409447044982354e-05, + "loss": 9.0719, + "step": 84330 + }, + { + "epoch": 0.4211840495393143, + "grad_norm": 0.0950896143913269, + "learning_rate": 1.74079451300408e-05, + "loss": 9.0815, + "step": 84340 + }, + { + "epoch": 0.4212339883642538, + "grad_norm": 0.08926611393690109, + "learning_rate": 1.7406443215099254e-05, + "loss": 9.0629, + "step": 84350 + }, + { + "epoch": 0.4212839271891932, + "grad_norm": 0.09594748169183731, + "learning_rate": 1.74049413001577e-05, + "loss": 9.0707, + "step": 84360 + }, + { + "epoch": 0.4213338660141327, + "grad_norm": 0.09475325793027878, + "learning_rate": 1.740343938521615e-05, + "loss": 9.074, + "step": 84370 + }, + { + "epoch": 0.4213838048390721, + "grad_norm": 0.09772231429815292, + "learning_rate": 1.74019374702746e-05, + "loss": 9.0669, + "step": 84380 + }, + { + "epoch": 0.4214337436640116, + "grad_norm": 0.09714603424072266, + "learning_rate": 1.7400435555333048e-05, + "loss": 9.0703, + "step": 84390 + }, + { + "epoch": 0.421483682488951, + "grad_norm": 0.09235162287950516, + "learning_rate": 1.73989336403915e-05, + "loss": 9.059, + "step": 84400 + }, + { + "epoch": 0.4215336213138905, + "grad_norm": 0.0921136885881424, + "learning_rate": 1.739743172544995e-05, + "loss": 9.0542, + "step": 84410 + }, + { + "epoch": 0.4215835601388299, + "grad_norm": 0.09008321166038513, + "learning_rate": 1.73959298105084e-05, + "loss": 9.066, + "step": 84420 + }, + { + "epoch": 0.4216334989637694, + "grad_norm": 0.0914497822523117, + "learning_rate": 1.739442789556685e-05, + "loss": 9.0638, + "step": 84430 + }, + { + "epoch": 0.4216834377887088, + "grad_norm": 0.09600444883108139, + "learning_rate": 1.7392925980625296e-05, + "loss": 9.0733, + "step": 84440 + }, + { + "epoch": 0.4217333766136483, + "grad_norm": 0.09309738129377365, + "learning_rate": 1.739142406568375e-05, + "loss": 9.0743, + "step": 84450 + }, + { + "epoch": 0.4217833154385877, + "grad_norm": 0.09112364053726196, + "learning_rate": 1.7389922150742196e-05, + "loss": 9.0722, + "step": 84460 + }, + { + "epoch": 0.4218332542635272, + "grad_norm": 0.09220999479293823, + "learning_rate": 1.7388420235800646e-05, + "loss": 9.0659, + "step": 84470 + }, + { + "epoch": 0.4218831930884666, + "grad_norm": 0.09681863337755203, + "learning_rate": 1.7386918320859096e-05, + "loss": 9.0701, + "step": 84480 + }, + { + "epoch": 0.4219331319134061, + "grad_norm": 0.09077723324298859, + "learning_rate": 1.7385416405917543e-05, + "loss": 9.0616, + "step": 84490 + }, + { + "epoch": 0.4219830707383455, + "grad_norm": 0.09060750901699066, + "learning_rate": 1.7383914490975997e-05, + "loss": 9.06, + "step": 84500 + }, + { + "epoch": 0.422033009563285, + "grad_norm": 0.09467575699090958, + "learning_rate": 1.7382412576034443e-05, + "loss": 9.0742, + "step": 84510 + }, + { + "epoch": 0.4220829483882244, + "grad_norm": 0.09085453301668167, + "learning_rate": 1.7380910661092894e-05, + "loss": 9.0705, + "step": 84520 + }, + { + "epoch": 0.4221328872131639, + "grad_norm": 0.09209267795085907, + "learning_rate": 1.7379408746151344e-05, + "loss": 9.0544, + "step": 84530 + }, + { + "epoch": 0.4221828260381033, + "grad_norm": 0.09571056067943573, + "learning_rate": 1.737790683120979e-05, + "loss": 9.0732, + "step": 84540 + }, + { + "epoch": 0.4222327648630428, + "grad_norm": 0.09486297518014908, + "learning_rate": 1.7376404916268244e-05, + "loss": 9.0753, + "step": 84550 + }, + { + "epoch": 0.4222827036879822, + "grad_norm": 0.09144411236047745, + "learning_rate": 1.737490300132669e-05, + "loss": 9.0669, + "step": 84560 + }, + { + "epoch": 0.4223326425129217, + "grad_norm": 0.09834633022546768, + "learning_rate": 1.737340108638514e-05, + "loss": 9.072, + "step": 84570 + }, + { + "epoch": 0.4223825813378611, + "grad_norm": 0.09790683537721634, + "learning_rate": 1.737189917144359e-05, + "loss": 9.0566, + "step": 84580 + }, + { + "epoch": 0.4224325201628006, + "grad_norm": 0.09472320228815079, + "learning_rate": 1.7370397256502038e-05, + "loss": 9.0709, + "step": 84590 + }, + { + "epoch": 0.42248245898774, + "grad_norm": 0.0928400382399559, + "learning_rate": 1.736889534156049e-05, + "loss": 9.063, + "step": 84600 + }, + { + "epoch": 0.4225323978126795, + "grad_norm": 0.09452667087316513, + "learning_rate": 1.736739342661894e-05, + "loss": 9.0671, + "step": 84610 + }, + { + "epoch": 0.4225823366376189, + "grad_norm": 0.09719450771808624, + "learning_rate": 1.736589151167739e-05, + "loss": 9.0658, + "step": 84620 + }, + { + "epoch": 0.4226322754625584, + "grad_norm": 0.08982671052217484, + "learning_rate": 1.736438959673584e-05, + "loss": 9.0629, + "step": 84630 + }, + { + "epoch": 0.4226822142874978, + "grad_norm": 0.08864292502403259, + "learning_rate": 1.7362887681794286e-05, + "loss": 9.0618, + "step": 84640 + }, + { + "epoch": 0.4227321531124373, + "grad_norm": 0.09744229912757874, + "learning_rate": 1.736138576685274e-05, + "loss": 9.0557, + "step": 84650 + }, + { + "epoch": 0.4227820919373767, + "grad_norm": 0.09893699735403061, + "learning_rate": 1.7359883851911186e-05, + "loss": 9.067, + "step": 84660 + }, + { + "epoch": 0.4228320307623162, + "grad_norm": 0.09269163012504578, + "learning_rate": 1.735838193696964e-05, + "loss": 9.0676, + "step": 84670 + }, + { + "epoch": 0.4228819695872556, + "grad_norm": 0.09532048553228378, + "learning_rate": 1.7356880022028086e-05, + "loss": 9.0715, + "step": 84680 + }, + { + "epoch": 0.4229319084121951, + "grad_norm": 0.09102669358253479, + "learning_rate": 1.7355378107086533e-05, + "loss": 9.0674, + "step": 84690 + }, + { + "epoch": 0.4229818472371345, + "grad_norm": 0.09197783470153809, + "learning_rate": 1.7353876192144987e-05, + "loss": 9.0677, + "step": 84700 + }, + { + "epoch": 0.423031786062074, + "grad_norm": 0.09266051650047302, + "learning_rate": 1.7352374277203433e-05, + "loss": 9.0652, + "step": 84710 + }, + { + "epoch": 0.4230817248870134, + "grad_norm": 0.09375884383916855, + "learning_rate": 1.7350872362261887e-05, + "loss": 9.0622, + "step": 84720 + }, + { + "epoch": 0.4231316637119529, + "grad_norm": 0.09187036752700806, + "learning_rate": 1.7349370447320334e-05, + "loss": 9.0654, + "step": 84730 + }, + { + "epoch": 0.4231816025368923, + "grad_norm": 0.09767387062311172, + "learning_rate": 1.734786853237878e-05, + "loss": 9.074, + "step": 84740 + }, + { + "epoch": 0.4232315413618318, + "grad_norm": 0.09047545492649078, + "learning_rate": 1.7346366617437234e-05, + "loss": 9.0664, + "step": 84750 + }, + { + "epoch": 0.4232814801867712, + "grad_norm": 0.09297401458024979, + "learning_rate": 1.734486470249568e-05, + "loss": 9.073, + "step": 84760 + }, + { + "epoch": 0.4233314190117107, + "grad_norm": 0.0917564183473587, + "learning_rate": 1.7343362787554135e-05, + "loss": 9.064, + "step": 84770 + }, + { + "epoch": 0.4233813578366501, + "grad_norm": 0.09373626857995987, + "learning_rate": 1.734186087261258e-05, + "loss": 9.0597, + "step": 84780 + }, + { + "epoch": 0.4234312966615896, + "grad_norm": 0.09210002422332764, + "learning_rate": 1.7340358957671028e-05, + "loss": 9.0762, + "step": 84790 + }, + { + "epoch": 0.423481235486529, + "grad_norm": 0.09276442974805832, + "learning_rate": 1.733885704272948e-05, + "loss": 9.0565, + "step": 84800 + }, + { + "epoch": 0.4235311743114685, + "grad_norm": 0.09392396360635757, + "learning_rate": 1.733735512778793e-05, + "loss": 9.0691, + "step": 84810 + }, + { + "epoch": 0.4235811131364079, + "grad_norm": 0.09003560990095139, + "learning_rate": 1.7335853212846382e-05, + "loss": 9.0619, + "step": 84820 + }, + { + "epoch": 0.42363105196134737, + "grad_norm": 0.09067775309085846, + "learning_rate": 1.733435129790483e-05, + "loss": 9.0586, + "step": 84830 + }, + { + "epoch": 0.4236809907862868, + "grad_norm": 0.0904303789138794, + "learning_rate": 1.7332849382963276e-05, + "loss": 9.054, + "step": 84840 + }, + { + "epoch": 0.42373092961122627, + "grad_norm": 0.09555737674236298, + "learning_rate": 1.733134746802173e-05, + "loss": 9.0534, + "step": 84850 + }, + { + "epoch": 0.4237808684361657, + "grad_norm": 0.100531667470932, + "learning_rate": 1.7329845553080176e-05, + "loss": 9.0776, + "step": 84860 + }, + { + "epoch": 0.42383080726110517, + "grad_norm": 0.09057488292455673, + "learning_rate": 1.732834363813863e-05, + "loss": 9.0704, + "step": 84870 + }, + { + "epoch": 0.4238807460860446, + "grad_norm": 0.09145992249250412, + "learning_rate": 1.7326841723197076e-05, + "loss": 9.0489, + "step": 84880 + }, + { + "epoch": 0.42393068491098407, + "grad_norm": 0.0870828926563263, + "learning_rate": 1.7325339808255523e-05, + "loss": 9.0725, + "step": 84890 + }, + { + "epoch": 0.4239806237359235, + "grad_norm": 0.09605187177658081, + "learning_rate": 1.7323837893313977e-05, + "loss": 9.0487, + "step": 84900 + }, + { + "epoch": 0.42403056256086297, + "grad_norm": 0.09087910503149033, + "learning_rate": 1.7322335978372423e-05, + "loss": 9.0577, + "step": 84910 + }, + { + "epoch": 0.4240805013858024, + "grad_norm": 0.09988180547952652, + "learning_rate": 1.7320834063430877e-05, + "loss": 9.0576, + "step": 84920 + }, + { + "epoch": 0.42413044021074187, + "grad_norm": 0.09044559299945831, + "learning_rate": 1.7319332148489324e-05, + "loss": 9.0767, + "step": 84930 + }, + { + "epoch": 0.4241803790356813, + "grad_norm": 0.09994645416736603, + "learning_rate": 1.731783023354777e-05, + "loss": 9.0492, + "step": 84940 + }, + { + "epoch": 0.42423031786062076, + "grad_norm": 0.09559715539216995, + "learning_rate": 1.7316328318606224e-05, + "loss": 9.0664, + "step": 84950 + }, + { + "epoch": 0.4242802566855602, + "grad_norm": 0.08689738065004349, + "learning_rate": 1.731482640366467e-05, + "loss": 9.0722, + "step": 84960 + }, + { + "epoch": 0.42433019551049966, + "grad_norm": 0.10374455153942108, + "learning_rate": 1.7313324488723125e-05, + "loss": 9.0643, + "step": 84970 + }, + { + "epoch": 0.4243801343354391, + "grad_norm": 0.0957735925912857, + "learning_rate": 1.731182257378157e-05, + "loss": 9.0582, + "step": 84980 + }, + { + "epoch": 0.42443007316037856, + "grad_norm": 0.09154541045427322, + "learning_rate": 1.731032065884002e-05, + "loss": 9.06, + "step": 84990 + }, + { + "epoch": 0.424480011985318, + "grad_norm": 0.09392927587032318, + "learning_rate": 1.730881874389847e-05, + "loss": 9.0737, + "step": 85000 + }, + { + "epoch": 0.42452995081025746, + "grad_norm": 0.09651625156402588, + "learning_rate": 1.730731682895692e-05, + "loss": 9.0567, + "step": 85010 + }, + { + "epoch": 0.4245798896351969, + "grad_norm": 0.09320859611034393, + "learning_rate": 1.7305814914015372e-05, + "loss": 9.0656, + "step": 85020 + }, + { + "epoch": 0.42462982846013636, + "grad_norm": 0.09693901240825653, + "learning_rate": 1.730431299907382e-05, + "loss": 9.0709, + "step": 85030 + }, + { + "epoch": 0.4246797672850758, + "grad_norm": 0.09250442683696747, + "learning_rate": 1.730281108413227e-05, + "loss": 9.059, + "step": 85040 + }, + { + "epoch": 0.4247297061100152, + "grad_norm": 0.09281265735626221, + "learning_rate": 1.730130916919072e-05, + "loss": 9.0618, + "step": 85050 + }, + { + "epoch": 0.4247796449349547, + "grad_norm": 0.09139450639486313, + "learning_rate": 1.7299807254249166e-05, + "loss": 9.0574, + "step": 85060 + }, + { + "epoch": 0.4248295837598941, + "grad_norm": 0.09575843065977097, + "learning_rate": 1.729830533930762e-05, + "loss": 9.0575, + "step": 85070 + }, + { + "epoch": 0.4248795225848336, + "grad_norm": 0.09058381617069244, + "learning_rate": 1.7296803424366066e-05, + "loss": 9.0458, + "step": 85080 + }, + { + "epoch": 0.424929461409773, + "grad_norm": 0.09589716792106628, + "learning_rate": 1.7295301509424516e-05, + "loss": 9.0613, + "step": 85090 + }, + { + "epoch": 0.4249794002347125, + "grad_norm": 0.09588911384344101, + "learning_rate": 1.7293799594482967e-05, + "loss": 9.0706, + "step": 85100 + }, + { + "epoch": 0.4250293390596519, + "grad_norm": 0.08973708748817444, + "learning_rate": 1.7292297679541413e-05, + "loss": 9.0601, + "step": 85110 + }, + { + "epoch": 0.4250792778845914, + "grad_norm": 0.09021148830652237, + "learning_rate": 1.7290795764599867e-05, + "loss": 9.0696, + "step": 85120 + }, + { + "epoch": 0.4251292167095308, + "grad_norm": 0.08464304357767105, + "learning_rate": 1.7289293849658314e-05, + "loss": 9.0598, + "step": 85130 + }, + { + "epoch": 0.4251791555344703, + "grad_norm": 0.09651916474103928, + "learning_rate": 1.7287791934716764e-05, + "loss": 9.0664, + "step": 85140 + }, + { + "epoch": 0.4252290943594097, + "grad_norm": 0.09342118352651596, + "learning_rate": 1.7286290019775214e-05, + "loss": 9.0592, + "step": 85150 + }, + { + "epoch": 0.4252790331843492, + "grad_norm": 0.09311968088150024, + "learning_rate": 1.728478810483366e-05, + "loss": 9.0665, + "step": 85160 + }, + { + "epoch": 0.4253289720092886, + "grad_norm": 0.08970993012189865, + "learning_rate": 1.7283286189892115e-05, + "loss": 9.0662, + "step": 85170 + }, + { + "epoch": 0.4253789108342281, + "grad_norm": 0.09633956849575043, + "learning_rate": 1.728178427495056e-05, + "loss": 9.0603, + "step": 85180 + }, + { + "epoch": 0.4254288496591675, + "grad_norm": 0.09612761437892914, + "learning_rate": 1.728028236000901e-05, + "loss": 9.0691, + "step": 85190 + }, + { + "epoch": 0.425478788484107, + "grad_norm": 0.0876702144742012, + "learning_rate": 1.727878044506746e-05, + "loss": 9.0624, + "step": 85200 + }, + { + "epoch": 0.4255287273090464, + "grad_norm": 0.09160758554935455, + "learning_rate": 1.727727853012591e-05, + "loss": 9.0717, + "step": 85210 + }, + { + "epoch": 0.4255786661339859, + "grad_norm": 0.09074576944112778, + "learning_rate": 1.7275776615184362e-05, + "loss": 9.0706, + "step": 85220 + }, + { + "epoch": 0.4256286049589253, + "grad_norm": 0.08834607899188995, + "learning_rate": 1.727427470024281e-05, + "loss": 9.06, + "step": 85230 + }, + { + "epoch": 0.42567854378386477, + "grad_norm": 0.09007090330123901, + "learning_rate": 1.727277278530126e-05, + "loss": 9.0507, + "step": 85240 + }, + { + "epoch": 0.4257284826088042, + "grad_norm": 0.09260173887014389, + "learning_rate": 1.727127087035971e-05, + "loss": 9.0542, + "step": 85250 + }, + { + "epoch": 0.42577842143374367, + "grad_norm": 0.0915934219956398, + "learning_rate": 1.7269768955418156e-05, + "loss": 9.0627, + "step": 85260 + }, + { + "epoch": 0.4258283602586831, + "grad_norm": 0.09786804020404816, + "learning_rate": 1.726826704047661e-05, + "loss": 9.0529, + "step": 85270 + }, + { + "epoch": 0.42587829908362257, + "grad_norm": 0.09305689483880997, + "learning_rate": 1.7266765125535056e-05, + "loss": 9.0447, + "step": 85280 + }, + { + "epoch": 0.425928237908562, + "grad_norm": 0.09683950990438461, + "learning_rate": 1.7265263210593507e-05, + "loss": 9.0657, + "step": 85290 + }, + { + "epoch": 0.42597817673350147, + "grad_norm": 0.08442121744155884, + "learning_rate": 1.7263761295651957e-05, + "loss": 9.065, + "step": 85300 + }, + { + "epoch": 0.4260281155584409, + "grad_norm": 0.09468364715576172, + "learning_rate": 1.7262259380710403e-05, + "loss": 9.0679, + "step": 85310 + }, + { + "epoch": 0.42607805438338037, + "grad_norm": 0.09155453741550446, + "learning_rate": 1.7260757465768857e-05, + "loss": 9.0554, + "step": 85320 + }, + { + "epoch": 0.4261279932083198, + "grad_norm": 0.09124631434679031, + "learning_rate": 1.7259255550827304e-05, + "loss": 9.0581, + "step": 85330 + }, + { + "epoch": 0.42617793203325927, + "grad_norm": 0.09624254703521729, + "learning_rate": 1.7257753635885757e-05, + "loss": 9.0521, + "step": 85340 + }, + { + "epoch": 0.4262278708581987, + "grad_norm": 0.09301303327083588, + "learning_rate": 1.7256251720944204e-05, + "loss": 9.0474, + "step": 85350 + }, + { + "epoch": 0.42627780968313816, + "grad_norm": 0.09513042867183685, + "learning_rate": 1.7254749806002654e-05, + "loss": 9.0544, + "step": 85360 + }, + { + "epoch": 0.4263277485080776, + "grad_norm": 0.08930395543575287, + "learning_rate": 1.7253247891061105e-05, + "loss": 9.0651, + "step": 85370 + }, + { + "epoch": 0.42637768733301706, + "grad_norm": 0.09650768339633942, + "learning_rate": 1.725174597611955e-05, + "loss": 9.057, + "step": 85380 + }, + { + "epoch": 0.4264276261579565, + "grad_norm": 0.09203314781188965, + "learning_rate": 1.7250244061178005e-05, + "loss": 9.0673, + "step": 85390 + }, + { + "epoch": 0.42647756498289596, + "grad_norm": 0.09252451360225677, + "learning_rate": 1.7248742146236452e-05, + "loss": 9.0654, + "step": 85400 + }, + { + "epoch": 0.4265275038078354, + "grad_norm": 0.09380394965410233, + "learning_rate": 1.7247240231294902e-05, + "loss": 9.0483, + "step": 85410 + }, + { + "epoch": 0.42657744263277486, + "grad_norm": 0.09272889792919159, + "learning_rate": 1.7245738316353352e-05, + "loss": 9.0737, + "step": 85420 + }, + { + "epoch": 0.4266273814577143, + "grad_norm": 0.09201991558074951, + "learning_rate": 1.72442364014118e-05, + "loss": 9.0526, + "step": 85430 + }, + { + "epoch": 0.42667732028265376, + "grad_norm": 0.09079743921756744, + "learning_rate": 1.7242734486470252e-05, + "loss": 9.062, + "step": 85440 + }, + { + "epoch": 0.4267272591075932, + "grad_norm": 0.09716988354921341, + "learning_rate": 1.72412325715287e-05, + "loss": 9.0581, + "step": 85450 + }, + { + "epoch": 0.42677719793253266, + "grad_norm": 0.10238094627857208, + "learning_rate": 1.723973065658715e-05, + "loss": 9.0545, + "step": 85460 + }, + { + "epoch": 0.4268271367574721, + "grad_norm": 0.09213068336248398, + "learning_rate": 1.72382287416456e-05, + "loss": 9.0599, + "step": 85470 + }, + { + "epoch": 0.42687707558241156, + "grad_norm": 0.0878293439745903, + "learning_rate": 1.7236726826704046e-05, + "loss": 9.058, + "step": 85480 + }, + { + "epoch": 0.426927014407351, + "grad_norm": 0.09897506982088089, + "learning_rate": 1.72352249117625e-05, + "loss": 9.0604, + "step": 85490 + }, + { + "epoch": 0.42697695323229046, + "grad_norm": 0.09527362138032913, + "learning_rate": 1.7233722996820947e-05, + "loss": 9.0577, + "step": 85500 + }, + { + "epoch": 0.4270268920572299, + "grad_norm": 0.09203501790761948, + "learning_rate": 1.7232221081879397e-05, + "loss": 9.0629, + "step": 85510 + }, + { + "epoch": 0.42707683088216936, + "grad_norm": 0.09153282642364502, + "learning_rate": 1.7230719166937847e-05, + "loss": 9.0658, + "step": 85520 + }, + { + "epoch": 0.4271267697071088, + "grad_norm": 0.09082131832838058, + "learning_rate": 1.7229217251996294e-05, + "loss": 9.0558, + "step": 85530 + }, + { + "epoch": 0.42717670853204825, + "grad_norm": 0.09474789351224899, + "learning_rate": 1.7227715337054747e-05, + "loss": 9.0649, + "step": 85540 + }, + { + "epoch": 0.4272266473569877, + "grad_norm": 0.09650855511426926, + "learning_rate": 1.7226213422113194e-05, + "loss": 9.0518, + "step": 85550 + }, + { + "epoch": 0.42727658618192715, + "grad_norm": 0.09041815251111984, + "learning_rate": 1.7224711507171644e-05, + "loss": 9.0682, + "step": 85560 + }, + { + "epoch": 0.4273265250068666, + "grad_norm": 0.09136392921209335, + "learning_rate": 1.7223209592230095e-05, + "loss": 9.0575, + "step": 85570 + }, + { + "epoch": 0.42737646383180605, + "grad_norm": 0.09177730977535248, + "learning_rate": 1.722170767728854e-05, + "loss": 9.0442, + "step": 85580 + }, + { + "epoch": 0.4274264026567455, + "grad_norm": 0.09383508563041687, + "learning_rate": 1.7220205762346995e-05, + "loss": 9.0745, + "step": 85590 + }, + { + "epoch": 0.42747634148168495, + "grad_norm": 0.08896640688180923, + "learning_rate": 1.7218703847405442e-05, + "loss": 9.0603, + "step": 85600 + }, + { + "epoch": 0.4275262803066244, + "grad_norm": 0.0970030426979065, + "learning_rate": 1.7217201932463892e-05, + "loss": 9.0605, + "step": 85610 + }, + { + "epoch": 0.42757621913156385, + "grad_norm": 0.09975878149271011, + "learning_rate": 1.7215700017522342e-05, + "loss": 9.0549, + "step": 85620 + }, + { + "epoch": 0.42762615795650327, + "grad_norm": 0.09379398822784424, + "learning_rate": 1.721419810258079e-05, + "loss": 9.0637, + "step": 85630 + }, + { + "epoch": 0.42767609678144275, + "grad_norm": 0.09030137956142426, + "learning_rate": 1.7212696187639242e-05, + "loss": 9.0584, + "step": 85640 + }, + { + "epoch": 0.42772603560638217, + "grad_norm": 0.09938555955886841, + "learning_rate": 1.721119427269769e-05, + "loss": 9.0446, + "step": 85650 + }, + { + "epoch": 0.42777597443132165, + "grad_norm": 0.09401064366102219, + "learning_rate": 1.720969235775614e-05, + "loss": 9.0472, + "step": 85660 + }, + { + "epoch": 0.42782591325626107, + "grad_norm": 0.10197415202856064, + "learning_rate": 1.720819044281459e-05, + "loss": 9.0556, + "step": 85670 + }, + { + "epoch": 0.42787585208120055, + "grad_norm": 0.09055991470813751, + "learning_rate": 1.720668852787304e-05, + "loss": 9.0479, + "step": 85680 + }, + { + "epoch": 0.42792579090613997, + "grad_norm": 0.0905674397945404, + "learning_rate": 1.720518661293149e-05, + "loss": 9.0593, + "step": 85690 + }, + { + "epoch": 0.42797572973107945, + "grad_norm": 0.09657610207796097, + "learning_rate": 1.7203684697989937e-05, + "loss": 9.0484, + "step": 85700 + }, + { + "epoch": 0.42802566855601887, + "grad_norm": 0.09140429645776749, + "learning_rate": 1.7202182783048387e-05, + "loss": 9.0471, + "step": 85710 + }, + { + "epoch": 0.42807560738095835, + "grad_norm": 0.09539390355348587, + "learning_rate": 1.7200680868106837e-05, + "loss": 9.0487, + "step": 85720 + }, + { + "epoch": 0.42812554620589777, + "grad_norm": 0.09347163140773773, + "learning_rate": 1.7199178953165287e-05, + "loss": 9.0556, + "step": 85730 + }, + { + "epoch": 0.42817548503083724, + "grad_norm": 0.08956640213727951, + "learning_rate": 1.7197677038223737e-05, + "loss": 9.0475, + "step": 85740 + }, + { + "epoch": 0.42822542385577667, + "grad_norm": 0.09470704942941666, + "learning_rate": 1.7196175123282184e-05, + "loss": 9.0453, + "step": 85750 + }, + { + "epoch": 0.42827536268071614, + "grad_norm": 0.0948714017868042, + "learning_rate": 1.7194673208340634e-05, + "loss": 9.0497, + "step": 85760 + }, + { + "epoch": 0.42832530150565556, + "grad_norm": 0.09796865284442902, + "learning_rate": 1.7193171293399085e-05, + "loss": 9.0603, + "step": 85770 + }, + { + "epoch": 0.42837524033059504, + "grad_norm": 0.09614430367946625, + "learning_rate": 1.7191669378457535e-05, + "loss": 9.0641, + "step": 85780 + }, + { + "epoch": 0.42842517915553446, + "grad_norm": 0.09698038548231125, + "learning_rate": 1.7190167463515985e-05, + "loss": 9.0549, + "step": 85790 + }, + { + "epoch": 0.42847511798047394, + "grad_norm": 0.08912073075771332, + "learning_rate": 1.7188665548574432e-05, + "loss": 9.0442, + "step": 85800 + }, + { + "epoch": 0.42852505680541336, + "grad_norm": 0.0899166464805603, + "learning_rate": 1.7187163633632882e-05, + "loss": 9.04, + "step": 85810 + }, + { + "epoch": 0.42857499563035284, + "grad_norm": 0.09650041162967682, + "learning_rate": 1.7185661718691332e-05, + "loss": 9.0408, + "step": 85820 + }, + { + "epoch": 0.42862493445529226, + "grad_norm": 0.09182535111904144, + "learning_rate": 1.7184159803749782e-05, + "loss": 9.0617, + "step": 85830 + }, + { + "epoch": 0.42867487328023174, + "grad_norm": 0.09780148416757584, + "learning_rate": 1.7182657888808232e-05, + "loss": 9.0625, + "step": 85840 + }, + { + "epoch": 0.42872481210517116, + "grad_norm": 0.09503006935119629, + "learning_rate": 1.718115597386668e-05, + "loss": 9.0631, + "step": 85850 + }, + { + "epoch": 0.42877475093011064, + "grad_norm": 0.09269234538078308, + "learning_rate": 1.717965405892513e-05, + "loss": 9.055, + "step": 85860 + }, + { + "epoch": 0.42882468975505006, + "grad_norm": 0.09522657096385956, + "learning_rate": 1.717815214398358e-05, + "loss": 9.0593, + "step": 85870 + }, + { + "epoch": 0.42887462857998954, + "grad_norm": 0.09069998562335968, + "learning_rate": 1.717665022904203e-05, + "loss": 9.0425, + "step": 85880 + }, + { + "epoch": 0.42892456740492896, + "grad_norm": 0.09758871048688889, + "learning_rate": 1.717514831410048e-05, + "loss": 9.0392, + "step": 85890 + }, + { + "epoch": 0.42897450622986844, + "grad_norm": 0.09257560968399048, + "learning_rate": 1.7173646399158927e-05, + "loss": 9.0638, + "step": 85900 + }, + { + "epoch": 0.42902444505480786, + "grad_norm": 0.09189421683549881, + "learning_rate": 1.7172144484217377e-05, + "loss": 9.0469, + "step": 85910 + }, + { + "epoch": 0.42907438387974733, + "grad_norm": 0.09272563457489014, + "learning_rate": 1.7170642569275827e-05, + "loss": 9.0536, + "step": 85920 + }, + { + "epoch": 0.42912432270468676, + "grad_norm": 0.08748695999383926, + "learning_rate": 1.7169140654334277e-05, + "loss": 9.0641, + "step": 85930 + }, + { + "epoch": 0.42917426152962623, + "grad_norm": 0.09138987958431244, + "learning_rate": 1.7167638739392727e-05, + "loss": 9.057, + "step": 85940 + }, + { + "epoch": 0.42922420035456565, + "grad_norm": 0.09145860373973846, + "learning_rate": 1.7166136824451174e-05, + "loss": 9.0571, + "step": 85950 + }, + { + "epoch": 0.42927413917950513, + "grad_norm": 0.09510332345962524, + "learning_rate": 1.7164634909509624e-05, + "loss": 9.0455, + "step": 85960 + }, + { + "epoch": 0.42932407800444455, + "grad_norm": 0.09964827448129654, + "learning_rate": 1.7163132994568075e-05, + "loss": 9.0313, + "step": 85970 + }, + { + "epoch": 0.42937401682938403, + "grad_norm": 0.09246429055929184, + "learning_rate": 1.7161631079626525e-05, + "loss": 9.0544, + "step": 85980 + }, + { + "epoch": 0.42942395565432345, + "grad_norm": 0.10065965354442596, + "learning_rate": 1.7160129164684975e-05, + "loss": 9.0548, + "step": 85990 + }, + { + "epoch": 0.42947389447926293, + "grad_norm": 0.09086819738149643, + "learning_rate": 1.7158627249743425e-05, + "loss": 9.0661, + "step": 86000 + }, + { + "epoch": 0.42952383330420235, + "grad_norm": 0.09284672141075134, + "learning_rate": 1.7157125334801872e-05, + "loss": 9.0489, + "step": 86010 + }, + { + "epoch": 0.42957377212914183, + "grad_norm": 0.09478854387998581, + "learning_rate": 1.7155623419860322e-05, + "loss": 9.0511, + "step": 86020 + }, + { + "epoch": 0.42962371095408125, + "grad_norm": 0.0997512936592102, + "learning_rate": 1.7154121504918772e-05, + "loss": 9.0386, + "step": 86030 + }, + { + "epoch": 0.42967364977902067, + "grad_norm": 0.0923648402094841, + "learning_rate": 1.7152619589977222e-05, + "loss": 9.0533, + "step": 86040 + }, + { + "epoch": 0.42972358860396015, + "grad_norm": 0.09019633382558823, + "learning_rate": 1.7151117675035673e-05, + "loss": 9.0481, + "step": 86050 + }, + { + "epoch": 0.42977352742889957, + "grad_norm": 0.09562560170888901, + "learning_rate": 1.714961576009412e-05, + "loss": 9.0553, + "step": 86060 + }, + { + "epoch": 0.42982346625383905, + "grad_norm": 0.09037364274263382, + "learning_rate": 1.714811384515257e-05, + "loss": 9.0621, + "step": 86070 + }, + { + "epoch": 0.42987340507877847, + "grad_norm": 0.09504220634698868, + "learning_rate": 1.714661193021102e-05, + "loss": 9.0369, + "step": 86080 + }, + { + "epoch": 0.42992334390371795, + "grad_norm": 0.09927166253328323, + "learning_rate": 1.714511001526947e-05, + "loss": 9.0363, + "step": 86090 + }, + { + "epoch": 0.42997328272865737, + "grad_norm": 0.09197903424501419, + "learning_rate": 1.714360810032792e-05, + "loss": 9.0647, + "step": 86100 + }, + { + "epoch": 0.43002322155359685, + "grad_norm": 0.08889306336641312, + "learning_rate": 1.7142106185386367e-05, + "loss": 9.0551, + "step": 86110 + }, + { + "epoch": 0.43007316037853627, + "grad_norm": 0.09508626163005829, + "learning_rate": 1.7140604270444817e-05, + "loss": 9.0492, + "step": 86120 + }, + { + "epoch": 0.43012309920347574, + "grad_norm": 0.0925615131855011, + "learning_rate": 1.7139102355503267e-05, + "loss": 9.0463, + "step": 86130 + }, + { + "epoch": 0.43017303802841517, + "grad_norm": 0.0957641676068306, + "learning_rate": 1.7137600440561717e-05, + "loss": 9.0549, + "step": 86140 + }, + { + "epoch": 0.43022297685335464, + "grad_norm": 0.09168253093957901, + "learning_rate": 1.7136098525620168e-05, + "loss": 9.0504, + "step": 86150 + }, + { + "epoch": 0.43027291567829407, + "grad_norm": 0.09538407623767853, + "learning_rate": 1.7134596610678614e-05, + "loss": 9.0464, + "step": 86160 + }, + { + "epoch": 0.43032285450323354, + "grad_norm": 0.09280803054571152, + "learning_rate": 1.7133094695737065e-05, + "loss": 9.0527, + "step": 86170 + }, + { + "epoch": 0.43037279332817296, + "grad_norm": 0.09550796449184418, + "learning_rate": 1.7131592780795515e-05, + "loss": 9.0521, + "step": 86180 + }, + { + "epoch": 0.43042273215311244, + "grad_norm": 0.09481924027204514, + "learning_rate": 1.7130090865853965e-05, + "loss": 9.058, + "step": 86190 + }, + { + "epoch": 0.43047267097805186, + "grad_norm": 0.0906360000371933, + "learning_rate": 1.7128588950912415e-05, + "loss": 9.0544, + "step": 86200 + }, + { + "epoch": 0.43052260980299134, + "grad_norm": 0.1016702726483345, + "learning_rate": 1.7127087035970862e-05, + "loss": 9.0553, + "step": 86210 + }, + { + "epoch": 0.43057254862793076, + "grad_norm": 0.0934482216835022, + "learning_rate": 1.7125585121029312e-05, + "loss": 9.0411, + "step": 86220 + }, + { + "epoch": 0.43062248745287024, + "grad_norm": 0.09161151945590973, + "learning_rate": 1.7124083206087762e-05, + "loss": 9.054, + "step": 86230 + }, + { + "epoch": 0.43067242627780966, + "grad_norm": 0.09385374188423157, + "learning_rate": 1.7122581291146212e-05, + "loss": 9.0394, + "step": 86240 + }, + { + "epoch": 0.43072236510274914, + "grad_norm": 0.09769625961780548, + "learning_rate": 1.7121079376204663e-05, + "loss": 9.0444, + "step": 86250 + }, + { + "epoch": 0.43077230392768856, + "grad_norm": 0.09559006243944168, + "learning_rate": 1.711957746126311e-05, + "loss": 9.0386, + "step": 86260 + }, + { + "epoch": 0.43082224275262804, + "grad_norm": 0.0927807167172432, + "learning_rate": 1.711807554632156e-05, + "loss": 9.064, + "step": 86270 + }, + { + "epoch": 0.43087218157756746, + "grad_norm": 0.09350989013910294, + "learning_rate": 1.711657363138001e-05, + "loss": 9.0505, + "step": 86280 + }, + { + "epoch": 0.43092212040250694, + "grad_norm": 0.09480515867471695, + "learning_rate": 1.711507171643846e-05, + "loss": 9.0377, + "step": 86290 + }, + { + "epoch": 0.43097205922744636, + "grad_norm": 0.0930512323975563, + "learning_rate": 1.711356980149691e-05, + "loss": 9.0554, + "step": 86300 + }, + { + "epoch": 0.43102199805238584, + "grad_norm": 0.09537147730588913, + "learning_rate": 1.7112067886555357e-05, + "loss": 9.0552, + "step": 86310 + }, + { + "epoch": 0.43107193687732526, + "grad_norm": 0.09353245049715042, + "learning_rate": 1.711056597161381e-05, + "loss": 9.043, + "step": 86320 + }, + { + "epoch": 0.43112187570226473, + "grad_norm": 0.09246620535850525, + "learning_rate": 1.7109064056672257e-05, + "loss": 9.0528, + "step": 86330 + }, + { + "epoch": 0.43117181452720416, + "grad_norm": 0.09530563652515411, + "learning_rate": 1.7107562141730707e-05, + "loss": 9.0463, + "step": 86340 + }, + { + "epoch": 0.43122175335214363, + "grad_norm": 0.09027519822120667, + "learning_rate": 1.7106060226789158e-05, + "loss": 9.0485, + "step": 86350 + }, + { + "epoch": 0.43127169217708305, + "grad_norm": 0.09005729854106903, + "learning_rate": 1.7104558311847604e-05, + "loss": 9.0492, + "step": 86360 + }, + { + "epoch": 0.43132163100202253, + "grad_norm": 0.0943072959780693, + "learning_rate": 1.7103056396906058e-05, + "loss": 9.054, + "step": 86370 + }, + { + "epoch": 0.43137156982696195, + "grad_norm": 0.09449702501296997, + "learning_rate": 1.7101554481964505e-05, + "loss": 9.0423, + "step": 86380 + }, + { + "epoch": 0.43142150865190143, + "grad_norm": 0.09583057463169098, + "learning_rate": 1.7100052567022955e-05, + "loss": 9.0625, + "step": 86390 + }, + { + "epoch": 0.43147144747684085, + "grad_norm": 0.09351655840873718, + "learning_rate": 1.7098550652081405e-05, + "loss": 9.0511, + "step": 86400 + }, + { + "epoch": 0.43152138630178033, + "grad_norm": 0.0870162844657898, + "learning_rate": 1.7097048737139852e-05, + "loss": 9.0509, + "step": 86410 + }, + { + "epoch": 0.43157132512671975, + "grad_norm": 0.09593592584133148, + "learning_rate": 1.7095546822198306e-05, + "loss": 9.0419, + "step": 86420 + }, + { + "epoch": 0.43162126395165923, + "grad_norm": 0.0929575264453888, + "learning_rate": 1.7094044907256752e-05, + "loss": 9.0523, + "step": 86430 + }, + { + "epoch": 0.43167120277659865, + "grad_norm": 0.09491048753261566, + "learning_rate": 1.7092542992315202e-05, + "loss": 9.0308, + "step": 86440 + }, + { + "epoch": 0.4317211416015381, + "grad_norm": 0.09691637009382248, + "learning_rate": 1.7091041077373653e-05, + "loss": 9.0335, + "step": 86450 + }, + { + "epoch": 0.43177108042647755, + "grad_norm": 0.09719613939523697, + "learning_rate": 1.70895391624321e-05, + "loss": 9.0532, + "step": 86460 + }, + { + "epoch": 0.431821019251417, + "grad_norm": 0.08485887944698334, + "learning_rate": 1.7088037247490553e-05, + "loss": 9.045, + "step": 86470 + }, + { + "epoch": 0.43187095807635645, + "grad_norm": 0.09217669814825058, + "learning_rate": 1.7086535332549e-05, + "loss": 9.0437, + "step": 86480 + }, + { + "epoch": 0.4319208969012959, + "grad_norm": 0.09682594239711761, + "learning_rate": 1.708503341760745e-05, + "loss": 9.0475, + "step": 86490 + }, + { + "epoch": 0.43197083572623535, + "grad_norm": 0.09146720916032791, + "learning_rate": 1.70835315026659e-05, + "loss": 9.0369, + "step": 86500 + }, + { + "epoch": 0.4320207745511748, + "grad_norm": 0.09073561429977417, + "learning_rate": 1.7082029587724347e-05, + "loss": 9.0416, + "step": 86510 + }, + { + "epoch": 0.43207071337611425, + "grad_norm": 0.09679694473743439, + "learning_rate": 1.70805276727828e-05, + "loss": 9.0619, + "step": 86520 + }, + { + "epoch": 0.4321206522010537, + "grad_norm": 0.09839240461587906, + "learning_rate": 1.7079025757841247e-05, + "loss": 9.0497, + "step": 86530 + }, + { + "epoch": 0.43217059102599314, + "grad_norm": 0.09931270033121109, + "learning_rate": 1.7077523842899697e-05, + "loss": 9.055, + "step": 86540 + }, + { + "epoch": 0.4322205298509326, + "grad_norm": 0.09096875041723251, + "learning_rate": 1.7076021927958148e-05, + "loss": 9.0453, + "step": 86550 + }, + { + "epoch": 0.43227046867587204, + "grad_norm": 0.08868992328643799, + "learning_rate": 1.7074520013016594e-05, + "loss": 9.0377, + "step": 86560 + }, + { + "epoch": 0.4323204075008115, + "grad_norm": 0.09178798645734787, + "learning_rate": 1.7073018098075048e-05, + "loss": 9.0494, + "step": 86570 + }, + { + "epoch": 0.43237034632575094, + "grad_norm": 0.09545016288757324, + "learning_rate": 1.7071516183133495e-05, + "loss": 9.0489, + "step": 86580 + }, + { + "epoch": 0.4324202851506904, + "grad_norm": 0.0965125560760498, + "learning_rate": 1.7070014268191945e-05, + "loss": 9.0364, + "step": 86590 + }, + { + "epoch": 0.43247022397562984, + "grad_norm": 0.09696787595748901, + "learning_rate": 1.7068512353250395e-05, + "loss": 9.0447, + "step": 86600 + }, + { + "epoch": 0.4325201628005693, + "grad_norm": 0.09214460849761963, + "learning_rate": 1.7067010438308842e-05, + "loss": 9.0573, + "step": 86610 + }, + { + "epoch": 0.43257010162550874, + "grad_norm": 0.09552972763776779, + "learning_rate": 1.7065508523367296e-05, + "loss": 9.0402, + "step": 86620 + }, + { + "epoch": 0.4326200404504482, + "grad_norm": 0.09437466412782669, + "learning_rate": 1.7064006608425742e-05, + "loss": 9.0492, + "step": 86630 + }, + { + "epoch": 0.43266997927538764, + "grad_norm": 0.09180578589439392, + "learning_rate": 1.7062504693484196e-05, + "loss": 9.0461, + "step": 86640 + }, + { + "epoch": 0.4327199181003271, + "grad_norm": 0.0951187014579773, + "learning_rate": 1.7061002778542643e-05, + "loss": 9.0486, + "step": 86650 + }, + { + "epoch": 0.43276985692526654, + "grad_norm": 0.0927475169301033, + "learning_rate": 1.705950086360109e-05, + "loss": 9.04, + "step": 86660 + }, + { + "epoch": 0.432819795750206, + "grad_norm": 0.09508585184812546, + "learning_rate": 1.7057998948659543e-05, + "loss": 9.0499, + "step": 86670 + }, + { + "epoch": 0.43286973457514544, + "grad_norm": 0.09572357684373856, + "learning_rate": 1.705649703371799e-05, + "loss": 9.0267, + "step": 86680 + }, + { + "epoch": 0.4329196734000849, + "grad_norm": 0.09100820124149323, + "learning_rate": 1.7054995118776443e-05, + "loss": 9.0476, + "step": 86690 + }, + { + "epoch": 0.43296961222502434, + "grad_norm": 0.09594081342220306, + "learning_rate": 1.705349320383489e-05, + "loss": 9.0425, + "step": 86700 + }, + { + "epoch": 0.4330195510499638, + "grad_norm": 0.09707988053560257, + "learning_rate": 1.7051991288893337e-05, + "loss": 9.0399, + "step": 86710 + }, + { + "epoch": 0.43306948987490324, + "grad_norm": 0.09649763256311417, + "learning_rate": 1.705048937395179e-05, + "loss": 9.041, + "step": 86720 + }, + { + "epoch": 0.4331194286998427, + "grad_norm": 0.0913090854883194, + "learning_rate": 1.7048987459010237e-05, + "loss": 9.0297, + "step": 86730 + }, + { + "epoch": 0.43316936752478213, + "grad_norm": 0.09227044880390167, + "learning_rate": 1.704748554406869e-05, + "loss": 9.0434, + "step": 86740 + }, + { + "epoch": 0.4332193063497216, + "grad_norm": 0.09608667343854904, + "learning_rate": 1.7045983629127138e-05, + "loss": 9.0428, + "step": 86750 + }, + { + "epoch": 0.43326924517466103, + "grad_norm": 0.09107893705368042, + "learning_rate": 1.7044481714185584e-05, + "loss": 9.0533, + "step": 86760 + }, + { + "epoch": 0.4333191839996005, + "grad_norm": 0.09340152889490128, + "learning_rate": 1.7042979799244038e-05, + "loss": 9.0453, + "step": 86770 + }, + { + "epoch": 0.43336912282453993, + "grad_norm": 0.0952560156583786, + "learning_rate": 1.7041477884302485e-05, + "loss": 9.0421, + "step": 86780 + }, + { + "epoch": 0.4334190616494794, + "grad_norm": 0.09432324022054672, + "learning_rate": 1.703997596936094e-05, + "loss": 9.0402, + "step": 86790 + }, + { + "epoch": 0.43346900047441883, + "grad_norm": 0.09584697335958481, + "learning_rate": 1.7038474054419385e-05, + "loss": 9.0374, + "step": 86800 + }, + { + "epoch": 0.4335189392993583, + "grad_norm": 0.09084445238113403, + "learning_rate": 1.7036972139477832e-05, + "loss": 9.0376, + "step": 86810 + }, + { + "epoch": 0.43356887812429773, + "grad_norm": 0.09363221377134323, + "learning_rate": 1.7035470224536286e-05, + "loss": 9.0522, + "step": 86820 + }, + { + "epoch": 0.4336188169492372, + "grad_norm": 0.09301798045635223, + "learning_rate": 1.7033968309594732e-05, + "loss": 9.0359, + "step": 86830 + }, + { + "epoch": 0.43366875577417663, + "grad_norm": 0.09216799587011337, + "learning_rate": 1.7032466394653186e-05, + "loss": 9.0435, + "step": 86840 + }, + { + "epoch": 0.4337186945991161, + "grad_norm": 0.09435062110424042, + "learning_rate": 1.7030964479711633e-05, + "loss": 9.0433, + "step": 86850 + }, + { + "epoch": 0.4337686334240555, + "grad_norm": 0.08969014137983322, + "learning_rate": 1.702946256477008e-05, + "loss": 9.0326, + "step": 86860 + }, + { + "epoch": 0.433818572248995, + "grad_norm": 0.09770084917545319, + "learning_rate": 1.7027960649828533e-05, + "loss": 9.0366, + "step": 86870 + }, + { + "epoch": 0.4338685110739344, + "grad_norm": 0.0956239253282547, + "learning_rate": 1.702645873488698e-05, + "loss": 9.0302, + "step": 86880 + }, + { + "epoch": 0.4339184498988739, + "grad_norm": 0.09152868390083313, + "learning_rate": 1.7024956819945433e-05, + "loss": 9.0351, + "step": 86890 + }, + { + "epoch": 0.4339683887238133, + "grad_norm": 0.09322716295719147, + "learning_rate": 1.702345490500388e-05, + "loss": 9.0341, + "step": 86900 + }, + { + "epoch": 0.4340183275487528, + "grad_norm": 0.08923067152500153, + "learning_rate": 1.7021952990062327e-05, + "loss": 9.0475, + "step": 86910 + }, + { + "epoch": 0.4340682663736922, + "grad_norm": 0.09851467609405518, + "learning_rate": 1.702045107512078e-05, + "loss": 9.0337, + "step": 86920 + }, + { + "epoch": 0.4341182051986317, + "grad_norm": 0.09365841746330261, + "learning_rate": 1.7018949160179227e-05, + "loss": 9.0379, + "step": 86930 + }, + { + "epoch": 0.4341681440235711, + "grad_norm": 0.09401962906122208, + "learning_rate": 1.701744724523768e-05, + "loss": 9.053, + "step": 86940 + }, + { + "epoch": 0.4342180828485106, + "grad_norm": 0.09199943393468857, + "learning_rate": 1.7015945330296128e-05, + "loss": 9.0282, + "step": 86950 + }, + { + "epoch": 0.43426802167345, + "grad_norm": 0.09017936885356903, + "learning_rate": 1.7014443415354578e-05, + "loss": 9.0395, + "step": 86960 + }, + { + "epoch": 0.4343179604983895, + "grad_norm": 0.09726618975400925, + "learning_rate": 1.7012941500413028e-05, + "loss": 9.0404, + "step": 86970 + }, + { + "epoch": 0.4343678993233289, + "grad_norm": 0.0918731540441513, + "learning_rate": 1.7011439585471475e-05, + "loss": 9.0201, + "step": 86980 + }, + { + "epoch": 0.4344178381482684, + "grad_norm": 0.09042965620756149, + "learning_rate": 1.700993767052993e-05, + "loss": 9.035, + "step": 86990 + }, + { + "epoch": 0.4344677769732078, + "grad_norm": 0.09324657171964645, + "learning_rate": 1.7008435755588375e-05, + "loss": 9.0361, + "step": 87000 + }, + { + "epoch": 0.4345177157981473, + "grad_norm": 0.09195488691329956, + "learning_rate": 1.7006933840646825e-05, + "loss": 9.049, + "step": 87010 + }, + { + "epoch": 0.4345676546230867, + "grad_norm": 0.08934830129146576, + "learning_rate": 1.7005431925705276e-05, + "loss": 9.0324, + "step": 87020 + }, + { + "epoch": 0.43461759344802614, + "grad_norm": 0.0945764034986496, + "learning_rate": 1.7003930010763722e-05, + "loss": 9.0484, + "step": 87030 + }, + { + "epoch": 0.4346675322729656, + "grad_norm": 0.09223344922065735, + "learning_rate": 1.7002428095822176e-05, + "loss": 9.0433, + "step": 87040 + }, + { + "epoch": 0.43471747109790504, + "grad_norm": 0.09764394164085388, + "learning_rate": 1.7000926180880623e-05, + "loss": 9.0318, + "step": 87050 + }, + { + "epoch": 0.4347674099228445, + "grad_norm": 0.08980736136436462, + "learning_rate": 1.6999424265939073e-05, + "loss": 9.0387, + "step": 87060 + }, + { + "epoch": 0.43481734874778394, + "grad_norm": 0.0914120152592659, + "learning_rate": 1.6997922350997523e-05, + "loss": 9.052, + "step": 87070 + }, + { + "epoch": 0.4348672875727234, + "grad_norm": 0.09246059507131577, + "learning_rate": 1.699642043605597e-05, + "loss": 9.0412, + "step": 87080 + }, + { + "epoch": 0.43491722639766284, + "grad_norm": 0.09065555036067963, + "learning_rate": 1.6994918521114423e-05, + "loss": 9.035, + "step": 87090 + }, + { + "epoch": 0.4349671652226023, + "grad_norm": 0.0936904326081276, + "learning_rate": 1.699341660617287e-05, + "loss": 9.0367, + "step": 87100 + }, + { + "epoch": 0.43501710404754174, + "grad_norm": 0.1051855981349945, + "learning_rate": 1.699191469123132e-05, + "loss": 9.0431, + "step": 87110 + }, + { + "epoch": 0.4350670428724812, + "grad_norm": 0.08944005519151688, + "learning_rate": 1.699041277628977e-05, + "loss": 9.0287, + "step": 87120 + }, + { + "epoch": 0.43511698169742064, + "grad_norm": 0.09485218673944473, + "learning_rate": 1.6988910861348217e-05, + "loss": 9.0493, + "step": 87130 + }, + { + "epoch": 0.4351669205223601, + "grad_norm": 0.09217767417430878, + "learning_rate": 1.698740894640667e-05, + "loss": 9.0362, + "step": 87140 + }, + { + "epoch": 0.43521685934729953, + "grad_norm": 0.09442111849784851, + "learning_rate": 1.6985907031465118e-05, + "loss": 9.0246, + "step": 87150 + }, + { + "epoch": 0.435266798172239, + "grad_norm": 0.10553400963544846, + "learning_rate": 1.6984405116523568e-05, + "loss": 9.0343, + "step": 87160 + }, + { + "epoch": 0.43531673699717843, + "grad_norm": 0.08932023495435715, + "learning_rate": 1.6982903201582018e-05, + "loss": 9.0449, + "step": 87170 + }, + { + "epoch": 0.4353666758221179, + "grad_norm": 0.09344980865716934, + "learning_rate": 1.6981401286640465e-05, + "loss": 9.0279, + "step": 87180 + }, + { + "epoch": 0.43541661464705733, + "grad_norm": 0.09347915649414062, + "learning_rate": 1.697989937169892e-05, + "loss": 9.0341, + "step": 87190 + }, + { + "epoch": 0.4354665534719968, + "grad_norm": 0.0941552221775055, + "learning_rate": 1.6978397456757365e-05, + "loss": 9.0331, + "step": 87200 + }, + { + "epoch": 0.43551649229693623, + "grad_norm": 0.09416943788528442, + "learning_rate": 1.6976895541815815e-05, + "loss": 9.0384, + "step": 87210 + }, + { + "epoch": 0.4355664311218757, + "grad_norm": 0.0943121612071991, + "learning_rate": 1.6975393626874266e-05, + "loss": 9.038, + "step": 87220 + }, + { + "epoch": 0.43561636994681513, + "grad_norm": 0.09376168996095657, + "learning_rate": 1.6973891711932712e-05, + "loss": 9.0261, + "step": 87230 + }, + { + "epoch": 0.4356663087717546, + "grad_norm": 0.09052184224128723, + "learning_rate": 1.6972389796991166e-05, + "loss": 9.0356, + "step": 87240 + }, + { + "epoch": 0.43571624759669403, + "grad_norm": 0.0898606926202774, + "learning_rate": 1.6970887882049613e-05, + "loss": 9.0347, + "step": 87250 + }, + { + "epoch": 0.4357661864216335, + "grad_norm": 0.09599337726831436, + "learning_rate": 1.6969385967108063e-05, + "loss": 9.0272, + "step": 87260 + }, + { + "epoch": 0.4358161252465729, + "grad_norm": 0.093189537525177, + "learning_rate": 1.6967884052166513e-05, + "loss": 9.033, + "step": 87270 + }, + { + "epoch": 0.4358660640715124, + "grad_norm": 0.09416911005973816, + "learning_rate": 1.6966382137224963e-05, + "loss": 9.0371, + "step": 87280 + }, + { + "epoch": 0.4359160028964518, + "grad_norm": 0.09415122121572495, + "learning_rate": 1.6964880222283413e-05, + "loss": 9.0361, + "step": 87290 + }, + { + "epoch": 0.4359659417213913, + "grad_norm": 0.09333626925945282, + "learning_rate": 1.696337830734186e-05, + "loss": 9.0317, + "step": 87300 + }, + { + "epoch": 0.4360158805463307, + "grad_norm": 0.09361656755208969, + "learning_rate": 1.696187639240031e-05, + "loss": 9.0233, + "step": 87310 + }, + { + "epoch": 0.4360658193712702, + "grad_norm": 0.0928577408194542, + "learning_rate": 1.696037447745876e-05, + "loss": 9.044, + "step": 87320 + }, + { + "epoch": 0.4361157581962096, + "grad_norm": 0.09233950078487396, + "learning_rate": 1.695887256251721e-05, + "loss": 9.0395, + "step": 87330 + }, + { + "epoch": 0.4361656970211491, + "grad_norm": 0.09364930540323257, + "learning_rate": 1.695737064757566e-05, + "loss": 9.0438, + "step": 87340 + }, + { + "epoch": 0.4362156358460885, + "grad_norm": 0.09141746908426285, + "learning_rate": 1.6955868732634108e-05, + "loss": 9.0329, + "step": 87350 + }, + { + "epoch": 0.436265574671028, + "grad_norm": 0.08941878378391266, + "learning_rate": 1.6954366817692558e-05, + "loss": 9.0257, + "step": 87360 + }, + { + "epoch": 0.4363155134959674, + "grad_norm": 0.09561556577682495, + "learning_rate": 1.6952864902751008e-05, + "loss": 9.0315, + "step": 87370 + }, + { + "epoch": 0.4363654523209069, + "grad_norm": 0.09164825826883316, + "learning_rate": 1.6951362987809458e-05, + "loss": 9.027, + "step": 87380 + }, + { + "epoch": 0.4364153911458463, + "grad_norm": 0.0955611914396286, + "learning_rate": 1.694986107286791e-05, + "loss": 9.034, + "step": 87390 + }, + { + "epoch": 0.4364653299707858, + "grad_norm": 0.09065394103527069, + "learning_rate": 1.6948359157926355e-05, + "loss": 9.0308, + "step": 87400 + }, + { + "epoch": 0.4365152687957252, + "grad_norm": 0.09663858264684677, + "learning_rate": 1.6946857242984805e-05, + "loss": 9.0187, + "step": 87410 + }, + { + "epoch": 0.4365652076206647, + "grad_norm": 0.0890546515583992, + "learning_rate": 1.6945355328043256e-05, + "loss": 9.0392, + "step": 87420 + }, + { + "epoch": 0.4366151464456041, + "grad_norm": 0.09616228938102722, + "learning_rate": 1.6943853413101706e-05, + "loss": 9.0342, + "step": 87430 + }, + { + "epoch": 0.4366650852705436, + "grad_norm": 0.10570753365755081, + "learning_rate": 1.6942351498160156e-05, + "loss": 9.0383, + "step": 87440 + }, + { + "epoch": 0.436715024095483, + "grad_norm": 0.09239917248487473, + "learning_rate": 1.6940849583218603e-05, + "loss": 9.0355, + "step": 87450 + }, + { + "epoch": 0.4367649629204225, + "grad_norm": 0.09370817989110947, + "learning_rate": 1.6939347668277053e-05, + "loss": 9.0242, + "step": 87460 + }, + { + "epoch": 0.4368149017453619, + "grad_norm": 0.09490583091974258, + "learning_rate": 1.6937845753335503e-05, + "loss": 9.0441, + "step": 87470 + }, + { + "epoch": 0.4368648405703014, + "grad_norm": 0.0928240418434143, + "learning_rate": 1.6936343838393953e-05, + "loss": 9.0362, + "step": 87480 + }, + { + "epoch": 0.4369147793952408, + "grad_norm": 0.09075388312339783, + "learning_rate": 1.6934841923452403e-05, + "loss": 9.0322, + "step": 87490 + }, + { + "epoch": 0.4369647182201803, + "grad_norm": 0.09307154268026352, + "learning_rate": 1.693334000851085e-05, + "loss": 9.0344, + "step": 87500 + }, + { + "epoch": 0.4370146570451197, + "grad_norm": 0.0918651670217514, + "learning_rate": 1.69318380935693e-05, + "loss": 9.0393, + "step": 87510 + }, + { + "epoch": 0.4370645958700592, + "grad_norm": 0.0964551717042923, + "learning_rate": 1.693033617862775e-05, + "loss": 9.0446, + "step": 87520 + }, + { + "epoch": 0.4371145346949986, + "grad_norm": 0.09495722502470016, + "learning_rate": 1.69288342636862e-05, + "loss": 9.0284, + "step": 87530 + }, + { + "epoch": 0.4371644735199381, + "grad_norm": 0.09483332186937332, + "learning_rate": 1.692733234874465e-05, + "loss": 9.0341, + "step": 87540 + }, + { + "epoch": 0.4372144123448775, + "grad_norm": 0.08984597027301788, + "learning_rate": 1.6925830433803098e-05, + "loss": 9.0351, + "step": 87550 + }, + { + "epoch": 0.437264351169817, + "grad_norm": 0.09564351290464401, + "learning_rate": 1.6924328518861548e-05, + "loss": 9.023, + "step": 87560 + }, + { + "epoch": 0.4373142899947564, + "grad_norm": 0.09509795159101486, + "learning_rate": 1.6922826603919998e-05, + "loss": 9.0338, + "step": 87570 + }, + { + "epoch": 0.4373642288196959, + "grad_norm": 0.09072297811508179, + "learning_rate": 1.6921324688978448e-05, + "loss": 9.0442, + "step": 87580 + }, + { + "epoch": 0.4374141676446353, + "grad_norm": 0.09715980291366577, + "learning_rate": 1.69198227740369e-05, + "loss": 9.0252, + "step": 87590 + }, + { + "epoch": 0.4374641064695748, + "grad_norm": 0.09227906912565231, + "learning_rate": 1.691832085909535e-05, + "loss": 9.0273, + "step": 87600 + }, + { + "epoch": 0.4375140452945142, + "grad_norm": 0.10533630102872849, + "learning_rate": 1.6916818944153795e-05, + "loss": 9.0322, + "step": 87610 + }, + { + "epoch": 0.4375639841194537, + "grad_norm": 0.09329358488321304, + "learning_rate": 1.6915317029212246e-05, + "loss": 9.0214, + "step": 87620 + }, + { + "epoch": 0.4376139229443931, + "grad_norm": 0.09366413950920105, + "learning_rate": 1.6913815114270696e-05, + "loss": 9.0308, + "step": 87630 + }, + { + "epoch": 0.4376638617693326, + "grad_norm": 0.10361622273921967, + "learning_rate": 1.6912313199329146e-05, + "loss": 9.0371, + "step": 87640 + }, + { + "epoch": 0.437713800594272, + "grad_norm": 0.09569653123617172, + "learning_rate": 1.6910811284387596e-05, + "loss": 9.0347, + "step": 87650 + }, + { + "epoch": 0.4377637394192115, + "grad_norm": 0.09628830850124359, + "learning_rate": 1.6909309369446043e-05, + "loss": 9.0361, + "step": 87660 + }, + { + "epoch": 0.4378136782441509, + "grad_norm": 0.09476719051599503, + "learning_rate": 1.6907807454504493e-05, + "loss": 9.0287, + "step": 87670 + }, + { + "epoch": 0.4378636170690904, + "grad_norm": 0.09486107528209686, + "learning_rate": 1.6906305539562943e-05, + "loss": 9.0361, + "step": 87680 + }, + { + "epoch": 0.4379135558940298, + "grad_norm": 0.08894047141075134, + "learning_rate": 1.6904803624621393e-05, + "loss": 9.0196, + "step": 87690 + }, + { + "epoch": 0.4379634947189693, + "grad_norm": 0.09540939331054688, + "learning_rate": 1.6903301709679844e-05, + "loss": 9.039, + "step": 87700 + }, + { + "epoch": 0.4380134335439087, + "grad_norm": 0.09612215310335159, + "learning_rate": 1.690179979473829e-05, + "loss": 9.0244, + "step": 87710 + }, + { + "epoch": 0.4380633723688482, + "grad_norm": 0.0884467363357544, + "learning_rate": 1.690029787979674e-05, + "loss": 9.0421, + "step": 87720 + }, + { + "epoch": 0.4381133111937876, + "grad_norm": 0.09004423022270203, + "learning_rate": 1.689879596485519e-05, + "loss": 9.0254, + "step": 87730 + }, + { + "epoch": 0.4381632500187271, + "grad_norm": 0.09423902630805969, + "learning_rate": 1.689729404991364e-05, + "loss": 9.0198, + "step": 87740 + }, + { + "epoch": 0.4382131888436665, + "grad_norm": 0.09269099682569504, + "learning_rate": 1.689579213497209e-05, + "loss": 9.0444, + "step": 87750 + }, + { + "epoch": 0.438263127668606, + "grad_norm": 0.0918196514248848, + "learning_rate": 1.6894290220030538e-05, + "loss": 9.024, + "step": 87760 + }, + { + "epoch": 0.4383130664935454, + "grad_norm": 0.09599689394235611, + "learning_rate": 1.6892788305088988e-05, + "loss": 9.0233, + "step": 87770 + }, + { + "epoch": 0.4383630053184849, + "grad_norm": 0.09426075220108032, + "learning_rate": 1.6891286390147438e-05, + "loss": 9.0151, + "step": 87780 + }, + { + "epoch": 0.4384129441434243, + "grad_norm": 0.09202740341424942, + "learning_rate": 1.688978447520589e-05, + "loss": 9.0304, + "step": 87790 + }, + { + "epoch": 0.4384628829683638, + "grad_norm": 0.09728818386793137, + "learning_rate": 1.688828256026434e-05, + "loss": 9.0325, + "step": 87800 + }, + { + "epoch": 0.4385128217933032, + "grad_norm": 0.09795814007520676, + "learning_rate": 1.6886780645322785e-05, + "loss": 9.0093, + "step": 87810 + }, + { + "epoch": 0.4385627606182427, + "grad_norm": 0.0872364342212677, + "learning_rate": 1.6885278730381236e-05, + "loss": 9.0299, + "step": 87820 + }, + { + "epoch": 0.4386126994431821, + "grad_norm": 0.09706996381282806, + "learning_rate": 1.6883776815439686e-05, + "loss": 9.0341, + "step": 87830 + }, + { + "epoch": 0.4386626382681216, + "grad_norm": 0.08879675716161728, + "learning_rate": 1.6882274900498136e-05, + "loss": 9.0358, + "step": 87840 + }, + { + "epoch": 0.438712577093061, + "grad_norm": 0.09184462577104568, + "learning_rate": 1.6880772985556586e-05, + "loss": 9.0178, + "step": 87850 + }, + { + "epoch": 0.4387625159180005, + "grad_norm": 0.09184456616640091, + "learning_rate": 1.6879271070615033e-05, + "loss": 9.0246, + "step": 87860 + }, + { + "epoch": 0.4388124547429399, + "grad_norm": 0.08778972923755646, + "learning_rate": 1.6877769155673483e-05, + "loss": 9.0291, + "step": 87870 + }, + { + "epoch": 0.43886239356787937, + "grad_norm": 0.09503248333930969, + "learning_rate": 1.6876267240731933e-05, + "loss": 9.0374, + "step": 87880 + }, + { + "epoch": 0.4389123323928188, + "grad_norm": 0.08978200703859329, + "learning_rate": 1.6874765325790383e-05, + "loss": 9.0381, + "step": 87890 + }, + { + "epoch": 0.43896227121775827, + "grad_norm": 0.098361074924469, + "learning_rate": 1.6873263410848834e-05, + "loss": 9.0196, + "step": 87900 + }, + { + "epoch": 0.4390122100426977, + "grad_norm": 0.09547007083892822, + "learning_rate": 1.687176149590728e-05, + "loss": 9.0084, + "step": 87910 + }, + { + "epoch": 0.43906214886763717, + "grad_norm": 0.09400343149900436, + "learning_rate": 1.6870259580965734e-05, + "loss": 9.0175, + "step": 87920 + }, + { + "epoch": 0.4391120876925766, + "grad_norm": 0.09383000433444977, + "learning_rate": 1.686875766602418e-05, + "loss": 9.0114, + "step": 87930 + }, + { + "epoch": 0.43916202651751607, + "grad_norm": 0.09463747590780258, + "learning_rate": 1.686725575108263e-05, + "loss": 9.0246, + "step": 87940 + }, + { + "epoch": 0.4392119653424555, + "grad_norm": 0.09992758929729462, + "learning_rate": 1.686575383614108e-05, + "loss": 9.0191, + "step": 87950 + }, + { + "epoch": 0.43926190416739497, + "grad_norm": 0.09506499767303467, + "learning_rate": 1.6864251921199528e-05, + "loss": 9.0212, + "step": 87960 + }, + { + "epoch": 0.4393118429923344, + "grad_norm": 0.09309326857328415, + "learning_rate": 1.686275000625798e-05, + "loss": 9.0304, + "step": 87970 + }, + { + "epoch": 0.43936178181727387, + "grad_norm": 0.09387887269258499, + "learning_rate": 1.686124809131643e-05, + "loss": 9.0263, + "step": 87980 + }, + { + "epoch": 0.4394117206422133, + "grad_norm": 0.09608437120914459, + "learning_rate": 1.685974617637488e-05, + "loss": 9.0282, + "step": 87990 + }, + { + "epoch": 0.43946165946715277, + "grad_norm": 0.10036759823560715, + "learning_rate": 1.685824426143333e-05, + "loss": 9.0298, + "step": 88000 + }, + { + "epoch": 0.4395115982920922, + "grad_norm": 0.09929006546735764, + "learning_rate": 1.6856742346491775e-05, + "loss": 9.0269, + "step": 88010 + }, + { + "epoch": 0.4395615371170316, + "grad_norm": 0.08878950029611588, + "learning_rate": 1.685524043155023e-05, + "loss": 9.0244, + "step": 88020 + }, + { + "epoch": 0.4396114759419711, + "grad_norm": 0.09790655225515366, + "learning_rate": 1.6853738516608676e-05, + "loss": 9.0289, + "step": 88030 + }, + { + "epoch": 0.4396614147669105, + "grad_norm": 0.09261323511600494, + "learning_rate": 1.6852236601667126e-05, + "loss": 9.032, + "step": 88040 + }, + { + "epoch": 0.43971135359185, + "grad_norm": 0.09531623125076294, + "learning_rate": 1.6850734686725576e-05, + "loss": 9.0355, + "step": 88050 + }, + { + "epoch": 0.4397612924167894, + "grad_norm": 0.09662208706140518, + "learning_rate": 1.6849232771784023e-05, + "loss": 9.0177, + "step": 88060 + }, + { + "epoch": 0.4398112312417289, + "grad_norm": 0.09118068218231201, + "learning_rate": 1.6847730856842477e-05, + "loss": 9.017, + "step": 88070 + }, + { + "epoch": 0.4398611700666683, + "grad_norm": 0.09556883573532104, + "learning_rate": 1.6846228941900923e-05, + "loss": 9.0367, + "step": 88080 + }, + { + "epoch": 0.4399111088916078, + "grad_norm": 0.08983524143695831, + "learning_rate": 1.6844727026959373e-05, + "loss": 9.0285, + "step": 88090 + }, + { + "epoch": 0.4399610477165472, + "grad_norm": 0.09559369832277298, + "learning_rate": 1.6843225112017824e-05, + "loss": 9.0309, + "step": 88100 + }, + { + "epoch": 0.4400109865414867, + "grad_norm": 0.09884733706712723, + "learning_rate": 1.684172319707627e-05, + "loss": 9.0156, + "step": 88110 + }, + { + "epoch": 0.4400609253664261, + "grad_norm": 0.08653125166893005, + "learning_rate": 1.6840221282134724e-05, + "loss": 9.0307, + "step": 88120 + }, + { + "epoch": 0.4401108641913656, + "grad_norm": 0.09182146936655045, + "learning_rate": 1.683871936719317e-05, + "loss": 9.0374, + "step": 88130 + }, + { + "epoch": 0.440160803016305, + "grad_norm": 0.091147780418396, + "learning_rate": 1.683721745225162e-05, + "loss": 9.0369, + "step": 88140 + }, + { + "epoch": 0.4402107418412445, + "grad_norm": 0.09663031250238419, + "learning_rate": 1.683571553731007e-05, + "loss": 9.0242, + "step": 88150 + }, + { + "epoch": 0.4402606806661839, + "grad_norm": 0.09547109156847, + "learning_rate": 1.6834213622368518e-05, + "loss": 9.0246, + "step": 88160 + }, + { + "epoch": 0.4403106194911234, + "grad_norm": 0.09579718112945557, + "learning_rate": 1.683271170742697e-05, + "loss": 9.022, + "step": 88170 + }, + { + "epoch": 0.4403605583160628, + "grad_norm": 0.09461940824985504, + "learning_rate": 1.683120979248542e-05, + "loss": 9.0285, + "step": 88180 + }, + { + "epoch": 0.4404104971410023, + "grad_norm": 0.09216568619012833, + "learning_rate": 1.682970787754387e-05, + "loss": 9.0253, + "step": 88190 + }, + { + "epoch": 0.4404604359659417, + "grad_norm": 0.08952445536851883, + "learning_rate": 1.682820596260232e-05, + "loss": 9.0147, + "step": 88200 + }, + { + "epoch": 0.4405103747908812, + "grad_norm": 0.09351477026939392, + "learning_rate": 1.6826704047660765e-05, + "loss": 9.0297, + "step": 88210 + }, + { + "epoch": 0.4405603136158206, + "grad_norm": 0.09434119611978531, + "learning_rate": 1.682520213271922e-05, + "loss": 9.0314, + "step": 88220 + }, + { + "epoch": 0.4406102524407601, + "grad_norm": 0.09438735991716385, + "learning_rate": 1.6823700217777666e-05, + "loss": 9.0262, + "step": 88230 + }, + { + "epoch": 0.4406601912656995, + "grad_norm": 0.08906865119934082, + "learning_rate": 1.6822198302836116e-05, + "loss": 9.0275, + "step": 88240 + }, + { + "epoch": 0.440710130090639, + "grad_norm": 0.09299363940954208, + "learning_rate": 1.6820696387894566e-05, + "loss": 9.0376, + "step": 88250 + }, + { + "epoch": 0.4407600689155784, + "grad_norm": 0.09099581837654114, + "learning_rate": 1.6819194472953013e-05, + "loss": 9.03, + "step": 88260 + }, + { + "epoch": 0.4408100077405179, + "grad_norm": 0.08823831379413605, + "learning_rate": 1.6817692558011467e-05, + "loss": 9.0356, + "step": 88270 + }, + { + "epoch": 0.4408599465654573, + "grad_norm": 0.0898958370089531, + "learning_rate": 1.6816190643069913e-05, + "loss": 9.0328, + "step": 88280 + }, + { + "epoch": 0.44090988539039677, + "grad_norm": 0.09236274659633636, + "learning_rate": 1.6814688728128367e-05, + "loss": 9.0355, + "step": 88290 + }, + { + "epoch": 0.4409598242153362, + "grad_norm": 0.08883727341890335, + "learning_rate": 1.6813186813186814e-05, + "loss": 9.0354, + "step": 88300 + }, + { + "epoch": 0.44100976304027567, + "grad_norm": 0.09699812531471252, + "learning_rate": 1.681168489824526e-05, + "loss": 9.0269, + "step": 88310 + }, + { + "epoch": 0.4410597018652151, + "grad_norm": 0.09533574432134628, + "learning_rate": 1.6810182983303714e-05, + "loss": 9.0313, + "step": 88320 + }, + { + "epoch": 0.44110964069015457, + "grad_norm": 0.09363710880279541, + "learning_rate": 1.680868106836216e-05, + "loss": 9.0249, + "step": 88330 + }, + { + "epoch": 0.441159579515094, + "grad_norm": 0.09056729823350906, + "learning_rate": 1.6807179153420614e-05, + "loss": 9.013, + "step": 88340 + }, + { + "epoch": 0.44120951834003347, + "grad_norm": 0.08941638469696045, + "learning_rate": 1.680567723847906e-05, + "loss": 9.0277, + "step": 88350 + }, + { + "epoch": 0.4412594571649729, + "grad_norm": 0.09786325693130493, + "learning_rate": 1.6804175323537508e-05, + "loss": 9.0257, + "step": 88360 + }, + { + "epoch": 0.44130939598991237, + "grad_norm": 0.09090113639831543, + "learning_rate": 1.680267340859596e-05, + "loss": 9.0268, + "step": 88370 + }, + { + "epoch": 0.4413593348148518, + "grad_norm": 0.09401243180036545, + "learning_rate": 1.680117149365441e-05, + "loss": 9.0274, + "step": 88380 + }, + { + "epoch": 0.44140927363979127, + "grad_norm": 0.09182031452655792, + "learning_rate": 1.6799669578712862e-05, + "loss": 9.0217, + "step": 88390 + }, + { + "epoch": 0.4414592124647307, + "grad_norm": 0.09471350908279419, + "learning_rate": 1.679816766377131e-05, + "loss": 9.0152, + "step": 88400 + }, + { + "epoch": 0.44150915128967017, + "grad_norm": 0.0950457975268364, + "learning_rate": 1.6796665748829755e-05, + "loss": 9.0367, + "step": 88410 + }, + { + "epoch": 0.4415590901146096, + "grad_norm": 0.09334580600261688, + "learning_rate": 1.679516383388821e-05, + "loss": 9.0324, + "step": 88420 + }, + { + "epoch": 0.44160902893954906, + "grad_norm": 0.09345501661300659, + "learning_rate": 1.6793661918946656e-05, + "loss": 9.0421, + "step": 88430 + }, + { + "epoch": 0.4416589677644885, + "grad_norm": 0.09563722461462021, + "learning_rate": 1.679216000400511e-05, + "loss": 9.0331, + "step": 88440 + }, + { + "epoch": 0.44170890658942796, + "grad_norm": 0.09366366267204285, + "learning_rate": 1.6790658089063556e-05, + "loss": 9.022, + "step": 88450 + }, + { + "epoch": 0.4417588454143674, + "grad_norm": 0.09602910280227661, + "learning_rate": 1.6789156174122003e-05, + "loss": 9.0281, + "step": 88460 + }, + { + "epoch": 0.44180878423930686, + "grad_norm": 0.09258101880550385, + "learning_rate": 1.6787654259180457e-05, + "loss": 9.0255, + "step": 88470 + }, + { + "epoch": 0.4418587230642463, + "grad_norm": 0.09126337617635727, + "learning_rate": 1.6786152344238903e-05, + "loss": 9.0267, + "step": 88480 + }, + { + "epoch": 0.44190866188918576, + "grad_norm": 0.0905313566327095, + "learning_rate": 1.6784650429297357e-05, + "loss": 9.0256, + "step": 88490 + }, + { + "epoch": 0.4419586007141252, + "grad_norm": 0.09191538393497467, + "learning_rate": 1.6783148514355804e-05, + "loss": 9.0323, + "step": 88500 + }, + { + "epoch": 0.44200853953906466, + "grad_norm": 0.08896350860595703, + "learning_rate": 1.678164659941425e-05, + "loss": 9.0267, + "step": 88510 + }, + { + "epoch": 0.4420584783640041, + "grad_norm": 0.0963086411356926, + "learning_rate": 1.6780144684472704e-05, + "loss": 9.0095, + "step": 88520 + }, + { + "epoch": 0.44210841718894356, + "grad_norm": 0.09583403170108795, + "learning_rate": 1.677864276953115e-05, + "loss": 9.0238, + "step": 88530 + }, + { + "epoch": 0.442158356013883, + "grad_norm": 0.09216772019863129, + "learning_rate": 1.6777140854589604e-05, + "loss": 9.0123, + "step": 88540 + }, + { + "epoch": 0.44220829483882246, + "grad_norm": 0.09259237349033356, + "learning_rate": 1.677563893964805e-05, + "loss": 9.024, + "step": 88550 + }, + { + "epoch": 0.4422582336637619, + "grad_norm": 0.09866563975811005, + "learning_rate": 1.6774137024706498e-05, + "loss": 9.0138, + "step": 88560 + }, + { + "epoch": 0.44230817248870136, + "grad_norm": 0.09354985505342484, + "learning_rate": 1.677263510976495e-05, + "loss": 9.026, + "step": 88570 + }, + { + "epoch": 0.4423581113136408, + "grad_norm": 0.10093557089567184, + "learning_rate": 1.67711331948234e-05, + "loss": 9.0229, + "step": 88580 + }, + { + "epoch": 0.44240805013858026, + "grad_norm": 0.09272787719964981, + "learning_rate": 1.6769631279881852e-05, + "loss": 9.0249, + "step": 88590 + }, + { + "epoch": 0.4424579889635197, + "grad_norm": 0.09445803612470627, + "learning_rate": 1.67681293649403e-05, + "loss": 9.0102, + "step": 88600 + }, + { + "epoch": 0.44250792778845915, + "grad_norm": 0.09491786360740662, + "learning_rate": 1.676662744999875e-05, + "loss": 9.0247, + "step": 88610 + }, + { + "epoch": 0.4425578666133986, + "grad_norm": 0.09124266356229782, + "learning_rate": 1.67651255350572e-05, + "loss": 9.0244, + "step": 88620 + }, + { + "epoch": 0.44260780543833805, + "grad_norm": 0.09176667034626007, + "learning_rate": 1.6763623620115646e-05, + "loss": 9.0165, + "step": 88630 + }, + { + "epoch": 0.4426577442632775, + "grad_norm": 0.10504433512687683, + "learning_rate": 1.67621217051741e-05, + "loss": 9.0253, + "step": 88640 + }, + { + "epoch": 0.44270768308821695, + "grad_norm": 0.09985138475894928, + "learning_rate": 1.6760619790232546e-05, + "loss": 9.0125, + "step": 88650 + }, + { + "epoch": 0.4427576219131564, + "grad_norm": 0.0883818119764328, + "learning_rate": 1.6759117875290996e-05, + "loss": 9.0167, + "step": 88660 + }, + { + "epoch": 0.44280756073809585, + "grad_norm": 0.08730896562337875, + "learning_rate": 1.6757615960349447e-05, + "loss": 9.033, + "step": 88670 + }, + { + "epoch": 0.4428574995630353, + "grad_norm": 0.08953774720430374, + "learning_rate": 1.6756114045407893e-05, + "loss": 9.0397, + "step": 88680 + }, + { + "epoch": 0.44290743838797475, + "grad_norm": 0.09112643450498581, + "learning_rate": 1.6754612130466347e-05, + "loss": 9.0137, + "step": 88690 + }, + { + "epoch": 0.44295737721291417, + "grad_norm": 0.09895043820142746, + "learning_rate": 1.6753110215524794e-05, + "loss": 9.0121, + "step": 88700 + }, + { + "epoch": 0.44300731603785365, + "grad_norm": 0.09690089523792267, + "learning_rate": 1.6751608300583244e-05, + "loss": 9.0322, + "step": 88710 + }, + { + "epoch": 0.44305725486279307, + "grad_norm": 0.09116695076227188, + "learning_rate": 1.6750106385641694e-05, + "loss": 9.0266, + "step": 88720 + }, + { + "epoch": 0.44310719368773255, + "grad_norm": 0.09873844683170319, + "learning_rate": 1.674860447070014e-05, + "loss": 9.0144, + "step": 88730 + }, + { + "epoch": 0.44315713251267197, + "grad_norm": 0.08854333311319351, + "learning_rate": 1.6747102555758594e-05, + "loss": 9.0264, + "step": 88740 + }, + { + "epoch": 0.44320707133761145, + "grad_norm": 0.09332463145256042, + "learning_rate": 1.674560064081704e-05, + "loss": 9.0221, + "step": 88750 + }, + { + "epoch": 0.44325701016255087, + "grad_norm": 0.0980052724480629, + "learning_rate": 1.674409872587549e-05, + "loss": 9.0311, + "step": 88760 + }, + { + "epoch": 0.44330694898749035, + "grad_norm": 0.09149298816919327, + "learning_rate": 1.674259681093394e-05, + "loss": 9.0343, + "step": 88770 + }, + { + "epoch": 0.44335688781242977, + "grad_norm": 0.0906062051653862, + "learning_rate": 1.674109489599239e-05, + "loss": 9.0131, + "step": 88780 + }, + { + "epoch": 0.44340682663736924, + "grad_norm": 0.09233875572681427, + "learning_rate": 1.6739592981050842e-05, + "loss": 9.0148, + "step": 88790 + }, + { + "epoch": 0.44345676546230867, + "grad_norm": 0.09254756569862366, + "learning_rate": 1.673809106610929e-05, + "loss": 9.0121, + "step": 88800 + }, + { + "epoch": 0.44350670428724814, + "grad_norm": 0.0948517769575119, + "learning_rate": 1.673658915116774e-05, + "loss": 9.0205, + "step": 88810 + }, + { + "epoch": 0.44355664311218757, + "grad_norm": 0.0976976752281189, + "learning_rate": 1.673508723622619e-05, + "loss": 9.0087, + "step": 88820 + }, + { + "epoch": 0.44360658193712704, + "grad_norm": 0.0981476828455925, + "learning_rate": 1.6733585321284636e-05, + "loss": 9.0068, + "step": 88830 + }, + { + "epoch": 0.44365652076206646, + "grad_norm": 0.09623267501592636, + "learning_rate": 1.673208340634309e-05, + "loss": 9.0195, + "step": 88840 + }, + { + "epoch": 0.44370645958700594, + "grad_norm": 0.09452231973409653, + "learning_rate": 1.6730581491401536e-05, + "loss": 9.016, + "step": 88850 + }, + { + "epoch": 0.44375639841194536, + "grad_norm": 0.09390220791101456, + "learning_rate": 1.6729079576459986e-05, + "loss": 9.0134, + "step": 88860 + }, + { + "epoch": 0.44380633723688484, + "grad_norm": 0.09678220003843307, + "learning_rate": 1.6727577661518437e-05, + "loss": 9.012, + "step": 88870 + }, + { + "epoch": 0.44385627606182426, + "grad_norm": 0.09419732540845871, + "learning_rate": 1.6726075746576883e-05, + "loss": 9.0304, + "step": 88880 + }, + { + "epoch": 0.44390621488676374, + "grad_norm": 0.08998965471982956, + "learning_rate": 1.6724573831635337e-05, + "loss": 9.0247, + "step": 88890 + }, + { + "epoch": 0.44395615371170316, + "grad_norm": 0.09407693147659302, + "learning_rate": 1.6723071916693784e-05, + "loss": 9.0233, + "step": 88900 + }, + { + "epoch": 0.44400609253664264, + "grad_norm": 0.094719298183918, + "learning_rate": 1.6721570001752234e-05, + "loss": 9.0212, + "step": 88910 + }, + { + "epoch": 0.44405603136158206, + "grad_norm": 0.09490375965833664, + "learning_rate": 1.6720068086810684e-05, + "loss": 9.0231, + "step": 88920 + }, + { + "epoch": 0.44410597018652154, + "grad_norm": 0.09172754734754562, + "learning_rate": 1.6718566171869134e-05, + "loss": 9.0039, + "step": 88930 + }, + { + "epoch": 0.44415590901146096, + "grad_norm": 0.09175007045269012, + "learning_rate": 1.6717064256927584e-05, + "loss": 9.0265, + "step": 88940 + }, + { + "epoch": 0.44420584783640044, + "grad_norm": 0.0903623029589653, + "learning_rate": 1.671556234198603e-05, + "loss": 9.0105, + "step": 88950 + }, + { + "epoch": 0.44425578666133986, + "grad_norm": 0.09337350726127625, + "learning_rate": 1.671406042704448e-05, + "loss": 9.0146, + "step": 88960 + }, + { + "epoch": 0.44430572548627933, + "grad_norm": 0.0933094471693039, + "learning_rate": 1.671255851210293e-05, + "loss": 9.0125, + "step": 88970 + }, + { + "epoch": 0.44435566431121876, + "grad_norm": 0.0940171629190445, + "learning_rate": 1.6711056597161382e-05, + "loss": 9.0088, + "step": 88980 + }, + { + "epoch": 0.44440560313615823, + "grad_norm": 0.0906391590833664, + "learning_rate": 1.6709554682219832e-05, + "loss": 9.0209, + "step": 88990 + }, + { + "epoch": 0.44445554196109766, + "grad_norm": 0.09381049126386642, + "learning_rate": 1.670805276727828e-05, + "loss": 9.0177, + "step": 89000 + }, + { + "epoch": 0.4445054807860371, + "grad_norm": 0.09514244645833969, + "learning_rate": 1.670655085233673e-05, + "loss": 9.0153, + "step": 89010 + }, + { + "epoch": 0.44455541961097655, + "grad_norm": 0.09447330236434937, + "learning_rate": 1.670504893739518e-05, + "loss": 9.0064, + "step": 89020 + }, + { + "epoch": 0.444605358435916, + "grad_norm": 0.09046784043312073, + "learning_rate": 1.670354702245363e-05, + "loss": 9.011, + "step": 89030 + }, + { + "epoch": 0.44465529726085545, + "grad_norm": 0.09899982064962387, + "learning_rate": 1.670204510751208e-05, + "loss": 9.0164, + "step": 89040 + }, + { + "epoch": 0.4447052360857949, + "grad_norm": 0.0965995118021965, + "learning_rate": 1.6700543192570526e-05, + "loss": 9.0145, + "step": 89050 + }, + { + "epoch": 0.44475517491073435, + "grad_norm": 0.09488395601511002, + "learning_rate": 1.6699041277628976e-05, + "loss": 9.0108, + "step": 89060 + }, + { + "epoch": 0.4448051137356738, + "grad_norm": 0.09230642020702362, + "learning_rate": 1.6697539362687427e-05, + "loss": 9.0086, + "step": 89070 + }, + { + "epoch": 0.44485505256061325, + "grad_norm": 0.10005513578653336, + "learning_rate": 1.6696037447745877e-05, + "loss": 9.0129, + "step": 89080 + }, + { + "epoch": 0.4449049913855527, + "grad_norm": 0.09370043128728867, + "learning_rate": 1.6694535532804327e-05, + "loss": 9.0219, + "step": 89090 + }, + { + "epoch": 0.44495493021049215, + "grad_norm": 0.09334039688110352, + "learning_rate": 1.6693033617862774e-05, + "loss": 9.0091, + "step": 89100 + }, + { + "epoch": 0.44500486903543157, + "grad_norm": 0.09181877970695496, + "learning_rate": 1.6691531702921224e-05, + "loss": 9.0125, + "step": 89110 + }, + { + "epoch": 0.44505480786037105, + "grad_norm": 0.09253057092428207, + "learning_rate": 1.6690029787979674e-05, + "loss": 9.0284, + "step": 89120 + }, + { + "epoch": 0.44510474668531047, + "grad_norm": 0.08906669169664383, + "learning_rate": 1.6688527873038124e-05, + "loss": 9.0251, + "step": 89130 + }, + { + "epoch": 0.44515468551024995, + "grad_norm": 0.09391651302576065, + "learning_rate": 1.6687025958096574e-05, + "loss": 9.0138, + "step": 89140 + }, + { + "epoch": 0.44520462433518937, + "grad_norm": 0.09748826175928116, + "learning_rate": 1.668552404315502e-05, + "loss": 9.0223, + "step": 89150 + }, + { + "epoch": 0.44525456316012885, + "grad_norm": 0.09539515525102615, + "learning_rate": 1.668402212821347e-05, + "loss": 8.9965, + "step": 89160 + }, + { + "epoch": 0.44530450198506827, + "grad_norm": 0.0892728641629219, + "learning_rate": 1.668252021327192e-05, + "loss": 9.0155, + "step": 89170 + }, + { + "epoch": 0.44535444081000775, + "grad_norm": 0.09498701989650726, + "learning_rate": 1.6681018298330372e-05, + "loss": 9.0113, + "step": 89180 + }, + { + "epoch": 0.44540437963494717, + "grad_norm": 0.09873063117265701, + "learning_rate": 1.6679516383388822e-05, + "loss": 9.0206, + "step": 89190 + }, + { + "epoch": 0.44545431845988664, + "grad_norm": 0.093873530626297, + "learning_rate": 1.667801446844727e-05, + "loss": 9.0141, + "step": 89200 + }, + { + "epoch": 0.44550425728482607, + "grad_norm": 0.09323079884052277, + "learning_rate": 1.667651255350572e-05, + "loss": 9.0133, + "step": 89210 + }, + { + "epoch": 0.44555419610976554, + "grad_norm": 0.09667900949716568, + "learning_rate": 1.667501063856417e-05, + "loss": 9.0206, + "step": 89220 + }, + { + "epoch": 0.44560413493470497, + "grad_norm": 0.1016487404704094, + "learning_rate": 1.667350872362262e-05, + "loss": 9.036, + "step": 89230 + }, + { + "epoch": 0.44565407375964444, + "grad_norm": 0.09627226740121841, + "learning_rate": 1.667200680868107e-05, + "loss": 9.0168, + "step": 89240 + }, + { + "epoch": 0.44570401258458386, + "grad_norm": 0.09360161423683167, + "learning_rate": 1.667050489373952e-05, + "loss": 9.0101, + "step": 89250 + }, + { + "epoch": 0.44575395140952334, + "grad_norm": 0.08747625350952148, + "learning_rate": 1.6669002978797966e-05, + "loss": 9.0054, + "step": 89260 + }, + { + "epoch": 0.44580389023446276, + "grad_norm": 0.09256748855113983, + "learning_rate": 1.6667501063856417e-05, + "loss": 9.0228, + "step": 89270 + }, + { + "epoch": 0.44585382905940224, + "grad_norm": 0.09370601922273636, + "learning_rate": 1.6665999148914867e-05, + "loss": 9.0211, + "step": 89280 + }, + { + "epoch": 0.44590376788434166, + "grad_norm": 0.09242367744445801, + "learning_rate": 1.6664497233973317e-05, + "loss": 9.0083, + "step": 89290 + }, + { + "epoch": 0.44595370670928114, + "grad_norm": 0.0926143079996109, + "learning_rate": 1.6662995319031767e-05, + "loss": 9.0202, + "step": 89300 + }, + { + "epoch": 0.44600364553422056, + "grad_norm": 0.0988818109035492, + "learning_rate": 1.6661493404090214e-05, + "loss": 9.0048, + "step": 89310 + }, + { + "epoch": 0.44605358435916004, + "grad_norm": 0.09795771539211273, + "learning_rate": 1.6659991489148664e-05, + "loss": 9.0216, + "step": 89320 + }, + { + "epoch": 0.44610352318409946, + "grad_norm": 0.09289045631885529, + "learning_rate": 1.6658489574207114e-05, + "loss": 9.0187, + "step": 89330 + }, + { + "epoch": 0.44615346200903894, + "grad_norm": 0.09757115691900253, + "learning_rate": 1.6656987659265564e-05, + "loss": 9.0029, + "step": 89340 + }, + { + "epoch": 0.44620340083397836, + "grad_norm": 0.09858351945877075, + "learning_rate": 1.6655485744324015e-05, + "loss": 9.0203, + "step": 89350 + }, + { + "epoch": 0.44625333965891784, + "grad_norm": 0.09712626785039902, + "learning_rate": 1.665398382938246e-05, + "loss": 9.0095, + "step": 89360 + }, + { + "epoch": 0.44630327848385726, + "grad_norm": 0.09400411695241928, + "learning_rate": 1.665248191444091e-05, + "loss": 9.007, + "step": 89370 + }, + { + "epoch": 0.44635321730879673, + "grad_norm": 0.0991569310426712, + "learning_rate": 1.6650979999499362e-05, + "loss": 9.0118, + "step": 89380 + }, + { + "epoch": 0.44640315613373616, + "grad_norm": 0.09559716284275055, + "learning_rate": 1.6649478084557812e-05, + "loss": 8.9983, + "step": 89390 + }, + { + "epoch": 0.44645309495867563, + "grad_norm": 0.09568917006254196, + "learning_rate": 1.6647976169616262e-05, + "loss": 9.0076, + "step": 89400 + }, + { + "epoch": 0.44650303378361506, + "grad_norm": 0.09183740615844727, + "learning_rate": 1.664647425467471e-05, + "loss": 9.0106, + "step": 89410 + }, + { + "epoch": 0.44655297260855453, + "grad_norm": 0.0893789678812027, + "learning_rate": 1.664497233973316e-05, + "loss": 9.0001, + "step": 89420 + }, + { + "epoch": 0.44660291143349395, + "grad_norm": 0.09366993606090546, + "learning_rate": 1.664347042479161e-05, + "loss": 9.0137, + "step": 89430 + }, + { + "epoch": 0.44665285025843343, + "grad_norm": 0.09987689554691315, + "learning_rate": 1.664196850985006e-05, + "loss": 9.0049, + "step": 89440 + }, + { + "epoch": 0.44670278908337285, + "grad_norm": 0.09278554469347, + "learning_rate": 1.664046659490851e-05, + "loss": 9.0149, + "step": 89450 + }, + { + "epoch": 0.44675272790831233, + "grad_norm": 0.08935824036598206, + "learning_rate": 1.6638964679966956e-05, + "loss": 9.0228, + "step": 89460 + }, + { + "epoch": 0.44680266673325175, + "grad_norm": 0.08843576163053513, + "learning_rate": 1.6637462765025407e-05, + "loss": 9.0136, + "step": 89470 + }, + { + "epoch": 0.44685260555819123, + "grad_norm": 0.09229853004217148, + "learning_rate": 1.6635960850083857e-05, + "loss": 9.0065, + "step": 89480 + }, + { + "epoch": 0.44690254438313065, + "grad_norm": 0.09317019581794739, + "learning_rate": 1.6634458935142307e-05, + "loss": 9.0074, + "step": 89490 + }, + { + "epoch": 0.44695248320807013, + "grad_norm": 0.09514672309160233, + "learning_rate": 1.6632957020200757e-05, + "loss": 9.0009, + "step": 89500 + }, + { + "epoch": 0.44700242203300955, + "grad_norm": 0.09481526166200638, + "learning_rate": 1.6631455105259207e-05, + "loss": 9.0216, + "step": 89510 + }, + { + "epoch": 0.447052360857949, + "grad_norm": 0.09262920171022415, + "learning_rate": 1.6629953190317654e-05, + "loss": 9.0146, + "step": 89520 + }, + { + "epoch": 0.44710229968288845, + "grad_norm": 0.09552852809429169, + "learning_rate": 1.6628451275376104e-05, + "loss": 9.0086, + "step": 89530 + }, + { + "epoch": 0.4471522385078279, + "grad_norm": 0.09410271048545837, + "learning_rate": 1.6626949360434554e-05, + "loss": 9.0166, + "step": 89540 + }, + { + "epoch": 0.44720217733276735, + "grad_norm": 0.09333918243646622, + "learning_rate": 1.6625447445493005e-05, + "loss": 9.0171, + "step": 89550 + }, + { + "epoch": 0.4472521161577068, + "grad_norm": 0.09849578142166138, + "learning_rate": 1.6623945530551455e-05, + "loss": 9.0056, + "step": 89560 + }, + { + "epoch": 0.44730205498264625, + "grad_norm": 0.09501996636390686, + "learning_rate": 1.6622443615609905e-05, + "loss": 9.0066, + "step": 89570 + }, + { + "epoch": 0.4473519938075857, + "grad_norm": 0.09807047247886658, + "learning_rate": 1.6620941700668352e-05, + "loss": 9.0202, + "step": 89580 + }, + { + "epoch": 0.44740193263252515, + "grad_norm": 0.09879709780216217, + "learning_rate": 1.6619439785726802e-05, + "loss": 9.0203, + "step": 89590 + }, + { + "epoch": 0.4474518714574646, + "grad_norm": 0.08965755254030228, + "learning_rate": 1.6617937870785252e-05, + "loss": 9.0089, + "step": 89600 + }, + { + "epoch": 0.44750181028240404, + "grad_norm": 0.09854756295681, + "learning_rate": 1.6616435955843702e-05, + "loss": 9.0021, + "step": 89610 + }, + { + "epoch": 0.4475517491073435, + "grad_norm": 0.09752686321735382, + "learning_rate": 1.6614934040902153e-05, + "loss": 9.0026, + "step": 89620 + }, + { + "epoch": 0.44760168793228294, + "grad_norm": 0.09632866829633713, + "learning_rate": 1.66134321259606e-05, + "loss": 9.0084, + "step": 89630 + }, + { + "epoch": 0.4476516267572224, + "grad_norm": 0.09156852960586548, + "learning_rate": 1.661193021101905e-05, + "loss": 9.0133, + "step": 89640 + }, + { + "epoch": 0.44770156558216184, + "grad_norm": 0.08667552471160889, + "learning_rate": 1.66104282960775e-05, + "loss": 9.0044, + "step": 89650 + }, + { + "epoch": 0.4477515044071013, + "grad_norm": 0.09284383803606033, + "learning_rate": 1.660892638113595e-05, + "loss": 9.0035, + "step": 89660 + }, + { + "epoch": 0.44780144323204074, + "grad_norm": 0.09126190096139908, + "learning_rate": 1.66074244661944e-05, + "loss": 9.014, + "step": 89670 + }, + { + "epoch": 0.4478513820569802, + "grad_norm": 0.10024043917655945, + "learning_rate": 1.6605922551252847e-05, + "loss": 8.9948, + "step": 89680 + }, + { + "epoch": 0.44790132088191964, + "grad_norm": 0.09772263467311859, + "learning_rate": 1.6604420636311297e-05, + "loss": 9.0078, + "step": 89690 + }, + { + "epoch": 0.4479512597068591, + "grad_norm": 0.09419326484203339, + "learning_rate": 1.6602918721369747e-05, + "loss": 9.0066, + "step": 89700 + }, + { + "epoch": 0.44800119853179854, + "grad_norm": 0.08692155033349991, + "learning_rate": 1.6601416806428197e-05, + "loss": 9.0163, + "step": 89710 + }, + { + "epoch": 0.448051137356738, + "grad_norm": 0.09567783027887344, + "learning_rate": 1.6599914891486648e-05, + "loss": 9.0016, + "step": 89720 + }, + { + "epoch": 0.44810107618167744, + "grad_norm": 0.0902228057384491, + "learning_rate": 1.6598412976545094e-05, + "loss": 9.0098, + "step": 89730 + }, + { + "epoch": 0.4481510150066169, + "grad_norm": 0.09784096479415894, + "learning_rate": 1.6596911061603545e-05, + "loss": 9.0028, + "step": 89740 + }, + { + "epoch": 0.44820095383155634, + "grad_norm": 0.09646725654602051, + "learning_rate": 1.6595409146661995e-05, + "loss": 8.9891, + "step": 89750 + }, + { + "epoch": 0.4482508926564958, + "grad_norm": 0.09333818405866623, + "learning_rate": 1.6593907231720445e-05, + "loss": 8.9968, + "step": 89760 + }, + { + "epoch": 0.44830083148143524, + "grad_norm": 0.09000226110219955, + "learning_rate": 1.6592405316778895e-05, + "loss": 9.0099, + "step": 89770 + }, + { + "epoch": 0.4483507703063747, + "grad_norm": 0.09583406150341034, + "learning_rate": 1.6590903401837342e-05, + "loss": 9.0108, + "step": 89780 + }, + { + "epoch": 0.44840070913131413, + "grad_norm": 0.09547807276248932, + "learning_rate": 1.6589401486895792e-05, + "loss": 9.0058, + "step": 89790 + }, + { + "epoch": 0.4484506479562536, + "grad_norm": 0.09241116046905518, + "learning_rate": 1.6587899571954242e-05, + "loss": 8.9989, + "step": 89800 + }, + { + "epoch": 0.44850058678119303, + "grad_norm": 0.09252715855836868, + "learning_rate": 1.6586397657012692e-05, + "loss": 9.0156, + "step": 89810 + }, + { + "epoch": 0.4485505256061325, + "grad_norm": 0.09530036151409149, + "learning_rate": 1.6584895742071143e-05, + "loss": 9.0116, + "step": 89820 + }, + { + "epoch": 0.44860046443107193, + "grad_norm": 0.08951985090970993, + "learning_rate": 1.658339382712959e-05, + "loss": 9.0116, + "step": 89830 + }, + { + "epoch": 0.4486504032560114, + "grad_norm": 0.09694526344537735, + "learning_rate": 1.658189191218804e-05, + "loss": 8.9973, + "step": 89840 + }, + { + "epoch": 0.44870034208095083, + "grad_norm": 0.09165141731500626, + "learning_rate": 1.658038999724649e-05, + "loss": 9.0058, + "step": 89850 + }, + { + "epoch": 0.4487502809058903, + "grad_norm": 0.09192651510238647, + "learning_rate": 1.657888808230494e-05, + "loss": 9.0044, + "step": 89860 + }, + { + "epoch": 0.44880021973082973, + "grad_norm": 0.08684073388576508, + "learning_rate": 1.657738616736339e-05, + "loss": 9.0147, + "step": 89870 + }, + { + "epoch": 0.4488501585557692, + "grad_norm": 0.09026368707418442, + "learning_rate": 1.6575884252421837e-05, + "loss": 8.9914, + "step": 89880 + }, + { + "epoch": 0.44890009738070863, + "grad_norm": 0.09688662737607956, + "learning_rate": 1.657438233748029e-05, + "loss": 9.0052, + "step": 89890 + }, + { + "epoch": 0.4489500362056481, + "grad_norm": 0.09581411629915237, + "learning_rate": 1.6572880422538737e-05, + "loss": 8.9905, + "step": 89900 + }, + { + "epoch": 0.44899997503058753, + "grad_norm": 0.1003839373588562, + "learning_rate": 1.6571378507597187e-05, + "loss": 9.0148, + "step": 89910 + }, + { + "epoch": 0.449049913855527, + "grad_norm": 0.09192468225955963, + "learning_rate": 1.6569876592655638e-05, + "loss": 9.0248, + "step": 89920 + }, + { + "epoch": 0.4490998526804664, + "grad_norm": 0.09126332402229309, + "learning_rate": 1.6568374677714084e-05, + "loss": 9.0122, + "step": 89930 + }, + { + "epoch": 0.4491497915054059, + "grad_norm": 0.0944957584142685, + "learning_rate": 1.6566872762772538e-05, + "loss": 9.0082, + "step": 89940 + }, + { + "epoch": 0.4491997303303453, + "grad_norm": 0.0930357500910759, + "learning_rate": 1.6565370847830985e-05, + "loss": 8.9977, + "step": 89950 + }, + { + "epoch": 0.4492496691552848, + "grad_norm": 0.09657001495361328, + "learning_rate": 1.6563868932889435e-05, + "loss": 9.006, + "step": 89960 + }, + { + "epoch": 0.4492996079802242, + "grad_norm": 0.09886710345745087, + "learning_rate": 1.6562367017947885e-05, + "loss": 9.0056, + "step": 89970 + }, + { + "epoch": 0.4493495468051637, + "grad_norm": 0.0890527293086052, + "learning_rate": 1.6560865103006332e-05, + "loss": 9.0023, + "step": 89980 + }, + { + "epoch": 0.4493994856301031, + "grad_norm": 0.0896250382065773, + "learning_rate": 1.6559363188064785e-05, + "loss": 9.0064, + "step": 89990 + }, + { + "epoch": 0.44944942445504255, + "grad_norm": 0.09379280358552933, + "learning_rate": 1.6557861273123232e-05, + "loss": 9.0103, + "step": 90000 + }, + { + "epoch": 0.449499363279982, + "grad_norm": 0.09411850571632385, + "learning_rate": 1.6556359358181682e-05, + "loss": 9.0098, + "step": 90010 + }, + { + "epoch": 0.44954930210492144, + "grad_norm": 0.08976029604673386, + "learning_rate": 1.6554857443240133e-05, + "loss": 9.0026, + "step": 90020 + }, + { + "epoch": 0.4495992409298609, + "grad_norm": 0.0921340063214302, + "learning_rate": 1.655335552829858e-05, + "loss": 9.0126, + "step": 90030 + }, + { + "epoch": 0.44964917975480034, + "grad_norm": 0.09887304157018661, + "learning_rate": 1.6551853613357033e-05, + "loss": 9.0057, + "step": 90040 + }, + { + "epoch": 0.4496991185797398, + "grad_norm": 0.09693904966115952, + "learning_rate": 1.655035169841548e-05, + "loss": 8.9959, + "step": 90050 + }, + { + "epoch": 0.44974905740467924, + "grad_norm": 0.09151757508516312, + "learning_rate": 1.654884978347393e-05, + "loss": 9.0101, + "step": 90060 + }, + { + "epoch": 0.4497989962296187, + "grad_norm": 0.1048135980963707, + "learning_rate": 1.654734786853238e-05, + "loss": 8.9994, + "step": 90070 + }, + { + "epoch": 0.44984893505455814, + "grad_norm": 0.09414626657962799, + "learning_rate": 1.6545845953590827e-05, + "loss": 9.0041, + "step": 90080 + }, + { + "epoch": 0.4498988738794976, + "grad_norm": 0.0916576012969017, + "learning_rate": 1.654434403864928e-05, + "loss": 8.9947, + "step": 90090 + }, + { + "epoch": 0.44994881270443704, + "grad_norm": 0.09090348333120346, + "learning_rate": 1.6542842123707727e-05, + "loss": 8.9906, + "step": 90100 + }, + { + "epoch": 0.4499987515293765, + "grad_norm": 0.0916331559419632, + "learning_rate": 1.6541340208766177e-05, + "loss": 9.0048, + "step": 90110 + }, + { + "epoch": 0.45004869035431594, + "grad_norm": 0.09484575688838959, + "learning_rate": 1.6539838293824628e-05, + "loss": 9.0064, + "step": 90120 + }, + { + "epoch": 0.4500986291792554, + "grad_norm": 0.09016244858503342, + "learning_rate": 1.6538336378883074e-05, + "loss": 9.0041, + "step": 90130 + }, + { + "epoch": 0.45014856800419484, + "grad_norm": 0.09121375530958176, + "learning_rate": 1.6536834463941528e-05, + "loss": 9.0068, + "step": 90140 + }, + { + "epoch": 0.4501985068291343, + "grad_norm": 0.09483055770397186, + "learning_rate": 1.6535332548999975e-05, + "loss": 9.0051, + "step": 90150 + }, + { + "epoch": 0.45024844565407374, + "grad_norm": 0.09125950932502747, + "learning_rate": 1.6533830634058425e-05, + "loss": 8.9956, + "step": 90160 + }, + { + "epoch": 0.4502983844790132, + "grad_norm": 0.09234707057476044, + "learning_rate": 1.6532328719116875e-05, + "loss": 9.0004, + "step": 90170 + }, + { + "epoch": 0.45034832330395264, + "grad_norm": 0.09413158893585205, + "learning_rate": 1.6530826804175322e-05, + "loss": 8.9987, + "step": 90180 + }, + { + "epoch": 0.4503982621288921, + "grad_norm": 0.09710296243429184, + "learning_rate": 1.6529324889233775e-05, + "loss": 8.9944, + "step": 90190 + }, + { + "epoch": 0.45044820095383153, + "grad_norm": 0.0949583351612091, + "learning_rate": 1.6527822974292222e-05, + "loss": 9.003, + "step": 90200 + }, + { + "epoch": 0.450498139778771, + "grad_norm": 0.09892937541007996, + "learning_rate": 1.6526321059350676e-05, + "loss": 8.9983, + "step": 90210 + }, + { + "epoch": 0.45054807860371043, + "grad_norm": 0.09644380211830139, + "learning_rate": 1.6524819144409123e-05, + "loss": 8.9919, + "step": 90220 + }, + { + "epoch": 0.4505980174286499, + "grad_norm": 0.09606988728046417, + "learning_rate": 1.652331722946757e-05, + "loss": 9.0063, + "step": 90230 + }, + { + "epoch": 0.45064795625358933, + "grad_norm": 0.09773648530244827, + "learning_rate": 1.6521815314526023e-05, + "loss": 8.9835, + "step": 90240 + }, + { + "epoch": 0.4506978950785288, + "grad_norm": 0.09979324787855148, + "learning_rate": 1.652031339958447e-05, + "loss": 8.9963, + "step": 90250 + }, + { + "epoch": 0.45074783390346823, + "grad_norm": 0.09351041167974472, + "learning_rate": 1.6518811484642923e-05, + "loss": 9.0103, + "step": 90260 + }, + { + "epoch": 0.4507977727284077, + "grad_norm": 0.09703806042671204, + "learning_rate": 1.651730956970137e-05, + "loss": 8.9954, + "step": 90270 + }, + { + "epoch": 0.45084771155334713, + "grad_norm": 0.09339191764593124, + "learning_rate": 1.6515807654759817e-05, + "loss": 9.0095, + "step": 90280 + }, + { + "epoch": 0.4508976503782866, + "grad_norm": 0.09220651537179947, + "learning_rate": 1.651430573981827e-05, + "loss": 8.9959, + "step": 90290 + }, + { + "epoch": 0.45094758920322603, + "grad_norm": 0.09635385125875473, + "learning_rate": 1.6512803824876717e-05, + "loss": 8.9845, + "step": 90300 + }, + { + "epoch": 0.4509975280281655, + "grad_norm": 0.08985468745231628, + "learning_rate": 1.651130190993517e-05, + "loss": 9.0037, + "step": 90310 + }, + { + "epoch": 0.45104746685310493, + "grad_norm": 0.0972968265414238, + "learning_rate": 1.6509799994993618e-05, + "loss": 8.9896, + "step": 90320 + }, + { + "epoch": 0.4510974056780444, + "grad_norm": 0.09027385711669922, + "learning_rate": 1.6508298080052064e-05, + "loss": 9.0151, + "step": 90330 + }, + { + "epoch": 0.4511473445029838, + "grad_norm": 0.09425677359104156, + "learning_rate": 1.6506796165110518e-05, + "loss": 9.0061, + "step": 90340 + }, + { + "epoch": 0.4511972833279233, + "grad_norm": 0.09738679975271225, + "learning_rate": 1.6505294250168965e-05, + "loss": 8.9966, + "step": 90350 + }, + { + "epoch": 0.4512472221528627, + "grad_norm": 0.08872897177934647, + "learning_rate": 1.6503792335227418e-05, + "loss": 9.0142, + "step": 90360 + }, + { + "epoch": 0.4512971609778022, + "grad_norm": 0.09494879096746445, + "learning_rate": 1.6502290420285865e-05, + "loss": 9.0005, + "step": 90370 + }, + { + "epoch": 0.4513470998027416, + "grad_norm": 0.09621385484933853, + "learning_rate": 1.6500788505344312e-05, + "loss": 9.0012, + "step": 90380 + }, + { + "epoch": 0.4513970386276811, + "grad_norm": 0.09302900731563568, + "learning_rate": 1.6499286590402765e-05, + "loss": 9.0184, + "step": 90390 + }, + { + "epoch": 0.4514469774526205, + "grad_norm": 0.09508734941482544, + "learning_rate": 1.6497784675461212e-05, + "loss": 9.007, + "step": 90400 + }, + { + "epoch": 0.45149691627756, + "grad_norm": 0.09385523945093155, + "learning_rate": 1.6496282760519666e-05, + "loss": 9.006, + "step": 90410 + }, + { + "epoch": 0.4515468551024994, + "grad_norm": 0.09022466838359833, + "learning_rate": 1.6494780845578113e-05, + "loss": 9.0222, + "step": 90420 + }, + { + "epoch": 0.4515967939274389, + "grad_norm": 0.09833023697137833, + "learning_rate": 1.649327893063656e-05, + "loss": 8.985, + "step": 90430 + }, + { + "epoch": 0.4516467327523783, + "grad_norm": 0.09564008563756943, + "learning_rate": 1.6491777015695013e-05, + "loss": 9.0073, + "step": 90440 + }, + { + "epoch": 0.4516966715773178, + "grad_norm": 0.0922297015786171, + "learning_rate": 1.649027510075346e-05, + "loss": 8.9923, + "step": 90450 + }, + { + "epoch": 0.4517466104022572, + "grad_norm": 0.09411637485027313, + "learning_rate": 1.6488773185811913e-05, + "loss": 9.0043, + "step": 90460 + }, + { + "epoch": 0.4517965492271967, + "grad_norm": 0.09667911380529404, + "learning_rate": 1.648727127087036e-05, + "loss": 8.9967, + "step": 90470 + }, + { + "epoch": 0.4518464880521361, + "grad_norm": 0.09421195834875107, + "learning_rate": 1.6485769355928807e-05, + "loss": 9.0058, + "step": 90480 + }, + { + "epoch": 0.4518964268770756, + "grad_norm": 0.09238166362047195, + "learning_rate": 1.648426744098726e-05, + "loss": 9.0087, + "step": 90490 + }, + { + "epoch": 0.451946365702015, + "grad_norm": 0.09417644143104553, + "learning_rate": 1.6482765526045707e-05, + "loss": 9.0012, + "step": 90500 + }, + { + "epoch": 0.4519963045269545, + "grad_norm": 0.09228955209255219, + "learning_rate": 1.648126361110416e-05, + "loss": 9.0016, + "step": 90510 + }, + { + "epoch": 0.4520462433518939, + "grad_norm": 0.09172794967889786, + "learning_rate": 1.6479761696162608e-05, + "loss": 8.9866, + "step": 90520 + }, + { + "epoch": 0.4520961821768334, + "grad_norm": 0.10119758546352386, + "learning_rate": 1.6478259781221058e-05, + "loss": 9.0031, + "step": 90530 + }, + { + "epoch": 0.4521461210017728, + "grad_norm": 0.10038284212350845, + "learning_rate": 1.6476757866279508e-05, + "loss": 9.0136, + "step": 90540 + }, + { + "epoch": 0.4521960598267123, + "grad_norm": 0.09320379048585892, + "learning_rate": 1.6475255951337955e-05, + "loss": 8.9853, + "step": 90550 + }, + { + "epoch": 0.4522459986516517, + "grad_norm": 0.08812331408262253, + "learning_rate": 1.647375403639641e-05, + "loss": 9.0052, + "step": 90560 + }, + { + "epoch": 0.4522959374765912, + "grad_norm": 0.08946395665407181, + "learning_rate": 1.6472252121454855e-05, + "loss": 9.0011, + "step": 90570 + }, + { + "epoch": 0.4523458763015306, + "grad_norm": 0.09697841852903366, + "learning_rate": 1.6470750206513305e-05, + "loss": 9.004, + "step": 90580 + }, + { + "epoch": 0.4523958151264701, + "grad_norm": 0.09299259632825851, + "learning_rate": 1.6469248291571755e-05, + "loss": 8.9935, + "step": 90590 + }, + { + "epoch": 0.4524457539514095, + "grad_norm": 0.09182324260473251, + "learning_rate": 1.6467746376630202e-05, + "loss": 9.0015, + "step": 90600 + }, + { + "epoch": 0.452495692776349, + "grad_norm": 0.08882872015237808, + "learning_rate": 1.6466244461688656e-05, + "loss": 9.0002, + "step": 90610 + }, + { + "epoch": 0.4525456316012884, + "grad_norm": 0.08870082348585129, + "learning_rate": 1.6464742546747103e-05, + "loss": 8.9936, + "step": 90620 + }, + { + "epoch": 0.4525955704262279, + "grad_norm": 0.09134729206562042, + "learning_rate": 1.6463240631805553e-05, + "loss": 8.9908, + "step": 90630 + }, + { + "epoch": 0.4526455092511673, + "grad_norm": 0.09212338924407959, + "learning_rate": 1.6461738716864003e-05, + "loss": 9.0031, + "step": 90640 + }, + { + "epoch": 0.4526954480761068, + "grad_norm": 0.09525448828935623, + "learning_rate": 1.646023680192245e-05, + "loss": 8.998, + "step": 90650 + }, + { + "epoch": 0.4527453869010462, + "grad_norm": 0.09147049486637115, + "learning_rate": 1.6458734886980903e-05, + "loss": 9.0005, + "step": 90660 + }, + { + "epoch": 0.4527953257259857, + "grad_norm": 0.09832822531461716, + "learning_rate": 1.645723297203935e-05, + "loss": 8.9923, + "step": 90670 + }, + { + "epoch": 0.4528452645509251, + "grad_norm": 0.09704606980085373, + "learning_rate": 1.64557310570978e-05, + "loss": 8.9927, + "step": 90680 + }, + { + "epoch": 0.4528952033758646, + "grad_norm": 0.09421838819980621, + "learning_rate": 1.645422914215625e-05, + "loss": 9.0079, + "step": 90690 + }, + { + "epoch": 0.452945142200804, + "grad_norm": 0.09166116267442703, + "learning_rate": 1.6452727227214697e-05, + "loss": 8.9912, + "step": 90700 + }, + { + "epoch": 0.4529950810257435, + "grad_norm": 0.0929771363735199, + "learning_rate": 1.645122531227315e-05, + "loss": 8.9928, + "step": 90710 + }, + { + "epoch": 0.4530450198506829, + "grad_norm": 0.09662994742393494, + "learning_rate": 1.6449723397331598e-05, + "loss": 8.997, + "step": 90720 + }, + { + "epoch": 0.4530949586756224, + "grad_norm": 0.09966935217380524, + "learning_rate": 1.6448221482390048e-05, + "loss": 8.9901, + "step": 90730 + }, + { + "epoch": 0.4531448975005618, + "grad_norm": 0.09319128841161728, + "learning_rate": 1.6446719567448498e-05, + "loss": 8.9974, + "step": 90740 + }, + { + "epoch": 0.4531948363255013, + "grad_norm": 0.09141194820404053, + "learning_rate": 1.6445217652506945e-05, + "loss": 9.0124, + "step": 90750 + }, + { + "epoch": 0.4532447751504407, + "grad_norm": 0.09347580373287201, + "learning_rate": 1.64437157375654e-05, + "loss": 8.996, + "step": 90760 + }, + { + "epoch": 0.4532947139753802, + "grad_norm": 0.09879345446825027, + "learning_rate": 1.6442213822623845e-05, + "loss": 8.9926, + "step": 90770 + }, + { + "epoch": 0.4533446528003196, + "grad_norm": 0.09470192342996597, + "learning_rate": 1.6440711907682295e-05, + "loss": 9.0014, + "step": 90780 + }, + { + "epoch": 0.4533945916252591, + "grad_norm": 0.09187999367713928, + "learning_rate": 1.6439209992740745e-05, + "loss": 9.0035, + "step": 90790 + }, + { + "epoch": 0.4534445304501985, + "grad_norm": 0.0892479196190834, + "learning_rate": 1.6437708077799192e-05, + "loss": 9.0248, + "step": 90800 + }, + { + "epoch": 0.453494469275138, + "grad_norm": 0.09171134233474731, + "learning_rate": 1.6436206162857646e-05, + "loss": 8.9885, + "step": 90810 + }, + { + "epoch": 0.4535444081000774, + "grad_norm": 0.09449323266744614, + "learning_rate": 1.6434704247916093e-05, + "loss": 9.0067, + "step": 90820 + }, + { + "epoch": 0.4535943469250169, + "grad_norm": 0.09131663292646408, + "learning_rate": 1.6433202332974543e-05, + "loss": 8.9834, + "step": 90830 + }, + { + "epoch": 0.4536442857499563, + "grad_norm": 0.0928645208477974, + "learning_rate": 1.6431700418032993e-05, + "loss": 8.9925, + "step": 90840 + }, + { + "epoch": 0.4536942245748958, + "grad_norm": 0.09293372929096222, + "learning_rate": 1.6430198503091443e-05, + "loss": 8.998, + "step": 90850 + }, + { + "epoch": 0.4537441633998352, + "grad_norm": 0.09626687318086624, + "learning_rate": 1.6428696588149893e-05, + "loss": 9.0024, + "step": 90860 + }, + { + "epoch": 0.4537941022247747, + "grad_norm": 0.09633610397577286, + "learning_rate": 1.642719467320834e-05, + "loss": 8.9846, + "step": 90870 + }, + { + "epoch": 0.4538440410497141, + "grad_norm": 0.09213805943727493, + "learning_rate": 1.642569275826679e-05, + "loss": 8.9934, + "step": 90880 + }, + { + "epoch": 0.4538939798746536, + "grad_norm": 0.09418069571256638, + "learning_rate": 1.642419084332524e-05, + "loss": 8.987, + "step": 90890 + }, + { + "epoch": 0.453943918699593, + "grad_norm": 0.09727146476507187, + "learning_rate": 1.642268892838369e-05, + "loss": 8.9973, + "step": 90900 + }, + { + "epoch": 0.4539938575245325, + "grad_norm": 0.09132228046655655, + "learning_rate": 1.642118701344214e-05, + "loss": 9.0022, + "step": 90910 + }, + { + "epoch": 0.4540437963494719, + "grad_norm": 0.09720085561275482, + "learning_rate": 1.6419685098500588e-05, + "loss": 9.0058, + "step": 90920 + }, + { + "epoch": 0.4540937351744114, + "grad_norm": 0.08539389073848724, + "learning_rate": 1.6418183183559038e-05, + "loss": 9.0059, + "step": 90930 + }, + { + "epoch": 0.4541436739993508, + "grad_norm": 0.09037039428949356, + "learning_rate": 1.6416681268617488e-05, + "loss": 8.9984, + "step": 90940 + }, + { + "epoch": 0.45419361282429027, + "grad_norm": 0.09338293969631195, + "learning_rate": 1.6415179353675938e-05, + "loss": 8.998, + "step": 90950 + }, + { + "epoch": 0.4542435516492297, + "grad_norm": 0.09810806810855865, + "learning_rate": 1.641367743873439e-05, + "loss": 8.9885, + "step": 90960 + }, + { + "epoch": 0.45429349047416917, + "grad_norm": 0.09057030826807022, + "learning_rate": 1.6412175523792835e-05, + "loss": 8.9988, + "step": 90970 + }, + { + "epoch": 0.4543434292991086, + "grad_norm": 0.09103839844465256, + "learning_rate": 1.6410673608851285e-05, + "loss": 8.9834, + "step": 90980 + }, + { + "epoch": 0.454393368124048, + "grad_norm": 0.09123621881008148, + "learning_rate": 1.6409171693909735e-05, + "loss": 8.9904, + "step": 90990 + }, + { + "epoch": 0.4544433069489875, + "grad_norm": 0.09989268332719803, + "learning_rate": 1.6407669778968186e-05, + "loss": 8.9973, + "step": 91000 + }, + { + "epoch": 0.4544932457739269, + "grad_norm": 0.09387070685625076, + "learning_rate": 1.6406167864026636e-05, + "loss": 8.9821, + "step": 91010 + }, + { + "epoch": 0.4545431845988664, + "grad_norm": 0.09730041772127151, + "learning_rate": 1.6404665949085083e-05, + "loss": 8.9905, + "step": 91020 + }, + { + "epoch": 0.4545931234238058, + "grad_norm": 0.09198877215385437, + "learning_rate": 1.6403164034143533e-05, + "loss": 8.9827, + "step": 91030 + }, + { + "epoch": 0.4546430622487453, + "grad_norm": 0.10066971182823181, + "learning_rate": 1.6401662119201983e-05, + "loss": 8.9825, + "step": 91040 + }, + { + "epoch": 0.4546930010736847, + "grad_norm": 0.09631640464067459, + "learning_rate": 1.6400160204260433e-05, + "loss": 8.9979, + "step": 91050 + }, + { + "epoch": 0.4547429398986242, + "grad_norm": 0.09197012335062027, + "learning_rate": 1.6398658289318883e-05, + "loss": 8.9957, + "step": 91060 + }, + { + "epoch": 0.4547928787235636, + "grad_norm": 0.09876050055027008, + "learning_rate": 1.639715637437733e-05, + "loss": 8.9956, + "step": 91070 + }, + { + "epoch": 0.4548428175485031, + "grad_norm": 0.09191004186868668, + "learning_rate": 1.639565445943578e-05, + "loss": 8.9973, + "step": 91080 + }, + { + "epoch": 0.4548927563734425, + "grad_norm": 0.09481745958328247, + "learning_rate": 1.639415254449423e-05, + "loss": 9.0117, + "step": 91090 + }, + { + "epoch": 0.454942695198382, + "grad_norm": 0.09424830228090286, + "learning_rate": 1.639265062955268e-05, + "loss": 9.0045, + "step": 91100 + }, + { + "epoch": 0.4549926340233214, + "grad_norm": 0.09049578011035919, + "learning_rate": 1.639114871461113e-05, + "loss": 9.0088, + "step": 91110 + }, + { + "epoch": 0.4550425728482609, + "grad_norm": 0.09442015737295151, + "learning_rate": 1.6389646799669578e-05, + "loss": 8.9877, + "step": 91120 + }, + { + "epoch": 0.4550925116732003, + "grad_norm": 0.09153556078672409, + "learning_rate": 1.6388144884728028e-05, + "loss": 8.9957, + "step": 91130 + }, + { + "epoch": 0.4551424504981398, + "grad_norm": 0.08938729763031006, + "learning_rate": 1.6386642969786478e-05, + "loss": 8.9926, + "step": 91140 + }, + { + "epoch": 0.4551923893230792, + "grad_norm": 0.10056551545858383, + "learning_rate": 1.6385141054844928e-05, + "loss": 8.9848, + "step": 91150 + }, + { + "epoch": 0.4552423281480187, + "grad_norm": 0.09079209715127945, + "learning_rate": 1.638363913990338e-05, + "loss": 8.9895, + "step": 91160 + }, + { + "epoch": 0.4552922669729581, + "grad_norm": 0.08931328356266022, + "learning_rate": 1.6382137224961825e-05, + "loss": 8.9885, + "step": 91170 + }, + { + "epoch": 0.4553422057978976, + "grad_norm": 0.09365954250097275, + "learning_rate": 1.6380635310020275e-05, + "loss": 8.9991, + "step": 91180 + }, + { + "epoch": 0.455392144622837, + "grad_norm": 0.0947716236114502, + "learning_rate": 1.6379133395078725e-05, + "loss": 8.993, + "step": 91190 + }, + { + "epoch": 0.4554420834477765, + "grad_norm": 0.09569630026817322, + "learning_rate": 1.6377631480137176e-05, + "loss": 8.9796, + "step": 91200 + }, + { + "epoch": 0.4554920222727159, + "grad_norm": 0.09704722464084625, + "learning_rate": 1.6376129565195626e-05, + "loss": 8.9829, + "step": 91210 + }, + { + "epoch": 0.4555419610976554, + "grad_norm": 0.0929335281252861, + "learning_rate": 1.6374627650254076e-05, + "loss": 9.0032, + "step": 91220 + }, + { + "epoch": 0.4555918999225948, + "grad_norm": 0.09553791582584381, + "learning_rate": 1.6373125735312523e-05, + "loss": 8.9954, + "step": 91230 + }, + { + "epoch": 0.4556418387475343, + "grad_norm": 0.09323886781930923, + "learning_rate": 1.6371623820370973e-05, + "loss": 9.0015, + "step": 91240 + }, + { + "epoch": 0.4556917775724737, + "grad_norm": 0.09437048435211182, + "learning_rate": 1.6370121905429423e-05, + "loss": 9.0025, + "step": 91250 + }, + { + "epoch": 0.4557417163974132, + "grad_norm": 0.09459199756383896, + "learning_rate": 1.6368619990487873e-05, + "loss": 8.9955, + "step": 91260 + }, + { + "epoch": 0.4557916552223526, + "grad_norm": 0.09053412079811096, + "learning_rate": 1.6367118075546324e-05, + "loss": 8.9967, + "step": 91270 + }, + { + "epoch": 0.4558415940472921, + "grad_norm": 0.0967337116599083, + "learning_rate": 1.636561616060477e-05, + "loss": 8.9871, + "step": 91280 + }, + { + "epoch": 0.4558915328722315, + "grad_norm": 0.09642165154218674, + "learning_rate": 1.636411424566322e-05, + "loss": 9.0127, + "step": 91290 + }, + { + "epoch": 0.455941471697171, + "grad_norm": 0.09764105081558228, + "learning_rate": 1.636261233072167e-05, + "loss": 9.0031, + "step": 91300 + }, + { + "epoch": 0.4559914105221104, + "grad_norm": 0.08854164928197861, + "learning_rate": 1.636111041578012e-05, + "loss": 8.9874, + "step": 91310 + }, + { + "epoch": 0.4560413493470499, + "grad_norm": 0.09614869207143784, + "learning_rate": 1.635960850083857e-05, + "loss": 8.977, + "step": 91320 + }, + { + "epoch": 0.4560912881719893, + "grad_norm": 0.09570986032485962, + "learning_rate": 1.6358106585897018e-05, + "loss": 8.9986, + "step": 91330 + }, + { + "epoch": 0.4561412269969288, + "grad_norm": 0.0936940461397171, + "learning_rate": 1.6356604670955468e-05, + "loss": 8.988, + "step": 91340 + }, + { + "epoch": 0.4561911658218682, + "grad_norm": 0.08797448128461838, + "learning_rate": 1.6355102756013918e-05, + "loss": 8.998, + "step": 91350 + }, + { + "epoch": 0.45624110464680767, + "grad_norm": 0.0959649458527565, + "learning_rate": 1.635360084107237e-05, + "loss": 8.9901, + "step": 91360 + }, + { + "epoch": 0.4562910434717471, + "grad_norm": 0.08933617919683456, + "learning_rate": 1.635209892613082e-05, + "loss": 9.0037, + "step": 91370 + }, + { + "epoch": 0.45634098229668657, + "grad_norm": 0.09029172360897064, + "learning_rate": 1.6350597011189265e-05, + "loss": 8.9963, + "step": 91380 + }, + { + "epoch": 0.456390921121626, + "grad_norm": 0.0927506536245346, + "learning_rate": 1.6349095096247716e-05, + "loss": 8.9835, + "step": 91390 + }, + { + "epoch": 0.45644085994656547, + "grad_norm": 0.08801010996103287, + "learning_rate": 1.6347593181306166e-05, + "loss": 8.9871, + "step": 91400 + }, + { + "epoch": 0.4564907987715049, + "grad_norm": 0.09252002835273743, + "learning_rate": 1.6346091266364616e-05, + "loss": 9.0016, + "step": 91410 + }, + { + "epoch": 0.45654073759644437, + "grad_norm": 0.09484586119651794, + "learning_rate": 1.6344589351423066e-05, + "loss": 8.9857, + "step": 91420 + }, + { + "epoch": 0.4565906764213838, + "grad_norm": 0.10182981193065643, + "learning_rate": 1.6343087436481513e-05, + "loss": 8.9776, + "step": 91430 + }, + { + "epoch": 0.45664061524632327, + "grad_norm": 0.0948307141661644, + "learning_rate": 1.6341585521539963e-05, + "loss": 8.9847, + "step": 91440 + }, + { + "epoch": 0.4566905540712627, + "grad_norm": 0.0894957110285759, + "learning_rate": 1.6340083606598413e-05, + "loss": 8.9764, + "step": 91450 + }, + { + "epoch": 0.45674049289620217, + "grad_norm": 0.09226840734481812, + "learning_rate": 1.6338581691656863e-05, + "loss": 8.9938, + "step": 91460 + }, + { + "epoch": 0.4567904317211416, + "grad_norm": 0.09212039411067963, + "learning_rate": 1.6337079776715314e-05, + "loss": 8.9952, + "step": 91470 + }, + { + "epoch": 0.45684037054608106, + "grad_norm": 0.09596432745456696, + "learning_rate": 1.633557786177376e-05, + "loss": 8.9927, + "step": 91480 + }, + { + "epoch": 0.4568903093710205, + "grad_norm": 0.09584586322307587, + "learning_rate": 1.633407594683221e-05, + "loss": 8.9862, + "step": 91490 + }, + { + "epoch": 0.45694024819595996, + "grad_norm": 0.09555765986442566, + "learning_rate": 1.633257403189066e-05, + "loss": 8.9894, + "step": 91500 + }, + { + "epoch": 0.4569901870208994, + "grad_norm": 0.09596934914588928, + "learning_rate": 1.633107211694911e-05, + "loss": 8.9971, + "step": 91510 + }, + { + "epoch": 0.45704012584583886, + "grad_norm": 0.09511646628379822, + "learning_rate": 1.632957020200756e-05, + "loss": 8.9951, + "step": 91520 + }, + { + "epoch": 0.4570900646707783, + "grad_norm": 0.09373897314071655, + "learning_rate": 1.6328068287066008e-05, + "loss": 8.9797, + "step": 91530 + }, + { + "epoch": 0.45714000349571776, + "grad_norm": 0.0908622294664383, + "learning_rate": 1.632656637212446e-05, + "loss": 9.0091, + "step": 91540 + }, + { + "epoch": 0.4571899423206572, + "grad_norm": 0.09412607550621033, + "learning_rate": 1.6325064457182908e-05, + "loss": 8.9875, + "step": 91550 + }, + { + "epoch": 0.45723988114559666, + "grad_norm": 0.09757382422685623, + "learning_rate": 1.632356254224136e-05, + "loss": 8.9888, + "step": 91560 + }, + { + "epoch": 0.4572898199705361, + "grad_norm": 0.09289320558309555, + "learning_rate": 1.632206062729981e-05, + "loss": 8.9767, + "step": 91570 + }, + { + "epoch": 0.45733975879547556, + "grad_norm": 0.08800926059484482, + "learning_rate": 1.6320558712358255e-05, + "loss": 8.9755, + "step": 91580 + }, + { + "epoch": 0.457389697620415, + "grad_norm": 0.09186160564422607, + "learning_rate": 1.631905679741671e-05, + "loss": 8.992, + "step": 91590 + }, + { + "epoch": 0.45743963644535446, + "grad_norm": 0.09121951460838318, + "learning_rate": 1.6317554882475156e-05, + "loss": 8.9873, + "step": 91600 + }, + { + "epoch": 0.4574895752702939, + "grad_norm": 0.09226646274328232, + "learning_rate": 1.6316052967533606e-05, + "loss": 8.9873, + "step": 91610 + }, + { + "epoch": 0.45753951409523336, + "grad_norm": 0.0960613414645195, + "learning_rate": 1.6314551052592056e-05, + "loss": 8.9843, + "step": 91620 + }, + { + "epoch": 0.4575894529201728, + "grad_norm": 0.10511282831430435, + "learning_rate": 1.6313049137650503e-05, + "loss": 8.9846, + "step": 91630 + }, + { + "epoch": 0.45763939174511226, + "grad_norm": 0.09277866035699844, + "learning_rate": 1.6311547222708956e-05, + "loss": 8.9834, + "step": 91640 + }, + { + "epoch": 0.4576893305700517, + "grad_norm": 0.09160225838422775, + "learning_rate": 1.6310045307767403e-05, + "loss": 8.9996, + "step": 91650 + }, + { + "epoch": 0.45773926939499116, + "grad_norm": 0.08975838869810104, + "learning_rate": 1.6308543392825853e-05, + "loss": 8.9845, + "step": 91660 + }, + { + "epoch": 0.4577892082199306, + "grad_norm": 0.09508371353149414, + "learning_rate": 1.6307041477884304e-05, + "loss": 8.9995, + "step": 91670 + }, + { + "epoch": 0.45783914704487005, + "grad_norm": 0.09741570800542831, + "learning_rate": 1.630553956294275e-05, + "loss": 8.9987, + "step": 91680 + }, + { + "epoch": 0.4578890858698095, + "grad_norm": 0.09434978663921356, + "learning_rate": 1.6304037648001204e-05, + "loss": 8.9934, + "step": 91690 + }, + { + "epoch": 0.45793902469474895, + "grad_norm": 0.0960199385881424, + "learning_rate": 1.630253573305965e-05, + "loss": 8.9918, + "step": 91700 + }, + { + "epoch": 0.4579889635196884, + "grad_norm": 0.09522512555122375, + "learning_rate": 1.63010338181181e-05, + "loss": 8.9856, + "step": 91710 + }, + { + "epoch": 0.45803890234462785, + "grad_norm": 0.09180162101984024, + "learning_rate": 1.629953190317655e-05, + "loss": 8.986, + "step": 91720 + }, + { + "epoch": 0.4580888411695673, + "grad_norm": 0.09206482768058777, + "learning_rate": 1.6298029988234998e-05, + "loss": 8.9953, + "step": 91730 + }, + { + "epoch": 0.45813877999450675, + "grad_norm": 0.09780178964138031, + "learning_rate": 1.629652807329345e-05, + "loss": 8.9888, + "step": 91740 + }, + { + "epoch": 0.4581887188194462, + "grad_norm": 0.09504421800374985, + "learning_rate": 1.6295026158351898e-05, + "loss": 8.9846, + "step": 91750 + }, + { + "epoch": 0.45823865764438565, + "grad_norm": 0.09662793576717377, + "learning_rate": 1.629352424341035e-05, + "loss": 8.9828, + "step": 91760 + }, + { + "epoch": 0.45828859646932507, + "grad_norm": 0.09310472756624222, + "learning_rate": 1.62920223284688e-05, + "loss": 9.0069, + "step": 91770 + }, + { + "epoch": 0.45833853529426455, + "grad_norm": 0.09177019447088242, + "learning_rate": 1.6290520413527245e-05, + "loss": 8.9781, + "step": 91780 + }, + { + "epoch": 0.45838847411920397, + "grad_norm": 0.09365440160036087, + "learning_rate": 1.62890184985857e-05, + "loss": 8.9753, + "step": 91790 + }, + { + "epoch": 0.45843841294414345, + "grad_norm": 0.09410764276981354, + "learning_rate": 1.6287516583644146e-05, + "loss": 8.977, + "step": 91800 + }, + { + "epoch": 0.45848835176908287, + "grad_norm": 0.09689280390739441, + "learning_rate": 1.6286014668702596e-05, + "loss": 8.9838, + "step": 91810 + }, + { + "epoch": 0.45853829059402235, + "grad_norm": 0.09834188967943192, + "learning_rate": 1.6284512753761046e-05, + "loss": 8.98, + "step": 91820 + }, + { + "epoch": 0.45858822941896177, + "grad_norm": 0.10252006351947784, + "learning_rate": 1.6283010838819493e-05, + "loss": 8.9913, + "step": 91830 + }, + { + "epoch": 0.45863816824390125, + "grad_norm": 0.09507527202367783, + "learning_rate": 1.6281508923877946e-05, + "loss": 8.9938, + "step": 91840 + }, + { + "epoch": 0.45868810706884067, + "grad_norm": 0.09752275049686432, + "learning_rate": 1.6280007008936393e-05, + "loss": 8.9923, + "step": 91850 + }, + { + "epoch": 0.45873804589378014, + "grad_norm": 0.10007300972938538, + "learning_rate": 1.6278505093994847e-05, + "loss": 9.0013, + "step": 91860 + }, + { + "epoch": 0.45878798471871957, + "grad_norm": 0.09385237097740173, + "learning_rate": 1.6277003179053294e-05, + "loss": 8.975, + "step": 91870 + }, + { + "epoch": 0.45883792354365904, + "grad_norm": 0.09489066153764725, + "learning_rate": 1.627550126411174e-05, + "loss": 8.9761, + "step": 91880 + }, + { + "epoch": 0.45888786236859846, + "grad_norm": 0.0923139825463295, + "learning_rate": 1.6273999349170194e-05, + "loss": 8.9836, + "step": 91890 + }, + { + "epoch": 0.45893780119353794, + "grad_norm": 0.09415728598833084, + "learning_rate": 1.627249743422864e-05, + "loss": 8.984, + "step": 91900 + }, + { + "epoch": 0.45898774001847736, + "grad_norm": 0.08979801088571548, + "learning_rate": 1.6270995519287094e-05, + "loss": 8.9757, + "step": 91910 + }, + { + "epoch": 0.45903767884341684, + "grad_norm": 0.09274829924106598, + "learning_rate": 1.626949360434554e-05, + "loss": 8.9781, + "step": 91920 + }, + { + "epoch": 0.45908761766835626, + "grad_norm": 0.09117092192173004, + "learning_rate": 1.6267991689403988e-05, + "loss": 8.9811, + "step": 91930 + }, + { + "epoch": 0.45913755649329574, + "grad_norm": 0.09696992486715317, + "learning_rate": 1.626648977446244e-05, + "loss": 8.9823, + "step": 91940 + }, + { + "epoch": 0.45918749531823516, + "grad_norm": 0.09253089129924774, + "learning_rate": 1.6264987859520888e-05, + "loss": 8.9919, + "step": 91950 + }, + { + "epoch": 0.45923743414317464, + "grad_norm": 0.09517562389373779, + "learning_rate": 1.6263485944579342e-05, + "loss": 9.0024, + "step": 91960 + }, + { + "epoch": 0.45928737296811406, + "grad_norm": 0.09681175649166107, + "learning_rate": 1.626198402963779e-05, + "loss": 8.9724, + "step": 91970 + }, + { + "epoch": 0.4593373117930535, + "grad_norm": 0.08983436971902847, + "learning_rate": 1.6260482114696235e-05, + "loss": 8.9792, + "step": 91980 + }, + { + "epoch": 0.45938725061799296, + "grad_norm": 0.0926215648651123, + "learning_rate": 1.625898019975469e-05, + "loss": 8.9755, + "step": 91990 + }, + { + "epoch": 0.4594371894429324, + "grad_norm": 0.09400998055934906, + "learning_rate": 1.6257478284813136e-05, + "loss": 8.9726, + "step": 92000 + }, + { + "epoch": 0.45948712826787186, + "grad_norm": 0.09896674752235413, + "learning_rate": 1.625597636987159e-05, + "loss": 8.9919, + "step": 92010 + }, + { + "epoch": 0.4595370670928113, + "grad_norm": 0.09126316010951996, + "learning_rate": 1.6254474454930036e-05, + "loss": 8.9908, + "step": 92020 + }, + { + "epoch": 0.45958700591775076, + "grad_norm": 0.09881032258272171, + "learning_rate": 1.6252972539988483e-05, + "loss": 8.9719, + "step": 92030 + }, + { + "epoch": 0.4596369447426902, + "grad_norm": 0.09345117956399918, + "learning_rate": 1.6251470625046936e-05, + "loss": 8.9801, + "step": 92040 + }, + { + "epoch": 0.45968688356762966, + "grad_norm": 0.0911770686507225, + "learning_rate": 1.6249968710105383e-05, + "loss": 8.9831, + "step": 92050 + }, + { + "epoch": 0.4597368223925691, + "grad_norm": 0.09339731186628342, + "learning_rate": 1.6248466795163837e-05, + "loss": 8.9748, + "step": 92060 + }, + { + "epoch": 0.45978676121750855, + "grad_norm": 0.09199456870555878, + "learning_rate": 1.6246964880222284e-05, + "loss": 8.9885, + "step": 92070 + }, + { + "epoch": 0.459836700042448, + "grad_norm": 0.09440702199935913, + "learning_rate": 1.624546296528073e-05, + "loss": 8.9864, + "step": 92080 + }, + { + "epoch": 0.45988663886738745, + "grad_norm": 0.09405873715877533, + "learning_rate": 1.6243961050339184e-05, + "loss": 8.9815, + "step": 92090 + }, + { + "epoch": 0.4599365776923269, + "grad_norm": 0.09130971878767014, + "learning_rate": 1.624245913539763e-05, + "loss": 8.9784, + "step": 92100 + }, + { + "epoch": 0.45998651651726635, + "grad_norm": 0.09779450297355652, + "learning_rate": 1.6240957220456084e-05, + "loss": 8.9849, + "step": 92110 + }, + { + "epoch": 0.4600364553422058, + "grad_norm": 0.09931951016187668, + "learning_rate": 1.623945530551453e-05, + "loss": 8.9855, + "step": 92120 + }, + { + "epoch": 0.46008639416714525, + "grad_norm": 0.08796689659357071, + "learning_rate": 1.6237953390572978e-05, + "loss": 8.9822, + "step": 92130 + }, + { + "epoch": 0.4601363329920847, + "grad_norm": 0.09218308329582214, + "learning_rate": 1.623645147563143e-05, + "loss": 8.9771, + "step": 92140 + }, + { + "epoch": 0.46018627181702415, + "grad_norm": 0.09494258463382721, + "learning_rate": 1.6234949560689878e-05, + "loss": 8.9803, + "step": 92150 + }, + { + "epoch": 0.4602362106419636, + "grad_norm": 0.09072136878967285, + "learning_rate": 1.6233447645748332e-05, + "loss": 8.9806, + "step": 92160 + }, + { + "epoch": 0.46028614946690305, + "grad_norm": 0.09422437846660614, + "learning_rate": 1.623194573080678e-05, + "loss": 8.992, + "step": 92170 + }, + { + "epoch": 0.46033608829184247, + "grad_norm": 0.09309976547956467, + "learning_rate": 1.623044381586523e-05, + "loss": 8.9822, + "step": 92180 + }, + { + "epoch": 0.46038602711678195, + "grad_norm": 0.09436167776584625, + "learning_rate": 1.622894190092368e-05, + "loss": 8.9833, + "step": 92190 + }, + { + "epoch": 0.46043596594172137, + "grad_norm": 0.09215915203094482, + "learning_rate": 1.6227439985982126e-05, + "loss": 8.9867, + "step": 92200 + }, + { + "epoch": 0.46048590476666085, + "grad_norm": 0.0927964299917221, + "learning_rate": 1.622593807104058e-05, + "loss": 8.9965, + "step": 92210 + }, + { + "epoch": 0.46053584359160027, + "grad_norm": 0.08990570157766342, + "learning_rate": 1.6224436156099026e-05, + "loss": 8.9703, + "step": 92220 + }, + { + "epoch": 0.46058578241653975, + "grad_norm": 0.09287389367818832, + "learning_rate": 1.6222934241157476e-05, + "loss": 8.9852, + "step": 92230 + }, + { + "epoch": 0.46063572124147917, + "grad_norm": 0.09232201427221298, + "learning_rate": 1.6221432326215926e-05, + "loss": 8.9978, + "step": 92240 + }, + { + "epoch": 0.46068566006641865, + "grad_norm": 0.09181149303913116, + "learning_rate": 1.6219930411274373e-05, + "loss": 8.9753, + "step": 92250 + }, + { + "epoch": 0.46073559889135807, + "grad_norm": 0.08810899406671524, + "learning_rate": 1.6218428496332827e-05, + "loss": 8.9912, + "step": 92260 + }, + { + "epoch": 0.46078553771629754, + "grad_norm": 0.09229025989770889, + "learning_rate": 1.6216926581391274e-05, + "loss": 8.9822, + "step": 92270 + }, + { + "epoch": 0.46083547654123697, + "grad_norm": 0.09612645953893661, + "learning_rate": 1.6215424666449724e-05, + "loss": 8.9762, + "step": 92280 + }, + { + "epoch": 0.46088541536617644, + "grad_norm": 0.09475258737802505, + "learning_rate": 1.6213922751508174e-05, + "loss": 8.9794, + "step": 92290 + }, + { + "epoch": 0.46093535419111586, + "grad_norm": 0.08649539202451706, + "learning_rate": 1.621242083656662e-05, + "loss": 8.9705, + "step": 92300 + }, + { + "epoch": 0.46098529301605534, + "grad_norm": 0.09077353775501251, + "learning_rate": 1.6210918921625074e-05, + "loss": 8.9873, + "step": 92310 + }, + { + "epoch": 0.46103523184099476, + "grad_norm": 0.09602386504411697, + "learning_rate": 1.620941700668352e-05, + "loss": 8.9859, + "step": 92320 + }, + { + "epoch": 0.46108517066593424, + "grad_norm": 0.09134723991155624, + "learning_rate": 1.620791509174197e-05, + "loss": 8.9876, + "step": 92330 + }, + { + "epoch": 0.46113510949087366, + "grad_norm": 0.09485665708780289, + "learning_rate": 1.620641317680042e-05, + "loss": 8.9749, + "step": 92340 + }, + { + "epoch": 0.46118504831581314, + "grad_norm": 0.089450903236866, + "learning_rate": 1.6204911261858868e-05, + "loss": 8.9815, + "step": 92350 + }, + { + "epoch": 0.46123498714075256, + "grad_norm": 0.09187912195920944, + "learning_rate": 1.6203409346917322e-05, + "loss": 8.9729, + "step": 92360 + }, + { + "epoch": 0.46128492596569204, + "grad_norm": 0.09176033735275269, + "learning_rate": 1.620190743197577e-05, + "loss": 8.9813, + "step": 92370 + }, + { + "epoch": 0.46133486479063146, + "grad_norm": 0.09702310711145401, + "learning_rate": 1.620040551703422e-05, + "loss": 8.9771, + "step": 92380 + }, + { + "epoch": 0.46138480361557094, + "grad_norm": 0.09183552861213684, + "learning_rate": 1.619890360209267e-05, + "loss": 8.9827, + "step": 92390 + }, + { + "epoch": 0.46143474244051036, + "grad_norm": 0.09094628691673279, + "learning_rate": 1.6197401687151116e-05, + "loss": 8.9792, + "step": 92400 + }, + { + "epoch": 0.46148468126544984, + "grad_norm": 0.09234099090099335, + "learning_rate": 1.619589977220957e-05, + "loss": 8.9879, + "step": 92410 + }, + { + "epoch": 0.46153462009038926, + "grad_norm": 0.0963030532002449, + "learning_rate": 1.6194397857268016e-05, + "loss": 8.9812, + "step": 92420 + }, + { + "epoch": 0.46158455891532874, + "grad_norm": 0.09307033568620682, + "learning_rate": 1.6192895942326466e-05, + "loss": 8.9806, + "step": 92430 + }, + { + "epoch": 0.46163449774026816, + "grad_norm": 0.08998160809278488, + "learning_rate": 1.6191394027384916e-05, + "loss": 8.9827, + "step": 92440 + }, + { + "epoch": 0.46168443656520763, + "grad_norm": 0.09202240407466888, + "learning_rate": 1.6189892112443363e-05, + "loss": 8.9737, + "step": 92450 + }, + { + "epoch": 0.46173437539014706, + "grad_norm": 0.08661884814500809, + "learning_rate": 1.6188390197501817e-05, + "loss": 8.9747, + "step": 92460 + }, + { + "epoch": 0.46178431421508653, + "grad_norm": 0.08981632441282272, + "learning_rate": 1.6186888282560264e-05, + "loss": 8.973, + "step": 92470 + }, + { + "epoch": 0.46183425304002595, + "grad_norm": 0.09431172162294388, + "learning_rate": 1.6185386367618714e-05, + "loss": 8.972, + "step": 92480 + }, + { + "epoch": 0.46188419186496543, + "grad_norm": 0.09960649162530899, + "learning_rate": 1.6183884452677164e-05, + "loss": 8.9833, + "step": 92490 + }, + { + "epoch": 0.46193413068990485, + "grad_norm": 0.09025320410728455, + "learning_rate": 1.6182382537735614e-05, + "loss": 8.9745, + "step": 92500 + }, + { + "epoch": 0.46198406951484433, + "grad_norm": 0.09079807996749878, + "learning_rate": 1.6180880622794064e-05, + "loss": 8.9746, + "step": 92510 + }, + { + "epoch": 0.46203400833978375, + "grad_norm": 0.09874525666236877, + "learning_rate": 1.617937870785251e-05, + "loss": 8.9767, + "step": 92520 + }, + { + "epoch": 0.46208394716472323, + "grad_norm": 0.09728190302848816, + "learning_rate": 1.617787679291096e-05, + "loss": 8.9802, + "step": 92530 + }, + { + "epoch": 0.46213388598966265, + "grad_norm": 0.09223682433366776, + "learning_rate": 1.617637487796941e-05, + "loss": 8.9872, + "step": 92540 + }, + { + "epoch": 0.46218382481460213, + "grad_norm": 0.09065338224172592, + "learning_rate": 1.617487296302786e-05, + "loss": 8.9868, + "step": 92550 + }, + { + "epoch": 0.46223376363954155, + "grad_norm": 0.09436984360218048, + "learning_rate": 1.6173371048086312e-05, + "loss": 8.989, + "step": 92560 + }, + { + "epoch": 0.46228370246448103, + "grad_norm": 0.09117233753204346, + "learning_rate": 1.617186913314476e-05, + "loss": 8.9857, + "step": 92570 + }, + { + "epoch": 0.46233364128942045, + "grad_norm": 0.09342578798532486, + "learning_rate": 1.617036721820321e-05, + "loss": 8.9786, + "step": 92580 + }, + { + "epoch": 0.4623835801143599, + "grad_norm": 0.09534727782011032, + "learning_rate": 1.616886530326166e-05, + "loss": 8.9899, + "step": 92590 + }, + { + "epoch": 0.46243351893929935, + "grad_norm": 0.09320477396249771, + "learning_rate": 1.616736338832011e-05, + "loss": 8.9657, + "step": 92600 + }, + { + "epoch": 0.4624834577642388, + "grad_norm": 0.09745677560567856, + "learning_rate": 1.616586147337856e-05, + "loss": 8.9708, + "step": 92610 + }, + { + "epoch": 0.46253339658917825, + "grad_norm": 0.09360513091087341, + "learning_rate": 1.6164359558437006e-05, + "loss": 8.9846, + "step": 92620 + }, + { + "epoch": 0.4625833354141177, + "grad_norm": 0.09421921521425247, + "learning_rate": 1.6162857643495456e-05, + "loss": 8.9829, + "step": 92630 + }, + { + "epoch": 0.46263327423905715, + "grad_norm": 0.10035274922847748, + "learning_rate": 1.6161355728553906e-05, + "loss": 8.9633, + "step": 92640 + }, + { + "epoch": 0.4626832130639966, + "grad_norm": 0.09385058283805847, + "learning_rate": 1.6159853813612357e-05, + "loss": 8.9702, + "step": 92650 + }, + { + "epoch": 0.46273315188893605, + "grad_norm": 0.0909324511885643, + "learning_rate": 1.6158351898670807e-05, + "loss": 8.9831, + "step": 92660 + }, + { + "epoch": 0.4627830907138755, + "grad_norm": 0.08683860301971436, + "learning_rate": 1.6156849983729254e-05, + "loss": 8.9797, + "step": 92670 + }, + { + "epoch": 0.46283302953881494, + "grad_norm": 0.09413138777017593, + "learning_rate": 1.6155348068787704e-05, + "loss": 8.9752, + "step": 92680 + }, + { + "epoch": 0.4628829683637544, + "grad_norm": 0.09523726254701614, + "learning_rate": 1.6153846153846154e-05, + "loss": 8.9746, + "step": 92690 + }, + { + "epoch": 0.46293290718869384, + "grad_norm": 0.09190825372934341, + "learning_rate": 1.6152344238904604e-05, + "loss": 8.9821, + "step": 92700 + }, + { + "epoch": 0.4629828460136333, + "grad_norm": 0.09379702806472778, + "learning_rate": 1.6150842323963054e-05, + "loss": 8.9772, + "step": 92710 + }, + { + "epoch": 0.46303278483857274, + "grad_norm": 0.09580748528242111, + "learning_rate": 1.61493404090215e-05, + "loss": 8.9785, + "step": 92720 + }, + { + "epoch": 0.4630827236635122, + "grad_norm": 0.10296031832695007, + "learning_rate": 1.614783849407995e-05, + "loss": 8.9668, + "step": 92730 + }, + { + "epoch": 0.46313266248845164, + "grad_norm": 0.09141591936349869, + "learning_rate": 1.61463365791384e-05, + "loss": 8.9757, + "step": 92740 + }, + { + "epoch": 0.4631826013133911, + "grad_norm": 0.09824127703905106, + "learning_rate": 1.614483466419685e-05, + "loss": 8.9831, + "step": 92750 + }, + { + "epoch": 0.46323254013833054, + "grad_norm": 0.0958201214671135, + "learning_rate": 1.6143332749255302e-05, + "loss": 8.9645, + "step": 92760 + }, + { + "epoch": 0.46328247896327, + "grad_norm": 0.08969037979841232, + "learning_rate": 1.614183083431375e-05, + "loss": 8.9695, + "step": 92770 + }, + { + "epoch": 0.46333241778820944, + "grad_norm": 0.0948222279548645, + "learning_rate": 1.61403289193722e-05, + "loss": 8.9856, + "step": 92780 + }, + { + "epoch": 0.4633823566131489, + "grad_norm": 0.08855247497558594, + "learning_rate": 1.613882700443065e-05, + "loss": 8.9811, + "step": 92790 + }, + { + "epoch": 0.46343229543808834, + "grad_norm": 0.08753466606140137, + "learning_rate": 1.61373250894891e-05, + "loss": 8.9728, + "step": 92800 + }, + { + "epoch": 0.4634822342630278, + "grad_norm": 0.09330339729785919, + "learning_rate": 1.613582317454755e-05, + "loss": 8.9759, + "step": 92810 + }, + { + "epoch": 0.46353217308796724, + "grad_norm": 0.0953167974948883, + "learning_rate": 1.6134321259606e-05, + "loss": 8.9906, + "step": 92820 + }, + { + "epoch": 0.4635821119129067, + "grad_norm": 0.09375966340303421, + "learning_rate": 1.6132819344664446e-05, + "loss": 8.9694, + "step": 92830 + }, + { + "epoch": 0.46363205073784614, + "grad_norm": 0.08928825706243515, + "learning_rate": 1.6131317429722897e-05, + "loss": 8.9641, + "step": 92840 + }, + { + "epoch": 0.4636819895627856, + "grad_norm": 0.09437457472085953, + "learning_rate": 1.6129815514781347e-05, + "loss": 8.9668, + "step": 92850 + }, + { + "epoch": 0.46373192838772503, + "grad_norm": 0.0952828899025917, + "learning_rate": 1.6128313599839797e-05, + "loss": 8.9885, + "step": 92860 + }, + { + "epoch": 0.4637818672126645, + "grad_norm": 0.08954987674951553, + "learning_rate": 1.6126811684898247e-05, + "loss": 8.9779, + "step": 92870 + }, + { + "epoch": 0.46383180603760393, + "grad_norm": 0.095025435090065, + "learning_rate": 1.6125309769956694e-05, + "loss": 8.978, + "step": 92880 + }, + { + "epoch": 0.4638817448625434, + "grad_norm": 0.09305167198181152, + "learning_rate": 1.6123807855015144e-05, + "loss": 8.975, + "step": 92890 + }, + { + "epoch": 0.46393168368748283, + "grad_norm": 0.09365760535001755, + "learning_rate": 1.6122305940073594e-05, + "loss": 8.9796, + "step": 92900 + }, + { + "epoch": 0.4639816225124223, + "grad_norm": 0.0907236710190773, + "learning_rate": 1.6120804025132044e-05, + "loss": 8.9814, + "step": 92910 + }, + { + "epoch": 0.46403156133736173, + "grad_norm": 0.09334593266248703, + "learning_rate": 1.6119302110190495e-05, + "loss": 8.9935, + "step": 92920 + }, + { + "epoch": 0.4640815001623012, + "grad_norm": 0.0942806750535965, + "learning_rate": 1.611780019524894e-05, + "loss": 8.968, + "step": 92930 + }, + { + "epoch": 0.46413143898724063, + "grad_norm": 0.09642312675714493, + "learning_rate": 1.611629828030739e-05, + "loss": 8.9773, + "step": 92940 + }, + { + "epoch": 0.4641813778121801, + "grad_norm": 0.10480822622776031, + "learning_rate": 1.611479636536584e-05, + "loss": 8.9643, + "step": 92950 + }, + { + "epoch": 0.46423131663711953, + "grad_norm": 0.09461807459592819, + "learning_rate": 1.6113294450424292e-05, + "loss": 8.9774, + "step": 92960 + }, + { + "epoch": 0.46428125546205895, + "grad_norm": 0.09156806021928787, + "learning_rate": 1.6111792535482742e-05, + "loss": 8.983, + "step": 92970 + }, + { + "epoch": 0.4643311942869984, + "grad_norm": 0.09091312438249588, + "learning_rate": 1.611029062054119e-05, + "loss": 8.9742, + "step": 92980 + }, + { + "epoch": 0.46438113311193785, + "grad_norm": 0.09511202573776245, + "learning_rate": 1.610878870559964e-05, + "loss": 8.9784, + "step": 92990 + }, + { + "epoch": 0.4644310719368773, + "grad_norm": 0.09062772989273071, + "learning_rate": 1.610728679065809e-05, + "loss": 8.9831, + "step": 93000 + }, + { + "epoch": 0.46448101076181675, + "grad_norm": 0.09874343872070312, + "learning_rate": 1.610578487571654e-05, + "loss": 8.9679, + "step": 93010 + }, + { + "epoch": 0.4645309495867562, + "grad_norm": 0.09453938156366348, + "learning_rate": 1.610428296077499e-05, + "loss": 8.9649, + "step": 93020 + }, + { + "epoch": 0.46458088841169565, + "grad_norm": 0.09480418264865875, + "learning_rate": 1.6102781045833436e-05, + "loss": 8.9785, + "step": 93030 + }, + { + "epoch": 0.4646308272366351, + "grad_norm": 0.08807534724473953, + "learning_rate": 1.6101279130891887e-05, + "loss": 8.9728, + "step": 93040 + }, + { + "epoch": 0.46468076606157455, + "grad_norm": 0.09540516883134842, + "learning_rate": 1.6099777215950337e-05, + "loss": 8.9793, + "step": 93050 + }, + { + "epoch": 0.464730704886514, + "grad_norm": 0.09324967861175537, + "learning_rate": 1.6098275301008787e-05, + "loss": 8.9792, + "step": 93060 + }, + { + "epoch": 0.46478064371145345, + "grad_norm": 0.08924508839845657, + "learning_rate": 1.6096773386067237e-05, + "loss": 8.9898, + "step": 93070 + }, + { + "epoch": 0.4648305825363929, + "grad_norm": 0.09416824579238892, + "learning_rate": 1.6095271471125684e-05, + "loss": 8.9758, + "step": 93080 + }, + { + "epoch": 0.46488052136133234, + "grad_norm": 0.09986349195241928, + "learning_rate": 1.6093769556184134e-05, + "loss": 8.9774, + "step": 93090 + }, + { + "epoch": 0.4649304601862718, + "grad_norm": 0.10064547508955002, + "learning_rate": 1.6092267641242584e-05, + "loss": 8.9643, + "step": 93100 + }, + { + "epoch": 0.46498039901121124, + "grad_norm": 0.09657155722379684, + "learning_rate": 1.6090765726301034e-05, + "loss": 8.992, + "step": 93110 + }, + { + "epoch": 0.4650303378361507, + "grad_norm": 0.09835278242826462, + "learning_rate": 1.6089263811359485e-05, + "loss": 8.9773, + "step": 93120 + }, + { + "epoch": 0.46508027666109014, + "grad_norm": 0.09444111585617065, + "learning_rate": 1.608776189641793e-05, + "loss": 8.9678, + "step": 93130 + }, + { + "epoch": 0.4651302154860296, + "grad_norm": 0.09481803327798843, + "learning_rate": 1.6086259981476385e-05, + "loss": 8.9745, + "step": 93140 + }, + { + "epoch": 0.46518015431096904, + "grad_norm": 0.09177707135677338, + "learning_rate": 1.6084758066534832e-05, + "loss": 8.973, + "step": 93150 + }, + { + "epoch": 0.4652300931359085, + "grad_norm": 0.09027852863073349, + "learning_rate": 1.6083256151593282e-05, + "loss": 8.9625, + "step": 93160 + }, + { + "epoch": 0.46528003196084794, + "grad_norm": 0.0904780849814415, + "learning_rate": 1.6081754236651732e-05, + "loss": 8.9686, + "step": 93170 + }, + { + "epoch": 0.4653299707857874, + "grad_norm": 0.09435466676950455, + "learning_rate": 1.608025232171018e-05, + "loss": 8.9654, + "step": 93180 + }, + { + "epoch": 0.46537990961072684, + "grad_norm": 0.09567007422447205, + "learning_rate": 1.6078750406768632e-05, + "loss": 8.9825, + "step": 93190 + }, + { + "epoch": 0.4654298484356663, + "grad_norm": 0.09451539069414139, + "learning_rate": 1.607724849182708e-05, + "loss": 8.9788, + "step": 93200 + }, + { + "epoch": 0.46547978726060574, + "grad_norm": 0.09207061678171158, + "learning_rate": 1.607574657688553e-05, + "loss": 8.972, + "step": 93210 + }, + { + "epoch": 0.4655297260855452, + "grad_norm": 0.09581438452005386, + "learning_rate": 1.607424466194398e-05, + "loss": 8.9847, + "step": 93220 + }, + { + "epoch": 0.46557966491048464, + "grad_norm": 0.09088779985904694, + "learning_rate": 1.6072742747002426e-05, + "loss": 8.9714, + "step": 93230 + }, + { + "epoch": 0.4656296037354241, + "grad_norm": 0.09318163245916367, + "learning_rate": 1.607124083206088e-05, + "loss": 8.9726, + "step": 93240 + }, + { + "epoch": 0.46567954256036354, + "grad_norm": 0.09258940815925598, + "learning_rate": 1.6069738917119327e-05, + "loss": 8.9878, + "step": 93250 + }, + { + "epoch": 0.465729481385303, + "grad_norm": 0.09311637282371521, + "learning_rate": 1.6068237002177777e-05, + "loss": 8.9817, + "step": 93260 + }, + { + "epoch": 0.46577942021024243, + "grad_norm": 0.09517588466405869, + "learning_rate": 1.6066735087236227e-05, + "loss": 8.9635, + "step": 93270 + }, + { + "epoch": 0.4658293590351819, + "grad_norm": 0.08702480047941208, + "learning_rate": 1.6065233172294674e-05, + "loss": 8.9762, + "step": 93280 + }, + { + "epoch": 0.46587929786012133, + "grad_norm": 0.0963396281003952, + "learning_rate": 1.6063731257353127e-05, + "loss": 8.967, + "step": 93290 + }, + { + "epoch": 0.4659292366850608, + "grad_norm": 0.09384248405694962, + "learning_rate": 1.6062229342411574e-05, + "loss": 8.9723, + "step": 93300 + }, + { + "epoch": 0.46597917551000023, + "grad_norm": 0.1044907197356224, + "learning_rate": 1.6060727427470024e-05, + "loss": 8.9606, + "step": 93310 + }, + { + "epoch": 0.4660291143349397, + "grad_norm": 0.09456614404916763, + "learning_rate": 1.6059225512528475e-05, + "loss": 8.9805, + "step": 93320 + }, + { + "epoch": 0.46607905315987913, + "grad_norm": 0.09772161394357681, + "learning_rate": 1.605772359758692e-05, + "loss": 8.967, + "step": 93330 + }, + { + "epoch": 0.4661289919848186, + "grad_norm": 0.08841816335916519, + "learning_rate": 1.6056221682645375e-05, + "loss": 8.9762, + "step": 93340 + }, + { + "epoch": 0.46617893080975803, + "grad_norm": 0.0914144292473793, + "learning_rate": 1.6054719767703822e-05, + "loss": 8.978, + "step": 93350 + }, + { + "epoch": 0.4662288696346975, + "grad_norm": 0.09596528857946396, + "learning_rate": 1.6053217852762272e-05, + "loss": 8.956, + "step": 93360 + }, + { + "epoch": 0.46627880845963693, + "grad_norm": 0.09019865840673447, + "learning_rate": 1.6051715937820722e-05, + "loss": 8.965, + "step": 93370 + }, + { + "epoch": 0.4663287472845764, + "grad_norm": 0.09683724492788315, + "learning_rate": 1.605021402287917e-05, + "loss": 8.9587, + "step": 93380 + }, + { + "epoch": 0.4663786861095158, + "grad_norm": 0.08984372764825821, + "learning_rate": 1.6048712107937622e-05, + "loss": 8.9803, + "step": 93390 + }, + { + "epoch": 0.4664286249344553, + "grad_norm": 0.09242388606071472, + "learning_rate": 1.604721019299607e-05, + "loss": 8.991, + "step": 93400 + }, + { + "epoch": 0.4664785637593947, + "grad_norm": 0.0912506952881813, + "learning_rate": 1.604570827805452e-05, + "loss": 8.9865, + "step": 93410 + }, + { + "epoch": 0.4665285025843342, + "grad_norm": 0.09702800214290619, + "learning_rate": 1.604420636311297e-05, + "loss": 8.9536, + "step": 93420 + }, + { + "epoch": 0.4665784414092736, + "grad_norm": 0.09083381295204163, + "learning_rate": 1.6042704448171416e-05, + "loss": 8.9659, + "step": 93430 + }, + { + "epoch": 0.4666283802342131, + "grad_norm": 0.09859057515859604, + "learning_rate": 1.604120253322987e-05, + "loss": 8.9757, + "step": 93440 + }, + { + "epoch": 0.4666783190591525, + "grad_norm": 0.09232448786497116, + "learning_rate": 1.6039700618288317e-05, + "loss": 8.9693, + "step": 93450 + }, + { + "epoch": 0.466728257884092, + "grad_norm": 0.09486878663301468, + "learning_rate": 1.603819870334677e-05, + "loss": 8.9813, + "step": 93460 + }, + { + "epoch": 0.4667781967090314, + "grad_norm": 0.09302292764186859, + "learning_rate": 1.6036696788405217e-05, + "loss": 8.9782, + "step": 93470 + }, + { + "epoch": 0.4668281355339709, + "grad_norm": 0.09293092787265778, + "learning_rate": 1.6035194873463664e-05, + "loss": 8.9629, + "step": 93480 + }, + { + "epoch": 0.4668780743589103, + "grad_norm": 0.09174246340990067, + "learning_rate": 1.6033692958522117e-05, + "loss": 8.9694, + "step": 93490 + }, + { + "epoch": 0.4669280131838498, + "grad_norm": 0.09290467202663422, + "learning_rate": 1.6032191043580564e-05, + "loss": 8.9646, + "step": 93500 + }, + { + "epoch": 0.4669779520087892, + "grad_norm": 0.09408622980117798, + "learning_rate": 1.6030689128639018e-05, + "loss": 8.9694, + "step": 93510 + }, + { + "epoch": 0.4670278908337287, + "grad_norm": 0.09096479415893555, + "learning_rate": 1.6029187213697465e-05, + "loss": 8.966, + "step": 93520 + }, + { + "epoch": 0.4670778296586681, + "grad_norm": 0.09430520981550217, + "learning_rate": 1.602768529875591e-05, + "loss": 8.9614, + "step": 93530 + }, + { + "epoch": 0.4671277684836076, + "grad_norm": 0.09501869231462479, + "learning_rate": 1.6026183383814365e-05, + "loss": 8.954, + "step": 93540 + }, + { + "epoch": 0.467177707308547, + "grad_norm": 0.09236142039299011, + "learning_rate": 1.6024681468872812e-05, + "loss": 8.9749, + "step": 93550 + }, + { + "epoch": 0.4672276461334865, + "grad_norm": 0.09646128863096237, + "learning_rate": 1.6023179553931265e-05, + "loss": 8.985, + "step": 93560 + }, + { + "epoch": 0.4672775849584259, + "grad_norm": 0.08994904160499573, + "learning_rate": 1.6021677638989712e-05, + "loss": 8.9734, + "step": 93570 + }, + { + "epoch": 0.4673275237833654, + "grad_norm": 0.0974574014544487, + "learning_rate": 1.602017572404816e-05, + "loss": 8.9715, + "step": 93580 + }, + { + "epoch": 0.4673774626083048, + "grad_norm": 0.09206629544496536, + "learning_rate": 1.6018673809106612e-05, + "loss": 8.9758, + "step": 93590 + }, + { + "epoch": 0.4674274014332443, + "grad_norm": 0.08921888470649719, + "learning_rate": 1.601717189416506e-05, + "loss": 8.9747, + "step": 93600 + }, + { + "epoch": 0.4674773402581837, + "grad_norm": 0.09754341095685959, + "learning_rate": 1.6015669979223513e-05, + "loss": 8.9635, + "step": 93610 + }, + { + "epoch": 0.4675272790831232, + "grad_norm": 0.09427353739738464, + "learning_rate": 1.601416806428196e-05, + "loss": 8.9758, + "step": 93620 + }, + { + "epoch": 0.4675772179080626, + "grad_norm": 0.09223822504281998, + "learning_rate": 1.6012666149340406e-05, + "loss": 8.9795, + "step": 93630 + }, + { + "epoch": 0.4676271567330021, + "grad_norm": 0.09685010462999344, + "learning_rate": 1.601116423439886e-05, + "loss": 8.9705, + "step": 93640 + }, + { + "epoch": 0.4676770955579415, + "grad_norm": 0.09444907307624817, + "learning_rate": 1.6009662319457307e-05, + "loss": 8.9722, + "step": 93650 + }, + { + "epoch": 0.467727034382881, + "grad_norm": 0.09394410252571106, + "learning_rate": 1.600816040451576e-05, + "loss": 8.9806, + "step": 93660 + }, + { + "epoch": 0.4677769732078204, + "grad_norm": 0.09267649054527283, + "learning_rate": 1.6006658489574207e-05, + "loss": 8.9722, + "step": 93670 + }, + { + "epoch": 0.4678269120327599, + "grad_norm": 0.08718396723270416, + "learning_rate": 1.6005156574632654e-05, + "loss": 8.9693, + "step": 93680 + }, + { + "epoch": 0.4678768508576993, + "grad_norm": 0.08936282247304916, + "learning_rate": 1.6003654659691107e-05, + "loss": 8.9726, + "step": 93690 + }, + { + "epoch": 0.4679267896826388, + "grad_norm": 0.09415874630212784, + "learning_rate": 1.6002152744749554e-05, + "loss": 8.9622, + "step": 93700 + }, + { + "epoch": 0.4679767285075782, + "grad_norm": 0.09426844865083694, + "learning_rate": 1.6000650829808008e-05, + "loss": 8.9738, + "step": 93710 + }, + { + "epoch": 0.4680266673325177, + "grad_norm": 0.09191013872623444, + "learning_rate": 1.5999148914866455e-05, + "loss": 8.9705, + "step": 93720 + }, + { + "epoch": 0.4680766061574571, + "grad_norm": 0.09426682442426682, + "learning_rate": 1.5997646999924905e-05, + "loss": 8.9628, + "step": 93730 + }, + { + "epoch": 0.4681265449823966, + "grad_norm": 0.08494877815246582, + "learning_rate": 1.5996145084983355e-05, + "loss": 8.9671, + "step": 93740 + }, + { + "epoch": 0.468176483807336, + "grad_norm": 0.09612907469272614, + "learning_rate": 1.5994643170041802e-05, + "loss": 8.9614, + "step": 93750 + }, + { + "epoch": 0.4682264226322755, + "grad_norm": 0.09567809104919434, + "learning_rate": 1.5993141255100255e-05, + "loss": 8.9663, + "step": 93760 + }, + { + "epoch": 0.4682763614572149, + "grad_norm": 0.09354501962661743, + "learning_rate": 1.5991639340158702e-05, + "loss": 8.9696, + "step": 93770 + }, + { + "epoch": 0.4683263002821544, + "grad_norm": 0.09527242928743362, + "learning_rate": 1.5990137425217152e-05, + "loss": 8.976, + "step": 93780 + }, + { + "epoch": 0.4683762391070938, + "grad_norm": 0.09733609855175018, + "learning_rate": 1.5988635510275602e-05, + "loss": 8.9744, + "step": 93790 + }, + { + "epoch": 0.4684261779320333, + "grad_norm": 0.10153631865978241, + "learning_rate": 1.598713359533405e-05, + "loss": 8.9545, + "step": 93800 + }, + { + "epoch": 0.4684761167569727, + "grad_norm": 0.09621601551771164, + "learning_rate": 1.5985631680392503e-05, + "loss": 8.992, + "step": 93810 + }, + { + "epoch": 0.4685260555819122, + "grad_norm": 0.09544295817613602, + "learning_rate": 1.598412976545095e-05, + "loss": 8.9597, + "step": 93820 + }, + { + "epoch": 0.4685759944068516, + "grad_norm": 0.09087269753217697, + "learning_rate": 1.59826278505094e-05, + "loss": 8.9515, + "step": 93830 + }, + { + "epoch": 0.4686259332317911, + "grad_norm": 0.09846234321594238, + "learning_rate": 1.598112593556785e-05, + "loss": 8.9714, + "step": 93840 + }, + { + "epoch": 0.4686758720567305, + "grad_norm": 0.09184664487838745, + "learning_rate": 1.5979624020626297e-05, + "loss": 8.9678, + "step": 93850 + }, + { + "epoch": 0.46872581088167, + "grad_norm": 0.0921328142285347, + "learning_rate": 1.597812210568475e-05, + "loss": 8.9638, + "step": 93860 + }, + { + "epoch": 0.4687757497066094, + "grad_norm": 0.09225039184093475, + "learning_rate": 1.5976620190743197e-05, + "loss": 8.9748, + "step": 93870 + }, + { + "epoch": 0.4688256885315489, + "grad_norm": 0.09426619857549667, + "learning_rate": 1.5975118275801647e-05, + "loss": 8.957, + "step": 93880 + }, + { + "epoch": 0.4688756273564883, + "grad_norm": 0.08906444162130356, + "learning_rate": 1.5973616360860097e-05, + "loss": 8.9666, + "step": 93890 + }, + { + "epoch": 0.4689255661814278, + "grad_norm": 0.09076976776123047, + "learning_rate": 1.5972114445918544e-05, + "loss": 8.9806, + "step": 93900 + }, + { + "epoch": 0.4689755050063672, + "grad_norm": 0.09307584911584854, + "learning_rate": 1.5970612530976998e-05, + "loss": 8.9683, + "step": 93910 + }, + { + "epoch": 0.4690254438313067, + "grad_norm": 0.09109794348478317, + "learning_rate": 1.5969110616035445e-05, + "loss": 8.9724, + "step": 93920 + }, + { + "epoch": 0.4690753826562461, + "grad_norm": 0.09649874269962311, + "learning_rate": 1.5967608701093895e-05, + "loss": 8.9707, + "step": 93930 + }, + { + "epoch": 0.4691253214811856, + "grad_norm": 0.09002482146024704, + "learning_rate": 1.5966106786152345e-05, + "loss": 8.9689, + "step": 93940 + }, + { + "epoch": 0.469175260306125, + "grad_norm": 0.0938982143998146, + "learning_rate": 1.5964604871210792e-05, + "loss": 8.9652, + "step": 93950 + }, + { + "epoch": 0.4692251991310644, + "grad_norm": 0.0954759269952774, + "learning_rate": 1.5963102956269245e-05, + "loss": 8.9681, + "step": 93960 + }, + { + "epoch": 0.4692751379560039, + "grad_norm": 0.09431935846805573, + "learning_rate": 1.5961601041327692e-05, + "loss": 8.9643, + "step": 93970 + }, + { + "epoch": 0.4693250767809433, + "grad_norm": 0.10007733106613159, + "learning_rate": 1.5960099126386142e-05, + "loss": 8.9655, + "step": 93980 + }, + { + "epoch": 0.4693750156058828, + "grad_norm": 0.09566391259431839, + "learning_rate": 1.5958597211444592e-05, + "loss": 8.9486, + "step": 93990 + }, + { + "epoch": 0.4694249544308222, + "grad_norm": 0.08696243166923523, + "learning_rate": 1.595709529650304e-05, + "loss": 8.9544, + "step": 94000 + }, + { + "epoch": 0.4694748932557617, + "grad_norm": 0.0935448408126831, + "learning_rate": 1.5955593381561493e-05, + "loss": 8.9584, + "step": 94010 + }, + { + "epoch": 0.4695248320807011, + "grad_norm": 0.09453287720680237, + "learning_rate": 1.595409146661994e-05, + "loss": 8.9592, + "step": 94020 + }, + { + "epoch": 0.4695747709056406, + "grad_norm": 0.0924343541264534, + "learning_rate": 1.5952589551678393e-05, + "loss": 8.9641, + "step": 94030 + }, + { + "epoch": 0.46962470973058, + "grad_norm": 0.09673213213682175, + "learning_rate": 1.595108763673684e-05, + "loss": 8.9499, + "step": 94040 + }, + { + "epoch": 0.4696746485555195, + "grad_norm": 0.08831555396318436, + "learning_rate": 1.5949585721795287e-05, + "loss": 8.9571, + "step": 94050 + }, + { + "epoch": 0.4697245873804589, + "grad_norm": 0.09080936759710312, + "learning_rate": 1.594808380685374e-05, + "loss": 8.9697, + "step": 94060 + }, + { + "epoch": 0.4697745262053984, + "grad_norm": 0.09151973575353622, + "learning_rate": 1.5946581891912187e-05, + "loss": 8.9607, + "step": 94070 + }, + { + "epoch": 0.4698244650303378, + "grad_norm": 0.09453890472650528, + "learning_rate": 1.594507997697064e-05, + "loss": 8.9754, + "step": 94080 + }, + { + "epoch": 0.4698744038552773, + "grad_norm": 0.09043656289577484, + "learning_rate": 1.5943578062029087e-05, + "loss": 8.9796, + "step": 94090 + }, + { + "epoch": 0.4699243426802167, + "grad_norm": 0.09902900457382202, + "learning_rate": 1.5942076147087534e-05, + "loss": 8.9608, + "step": 94100 + }, + { + "epoch": 0.4699742815051562, + "grad_norm": 0.09724241495132446, + "learning_rate": 1.5940574232145988e-05, + "loss": 8.9604, + "step": 94110 + }, + { + "epoch": 0.4700242203300956, + "grad_norm": 0.09389027208089828, + "learning_rate": 1.5939072317204435e-05, + "loss": 8.9627, + "step": 94120 + }, + { + "epoch": 0.4700741591550351, + "grad_norm": 0.08884572237730026, + "learning_rate": 1.5937570402262888e-05, + "loss": 8.9623, + "step": 94130 + }, + { + "epoch": 0.4701240979799745, + "grad_norm": 0.09251714497804642, + "learning_rate": 1.5936068487321335e-05, + "loss": 8.9787, + "step": 94140 + }, + { + "epoch": 0.470174036804914, + "grad_norm": 0.09012773633003235, + "learning_rate": 1.5934566572379785e-05, + "loss": 8.9716, + "step": 94150 + }, + { + "epoch": 0.4702239756298534, + "grad_norm": 0.09131991118192673, + "learning_rate": 1.5933064657438235e-05, + "loss": 8.9629, + "step": 94160 + }, + { + "epoch": 0.4702739144547929, + "grad_norm": 0.09043431282043457, + "learning_rate": 1.5931562742496682e-05, + "loss": 8.9654, + "step": 94170 + }, + { + "epoch": 0.4703238532797323, + "grad_norm": 0.09283377230167389, + "learning_rate": 1.5930060827555136e-05, + "loss": 8.9507, + "step": 94180 + }, + { + "epoch": 0.4703737921046718, + "grad_norm": 0.09180030971765518, + "learning_rate": 1.5928558912613582e-05, + "loss": 8.9708, + "step": 94190 + }, + { + "epoch": 0.4704237309296112, + "grad_norm": 0.08900587260723114, + "learning_rate": 1.5927056997672033e-05, + "loss": 8.9548, + "step": 94200 + }, + { + "epoch": 0.4704736697545507, + "grad_norm": 0.10260471701622009, + "learning_rate": 1.5925555082730483e-05, + "loss": 8.9641, + "step": 94210 + }, + { + "epoch": 0.4705236085794901, + "grad_norm": 0.09451324492692947, + "learning_rate": 1.592405316778893e-05, + "loss": 8.9697, + "step": 94220 + }, + { + "epoch": 0.4705735474044296, + "grad_norm": 0.1005653440952301, + "learning_rate": 1.5922551252847383e-05, + "loss": 8.9598, + "step": 94230 + }, + { + "epoch": 0.470623486229369, + "grad_norm": 0.09918899834156036, + "learning_rate": 1.592104933790583e-05, + "loss": 8.9575, + "step": 94240 + }, + { + "epoch": 0.4706734250543085, + "grad_norm": 0.09747923910617828, + "learning_rate": 1.591954742296428e-05, + "loss": 8.9644, + "step": 94250 + }, + { + "epoch": 0.4707233638792479, + "grad_norm": 0.09829793125391006, + "learning_rate": 1.591804550802273e-05, + "loss": 8.9704, + "step": 94260 + }, + { + "epoch": 0.4707733027041874, + "grad_norm": 0.10006602108478546, + "learning_rate": 1.5916543593081177e-05, + "loss": 8.9625, + "step": 94270 + }, + { + "epoch": 0.4708232415291268, + "grad_norm": 0.09519977867603302, + "learning_rate": 1.591504167813963e-05, + "loss": 8.9572, + "step": 94280 + }, + { + "epoch": 0.4708731803540663, + "grad_norm": 0.09414579719305038, + "learning_rate": 1.5913539763198077e-05, + "loss": 8.9618, + "step": 94290 + }, + { + "epoch": 0.4709231191790057, + "grad_norm": 0.09214229881763458, + "learning_rate": 1.5912037848256528e-05, + "loss": 8.968, + "step": 94300 + }, + { + "epoch": 0.4709730580039452, + "grad_norm": 0.09208973497152328, + "learning_rate": 1.5910535933314978e-05, + "loss": 8.9711, + "step": 94310 + }, + { + "epoch": 0.4710229968288846, + "grad_norm": 0.09273689985275269, + "learning_rate": 1.5909034018373425e-05, + "loss": 8.9477, + "step": 94320 + }, + { + "epoch": 0.4710729356538241, + "grad_norm": 0.09345366805791855, + "learning_rate": 1.5907532103431878e-05, + "loss": 8.9459, + "step": 94330 + }, + { + "epoch": 0.4711228744787635, + "grad_norm": 0.09309998899698257, + "learning_rate": 1.5906030188490325e-05, + "loss": 8.9669, + "step": 94340 + }, + { + "epoch": 0.471172813303703, + "grad_norm": 0.0877714604139328, + "learning_rate": 1.5904528273548775e-05, + "loss": 8.967, + "step": 94350 + }, + { + "epoch": 0.4712227521286424, + "grad_norm": 0.09679785370826721, + "learning_rate": 1.5903026358607225e-05, + "loss": 8.9599, + "step": 94360 + }, + { + "epoch": 0.4712726909535819, + "grad_norm": 0.10014106333255768, + "learning_rate": 1.5901524443665672e-05, + "loss": 8.9567, + "step": 94370 + }, + { + "epoch": 0.4713226297785213, + "grad_norm": 0.09142360091209412, + "learning_rate": 1.5900022528724126e-05, + "loss": 8.9641, + "step": 94380 + }, + { + "epoch": 0.4713725686034608, + "grad_norm": 0.09642203897237778, + "learning_rate": 1.5898520613782573e-05, + "loss": 8.9582, + "step": 94390 + }, + { + "epoch": 0.4714225074284002, + "grad_norm": 0.0904778316617012, + "learning_rate": 1.5897018698841023e-05, + "loss": 8.9714, + "step": 94400 + }, + { + "epoch": 0.47147244625333967, + "grad_norm": 0.09670837968587875, + "learning_rate": 1.5895516783899473e-05, + "loss": 8.9466, + "step": 94410 + }, + { + "epoch": 0.4715223850782791, + "grad_norm": 0.09427211433649063, + "learning_rate": 1.589401486895792e-05, + "loss": 8.9493, + "step": 94420 + }, + { + "epoch": 0.47157232390321857, + "grad_norm": 0.09509798139333725, + "learning_rate": 1.5892512954016373e-05, + "loss": 8.9536, + "step": 94430 + }, + { + "epoch": 0.471622262728158, + "grad_norm": 0.09237992763519287, + "learning_rate": 1.589101103907482e-05, + "loss": 8.9698, + "step": 94440 + }, + { + "epoch": 0.47167220155309747, + "grad_norm": 0.09039132297039032, + "learning_rate": 1.588950912413327e-05, + "loss": 8.96, + "step": 94450 + }, + { + "epoch": 0.4717221403780369, + "grad_norm": 0.09362545609474182, + "learning_rate": 1.588800720919172e-05, + "loss": 8.9552, + "step": 94460 + }, + { + "epoch": 0.47177207920297637, + "grad_norm": 0.09487809240818024, + "learning_rate": 1.588650529425017e-05, + "loss": 8.9529, + "step": 94470 + }, + { + "epoch": 0.4718220180279158, + "grad_norm": 0.09452398866415024, + "learning_rate": 1.588500337930862e-05, + "loss": 8.9601, + "step": 94480 + }, + { + "epoch": 0.47187195685285527, + "grad_norm": 0.08807900547981262, + "learning_rate": 1.5883501464367068e-05, + "loss": 8.9602, + "step": 94490 + }, + { + "epoch": 0.4719218956777947, + "grad_norm": 0.09054072201251984, + "learning_rate": 1.5881999549425518e-05, + "loss": 8.9705, + "step": 94500 + }, + { + "epoch": 0.47197183450273417, + "grad_norm": 0.0914943590760231, + "learning_rate": 1.5880497634483968e-05, + "loss": 8.9667, + "step": 94510 + }, + { + "epoch": 0.4720217733276736, + "grad_norm": 0.09846024960279465, + "learning_rate": 1.5878995719542418e-05, + "loss": 8.964, + "step": 94520 + }, + { + "epoch": 0.47207171215261307, + "grad_norm": 0.08973458409309387, + "learning_rate": 1.5877493804600868e-05, + "loss": 8.9691, + "step": 94530 + }, + { + "epoch": 0.4721216509775525, + "grad_norm": 0.09380858391523361, + "learning_rate": 1.5875991889659315e-05, + "loss": 8.9595, + "step": 94540 + }, + { + "epoch": 0.47217158980249196, + "grad_norm": 0.09617038071155548, + "learning_rate": 1.5874489974717765e-05, + "loss": 8.9691, + "step": 94550 + }, + { + "epoch": 0.4722215286274314, + "grad_norm": 0.09885700792074203, + "learning_rate": 1.5872988059776215e-05, + "loss": 8.9462, + "step": 94560 + }, + { + "epoch": 0.47227146745237086, + "grad_norm": 0.08958373963832855, + "learning_rate": 1.5871486144834666e-05, + "loss": 8.9666, + "step": 94570 + }, + { + "epoch": 0.4723214062773103, + "grad_norm": 0.09378182142972946, + "learning_rate": 1.5869984229893116e-05, + "loss": 8.9554, + "step": 94580 + }, + { + "epoch": 0.47237134510224976, + "grad_norm": 0.09632568061351776, + "learning_rate": 1.5868482314951563e-05, + "loss": 8.9563, + "step": 94590 + }, + { + "epoch": 0.4724212839271892, + "grad_norm": 0.09778321534395218, + "learning_rate": 1.5866980400010013e-05, + "loss": 8.9532, + "step": 94600 + }, + { + "epoch": 0.47247122275212866, + "grad_norm": 0.08909459412097931, + "learning_rate": 1.5865478485068463e-05, + "loss": 8.9759, + "step": 94610 + }, + { + "epoch": 0.4725211615770681, + "grad_norm": 0.09531407058238983, + "learning_rate": 1.5863976570126913e-05, + "loss": 8.9577, + "step": 94620 + }, + { + "epoch": 0.47257110040200756, + "grad_norm": 0.09269222617149353, + "learning_rate": 1.5862474655185363e-05, + "loss": 8.959, + "step": 94630 + }, + { + "epoch": 0.472621039226947, + "grad_norm": 0.09362973272800446, + "learning_rate": 1.586097274024381e-05, + "loss": 8.953, + "step": 94640 + }, + { + "epoch": 0.47267097805188646, + "grad_norm": 0.09257864207029343, + "learning_rate": 1.585947082530226e-05, + "loss": 8.9646, + "step": 94650 + }, + { + "epoch": 0.4727209168768259, + "grad_norm": 0.10312341898679733, + "learning_rate": 1.585796891036071e-05, + "loss": 8.9662, + "step": 94660 + }, + { + "epoch": 0.47277085570176536, + "grad_norm": 0.09162463247776031, + "learning_rate": 1.585646699541916e-05, + "loss": 8.9528, + "step": 94670 + }, + { + "epoch": 0.4728207945267048, + "grad_norm": 0.09410829097032547, + "learning_rate": 1.585496508047761e-05, + "loss": 8.9487, + "step": 94680 + }, + { + "epoch": 0.47287073335164426, + "grad_norm": 0.09216289222240448, + "learning_rate": 1.5853463165536058e-05, + "loss": 8.9574, + "step": 94690 + }, + { + "epoch": 0.4729206721765837, + "grad_norm": 0.09374202787876129, + "learning_rate": 1.5851961250594508e-05, + "loss": 8.9629, + "step": 94700 + }, + { + "epoch": 0.47297061100152316, + "grad_norm": 0.09870696812868118, + "learning_rate": 1.5850459335652958e-05, + "loss": 8.9696, + "step": 94710 + }, + { + "epoch": 0.4730205498264626, + "grad_norm": 0.0899372324347496, + "learning_rate": 1.5848957420711408e-05, + "loss": 8.9589, + "step": 94720 + }, + { + "epoch": 0.47307048865140205, + "grad_norm": 0.0908857062458992, + "learning_rate": 1.5847455505769858e-05, + "loss": 8.9605, + "step": 94730 + }, + { + "epoch": 0.4731204274763415, + "grad_norm": 0.08634936064481735, + "learning_rate": 1.5845953590828305e-05, + "loss": 8.9659, + "step": 94740 + }, + { + "epoch": 0.47317036630128095, + "grad_norm": 0.09862346947193146, + "learning_rate": 1.5844451675886755e-05, + "loss": 8.9518, + "step": 94750 + }, + { + "epoch": 0.4732203051262204, + "grad_norm": 0.09859306365251541, + "learning_rate": 1.5842949760945205e-05, + "loss": 8.9509, + "step": 94760 + }, + { + "epoch": 0.47327024395115985, + "grad_norm": 0.09249970316886902, + "learning_rate": 1.5841447846003656e-05, + "loss": 8.9593, + "step": 94770 + }, + { + "epoch": 0.4733201827760993, + "grad_norm": 0.09957744181156158, + "learning_rate": 1.5839945931062106e-05, + "loss": 8.9597, + "step": 94780 + }, + { + "epoch": 0.47337012160103875, + "grad_norm": 0.0913504958152771, + "learning_rate": 1.5838444016120556e-05, + "loss": 8.968, + "step": 94790 + }, + { + "epoch": 0.4734200604259782, + "grad_norm": 0.09559644013643265, + "learning_rate": 1.5836942101179003e-05, + "loss": 8.9594, + "step": 94800 + }, + { + "epoch": 0.47346999925091765, + "grad_norm": 0.09303098171949387, + "learning_rate": 1.5835440186237453e-05, + "loss": 8.9535, + "step": 94810 + }, + { + "epoch": 0.47351993807585707, + "grad_norm": 0.09096693992614746, + "learning_rate": 1.5833938271295903e-05, + "loss": 8.9503, + "step": 94820 + }, + { + "epoch": 0.47356987690079655, + "grad_norm": 0.09463432431221008, + "learning_rate": 1.5832436356354353e-05, + "loss": 8.9561, + "step": 94830 + }, + { + "epoch": 0.47361981572573597, + "grad_norm": 0.09306497126817703, + "learning_rate": 1.5830934441412803e-05, + "loss": 8.9495, + "step": 94840 + }, + { + "epoch": 0.47366975455067545, + "grad_norm": 0.09330885857343674, + "learning_rate": 1.582943252647125e-05, + "loss": 8.9607, + "step": 94850 + }, + { + "epoch": 0.47371969337561487, + "grad_norm": 0.0966872051358223, + "learning_rate": 1.58279306115297e-05, + "loss": 8.9557, + "step": 94860 + }, + { + "epoch": 0.47376963220055435, + "grad_norm": 0.08871348202228546, + "learning_rate": 1.582642869658815e-05, + "loss": 8.9677, + "step": 94870 + }, + { + "epoch": 0.47381957102549377, + "grad_norm": 0.090989850461483, + "learning_rate": 1.58249267816466e-05, + "loss": 8.9661, + "step": 94880 + }, + { + "epoch": 0.47386950985043325, + "grad_norm": 0.0886576771736145, + "learning_rate": 1.582342486670505e-05, + "loss": 8.9559, + "step": 94890 + }, + { + "epoch": 0.47391944867537267, + "grad_norm": 0.09537599235773087, + "learning_rate": 1.5821922951763498e-05, + "loss": 8.9496, + "step": 94900 + }, + { + "epoch": 0.47396938750031214, + "grad_norm": 0.08645906299352646, + "learning_rate": 1.5820421036821948e-05, + "loss": 8.9599, + "step": 94910 + }, + { + "epoch": 0.47401932632525157, + "grad_norm": 0.0953529104590416, + "learning_rate": 1.5818919121880398e-05, + "loss": 8.9579, + "step": 94920 + }, + { + "epoch": 0.47406926515019104, + "grad_norm": 0.10060861706733704, + "learning_rate": 1.5817417206938848e-05, + "loss": 8.9599, + "step": 94930 + }, + { + "epoch": 0.47411920397513047, + "grad_norm": 0.09143946319818497, + "learning_rate": 1.58159152919973e-05, + "loss": 8.9482, + "step": 94940 + }, + { + "epoch": 0.4741691428000699, + "grad_norm": 0.09554131329059601, + "learning_rate": 1.5814413377055745e-05, + "loss": 8.9516, + "step": 94950 + }, + { + "epoch": 0.47421908162500936, + "grad_norm": 0.09513738751411438, + "learning_rate": 1.5812911462114195e-05, + "loss": 8.9709, + "step": 94960 + }, + { + "epoch": 0.4742690204499488, + "grad_norm": 0.09006723016500473, + "learning_rate": 1.5811409547172646e-05, + "loss": 8.9641, + "step": 94970 + }, + { + "epoch": 0.47431895927488826, + "grad_norm": 0.08737257868051529, + "learning_rate": 1.5809907632231096e-05, + "loss": 8.9553, + "step": 94980 + }, + { + "epoch": 0.4743688980998277, + "grad_norm": 0.09388820081949234, + "learning_rate": 1.5808405717289546e-05, + "loss": 8.9634, + "step": 94990 + }, + { + "epoch": 0.47441883692476716, + "grad_norm": 0.0908634141087532, + "learning_rate": 1.5806903802347993e-05, + "loss": 8.954, + "step": 95000 + }, + { + "epoch": 0.4744687757497066, + "grad_norm": 0.09147128462791443, + "learning_rate": 1.5805401887406443e-05, + "loss": 8.9601, + "step": 95010 + }, + { + "epoch": 0.47451871457464606, + "grad_norm": 0.09336107224225998, + "learning_rate": 1.5803899972464893e-05, + "loss": 8.9496, + "step": 95020 + }, + { + "epoch": 0.4745686533995855, + "grad_norm": 0.09616447985172272, + "learning_rate": 1.5802398057523343e-05, + "loss": 8.9433, + "step": 95030 + }, + { + "epoch": 0.47461859222452496, + "grad_norm": 0.09238088130950928, + "learning_rate": 1.5800896142581793e-05, + "loss": 8.9607, + "step": 95040 + }, + { + "epoch": 0.4746685310494644, + "grad_norm": 0.09115064889192581, + "learning_rate": 1.579939422764024e-05, + "loss": 8.9525, + "step": 95050 + }, + { + "epoch": 0.47471846987440386, + "grad_norm": 0.09081438928842545, + "learning_rate": 1.579789231269869e-05, + "loss": 8.9477, + "step": 95060 + }, + { + "epoch": 0.4747684086993433, + "grad_norm": 0.09441505372524261, + "learning_rate": 1.579639039775714e-05, + "loss": 8.9469, + "step": 95070 + }, + { + "epoch": 0.47481834752428276, + "grad_norm": 0.0935746282339096, + "learning_rate": 1.579488848281559e-05, + "loss": 8.978, + "step": 95080 + }, + { + "epoch": 0.4748682863492222, + "grad_norm": 0.09515486657619476, + "learning_rate": 1.579338656787404e-05, + "loss": 8.9698, + "step": 95090 + }, + { + "epoch": 0.47491822517416166, + "grad_norm": 0.10014467686414719, + "learning_rate": 1.5791884652932488e-05, + "loss": 8.9462, + "step": 95100 + }, + { + "epoch": 0.4749681639991011, + "grad_norm": 0.09181597828865051, + "learning_rate": 1.579038273799094e-05, + "loss": 8.9549, + "step": 95110 + }, + { + "epoch": 0.47501810282404056, + "grad_norm": 0.09579970687627792, + "learning_rate": 1.5788880823049388e-05, + "loss": 8.9546, + "step": 95120 + }, + { + "epoch": 0.47506804164898, + "grad_norm": 0.0962633490562439, + "learning_rate": 1.5787378908107838e-05, + "loss": 8.9442, + "step": 95130 + }, + { + "epoch": 0.47511798047391945, + "grad_norm": 0.09618624299764633, + "learning_rate": 1.578587699316629e-05, + "loss": 8.9584, + "step": 95140 + }, + { + "epoch": 0.4751679192988589, + "grad_norm": 0.08641847223043442, + "learning_rate": 1.5784375078224735e-05, + "loss": 8.9636, + "step": 95150 + }, + { + "epoch": 0.47521785812379835, + "grad_norm": 0.0992530807852745, + "learning_rate": 1.578287316328319e-05, + "loss": 8.9572, + "step": 95160 + }, + { + "epoch": 0.4752677969487378, + "grad_norm": 0.09517235308885574, + "learning_rate": 1.5781371248341636e-05, + "loss": 8.9476, + "step": 95170 + }, + { + "epoch": 0.47531773577367725, + "grad_norm": 0.09363456070423126, + "learning_rate": 1.5779869333400086e-05, + "loss": 8.9558, + "step": 95180 + }, + { + "epoch": 0.4753676745986167, + "grad_norm": 0.0897691547870636, + "learning_rate": 1.5778367418458536e-05, + "loss": 8.9506, + "step": 95190 + }, + { + "epoch": 0.47541761342355615, + "grad_norm": 0.09347530454397202, + "learning_rate": 1.5776865503516983e-05, + "loss": 8.9519, + "step": 95200 + }, + { + "epoch": 0.4754675522484956, + "grad_norm": 0.09927235543727875, + "learning_rate": 1.5775363588575436e-05, + "loss": 8.9348, + "step": 95210 + }, + { + "epoch": 0.47551749107343505, + "grad_norm": 0.09206566959619522, + "learning_rate": 1.5773861673633883e-05, + "loss": 8.9514, + "step": 95220 + }, + { + "epoch": 0.47556742989837447, + "grad_norm": 0.09086683392524719, + "learning_rate": 1.5772359758692333e-05, + "loss": 8.9393, + "step": 95230 + }, + { + "epoch": 0.47561736872331395, + "grad_norm": 0.0883910059928894, + "learning_rate": 1.5770857843750783e-05, + "loss": 8.9475, + "step": 95240 + }, + { + "epoch": 0.47566730754825337, + "grad_norm": 0.09318321943283081, + "learning_rate": 1.576935592880923e-05, + "loss": 8.9376, + "step": 95250 + }, + { + "epoch": 0.47571724637319285, + "grad_norm": 0.09458793699741364, + "learning_rate": 1.5767854013867684e-05, + "loss": 8.9374, + "step": 95260 + }, + { + "epoch": 0.47576718519813227, + "grad_norm": 0.09189481288194656, + "learning_rate": 1.576635209892613e-05, + "loss": 8.946, + "step": 95270 + }, + { + "epoch": 0.47581712402307175, + "grad_norm": 0.09554219245910645, + "learning_rate": 1.576485018398458e-05, + "loss": 8.9526, + "step": 95280 + }, + { + "epoch": 0.47586706284801117, + "grad_norm": 0.09450355917215347, + "learning_rate": 1.576334826904303e-05, + "loss": 8.957, + "step": 95290 + }, + { + "epoch": 0.47591700167295065, + "grad_norm": 0.08791901171207428, + "learning_rate": 1.5761846354101478e-05, + "loss": 8.9516, + "step": 95300 + }, + { + "epoch": 0.47596694049789007, + "grad_norm": 0.08911871910095215, + "learning_rate": 1.576034443915993e-05, + "loss": 8.9511, + "step": 95310 + }, + { + "epoch": 0.47601687932282954, + "grad_norm": 0.09467475861310959, + "learning_rate": 1.5758842524218378e-05, + "loss": 8.9439, + "step": 95320 + }, + { + "epoch": 0.47606681814776897, + "grad_norm": 0.09240403026342392, + "learning_rate": 1.5757340609276828e-05, + "loss": 8.9443, + "step": 95330 + }, + { + "epoch": 0.47611675697270844, + "grad_norm": 0.09284543991088867, + "learning_rate": 1.575583869433528e-05, + "loss": 8.9489, + "step": 95340 + }, + { + "epoch": 0.47616669579764787, + "grad_norm": 0.0898863673210144, + "learning_rate": 1.5754336779393725e-05, + "loss": 8.9601, + "step": 95350 + }, + { + "epoch": 0.47621663462258734, + "grad_norm": 0.09170831739902496, + "learning_rate": 1.575283486445218e-05, + "loss": 8.9568, + "step": 95360 + }, + { + "epoch": 0.47626657344752676, + "grad_norm": 0.0939890593290329, + "learning_rate": 1.5751332949510626e-05, + "loss": 8.9392, + "step": 95370 + }, + { + "epoch": 0.47631651227246624, + "grad_norm": 0.08675019443035126, + "learning_rate": 1.5749831034569076e-05, + "loss": 8.9485, + "step": 95380 + }, + { + "epoch": 0.47636645109740566, + "grad_norm": 0.08740539848804474, + "learning_rate": 1.5748329119627526e-05, + "loss": 8.9461, + "step": 95390 + }, + { + "epoch": 0.47641638992234514, + "grad_norm": 0.08891706168651581, + "learning_rate": 1.5746827204685973e-05, + "loss": 8.9638, + "step": 95400 + }, + { + "epoch": 0.47646632874728456, + "grad_norm": 0.09209989011287689, + "learning_rate": 1.5745325289744426e-05, + "loss": 8.9543, + "step": 95410 + }, + { + "epoch": 0.47651626757222404, + "grad_norm": 0.09303548187017441, + "learning_rate": 1.5743823374802873e-05, + "loss": 8.9472, + "step": 95420 + }, + { + "epoch": 0.47656620639716346, + "grad_norm": 0.09217501431703568, + "learning_rate": 1.5742321459861327e-05, + "loss": 8.9547, + "step": 95430 + }, + { + "epoch": 0.47661614522210294, + "grad_norm": 0.08850811421871185, + "learning_rate": 1.5740819544919773e-05, + "loss": 8.9523, + "step": 95440 + }, + { + "epoch": 0.47666608404704236, + "grad_norm": 0.09346584975719452, + "learning_rate": 1.573931762997822e-05, + "loss": 8.9549, + "step": 95450 + }, + { + "epoch": 0.47671602287198184, + "grad_norm": 0.09290006756782532, + "learning_rate": 1.5737815715036674e-05, + "loss": 8.9536, + "step": 95460 + }, + { + "epoch": 0.47676596169692126, + "grad_norm": 0.09277894347906113, + "learning_rate": 1.573631380009512e-05, + "loss": 8.9525, + "step": 95470 + }, + { + "epoch": 0.47681590052186074, + "grad_norm": 0.09250402450561523, + "learning_rate": 1.5734811885153574e-05, + "loss": 8.9305, + "step": 95480 + }, + { + "epoch": 0.47686583934680016, + "grad_norm": 0.08638934046030045, + "learning_rate": 1.573330997021202e-05, + "loss": 8.949, + "step": 95490 + }, + { + "epoch": 0.47691577817173963, + "grad_norm": 0.10017383098602295, + "learning_rate": 1.5731808055270468e-05, + "loss": 8.9447, + "step": 95500 + }, + { + "epoch": 0.47696571699667906, + "grad_norm": 0.09218341112136841, + "learning_rate": 1.573030614032892e-05, + "loss": 8.9477, + "step": 95510 + }, + { + "epoch": 0.47701565582161853, + "grad_norm": 0.09426803141832352, + "learning_rate": 1.5728804225387368e-05, + "loss": 8.9439, + "step": 95520 + }, + { + "epoch": 0.47706559464655796, + "grad_norm": 0.08649339526891708, + "learning_rate": 1.572730231044582e-05, + "loss": 8.9515, + "step": 95530 + }, + { + "epoch": 0.47711553347149743, + "grad_norm": 0.09477028250694275, + "learning_rate": 1.572580039550427e-05, + "loss": 8.958, + "step": 95540 + }, + { + "epoch": 0.47716547229643685, + "grad_norm": 0.09427261352539062, + "learning_rate": 1.5724298480562715e-05, + "loss": 8.9478, + "step": 95550 + }, + { + "epoch": 0.47721541112137633, + "grad_norm": 0.09864936023950577, + "learning_rate": 1.572279656562117e-05, + "loss": 8.9301, + "step": 95560 + }, + { + "epoch": 0.47726534994631575, + "grad_norm": 0.09512222558259964, + "learning_rate": 1.5721294650679616e-05, + "loss": 8.9521, + "step": 95570 + }, + { + "epoch": 0.47731528877125523, + "grad_norm": 0.09052599221467972, + "learning_rate": 1.571979273573807e-05, + "loss": 8.949, + "step": 95580 + }, + { + "epoch": 0.47736522759619465, + "grad_norm": 0.09813720732927322, + "learning_rate": 1.5718290820796516e-05, + "loss": 8.9293, + "step": 95590 + }, + { + "epoch": 0.47741516642113413, + "grad_norm": 0.09309673309326172, + "learning_rate": 1.5716788905854963e-05, + "loss": 8.9349, + "step": 95600 + }, + { + "epoch": 0.47746510524607355, + "grad_norm": 0.08683782070875168, + "learning_rate": 1.5715286990913416e-05, + "loss": 8.9493, + "step": 95610 + }, + { + "epoch": 0.47751504407101303, + "grad_norm": 0.09030024707317352, + "learning_rate": 1.5713785075971863e-05, + "loss": 8.9459, + "step": 95620 + }, + { + "epoch": 0.47756498289595245, + "grad_norm": 0.09140963852405548, + "learning_rate": 1.5712283161030317e-05, + "loss": 8.9456, + "step": 95630 + }, + { + "epoch": 0.4776149217208919, + "grad_norm": 0.09844744205474854, + "learning_rate": 1.5710781246088763e-05, + "loss": 8.9562, + "step": 95640 + }, + { + "epoch": 0.47766486054583135, + "grad_norm": 0.09246145933866501, + "learning_rate": 1.570927933114721e-05, + "loss": 8.9507, + "step": 95650 + }, + { + "epoch": 0.4777147993707708, + "grad_norm": 0.09520356357097626, + "learning_rate": 1.5707777416205664e-05, + "loss": 8.9419, + "step": 95660 + }, + { + "epoch": 0.47776473819571025, + "grad_norm": 0.09316331148147583, + "learning_rate": 1.570627550126411e-05, + "loss": 8.9404, + "step": 95670 + }, + { + "epoch": 0.4778146770206497, + "grad_norm": 0.09663616865873337, + "learning_rate": 1.5704773586322564e-05, + "loss": 8.9278, + "step": 95680 + }, + { + "epoch": 0.47786461584558915, + "grad_norm": 0.09337840229272842, + "learning_rate": 1.570327167138101e-05, + "loss": 8.9441, + "step": 95690 + }, + { + "epoch": 0.4779145546705286, + "grad_norm": 0.0952988788485527, + "learning_rate": 1.5701769756439458e-05, + "loss": 8.9642, + "step": 95700 + }, + { + "epoch": 0.47796449349546805, + "grad_norm": 0.09496844559907913, + "learning_rate": 1.570026784149791e-05, + "loss": 8.9474, + "step": 95710 + }, + { + "epoch": 0.4780144323204075, + "grad_norm": 0.09619852900505066, + "learning_rate": 1.5698765926556358e-05, + "loss": 8.9422, + "step": 95720 + }, + { + "epoch": 0.47806437114534694, + "grad_norm": 0.10100951045751572, + "learning_rate": 1.5697264011614812e-05, + "loss": 8.9483, + "step": 95730 + }, + { + "epoch": 0.4781143099702864, + "grad_norm": 0.08543970435857773, + "learning_rate": 1.569576209667326e-05, + "loss": 8.9605, + "step": 95740 + }, + { + "epoch": 0.47816424879522584, + "grad_norm": 0.09065885096788406, + "learning_rate": 1.569426018173171e-05, + "loss": 8.9482, + "step": 95750 + }, + { + "epoch": 0.4782141876201653, + "grad_norm": 0.09514321386814117, + "learning_rate": 1.569275826679016e-05, + "loss": 8.9535, + "step": 95760 + }, + { + "epoch": 0.47826412644510474, + "grad_norm": 0.08945092558860779, + "learning_rate": 1.5691256351848606e-05, + "loss": 8.9462, + "step": 95770 + }, + { + "epoch": 0.4783140652700442, + "grad_norm": 0.09275572746992111, + "learning_rate": 1.568975443690706e-05, + "loss": 8.9564, + "step": 95780 + }, + { + "epoch": 0.47836400409498364, + "grad_norm": 0.09393645077943802, + "learning_rate": 1.5688252521965506e-05, + "loss": 8.9396, + "step": 95790 + }, + { + "epoch": 0.4784139429199231, + "grad_norm": 0.08892881125211716, + "learning_rate": 1.5686750607023956e-05, + "loss": 8.9488, + "step": 95800 + }, + { + "epoch": 0.47846388174486254, + "grad_norm": 0.09092603623867035, + "learning_rate": 1.5685248692082406e-05, + "loss": 8.9466, + "step": 95810 + }, + { + "epoch": 0.478513820569802, + "grad_norm": 0.09448976814746857, + "learning_rate": 1.5683746777140853e-05, + "loss": 8.9469, + "step": 95820 + }, + { + "epoch": 0.47856375939474144, + "grad_norm": 0.0917850136756897, + "learning_rate": 1.5682244862199307e-05, + "loss": 8.9422, + "step": 95830 + }, + { + "epoch": 0.4786136982196809, + "grad_norm": 0.09401079267263412, + "learning_rate": 1.5680742947257754e-05, + "loss": 8.9421, + "step": 95840 + }, + { + "epoch": 0.47866363704462034, + "grad_norm": 0.09111663699150085, + "learning_rate": 1.5679241032316204e-05, + "loss": 8.9468, + "step": 95850 + }, + { + "epoch": 0.4787135758695598, + "grad_norm": 0.09453728049993515, + "learning_rate": 1.5677739117374654e-05, + "loss": 8.962, + "step": 95860 + }, + { + "epoch": 0.47876351469449924, + "grad_norm": 0.09403866529464722, + "learning_rate": 1.56762372024331e-05, + "loss": 8.9499, + "step": 95870 + }, + { + "epoch": 0.4788134535194387, + "grad_norm": 0.09163626283407211, + "learning_rate": 1.5674735287491554e-05, + "loss": 8.9528, + "step": 95880 + }, + { + "epoch": 0.47886339234437814, + "grad_norm": 0.09393519908189774, + "learning_rate": 1.567323337255e-05, + "loss": 8.9393, + "step": 95890 + }, + { + "epoch": 0.4789133311693176, + "grad_norm": 0.09879069030284882, + "learning_rate": 1.567173145760845e-05, + "loss": 8.9523, + "step": 95900 + }, + { + "epoch": 0.47896326999425703, + "grad_norm": 0.09257882833480835, + "learning_rate": 1.56702295426669e-05, + "loss": 8.9472, + "step": 95910 + }, + { + "epoch": 0.47901320881919646, + "grad_norm": 0.09598603844642639, + "learning_rate": 1.5668727627725348e-05, + "loss": 8.9522, + "step": 95920 + }, + { + "epoch": 0.47906314764413593, + "grad_norm": 0.09990739077329636, + "learning_rate": 1.5667225712783802e-05, + "loss": 8.9547, + "step": 95930 + }, + { + "epoch": 0.47911308646907536, + "grad_norm": 0.09603408724069595, + "learning_rate": 1.566572379784225e-05, + "loss": 8.9491, + "step": 95940 + }, + { + "epoch": 0.47916302529401483, + "grad_norm": 0.09766265004873276, + "learning_rate": 1.56642218829007e-05, + "loss": 8.9592, + "step": 95950 + }, + { + "epoch": 0.47921296411895425, + "grad_norm": 0.08897819370031357, + "learning_rate": 1.566271996795915e-05, + "loss": 8.9378, + "step": 95960 + }, + { + "epoch": 0.47926290294389373, + "grad_norm": 0.09036344289779663, + "learning_rate": 1.5661218053017596e-05, + "loss": 8.9585, + "step": 95970 + }, + { + "epoch": 0.47931284176883315, + "grad_norm": 0.0929788202047348, + "learning_rate": 1.565971613807605e-05, + "loss": 8.941, + "step": 95980 + }, + { + "epoch": 0.47936278059377263, + "grad_norm": 0.08704034984111786, + "learning_rate": 1.5658214223134496e-05, + "loss": 8.9409, + "step": 95990 + }, + { + "epoch": 0.47941271941871205, + "grad_norm": 0.08739351481199265, + "learning_rate": 1.5656712308192946e-05, + "loss": 8.9567, + "step": 96000 + }, + { + "epoch": 0.47946265824365153, + "grad_norm": 0.0888032615184784, + "learning_rate": 1.5655210393251396e-05, + "loss": 8.95, + "step": 96010 + }, + { + "epoch": 0.47951259706859095, + "grad_norm": 0.09704812616109848, + "learning_rate": 1.5653708478309843e-05, + "loss": 8.9473, + "step": 96020 + }, + { + "epoch": 0.47956253589353043, + "grad_norm": 0.0939079076051712, + "learning_rate": 1.5652206563368297e-05, + "loss": 8.939, + "step": 96030 + }, + { + "epoch": 0.47961247471846985, + "grad_norm": 0.09814861416816711, + "learning_rate": 1.5650704648426744e-05, + "loss": 8.9571, + "step": 96040 + }, + { + "epoch": 0.4796624135434093, + "grad_norm": 0.09225606173276901, + "learning_rate": 1.5649202733485194e-05, + "loss": 8.9364, + "step": 96050 + }, + { + "epoch": 0.47971235236834875, + "grad_norm": 0.08847086131572723, + "learning_rate": 1.5647700818543644e-05, + "loss": 8.9359, + "step": 96060 + }, + { + "epoch": 0.4797622911932882, + "grad_norm": 0.09193941950798035, + "learning_rate": 1.5646198903602094e-05, + "loss": 8.9464, + "step": 96070 + }, + { + "epoch": 0.47981223001822765, + "grad_norm": 0.09449310600757599, + "learning_rate": 1.5644696988660544e-05, + "loss": 8.9496, + "step": 96080 + }, + { + "epoch": 0.4798621688431671, + "grad_norm": 0.08825480937957764, + "learning_rate": 1.564319507371899e-05, + "loss": 8.9294, + "step": 96090 + }, + { + "epoch": 0.47991210766810655, + "grad_norm": 0.10034892708063126, + "learning_rate": 1.564169315877744e-05, + "loss": 8.9352, + "step": 96100 + }, + { + "epoch": 0.479962046493046, + "grad_norm": 0.08834687620401382, + "learning_rate": 1.564019124383589e-05, + "loss": 8.9406, + "step": 96110 + }, + { + "epoch": 0.48001198531798545, + "grad_norm": 0.09365418553352356, + "learning_rate": 1.563868932889434e-05, + "loss": 8.942, + "step": 96120 + }, + { + "epoch": 0.4800619241429249, + "grad_norm": 0.09072919934988022, + "learning_rate": 1.5637187413952792e-05, + "loss": 8.9519, + "step": 96130 + }, + { + "epoch": 0.48011186296786434, + "grad_norm": 0.09022905677556992, + "learning_rate": 1.563568549901124e-05, + "loss": 8.9463, + "step": 96140 + }, + { + "epoch": 0.4801618017928038, + "grad_norm": 0.09697787463665009, + "learning_rate": 1.563418358406969e-05, + "loss": 8.9448, + "step": 96150 + }, + { + "epoch": 0.48021174061774324, + "grad_norm": 0.09243085980415344, + "learning_rate": 1.563268166912814e-05, + "loss": 8.9423, + "step": 96160 + }, + { + "epoch": 0.4802616794426827, + "grad_norm": 0.08860752731561661, + "learning_rate": 1.563117975418659e-05, + "loss": 8.9517, + "step": 96170 + }, + { + "epoch": 0.48031161826762214, + "grad_norm": 0.09040497988462448, + "learning_rate": 1.562967783924504e-05, + "loss": 8.9399, + "step": 96180 + }, + { + "epoch": 0.4803615570925616, + "grad_norm": 0.09311524033546448, + "learning_rate": 1.5628175924303486e-05, + "loss": 8.929, + "step": 96190 + }, + { + "epoch": 0.48041149591750104, + "grad_norm": 0.08853588253259659, + "learning_rate": 1.5626674009361936e-05, + "loss": 8.9327, + "step": 96200 + }, + { + "epoch": 0.4804614347424405, + "grad_norm": 0.09366882592439651, + "learning_rate": 1.5625172094420386e-05, + "loss": 8.9437, + "step": 96210 + }, + { + "epoch": 0.48051137356737994, + "grad_norm": 0.09654311090707779, + "learning_rate": 1.5623670179478837e-05, + "loss": 8.9322, + "step": 96220 + }, + { + "epoch": 0.4805613123923194, + "grad_norm": 0.09593677520751953, + "learning_rate": 1.5622168264537287e-05, + "loss": 8.9382, + "step": 96230 + }, + { + "epoch": 0.48061125121725884, + "grad_norm": 0.09233121573925018, + "learning_rate": 1.5620666349595734e-05, + "loss": 8.9288, + "step": 96240 + }, + { + "epoch": 0.4806611900421983, + "grad_norm": 0.0946589857339859, + "learning_rate": 1.5619164434654184e-05, + "loss": 8.9519, + "step": 96250 + }, + { + "epoch": 0.48071112886713774, + "grad_norm": 0.09263359010219574, + "learning_rate": 1.5617662519712634e-05, + "loss": 8.9297, + "step": 96260 + }, + { + "epoch": 0.4807610676920772, + "grad_norm": 0.09187857806682587, + "learning_rate": 1.5616160604771084e-05, + "loss": 8.9482, + "step": 96270 + }, + { + "epoch": 0.48081100651701664, + "grad_norm": 0.09805098176002502, + "learning_rate": 1.5614658689829534e-05, + "loss": 8.9409, + "step": 96280 + }, + { + "epoch": 0.4808609453419561, + "grad_norm": 0.09072684496641159, + "learning_rate": 1.561315677488798e-05, + "loss": 8.9378, + "step": 96290 + }, + { + "epoch": 0.48091088416689554, + "grad_norm": 0.0932784229516983, + "learning_rate": 1.561165485994643e-05, + "loss": 8.9335, + "step": 96300 + }, + { + "epoch": 0.480960822991835, + "grad_norm": 0.09350146353244781, + "learning_rate": 1.561015294500488e-05, + "loss": 8.9404, + "step": 96310 + }, + { + "epoch": 0.48101076181677443, + "grad_norm": 0.09393427520990372, + "learning_rate": 1.560865103006333e-05, + "loss": 8.937, + "step": 96320 + }, + { + "epoch": 0.4810607006417139, + "grad_norm": 0.09077678620815277, + "learning_rate": 1.5607149115121782e-05, + "loss": 8.9297, + "step": 96330 + }, + { + "epoch": 0.48111063946665333, + "grad_norm": 0.08859416097402573, + "learning_rate": 1.560564720018023e-05, + "loss": 8.9511, + "step": 96340 + }, + { + "epoch": 0.4811605782915928, + "grad_norm": 0.09834136813879013, + "learning_rate": 1.560414528523868e-05, + "loss": 8.9462, + "step": 96350 + }, + { + "epoch": 0.48121051711653223, + "grad_norm": 0.09708964824676514, + "learning_rate": 1.560264337029713e-05, + "loss": 8.9358, + "step": 96360 + }, + { + "epoch": 0.4812604559414717, + "grad_norm": 0.09646966308355331, + "learning_rate": 1.560114145535558e-05, + "loss": 8.9394, + "step": 96370 + }, + { + "epoch": 0.48131039476641113, + "grad_norm": 0.09074628353118896, + "learning_rate": 1.559963954041403e-05, + "loss": 8.9441, + "step": 96380 + }, + { + "epoch": 0.4813603335913506, + "grad_norm": 0.09401838481426239, + "learning_rate": 1.559813762547248e-05, + "loss": 8.9365, + "step": 96390 + }, + { + "epoch": 0.48141027241629003, + "grad_norm": 0.09368253499269485, + "learning_rate": 1.5596635710530926e-05, + "loss": 8.9477, + "step": 96400 + }, + { + "epoch": 0.4814602112412295, + "grad_norm": 0.09199754893779755, + "learning_rate": 1.5595133795589376e-05, + "loss": 8.9274, + "step": 96410 + }, + { + "epoch": 0.48151015006616893, + "grad_norm": 0.09040980041027069, + "learning_rate": 1.5593631880647827e-05, + "loss": 8.9531, + "step": 96420 + }, + { + "epoch": 0.4815600888911084, + "grad_norm": 0.09219523519277573, + "learning_rate": 1.5592129965706277e-05, + "loss": 8.9384, + "step": 96430 + }, + { + "epoch": 0.48161002771604783, + "grad_norm": 0.09335026144981384, + "learning_rate": 1.5590628050764727e-05, + "loss": 8.9469, + "step": 96440 + }, + { + "epoch": 0.4816599665409873, + "grad_norm": 0.09123118221759796, + "learning_rate": 1.5589126135823174e-05, + "loss": 8.9421, + "step": 96450 + }, + { + "epoch": 0.4817099053659267, + "grad_norm": 0.09461282938718796, + "learning_rate": 1.5587624220881624e-05, + "loss": 8.9259, + "step": 96460 + }, + { + "epoch": 0.4817598441908662, + "grad_norm": 0.09215594083070755, + "learning_rate": 1.5586122305940074e-05, + "loss": 8.9334, + "step": 96470 + }, + { + "epoch": 0.4818097830158056, + "grad_norm": 0.09032759815454483, + "learning_rate": 1.5584620390998524e-05, + "loss": 8.946, + "step": 96480 + }, + { + "epoch": 0.4818597218407451, + "grad_norm": 0.09267091751098633, + "learning_rate": 1.5583118476056974e-05, + "loss": 8.9375, + "step": 96490 + }, + { + "epoch": 0.4819096606656845, + "grad_norm": 0.09548508375883102, + "learning_rate": 1.558161656111542e-05, + "loss": 8.9397, + "step": 96500 + }, + { + "epoch": 0.481959599490624, + "grad_norm": 0.09211142361164093, + "learning_rate": 1.558011464617387e-05, + "loss": 8.943, + "step": 96510 + }, + { + "epoch": 0.4820095383155634, + "grad_norm": 0.09218604117631912, + "learning_rate": 1.557861273123232e-05, + "loss": 8.9385, + "step": 96520 + }, + { + "epoch": 0.4820594771405029, + "grad_norm": 0.09151312708854675, + "learning_rate": 1.5577110816290772e-05, + "loss": 8.9425, + "step": 96530 + }, + { + "epoch": 0.4821094159654423, + "grad_norm": 0.09401395171880722, + "learning_rate": 1.5575608901349222e-05, + "loss": 8.9411, + "step": 96540 + }, + { + "epoch": 0.4821593547903818, + "grad_norm": 0.09752167761325836, + "learning_rate": 1.557410698640767e-05, + "loss": 8.954, + "step": 96550 + }, + { + "epoch": 0.4822092936153212, + "grad_norm": 0.0942562073469162, + "learning_rate": 1.557260507146612e-05, + "loss": 8.9508, + "step": 96560 + }, + { + "epoch": 0.4822592324402607, + "grad_norm": 0.09495299309492111, + "learning_rate": 1.557110315652457e-05, + "loss": 8.9436, + "step": 96570 + }, + { + "epoch": 0.4823091712652001, + "grad_norm": 0.08862724155187607, + "learning_rate": 1.556960124158302e-05, + "loss": 8.9357, + "step": 96580 + }, + { + "epoch": 0.4823591100901396, + "grad_norm": 0.09534230828285217, + "learning_rate": 1.556809932664147e-05, + "loss": 8.938, + "step": 96590 + }, + { + "epoch": 0.482409048915079, + "grad_norm": 0.08949341624975204, + "learning_rate": 1.5566597411699916e-05, + "loss": 8.9301, + "step": 96600 + }, + { + "epoch": 0.4824589877400185, + "grad_norm": 0.08901667594909668, + "learning_rate": 1.5565095496758366e-05, + "loss": 8.9381, + "step": 96610 + }, + { + "epoch": 0.4825089265649579, + "grad_norm": 0.09051734954118729, + "learning_rate": 1.5563593581816817e-05, + "loss": 8.9413, + "step": 96620 + }, + { + "epoch": 0.4825588653898974, + "grad_norm": 0.09591720253229141, + "learning_rate": 1.5562091666875267e-05, + "loss": 8.9389, + "step": 96630 + }, + { + "epoch": 0.4826088042148368, + "grad_norm": 0.09589020907878876, + "learning_rate": 1.5560589751933717e-05, + "loss": 8.9373, + "step": 96640 + }, + { + "epoch": 0.4826587430397763, + "grad_norm": 0.0880039632320404, + "learning_rate": 1.5559087836992164e-05, + "loss": 8.9322, + "step": 96650 + }, + { + "epoch": 0.4827086818647157, + "grad_norm": 0.09445958584547043, + "learning_rate": 1.5557585922050614e-05, + "loss": 8.9557, + "step": 96660 + }, + { + "epoch": 0.4827586206896552, + "grad_norm": 0.09995859116315842, + "learning_rate": 1.5556084007109064e-05, + "loss": 8.9236, + "step": 96670 + }, + { + "epoch": 0.4828085595145946, + "grad_norm": 0.09632241725921631, + "learning_rate": 1.5554582092167514e-05, + "loss": 8.9337, + "step": 96680 + }, + { + "epoch": 0.4828584983395341, + "grad_norm": 0.09016299247741699, + "learning_rate": 1.5553080177225964e-05, + "loss": 8.9381, + "step": 96690 + }, + { + "epoch": 0.4829084371644735, + "grad_norm": 0.08898003399372101, + "learning_rate": 1.555157826228441e-05, + "loss": 8.9435, + "step": 96700 + }, + { + "epoch": 0.482958375989413, + "grad_norm": 0.09666922688484192, + "learning_rate": 1.5550076347342865e-05, + "loss": 8.9445, + "step": 96710 + }, + { + "epoch": 0.4830083148143524, + "grad_norm": 0.09168431162834167, + "learning_rate": 1.554857443240131e-05, + "loss": 8.9312, + "step": 96720 + }, + { + "epoch": 0.4830582536392919, + "grad_norm": 0.09085845202207565, + "learning_rate": 1.5547072517459762e-05, + "loss": 8.9415, + "step": 96730 + }, + { + "epoch": 0.4831081924642313, + "grad_norm": 0.09779160469770432, + "learning_rate": 1.5545570602518212e-05, + "loss": 8.9521, + "step": 96740 + }, + { + "epoch": 0.4831581312891708, + "grad_norm": 0.09065205603837967, + "learning_rate": 1.554406868757666e-05, + "loss": 8.9444, + "step": 96750 + }, + { + "epoch": 0.4832080701141102, + "grad_norm": 0.0941033810377121, + "learning_rate": 1.5542566772635112e-05, + "loss": 8.9362, + "step": 96760 + }, + { + "epoch": 0.4832580089390497, + "grad_norm": 0.09128523617982864, + "learning_rate": 1.554106485769356e-05, + "loss": 8.9498, + "step": 96770 + }, + { + "epoch": 0.4833079477639891, + "grad_norm": 0.09456328302621841, + "learning_rate": 1.553956294275201e-05, + "loss": 8.9385, + "step": 96780 + }, + { + "epoch": 0.4833578865889286, + "grad_norm": 0.090219646692276, + "learning_rate": 1.553806102781046e-05, + "loss": 8.9453, + "step": 96790 + }, + { + "epoch": 0.483407825413868, + "grad_norm": 0.09211535006761551, + "learning_rate": 1.5536559112868906e-05, + "loss": 8.957, + "step": 96800 + }, + { + "epoch": 0.4834577642388075, + "grad_norm": 0.09447281807661057, + "learning_rate": 1.553505719792736e-05, + "loss": 8.9422, + "step": 96810 + }, + { + "epoch": 0.4835077030637469, + "grad_norm": 0.09818176925182343, + "learning_rate": 1.5533555282985807e-05, + "loss": 8.9414, + "step": 96820 + }, + { + "epoch": 0.4835576418886864, + "grad_norm": 0.0923561379313469, + "learning_rate": 1.5532053368044257e-05, + "loss": 8.9497, + "step": 96830 + }, + { + "epoch": 0.4836075807136258, + "grad_norm": 0.09403201192617416, + "learning_rate": 1.5530551453102707e-05, + "loss": 8.9408, + "step": 96840 + }, + { + "epoch": 0.4836575195385653, + "grad_norm": 0.08915037661790848, + "learning_rate": 1.5529049538161154e-05, + "loss": 8.9423, + "step": 96850 + }, + { + "epoch": 0.4837074583635047, + "grad_norm": 0.09625165164470673, + "learning_rate": 1.5527547623219607e-05, + "loss": 8.948, + "step": 96860 + }, + { + "epoch": 0.4837573971884442, + "grad_norm": 0.0959184393286705, + "learning_rate": 1.5526045708278054e-05, + "loss": 8.9383, + "step": 96870 + }, + { + "epoch": 0.4838073360133836, + "grad_norm": 0.09560030698776245, + "learning_rate": 1.5524543793336504e-05, + "loss": 8.9419, + "step": 96880 + }, + { + "epoch": 0.4838572748383231, + "grad_norm": 0.09560041129589081, + "learning_rate": 1.5523041878394954e-05, + "loss": 8.9391, + "step": 96890 + }, + { + "epoch": 0.4839072136632625, + "grad_norm": 0.09270980209112167, + "learning_rate": 1.55215399634534e-05, + "loss": 8.9466, + "step": 96900 + }, + { + "epoch": 0.4839571524882019, + "grad_norm": 0.08851654082536697, + "learning_rate": 1.5520038048511855e-05, + "loss": 8.9342, + "step": 96910 + }, + { + "epoch": 0.4840070913131414, + "grad_norm": 0.08883985877037048, + "learning_rate": 1.55185361335703e-05, + "loss": 8.9362, + "step": 96920 + }, + { + "epoch": 0.4840570301380808, + "grad_norm": 0.09435227513313293, + "learning_rate": 1.5517034218628752e-05, + "loss": 8.9312, + "step": 96930 + }, + { + "epoch": 0.4841069689630203, + "grad_norm": 0.09650822728872299, + "learning_rate": 1.5515532303687202e-05, + "loss": 8.9235, + "step": 96940 + }, + { + "epoch": 0.4841569077879597, + "grad_norm": 0.09342892467975616, + "learning_rate": 1.551403038874565e-05, + "loss": 8.9339, + "step": 96950 + }, + { + "epoch": 0.4842068466128992, + "grad_norm": 0.0937606543302536, + "learning_rate": 1.5512528473804102e-05, + "loss": 8.9385, + "step": 96960 + }, + { + "epoch": 0.4842567854378386, + "grad_norm": 0.09186763316392899, + "learning_rate": 1.551102655886255e-05, + "loss": 8.9308, + "step": 96970 + }, + { + "epoch": 0.4843067242627781, + "grad_norm": 0.09301009774208069, + "learning_rate": 1.5509524643921e-05, + "loss": 8.9326, + "step": 96980 + }, + { + "epoch": 0.4843566630877175, + "grad_norm": 0.09419457614421844, + "learning_rate": 1.550802272897945e-05, + "loss": 8.9292, + "step": 96990 + }, + { + "epoch": 0.484406601912657, + "grad_norm": 0.09167234599590302, + "learning_rate": 1.5506520814037896e-05, + "loss": 8.9431, + "step": 97000 + }, + { + "epoch": 0.4844565407375964, + "grad_norm": 0.0926911011338234, + "learning_rate": 1.550501889909635e-05, + "loss": 8.9269, + "step": 97010 + }, + { + "epoch": 0.4845064795625359, + "grad_norm": 0.09434627741575241, + "learning_rate": 1.5503516984154797e-05, + "loss": 8.9336, + "step": 97020 + }, + { + "epoch": 0.4845564183874753, + "grad_norm": 0.09322822839021683, + "learning_rate": 1.5502015069213247e-05, + "loss": 8.9313, + "step": 97030 + }, + { + "epoch": 0.4846063572124148, + "grad_norm": 0.09197692573070526, + "learning_rate": 1.5500513154271697e-05, + "loss": 8.9304, + "step": 97040 + }, + { + "epoch": 0.4846562960373542, + "grad_norm": 0.09229908883571625, + "learning_rate": 1.5499011239330144e-05, + "loss": 8.9265, + "step": 97050 + }, + { + "epoch": 0.4847062348622937, + "grad_norm": 0.09997537732124329, + "learning_rate": 1.5497509324388597e-05, + "loss": 8.9292, + "step": 97060 + }, + { + "epoch": 0.4847561736872331, + "grad_norm": 0.09335023909807205, + "learning_rate": 1.5496007409447044e-05, + "loss": 8.9234, + "step": 97070 + }, + { + "epoch": 0.4848061125121726, + "grad_norm": 0.09383931756019592, + "learning_rate": 1.5494505494505498e-05, + "loss": 8.9355, + "step": 97080 + }, + { + "epoch": 0.484856051337112, + "grad_norm": 0.09556584805250168, + "learning_rate": 1.5493003579563944e-05, + "loss": 8.9404, + "step": 97090 + }, + { + "epoch": 0.4849059901620515, + "grad_norm": 0.09331072121858597, + "learning_rate": 1.549150166462239e-05, + "loss": 8.935, + "step": 97100 + }, + { + "epoch": 0.4849559289869909, + "grad_norm": 0.09537593275308609, + "learning_rate": 1.5489999749680845e-05, + "loss": 8.9442, + "step": 97110 + }, + { + "epoch": 0.4850058678119304, + "grad_norm": 0.0932040587067604, + "learning_rate": 1.548849783473929e-05, + "loss": 8.9356, + "step": 97120 + }, + { + "epoch": 0.4850558066368698, + "grad_norm": 0.09548848122358322, + "learning_rate": 1.5486995919797745e-05, + "loss": 8.9351, + "step": 97130 + }, + { + "epoch": 0.4851057454618093, + "grad_norm": 0.088288314640522, + "learning_rate": 1.5485494004856192e-05, + "loss": 8.9277, + "step": 97140 + }, + { + "epoch": 0.4851556842867487, + "grad_norm": 0.0967058390378952, + "learning_rate": 1.548399208991464e-05, + "loss": 8.9245, + "step": 97150 + }, + { + "epoch": 0.4852056231116882, + "grad_norm": 0.0918177142739296, + "learning_rate": 1.5482490174973092e-05, + "loss": 8.9246, + "step": 97160 + }, + { + "epoch": 0.4852555619366276, + "grad_norm": 0.09275839477777481, + "learning_rate": 1.548098826003154e-05, + "loss": 8.9332, + "step": 97170 + }, + { + "epoch": 0.4853055007615671, + "grad_norm": 0.09571007639169693, + "learning_rate": 1.5479486345089993e-05, + "loss": 8.9214, + "step": 97180 + }, + { + "epoch": 0.4853554395865065, + "grad_norm": 0.09551675617694855, + "learning_rate": 1.547798443014844e-05, + "loss": 8.9341, + "step": 97190 + }, + { + "epoch": 0.485405378411446, + "grad_norm": 0.09668447822332382, + "learning_rate": 1.5476482515206886e-05, + "loss": 8.9389, + "step": 97200 + }, + { + "epoch": 0.4854553172363854, + "grad_norm": 0.09511365741491318, + "learning_rate": 1.547498060026534e-05, + "loss": 8.9348, + "step": 97210 + }, + { + "epoch": 0.4855052560613249, + "grad_norm": 0.09372630715370178, + "learning_rate": 1.5473478685323787e-05, + "loss": 8.9325, + "step": 97220 + }, + { + "epoch": 0.4855551948862643, + "grad_norm": 0.09546605497598648, + "learning_rate": 1.547197677038224e-05, + "loss": 8.9311, + "step": 97230 + }, + { + "epoch": 0.4856051337112038, + "grad_norm": 0.09527111798524857, + "learning_rate": 1.5470474855440687e-05, + "loss": 8.929, + "step": 97240 + }, + { + "epoch": 0.4856550725361432, + "grad_norm": 0.09080222994089127, + "learning_rate": 1.5468972940499134e-05, + "loss": 8.931, + "step": 97250 + }, + { + "epoch": 0.4857050113610827, + "grad_norm": 0.09486258774995804, + "learning_rate": 1.5467471025557587e-05, + "loss": 8.9347, + "step": 97260 + }, + { + "epoch": 0.4857549501860221, + "grad_norm": 0.09705396741628647, + "learning_rate": 1.5465969110616034e-05, + "loss": 8.9462, + "step": 97270 + }, + { + "epoch": 0.4858048890109616, + "grad_norm": 0.09238271415233612, + "learning_rate": 1.5464467195674488e-05, + "loss": 8.922, + "step": 97280 + }, + { + "epoch": 0.485854827835901, + "grad_norm": 0.09559265524148941, + "learning_rate": 1.5462965280732934e-05, + "loss": 8.9331, + "step": 97290 + }, + { + "epoch": 0.4859047666608405, + "grad_norm": 0.09076651930809021, + "learning_rate": 1.546146336579138e-05, + "loss": 8.9372, + "step": 97300 + }, + { + "epoch": 0.4859547054857799, + "grad_norm": 0.09629295766353607, + "learning_rate": 1.5459961450849835e-05, + "loss": 8.9217, + "step": 97310 + }, + { + "epoch": 0.4860046443107194, + "grad_norm": 0.09511082619428635, + "learning_rate": 1.545845953590828e-05, + "loss": 8.9201, + "step": 97320 + }, + { + "epoch": 0.4860545831356588, + "grad_norm": 0.09342075139284134, + "learning_rate": 1.5456957620966735e-05, + "loss": 8.9417, + "step": 97330 + }, + { + "epoch": 0.4861045219605983, + "grad_norm": 0.09244676679372787, + "learning_rate": 1.5455455706025182e-05, + "loss": 8.9413, + "step": 97340 + }, + { + "epoch": 0.4861544607855377, + "grad_norm": 0.08962726593017578, + "learning_rate": 1.545395379108363e-05, + "loss": 8.9414, + "step": 97350 + }, + { + "epoch": 0.4862043996104772, + "grad_norm": 0.09315306693315506, + "learning_rate": 1.5452451876142082e-05, + "loss": 8.9322, + "step": 97360 + }, + { + "epoch": 0.4862543384354166, + "grad_norm": 0.0958314910531044, + "learning_rate": 1.545094996120053e-05, + "loss": 8.93, + "step": 97370 + }, + { + "epoch": 0.4863042772603561, + "grad_norm": 0.09707090258598328, + "learning_rate": 1.5449448046258983e-05, + "loss": 8.9318, + "step": 97380 + }, + { + "epoch": 0.4863542160852955, + "grad_norm": 0.09278113394975662, + "learning_rate": 1.544794613131743e-05, + "loss": 8.9208, + "step": 97390 + }, + { + "epoch": 0.486404154910235, + "grad_norm": 0.09409945458173752, + "learning_rate": 1.544644421637588e-05, + "loss": 8.9421, + "step": 97400 + }, + { + "epoch": 0.4864540937351744, + "grad_norm": 0.08893690258264542, + "learning_rate": 1.544494230143433e-05, + "loss": 8.9299, + "step": 97410 + }, + { + "epoch": 0.4865040325601139, + "grad_norm": 0.09356432408094406, + "learning_rate": 1.5443440386492777e-05, + "loss": 8.9403, + "step": 97420 + }, + { + "epoch": 0.4865539713850533, + "grad_norm": 0.09327803552150726, + "learning_rate": 1.544193847155123e-05, + "loss": 8.9288, + "step": 97430 + }, + { + "epoch": 0.4866039102099928, + "grad_norm": 0.0947476327419281, + "learning_rate": 1.5440436556609677e-05, + "loss": 8.9193, + "step": 97440 + }, + { + "epoch": 0.4866538490349322, + "grad_norm": 0.09564851969480515, + "learning_rate": 1.5438934641668127e-05, + "loss": 8.9157, + "step": 97450 + }, + { + "epoch": 0.4867037878598717, + "grad_norm": 0.09227867424488068, + "learning_rate": 1.5437432726726577e-05, + "loss": 8.9285, + "step": 97460 + }, + { + "epoch": 0.4867537266848111, + "grad_norm": 0.09659763425588608, + "learning_rate": 1.5435930811785024e-05, + "loss": 8.9351, + "step": 97470 + }, + { + "epoch": 0.48680366550975057, + "grad_norm": 0.09299914538860321, + "learning_rate": 1.5434428896843478e-05, + "loss": 8.9386, + "step": 97480 + }, + { + "epoch": 0.48685360433469, + "grad_norm": 0.09589099138975143, + "learning_rate": 1.5432926981901925e-05, + "loss": 8.9294, + "step": 97490 + }, + { + "epoch": 0.48690354315962947, + "grad_norm": 0.08684620261192322, + "learning_rate": 1.5431425066960375e-05, + "loss": 8.9267, + "step": 97500 + }, + { + "epoch": 0.4869534819845689, + "grad_norm": 0.09718476235866547, + "learning_rate": 1.5429923152018825e-05, + "loss": 8.9199, + "step": 97510 + }, + { + "epoch": 0.48700342080950837, + "grad_norm": 0.09026718139648438, + "learning_rate": 1.542842123707727e-05, + "loss": 8.9492, + "step": 97520 + }, + { + "epoch": 0.4870533596344478, + "grad_norm": 0.09231576323509216, + "learning_rate": 1.5426919322135725e-05, + "loss": 8.931, + "step": 97530 + }, + { + "epoch": 0.48710329845938727, + "grad_norm": 0.09282558411359787, + "learning_rate": 1.5425417407194172e-05, + "loss": 8.9311, + "step": 97540 + }, + { + "epoch": 0.4871532372843267, + "grad_norm": 0.10046909749507904, + "learning_rate": 1.5423915492252622e-05, + "loss": 8.9389, + "step": 97550 + }, + { + "epoch": 0.48720317610926617, + "grad_norm": 0.09517937153577805, + "learning_rate": 1.5422413577311072e-05, + "loss": 8.9321, + "step": 97560 + }, + { + "epoch": 0.4872531149342056, + "grad_norm": 0.09505096822977066, + "learning_rate": 1.542091166236952e-05, + "loss": 8.9198, + "step": 97570 + }, + { + "epoch": 0.48730305375914507, + "grad_norm": 0.08916744589805603, + "learning_rate": 1.5419409747427973e-05, + "loss": 8.9174, + "step": 97580 + }, + { + "epoch": 0.4873529925840845, + "grad_norm": 0.08957254886627197, + "learning_rate": 1.541790783248642e-05, + "loss": 8.9441, + "step": 97590 + }, + { + "epoch": 0.48740293140902397, + "grad_norm": 0.08980575203895569, + "learning_rate": 1.541640591754487e-05, + "loss": 8.9318, + "step": 97600 + }, + { + "epoch": 0.4874528702339634, + "grad_norm": 0.09375443309545517, + "learning_rate": 1.541490400260332e-05, + "loss": 8.9393, + "step": 97610 + }, + { + "epoch": 0.48750280905890286, + "grad_norm": 0.08852649480104446, + "learning_rate": 1.5413402087661767e-05, + "loss": 8.9289, + "step": 97620 + }, + { + "epoch": 0.4875527478838423, + "grad_norm": 0.09620748460292816, + "learning_rate": 1.541190017272022e-05, + "loss": 8.9414, + "step": 97630 + }, + { + "epoch": 0.48760268670878176, + "grad_norm": 0.09633340686559677, + "learning_rate": 1.5410398257778667e-05, + "loss": 8.9263, + "step": 97640 + }, + { + "epoch": 0.4876526255337212, + "grad_norm": 0.0938490703701973, + "learning_rate": 1.5408896342837117e-05, + "loss": 8.932, + "step": 97650 + }, + { + "epoch": 0.48770256435866066, + "grad_norm": 0.08980219811201096, + "learning_rate": 1.5407394427895567e-05, + "loss": 8.913, + "step": 97660 + }, + { + "epoch": 0.4877525031836001, + "grad_norm": 0.09491949528455734, + "learning_rate": 1.5405892512954014e-05, + "loss": 8.9317, + "step": 97670 + }, + { + "epoch": 0.48780244200853956, + "grad_norm": 0.0898803249001503, + "learning_rate": 1.5404390598012468e-05, + "loss": 8.938, + "step": 97680 + }, + { + "epoch": 0.487852380833479, + "grad_norm": 0.09298001229763031, + "learning_rate": 1.5402888683070915e-05, + "loss": 8.9168, + "step": 97690 + }, + { + "epoch": 0.48790231965841846, + "grad_norm": 0.09501669555902481, + "learning_rate": 1.5401386768129365e-05, + "loss": 8.9396, + "step": 97700 + }, + { + "epoch": 0.4879522584833579, + "grad_norm": 0.09508873522281647, + "learning_rate": 1.5399884853187815e-05, + "loss": 8.9306, + "step": 97710 + }, + { + "epoch": 0.48800219730829736, + "grad_norm": 0.0931331217288971, + "learning_rate": 1.5398382938246265e-05, + "loss": 8.9261, + "step": 97720 + }, + { + "epoch": 0.4880521361332368, + "grad_norm": 0.0985851138830185, + "learning_rate": 1.5396881023304715e-05, + "loss": 8.9298, + "step": 97730 + }, + { + "epoch": 0.48810207495817626, + "grad_norm": 0.08908767253160477, + "learning_rate": 1.5395379108363162e-05, + "loss": 8.9204, + "step": 97740 + }, + { + "epoch": 0.4881520137831157, + "grad_norm": 0.09742926806211472, + "learning_rate": 1.5393877193421612e-05, + "loss": 8.9244, + "step": 97750 + }, + { + "epoch": 0.48820195260805516, + "grad_norm": 0.09041773527860641, + "learning_rate": 1.5392375278480062e-05, + "loss": 8.9151, + "step": 97760 + }, + { + "epoch": 0.4882518914329946, + "grad_norm": 0.094319187104702, + "learning_rate": 1.5390873363538513e-05, + "loss": 8.9254, + "step": 97770 + }, + { + "epoch": 0.48830183025793406, + "grad_norm": 0.09709497541189194, + "learning_rate": 1.5389371448596963e-05, + "loss": 8.9349, + "step": 97780 + }, + { + "epoch": 0.4883517690828735, + "grad_norm": 0.09084603935480118, + "learning_rate": 1.538786953365541e-05, + "loss": 8.9242, + "step": 97790 + }, + { + "epoch": 0.48840170790781295, + "grad_norm": 0.09305722266435623, + "learning_rate": 1.538636761871386e-05, + "loss": 8.9348, + "step": 97800 + }, + { + "epoch": 0.4884516467327524, + "grad_norm": 0.09015587717294693, + "learning_rate": 1.538486570377231e-05, + "loss": 8.9306, + "step": 97810 + }, + { + "epoch": 0.48850158555769185, + "grad_norm": 0.0915970653295517, + "learning_rate": 1.538336378883076e-05, + "loss": 8.9168, + "step": 97820 + }, + { + "epoch": 0.4885515243826313, + "grad_norm": 0.0917646586894989, + "learning_rate": 1.538186187388921e-05, + "loss": 8.9245, + "step": 97830 + }, + { + "epoch": 0.48860146320757075, + "grad_norm": 0.09395934641361237, + "learning_rate": 1.5380359958947657e-05, + "loss": 8.9253, + "step": 97840 + }, + { + "epoch": 0.4886514020325102, + "grad_norm": 0.09345404803752899, + "learning_rate": 1.5378858044006107e-05, + "loss": 8.9186, + "step": 97850 + }, + { + "epoch": 0.48870134085744965, + "grad_norm": 0.0977400466799736, + "learning_rate": 1.5377356129064557e-05, + "loss": 8.9381, + "step": 97860 + }, + { + "epoch": 0.4887512796823891, + "grad_norm": 0.09431370347738266, + "learning_rate": 1.5375854214123008e-05, + "loss": 8.925, + "step": 97870 + }, + { + "epoch": 0.48880121850732855, + "grad_norm": 0.09120336174964905, + "learning_rate": 1.5374352299181458e-05, + "loss": 8.9313, + "step": 97880 + }, + { + "epoch": 0.48885115733226797, + "grad_norm": 0.09345964342355728, + "learning_rate": 1.5372850384239905e-05, + "loss": 8.9307, + "step": 97890 + }, + { + "epoch": 0.4889010961572074, + "grad_norm": 0.09451073408126831, + "learning_rate": 1.5371348469298355e-05, + "loss": 8.9228, + "step": 97900 + }, + { + "epoch": 0.48895103498214687, + "grad_norm": 0.08943965286016464, + "learning_rate": 1.5369846554356805e-05, + "loss": 8.9363, + "step": 97910 + }, + { + "epoch": 0.4890009738070863, + "grad_norm": 0.09674328565597534, + "learning_rate": 1.5368344639415255e-05, + "loss": 8.9236, + "step": 97920 + }, + { + "epoch": 0.48905091263202577, + "grad_norm": 0.09601275622844696, + "learning_rate": 1.5366842724473705e-05, + "loss": 8.9287, + "step": 97930 + }, + { + "epoch": 0.4891008514569652, + "grad_norm": 0.09064171463251114, + "learning_rate": 1.5365340809532152e-05, + "loss": 8.9479, + "step": 97940 + }, + { + "epoch": 0.48915079028190467, + "grad_norm": 0.09523607045412064, + "learning_rate": 1.5363838894590602e-05, + "loss": 8.9308, + "step": 97950 + }, + { + "epoch": 0.4892007291068441, + "grad_norm": 0.09979385882616043, + "learning_rate": 1.5362336979649052e-05, + "loss": 8.9273, + "step": 97960 + }, + { + "epoch": 0.48925066793178357, + "grad_norm": 0.09213593602180481, + "learning_rate": 1.5360835064707503e-05, + "loss": 8.9341, + "step": 97970 + }, + { + "epoch": 0.489300606756723, + "grad_norm": 0.09432204067707062, + "learning_rate": 1.5359333149765953e-05, + "loss": 8.9229, + "step": 97980 + }, + { + "epoch": 0.48935054558166247, + "grad_norm": 0.09289470314979553, + "learning_rate": 1.53578312348244e-05, + "loss": 8.9194, + "step": 97990 + }, + { + "epoch": 0.4894004844066019, + "grad_norm": 0.09178686141967773, + "learning_rate": 1.535632931988285e-05, + "loss": 8.9441, + "step": 98000 + }, + { + "epoch": 0.48945042323154136, + "grad_norm": 0.08877766132354736, + "learning_rate": 1.53548274049413e-05, + "loss": 8.9273, + "step": 98010 + }, + { + "epoch": 0.4895003620564808, + "grad_norm": 0.09612318128347397, + "learning_rate": 1.535332548999975e-05, + "loss": 8.9139, + "step": 98020 + }, + { + "epoch": 0.48955030088142026, + "grad_norm": 0.09008832275867462, + "learning_rate": 1.53518235750582e-05, + "loss": 8.9228, + "step": 98030 + }, + { + "epoch": 0.4896002397063597, + "grad_norm": 0.0915464237332344, + "learning_rate": 1.535032166011665e-05, + "loss": 8.9361, + "step": 98040 + }, + { + "epoch": 0.48965017853129916, + "grad_norm": 0.093266561627388, + "learning_rate": 1.5348819745175097e-05, + "loss": 8.9302, + "step": 98050 + }, + { + "epoch": 0.4897001173562386, + "grad_norm": 0.08844608813524246, + "learning_rate": 1.5347317830233547e-05, + "loss": 8.9241, + "step": 98060 + }, + { + "epoch": 0.48975005618117806, + "grad_norm": 0.0893082469701767, + "learning_rate": 1.5345815915291998e-05, + "loss": 8.927, + "step": 98070 + }, + { + "epoch": 0.4897999950061175, + "grad_norm": 0.09313119947910309, + "learning_rate": 1.5344314000350448e-05, + "loss": 8.9324, + "step": 98080 + }, + { + "epoch": 0.48984993383105696, + "grad_norm": 0.09175945073366165, + "learning_rate": 1.5342812085408898e-05, + "loss": 8.9129, + "step": 98090 + }, + { + "epoch": 0.4898998726559964, + "grad_norm": 0.09570962190628052, + "learning_rate": 1.5341310170467345e-05, + "loss": 8.9264, + "step": 98100 + }, + { + "epoch": 0.48994981148093586, + "grad_norm": 0.09470510482788086, + "learning_rate": 1.5339808255525795e-05, + "loss": 8.9235, + "step": 98110 + }, + { + "epoch": 0.4899997503058753, + "grad_norm": 0.09118285030126572, + "learning_rate": 1.5338306340584245e-05, + "loss": 8.9291, + "step": 98120 + }, + { + "epoch": 0.49004968913081476, + "grad_norm": 0.09788258373737335, + "learning_rate": 1.5336804425642695e-05, + "loss": 8.919, + "step": 98130 + }, + { + "epoch": 0.4900996279557542, + "grad_norm": 0.09753192961215973, + "learning_rate": 1.5335302510701145e-05, + "loss": 8.9069, + "step": 98140 + }, + { + "epoch": 0.49014956678069366, + "grad_norm": 0.09691204130649567, + "learning_rate": 1.5333800595759592e-05, + "loss": 8.9225, + "step": 98150 + }, + { + "epoch": 0.4901995056056331, + "grad_norm": 0.0969519168138504, + "learning_rate": 1.5332298680818042e-05, + "loss": 8.9203, + "step": 98160 + }, + { + "epoch": 0.49024944443057256, + "grad_norm": 0.09179043769836426, + "learning_rate": 1.5330796765876493e-05, + "loss": 8.9177, + "step": 98170 + }, + { + "epoch": 0.490299383255512, + "grad_norm": 0.09258826822042465, + "learning_rate": 1.5329294850934943e-05, + "loss": 8.9295, + "step": 98180 + }, + { + "epoch": 0.49034932208045146, + "grad_norm": 0.09368129819631577, + "learning_rate": 1.5327792935993393e-05, + "loss": 8.9187, + "step": 98190 + }, + { + "epoch": 0.4903992609053909, + "grad_norm": 0.09183964878320694, + "learning_rate": 1.5326291021051843e-05, + "loss": 8.9198, + "step": 98200 + }, + { + "epoch": 0.49044919973033035, + "grad_norm": 0.0940631628036499, + "learning_rate": 1.532478910611029e-05, + "loss": 8.9311, + "step": 98210 + }, + { + "epoch": 0.4904991385552698, + "grad_norm": 0.09668076038360596, + "learning_rate": 1.532328719116874e-05, + "loss": 8.9115, + "step": 98220 + }, + { + "epoch": 0.49054907738020925, + "grad_norm": 0.09049348533153534, + "learning_rate": 1.532178527622719e-05, + "loss": 8.9309, + "step": 98230 + }, + { + "epoch": 0.4905990162051487, + "grad_norm": 0.08599221706390381, + "learning_rate": 1.532028336128564e-05, + "loss": 8.9212, + "step": 98240 + }, + { + "epoch": 0.49064895503008815, + "grad_norm": 0.09467752277851105, + "learning_rate": 1.531878144634409e-05, + "loss": 8.9257, + "step": 98250 + }, + { + "epoch": 0.4906988938550276, + "grad_norm": 0.10057593882083893, + "learning_rate": 1.5317279531402537e-05, + "loss": 8.9434, + "step": 98260 + }, + { + "epoch": 0.49074883267996705, + "grad_norm": 0.09494142979383469, + "learning_rate": 1.5315777616460988e-05, + "loss": 8.9185, + "step": 98270 + }, + { + "epoch": 0.4907987715049065, + "grad_norm": 0.09219949692487717, + "learning_rate": 1.5314275701519438e-05, + "loss": 8.9139, + "step": 98280 + }, + { + "epoch": 0.49084871032984595, + "grad_norm": 0.09445098787546158, + "learning_rate": 1.5312773786577888e-05, + "loss": 8.9353, + "step": 98290 + }, + { + "epoch": 0.49089864915478537, + "grad_norm": 0.09249778091907501, + "learning_rate": 1.5311271871636338e-05, + "loss": 8.9202, + "step": 98300 + }, + { + "epoch": 0.49094858797972485, + "grad_norm": 0.09504956007003784, + "learning_rate": 1.5309769956694785e-05, + "loss": 8.9148, + "step": 98310 + }, + { + "epoch": 0.49099852680466427, + "grad_norm": 0.09462475776672363, + "learning_rate": 1.5308268041753235e-05, + "loss": 8.9387, + "step": 98320 + }, + { + "epoch": 0.49104846562960375, + "grad_norm": 0.09308874607086182, + "learning_rate": 1.5306766126811685e-05, + "loss": 8.9143, + "step": 98330 + }, + { + "epoch": 0.49109840445454317, + "grad_norm": 0.09277541190385818, + "learning_rate": 1.5305264211870135e-05, + "loss": 8.9223, + "step": 98340 + }, + { + "epoch": 0.49114834327948265, + "grad_norm": 0.09326548874378204, + "learning_rate": 1.5303762296928586e-05, + "loss": 8.926, + "step": 98350 + }, + { + "epoch": 0.49119828210442207, + "grad_norm": 0.09251368790864944, + "learning_rate": 1.5302260381987036e-05, + "loss": 8.94, + "step": 98360 + }, + { + "epoch": 0.49124822092936155, + "grad_norm": 0.0935811847448349, + "learning_rate": 1.5300758467045483e-05, + "loss": 8.9264, + "step": 98370 + }, + { + "epoch": 0.49129815975430097, + "grad_norm": 0.09226620942354202, + "learning_rate": 1.5299256552103933e-05, + "loss": 8.9254, + "step": 98380 + }, + { + "epoch": 0.49134809857924044, + "grad_norm": 0.09316042810678482, + "learning_rate": 1.5297754637162383e-05, + "loss": 8.9175, + "step": 98390 + }, + { + "epoch": 0.49139803740417987, + "grad_norm": 0.09391681849956512, + "learning_rate": 1.5296252722220833e-05, + "loss": 8.9204, + "step": 98400 + }, + { + "epoch": 0.49144797622911934, + "grad_norm": 0.09176868200302124, + "learning_rate": 1.5294750807279283e-05, + "loss": 8.919, + "step": 98410 + }, + { + "epoch": 0.49149791505405876, + "grad_norm": 0.09538892656564713, + "learning_rate": 1.529324889233773e-05, + "loss": 8.9293, + "step": 98420 + }, + { + "epoch": 0.49154785387899824, + "grad_norm": 0.09601672738790512, + "learning_rate": 1.529174697739618e-05, + "loss": 8.9258, + "step": 98430 + }, + { + "epoch": 0.49159779270393766, + "grad_norm": 0.09024684131145477, + "learning_rate": 1.529024506245463e-05, + "loss": 8.9218, + "step": 98440 + }, + { + "epoch": 0.49164773152887714, + "grad_norm": 0.09166863560676575, + "learning_rate": 1.528874314751308e-05, + "loss": 8.9383, + "step": 98450 + }, + { + "epoch": 0.49169767035381656, + "grad_norm": 0.09486201405525208, + "learning_rate": 1.528724123257153e-05, + "loss": 8.9158, + "step": 98460 + }, + { + "epoch": 0.49174760917875604, + "grad_norm": 0.09765074402093887, + "learning_rate": 1.5285739317629978e-05, + "loss": 8.9163, + "step": 98470 + }, + { + "epoch": 0.49179754800369546, + "grad_norm": 0.09160161763429642, + "learning_rate": 1.5284237402688428e-05, + "loss": 8.9048, + "step": 98480 + }, + { + "epoch": 0.49184748682863494, + "grad_norm": 0.09425348788499832, + "learning_rate": 1.5282735487746878e-05, + "loss": 8.923, + "step": 98490 + }, + { + "epoch": 0.49189742565357436, + "grad_norm": 0.09459834545850754, + "learning_rate": 1.5281233572805328e-05, + "loss": 8.9128, + "step": 98500 + }, + { + "epoch": 0.49194736447851384, + "grad_norm": 0.08558713644742966, + "learning_rate": 1.527973165786378e-05, + "loss": 8.9286, + "step": 98510 + }, + { + "epoch": 0.49199730330345326, + "grad_norm": 0.09137829393148422, + "learning_rate": 1.5278229742922225e-05, + "loss": 8.9298, + "step": 98520 + }, + { + "epoch": 0.49204724212839274, + "grad_norm": 0.09085921198129654, + "learning_rate": 1.5276727827980675e-05, + "loss": 8.8977, + "step": 98530 + }, + { + "epoch": 0.49209718095333216, + "grad_norm": 0.08780453354120255, + "learning_rate": 1.5275225913039125e-05, + "loss": 8.9185, + "step": 98540 + }, + { + "epoch": 0.49214711977827164, + "grad_norm": 0.09832119941711426, + "learning_rate": 1.5273723998097576e-05, + "loss": 8.9146, + "step": 98550 + }, + { + "epoch": 0.49219705860321106, + "grad_norm": 0.09514303505420685, + "learning_rate": 1.5272222083156026e-05, + "loss": 8.9141, + "step": 98560 + }, + { + "epoch": 0.49224699742815053, + "grad_norm": 0.09371118247509003, + "learning_rate": 1.5270720168214473e-05, + "loss": 8.9301, + "step": 98570 + }, + { + "epoch": 0.49229693625308996, + "grad_norm": 0.09175911545753479, + "learning_rate": 1.5269218253272923e-05, + "loss": 8.9148, + "step": 98580 + }, + { + "epoch": 0.49234687507802943, + "grad_norm": 0.09370813518762589, + "learning_rate": 1.5267716338331373e-05, + "loss": 8.917, + "step": 98590 + }, + { + "epoch": 0.49239681390296886, + "grad_norm": 0.09525982290506363, + "learning_rate": 1.5266214423389823e-05, + "loss": 8.9276, + "step": 98600 + }, + { + "epoch": 0.49244675272790833, + "grad_norm": 0.09428591281175613, + "learning_rate": 1.5264712508448273e-05, + "loss": 8.9137, + "step": 98610 + }, + { + "epoch": 0.49249669155284775, + "grad_norm": 0.09742698073387146, + "learning_rate": 1.526321059350672e-05, + "loss": 8.9217, + "step": 98620 + }, + { + "epoch": 0.49254663037778723, + "grad_norm": 0.09069808572530746, + "learning_rate": 1.526170867856517e-05, + "loss": 8.9151, + "step": 98630 + }, + { + "epoch": 0.49259656920272665, + "grad_norm": 0.08807901293039322, + "learning_rate": 1.526020676362362e-05, + "loss": 8.9236, + "step": 98640 + }, + { + "epoch": 0.49264650802766613, + "grad_norm": 0.10254672169685364, + "learning_rate": 1.5258704848682069e-05, + "loss": 8.9263, + "step": 98650 + }, + { + "epoch": 0.49269644685260555, + "grad_norm": 0.09022139757871628, + "learning_rate": 1.525720293374052e-05, + "loss": 8.937, + "step": 98660 + }, + { + "epoch": 0.49274638567754503, + "grad_norm": 0.0958266630768776, + "learning_rate": 1.525570101879897e-05, + "loss": 8.9133, + "step": 98670 + }, + { + "epoch": 0.49279632450248445, + "grad_norm": 0.09768004715442657, + "learning_rate": 1.525419910385742e-05, + "loss": 8.9149, + "step": 98680 + }, + { + "epoch": 0.49284626332742393, + "grad_norm": 0.09794740378856659, + "learning_rate": 1.5252697188915868e-05, + "loss": 8.9157, + "step": 98690 + }, + { + "epoch": 0.49289620215236335, + "grad_norm": 0.09140866249799728, + "learning_rate": 1.5251195273974316e-05, + "loss": 8.931, + "step": 98700 + }, + { + "epoch": 0.4929461409773028, + "grad_norm": 0.09415680915117264, + "learning_rate": 1.5249693359032768e-05, + "loss": 8.9115, + "step": 98710 + }, + { + "epoch": 0.49299607980224225, + "grad_norm": 0.09118063002824783, + "learning_rate": 1.5248191444091217e-05, + "loss": 8.9061, + "step": 98720 + }, + { + "epoch": 0.4930460186271817, + "grad_norm": 0.09346728771924973, + "learning_rate": 1.5246689529149667e-05, + "loss": 8.9145, + "step": 98730 + }, + { + "epoch": 0.49309595745212115, + "grad_norm": 0.0916360467672348, + "learning_rate": 1.5245187614208115e-05, + "loss": 8.9112, + "step": 98740 + }, + { + "epoch": 0.4931458962770606, + "grad_norm": 0.09017834067344666, + "learning_rate": 1.5243685699266564e-05, + "loss": 8.9181, + "step": 98750 + }, + { + "epoch": 0.49319583510200005, + "grad_norm": 0.09475754201412201, + "learning_rate": 1.5242183784325016e-05, + "loss": 8.9081, + "step": 98760 + }, + { + "epoch": 0.4932457739269395, + "grad_norm": 0.09371718764305115, + "learning_rate": 1.5240681869383464e-05, + "loss": 8.9163, + "step": 98770 + }, + { + "epoch": 0.49329571275187895, + "grad_norm": 0.09911168366670609, + "learning_rate": 1.5239179954441915e-05, + "loss": 8.9001, + "step": 98780 + }, + { + "epoch": 0.4933456515768184, + "grad_norm": 0.09791523963212967, + "learning_rate": 1.5237678039500363e-05, + "loss": 8.9206, + "step": 98790 + }, + { + "epoch": 0.49339559040175784, + "grad_norm": 0.09776368737220764, + "learning_rate": 1.5236176124558811e-05, + "loss": 8.9072, + "step": 98800 + }, + { + "epoch": 0.4934455292266973, + "grad_norm": 0.09171529114246368, + "learning_rate": 1.5234674209617263e-05, + "loss": 8.9162, + "step": 98810 + }, + { + "epoch": 0.49349546805163674, + "grad_norm": 0.08931485563516617, + "learning_rate": 1.5233172294675712e-05, + "loss": 8.9238, + "step": 98820 + }, + { + "epoch": 0.4935454068765762, + "grad_norm": 0.09580953419208527, + "learning_rate": 1.5231670379734162e-05, + "loss": 8.9204, + "step": 98830 + }, + { + "epoch": 0.49359534570151564, + "grad_norm": 0.09444009512662888, + "learning_rate": 1.523016846479261e-05, + "loss": 8.9138, + "step": 98840 + }, + { + "epoch": 0.4936452845264551, + "grad_norm": 0.0973835289478302, + "learning_rate": 1.5228666549851059e-05, + "loss": 8.8994, + "step": 98850 + }, + { + "epoch": 0.49369522335139454, + "grad_norm": 0.0919685810804367, + "learning_rate": 1.5227164634909511e-05, + "loss": 8.9286, + "step": 98860 + }, + { + "epoch": 0.493745162176334, + "grad_norm": 0.09695620089769363, + "learning_rate": 1.522566271996796e-05, + "loss": 8.9083, + "step": 98870 + }, + { + "epoch": 0.49379510100127344, + "grad_norm": 0.08825941383838654, + "learning_rate": 1.522416080502641e-05, + "loss": 8.9236, + "step": 98880 + }, + { + "epoch": 0.49384503982621286, + "grad_norm": 0.08976638317108154, + "learning_rate": 1.5222658890084858e-05, + "loss": 8.9358, + "step": 98890 + }, + { + "epoch": 0.49389497865115234, + "grad_norm": 0.09463369101285934, + "learning_rate": 1.5221156975143306e-05, + "loss": 8.9231, + "step": 98900 + }, + { + "epoch": 0.49394491747609176, + "grad_norm": 0.09321442991495132, + "learning_rate": 1.5219655060201758e-05, + "loss": 8.9213, + "step": 98910 + }, + { + "epoch": 0.49399485630103124, + "grad_norm": 0.0944615826010704, + "learning_rate": 1.5218153145260207e-05, + "loss": 8.9208, + "step": 98920 + }, + { + "epoch": 0.49404479512597066, + "grad_norm": 0.09346310794353485, + "learning_rate": 1.5216651230318657e-05, + "loss": 8.9182, + "step": 98930 + }, + { + "epoch": 0.49409473395091014, + "grad_norm": 0.09217849373817444, + "learning_rate": 1.5215149315377106e-05, + "loss": 8.9081, + "step": 98940 + }, + { + "epoch": 0.49414467277584956, + "grad_norm": 0.09414231032133102, + "learning_rate": 1.5213647400435554e-05, + "loss": 8.9238, + "step": 98950 + }, + { + "epoch": 0.49419461160078904, + "grad_norm": 0.09089777618646622, + "learning_rate": 1.5212145485494006e-05, + "loss": 8.9218, + "step": 98960 + }, + { + "epoch": 0.49424455042572846, + "grad_norm": 0.09277298301458359, + "learning_rate": 1.5210643570552454e-05, + "loss": 8.9305, + "step": 98970 + }, + { + "epoch": 0.49429448925066793, + "grad_norm": 0.09524578601121902, + "learning_rate": 1.5209141655610905e-05, + "loss": 8.9063, + "step": 98980 + }, + { + "epoch": 0.49434442807560736, + "grad_norm": 0.09183613210916519, + "learning_rate": 1.5207639740669353e-05, + "loss": 8.9204, + "step": 98990 + }, + { + "epoch": 0.49439436690054683, + "grad_norm": 0.09120965749025345, + "learning_rate": 1.5206137825727805e-05, + "loss": 8.92, + "step": 99000 + }, + { + "epoch": 0.49444430572548626, + "grad_norm": 0.09432142972946167, + "learning_rate": 1.5204635910786253e-05, + "loss": 8.9168, + "step": 99010 + }, + { + "epoch": 0.49449424455042573, + "grad_norm": 0.0890686959028244, + "learning_rate": 1.5203133995844702e-05, + "loss": 8.9168, + "step": 99020 + }, + { + "epoch": 0.49454418337536515, + "grad_norm": 0.09807530790567398, + "learning_rate": 1.5201632080903152e-05, + "loss": 8.9078, + "step": 99030 + }, + { + "epoch": 0.49459412220030463, + "grad_norm": 0.09491509944200516, + "learning_rate": 1.52001301659616e-05, + "loss": 8.909, + "step": 99040 + }, + { + "epoch": 0.49464406102524405, + "grad_norm": 0.09278037399053574, + "learning_rate": 1.5198628251020052e-05, + "loss": 8.9086, + "step": 99050 + }, + { + "epoch": 0.49469399985018353, + "grad_norm": 0.09126697480678558, + "learning_rate": 1.5197126336078501e-05, + "loss": 8.9129, + "step": 99060 + }, + { + "epoch": 0.49474393867512295, + "grad_norm": 0.0916236862540245, + "learning_rate": 1.519562442113695e-05, + "loss": 8.9122, + "step": 99070 + }, + { + "epoch": 0.49479387750006243, + "grad_norm": 0.08973243832588196, + "learning_rate": 1.51941225061954e-05, + "loss": 8.9116, + "step": 99080 + }, + { + "epoch": 0.49484381632500185, + "grad_norm": 0.09231727570295334, + "learning_rate": 1.5192620591253848e-05, + "loss": 8.9159, + "step": 99090 + }, + { + "epoch": 0.49489375514994133, + "grad_norm": 0.09403295069932938, + "learning_rate": 1.51911186763123e-05, + "loss": 8.9249, + "step": 99100 + }, + { + "epoch": 0.49494369397488075, + "grad_norm": 0.09361027926206589, + "learning_rate": 1.5189616761370748e-05, + "loss": 8.9205, + "step": 99110 + }, + { + "epoch": 0.4949936327998202, + "grad_norm": 0.09127476811408997, + "learning_rate": 1.5188114846429197e-05, + "loss": 8.9237, + "step": 99120 + }, + { + "epoch": 0.49504357162475965, + "grad_norm": 0.090853251516819, + "learning_rate": 1.5186612931487647e-05, + "loss": 8.9382, + "step": 99130 + }, + { + "epoch": 0.4950935104496991, + "grad_norm": 0.09697920083999634, + "learning_rate": 1.5185111016546096e-05, + "loss": 8.9156, + "step": 99140 + }, + { + "epoch": 0.49514344927463855, + "grad_norm": 0.09126929193735123, + "learning_rate": 1.5183609101604547e-05, + "loss": 8.9194, + "step": 99150 + }, + { + "epoch": 0.495193388099578, + "grad_norm": 0.0899655893445015, + "learning_rate": 1.5182107186662996e-05, + "loss": 8.9068, + "step": 99160 + }, + { + "epoch": 0.49524332692451745, + "grad_norm": 0.08864665776491165, + "learning_rate": 1.5180605271721444e-05, + "loss": 8.8933, + "step": 99170 + }, + { + "epoch": 0.4952932657494569, + "grad_norm": 0.09634734690189362, + "learning_rate": 1.5179103356779895e-05, + "loss": 8.9322, + "step": 99180 + }, + { + "epoch": 0.49534320457439635, + "grad_norm": 0.0953470766544342, + "learning_rate": 1.5177601441838343e-05, + "loss": 8.9094, + "step": 99190 + }, + { + "epoch": 0.4953931433993358, + "grad_norm": 0.0901167094707489, + "learning_rate": 1.5176099526896795e-05, + "loss": 8.9126, + "step": 99200 + }, + { + "epoch": 0.49544308222427524, + "grad_norm": 0.09353036433458328, + "learning_rate": 1.5174597611955243e-05, + "loss": 8.9145, + "step": 99210 + }, + { + "epoch": 0.4954930210492147, + "grad_norm": 0.0913802906870842, + "learning_rate": 1.5173095697013692e-05, + "loss": 8.9135, + "step": 99220 + }, + { + "epoch": 0.49554295987415414, + "grad_norm": 0.09367360174655914, + "learning_rate": 1.5171593782072142e-05, + "loss": 8.9098, + "step": 99230 + }, + { + "epoch": 0.4955928986990936, + "grad_norm": 0.08964614570140839, + "learning_rate": 1.517009186713059e-05, + "loss": 8.9137, + "step": 99240 + }, + { + "epoch": 0.49564283752403304, + "grad_norm": 0.0972425788640976, + "learning_rate": 1.5168589952189042e-05, + "loss": 8.9079, + "step": 99250 + }, + { + "epoch": 0.4956927763489725, + "grad_norm": 0.08969489485025406, + "learning_rate": 1.5167088037247491e-05, + "loss": 8.9144, + "step": 99260 + }, + { + "epoch": 0.49574271517391194, + "grad_norm": 0.09431774914264679, + "learning_rate": 1.516558612230594e-05, + "loss": 8.9184, + "step": 99270 + }, + { + "epoch": 0.4957926539988514, + "grad_norm": 0.0908089205622673, + "learning_rate": 1.516408420736439e-05, + "loss": 8.9106, + "step": 99280 + }, + { + "epoch": 0.49584259282379084, + "grad_norm": 0.08945730328559875, + "learning_rate": 1.5162582292422838e-05, + "loss": 8.9202, + "step": 99290 + }, + { + "epoch": 0.4958925316487303, + "grad_norm": 0.09563491493463516, + "learning_rate": 1.516108037748129e-05, + "loss": 8.8976, + "step": 99300 + }, + { + "epoch": 0.49594247047366974, + "grad_norm": 0.0948091596364975, + "learning_rate": 1.5159578462539738e-05, + "loss": 8.916, + "step": 99310 + }, + { + "epoch": 0.4959924092986092, + "grad_norm": 0.09705643355846405, + "learning_rate": 1.515807654759819e-05, + "loss": 8.9191, + "step": 99320 + }, + { + "epoch": 0.49604234812354864, + "grad_norm": 0.08930245041847229, + "learning_rate": 1.5156574632656639e-05, + "loss": 8.9, + "step": 99330 + }, + { + "epoch": 0.4960922869484881, + "grad_norm": 0.09772713482379913, + "learning_rate": 1.5155072717715086e-05, + "loss": 8.9179, + "step": 99340 + }, + { + "epoch": 0.49614222577342754, + "grad_norm": 0.0901922956109047, + "learning_rate": 1.5153570802773537e-05, + "loss": 8.9004, + "step": 99350 + }, + { + "epoch": 0.496192164598367, + "grad_norm": 0.09526271373033524, + "learning_rate": 1.5152068887831986e-05, + "loss": 8.9225, + "step": 99360 + }, + { + "epoch": 0.49624210342330644, + "grad_norm": 0.09357525408267975, + "learning_rate": 1.5150566972890438e-05, + "loss": 8.9093, + "step": 99370 + }, + { + "epoch": 0.4962920422482459, + "grad_norm": 0.09719167649745941, + "learning_rate": 1.5149065057948886e-05, + "loss": 8.9092, + "step": 99380 + }, + { + "epoch": 0.49634198107318533, + "grad_norm": 0.09308057278394699, + "learning_rate": 1.5147563143007333e-05, + "loss": 8.8939, + "step": 99390 + }, + { + "epoch": 0.4963919198981248, + "grad_norm": 0.0987476035952568, + "learning_rate": 1.5146061228065785e-05, + "loss": 8.9098, + "step": 99400 + }, + { + "epoch": 0.49644185872306423, + "grad_norm": 0.09410141408443451, + "learning_rate": 1.5144559313124233e-05, + "loss": 8.9034, + "step": 99410 + }, + { + "epoch": 0.4964917975480037, + "grad_norm": 0.09079299867153168, + "learning_rate": 1.5143057398182685e-05, + "loss": 8.9117, + "step": 99420 + }, + { + "epoch": 0.49654173637294313, + "grad_norm": 0.0934637263417244, + "learning_rate": 1.5141555483241134e-05, + "loss": 8.9108, + "step": 99430 + }, + { + "epoch": 0.4965916751978826, + "grad_norm": 0.09323903918266296, + "learning_rate": 1.514005356829958e-05, + "loss": 8.9113, + "step": 99440 + }, + { + "epoch": 0.49664161402282203, + "grad_norm": 0.09298430383205414, + "learning_rate": 1.5138551653358032e-05, + "loss": 8.9119, + "step": 99450 + }, + { + "epoch": 0.4966915528477615, + "grad_norm": 0.09651913493871689, + "learning_rate": 1.5137049738416481e-05, + "loss": 8.8993, + "step": 99460 + }, + { + "epoch": 0.49674149167270093, + "grad_norm": 0.0914125069975853, + "learning_rate": 1.5135547823474933e-05, + "loss": 8.9252, + "step": 99470 + }, + { + "epoch": 0.4967914304976404, + "grad_norm": 0.09153977036476135, + "learning_rate": 1.5134045908533381e-05, + "loss": 8.9252, + "step": 99480 + }, + { + "epoch": 0.49684136932257983, + "grad_norm": 0.08837665617465973, + "learning_rate": 1.5132543993591828e-05, + "loss": 8.8961, + "step": 99490 + }, + { + "epoch": 0.4968913081475193, + "grad_norm": 0.0936015248298645, + "learning_rate": 1.513104207865028e-05, + "loss": 8.9022, + "step": 99500 + }, + { + "epoch": 0.49694124697245873, + "grad_norm": 0.09733084589242935, + "learning_rate": 1.5129540163708728e-05, + "loss": 8.8925, + "step": 99510 + }, + { + "epoch": 0.4969911857973982, + "grad_norm": 0.09360146522521973, + "learning_rate": 1.512803824876718e-05, + "loss": 8.903, + "step": 99520 + }, + { + "epoch": 0.4970411246223376, + "grad_norm": 0.09262694418430328, + "learning_rate": 1.5126536333825629e-05, + "loss": 8.9168, + "step": 99530 + }, + { + "epoch": 0.4970910634472771, + "grad_norm": 0.0910312682390213, + "learning_rate": 1.5125034418884076e-05, + "loss": 8.9155, + "step": 99540 + }, + { + "epoch": 0.4971410022722165, + "grad_norm": 0.09175118058919907, + "learning_rate": 1.5123532503942527e-05, + "loss": 8.905, + "step": 99550 + }, + { + "epoch": 0.497190941097156, + "grad_norm": 0.08740215748548508, + "learning_rate": 1.5122030589000976e-05, + "loss": 8.9235, + "step": 99560 + }, + { + "epoch": 0.4972408799220954, + "grad_norm": 0.09946295619010925, + "learning_rate": 1.5120528674059428e-05, + "loss": 8.9026, + "step": 99570 + }, + { + "epoch": 0.4972908187470349, + "grad_norm": 0.09400948882102966, + "learning_rate": 1.5119026759117876e-05, + "loss": 8.9128, + "step": 99580 + }, + { + "epoch": 0.4973407575719743, + "grad_norm": 0.09512431174516678, + "learning_rate": 1.5117524844176323e-05, + "loss": 8.9141, + "step": 99590 + }, + { + "epoch": 0.4973906963969138, + "grad_norm": 0.09360211342573166, + "learning_rate": 1.5116022929234775e-05, + "loss": 8.9206, + "step": 99600 + }, + { + "epoch": 0.4974406352218532, + "grad_norm": 0.08853219449520111, + "learning_rate": 1.5114521014293223e-05, + "loss": 8.9061, + "step": 99610 + }, + { + "epoch": 0.4974905740467927, + "grad_norm": 0.09550435096025467, + "learning_rate": 1.5113019099351675e-05, + "loss": 8.909, + "step": 99620 + }, + { + "epoch": 0.4975405128717321, + "grad_norm": 0.09092508256435394, + "learning_rate": 1.5111517184410124e-05, + "loss": 8.9078, + "step": 99630 + }, + { + "epoch": 0.4975904516966716, + "grad_norm": 0.09557131677865982, + "learning_rate": 1.5110015269468574e-05, + "loss": 8.9048, + "step": 99640 + }, + { + "epoch": 0.497640390521611, + "grad_norm": 0.09490132331848145, + "learning_rate": 1.5108513354527022e-05, + "loss": 8.9167, + "step": 99650 + }, + { + "epoch": 0.4976903293465505, + "grad_norm": 0.09305530786514282, + "learning_rate": 1.5107011439585471e-05, + "loss": 8.9106, + "step": 99660 + }, + { + "epoch": 0.4977402681714899, + "grad_norm": 0.09075649082660675, + "learning_rate": 1.5105509524643923e-05, + "loss": 8.9161, + "step": 99670 + }, + { + "epoch": 0.4977902069964294, + "grad_norm": 0.0945330560207367, + "learning_rate": 1.5104007609702371e-05, + "loss": 8.9061, + "step": 99680 + }, + { + "epoch": 0.4978401458213688, + "grad_norm": 0.09516996145248413, + "learning_rate": 1.5102505694760821e-05, + "loss": 8.9028, + "step": 99690 + }, + { + "epoch": 0.4978900846463083, + "grad_norm": 0.09055611491203308, + "learning_rate": 1.510100377981927e-05, + "loss": 8.9215, + "step": 99700 + }, + { + "epoch": 0.4979400234712477, + "grad_norm": 0.0914728119969368, + "learning_rate": 1.5099501864877718e-05, + "loss": 8.9231, + "step": 99710 + }, + { + "epoch": 0.4979899622961872, + "grad_norm": 0.08997715264558792, + "learning_rate": 1.509799994993617e-05, + "loss": 8.9054, + "step": 99720 + }, + { + "epoch": 0.4980399011211266, + "grad_norm": 0.10086820274591446, + "learning_rate": 1.5096498034994619e-05, + "loss": 8.9084, + "step": 99730 + }, + { + "epoch": 0.4980898399460661, + "grad_norm": 0.09388542920351028, + "learning_rate": 1.5094996120053069e-05, + "loss": 8.9193, + "step": 99740 + }, + { + "epoch": 0.4981397787710055, + "grad_norm": 0.09304860979318619, + "learning_rate": 1.5093494205111517e-05, + "loss": 8.9002, + "step": 99750 + }, + { + "epoch": 0.498189717595945, + "grad_norm": 0.0892772451043129, + "learning_rate": 1.5091992290169966e-05, + "loss": 8.8994, + "step": 99760 + }, + { + "epoch": 0.4982396564208844, + "grad_norm": 0.09076282382011414, + "learning_rate": 1.5090490375228418e-05, + "loss": 8.9014, + "step": 99770 + }, + { + "epoch": 0.4982895952458239, + "grad_norm": 0.09353473037481308, + "learning_rate": 1.5088988460286866e-05, + "loss": 8.9208, + "step": 99780 + }, + { + "epoch": 0.4983395340707633, + "grad_norm": 0.09274925291538239, + "learning_rate": 1.5087486545345316e-05, + "loss": 8.9072, + "step": 99790 + }, + { + "epoch": 0.4983894728957028, + "grad_norm": 0.09765703231096268, + "learning_rate": 1.5085984630403765e-05, + "loss": 8.9014, + "step": 99800 + }, + { + "epoch": 0.4984394117206422, + "grad_norm": 0.08879239112138748, + "learning_rate": 1.5084482715462213e-05, + "loss": 8.9185, + "step": 99810 + }, + { + "epoch": 0.4984893505455817, + "grad_norm": 0.09869293868541718, + "learning_rate": 1.5082980800520665e-05, + "loss": 8.9048, + "step": 99820 + }, + { + "epoch": 0.4985392893705211, + "grad_norm": 0.09303834289312363, + "learning_rate": 1.5081478885579114e-05, + "loss": 8.9107, + "step": 99830 + }, + { + "epoch": 0.4985892281954606, + "grad_norm": 0.09813646227121353, + "learning_rate": 1.5079976970637564e-05, + "loss": 8.9082, + "step": 99840 + }, + { + "epoch": 0.4986391670204, + "grad_norm": 0.09101364761590958, + "learning_rate": 1.5078475055696012e-05, + "loss": 8.8986, + "step": 99850 + }, + { + "epoch": 0.4986891058453395, + "grad_norm": 0.09561823308467865, + "learning_rate": 1.5076973140754461e-05, + "loss": 8.901, + "step": 99860 + }, + { + "epoch": 0.4987390446702789, + "grad_norm": 0.09230560064315796, + "learning_rate": 1.5075471225812913e-05, + "loss": 8.9104, + "step": 99870 + }, + { + "epoch": 0.49878898349521833, + "grad_norm": 0.10049329698085785, + "learning_rate": 1.5073969310871361e-05, + "loss": 8.9125, + "step": 99880 + }, + { + "epoch": 0.4988389223201578, + "grad_norm": 0.08787727355957031, + "learning_rate": 1.5072467395929811e-05, + "loss": 8.9013, + "step": 99890 + }, + { + "epoch": 0.49888886114509723, + "grad_norm": 0.09034966677427292, + "learning_rate": 1.507096548098826e-05, + "loss": 8.9037, + "step": 99900 + }, + { + "epoch": 0.4989387999700367, + "grad_norm": 0.09500369429588318, + "learning_rate": 1.5069463566046708e-05, + "loss": 8.8933, + "step": 99910 + }, + { + "epoch": 0.4989887387949761, + "grad_norm": 0.09762393683195114, + "learning_rate": 1.506796165110516e-05, + "loss": 8.9136, + "step": 99920 + }, + { + "epoch": 0.4990386776199156, + "grad_norm": 0.09636267274618149, + "learning_rate": 1.5066459736163609e-05, + "loss": 8.9171, + "step": 99930 + }, + { + "epoch": 0.499088616444855, + "grad_norm": 0.09169818460941315, + "learning_rate": 1.5064957821222059e-05, + "loss": 8.9056, + "step": 99940 + }, + { + "epoch": 0.4991385552697945, + "grad_norm": 0.09505058825016022, + "learning_rate": 1.5063455906280507e-05, + "loss": 8.9185, + "step": 99950 + }, + { + "epoch": 0.4991884940947339, + "grad_norm": 0.09004746377468109, + "learning_rate": 1.5061953991338956e-05, + "loss": 8.9026, + "step": 99960 + }, + { + "epoch": 0.4992384329196734, + "grad_norm": 0.08870364725589752, + "learning_rate": 1.5060452076397408e-05, + "loss": 8.9053, + "step": 99970 + }, + { + "epoch": 0.4992883717446128, + "grad_norm": 0.08827803283929825, + "learning_rate": 1.5058950161455856e-05, + "loss": 8.9115, + "step": 99980 + }, + { + "epoch": 0.4993383105695523, + "grad_norm": 0.09465722739696503, + "learning_rate": 1.5057448246514306e-05, + "loss": 8.9056, + "step": 99990 + }, + { + "epoch": 0.4993882493944917, + "grad_norm": 0.0972822979092598, + "learning_rate": 1.5055946331572755e-05, + "loss": 8.9103, + "step": 100000 + }, + { + "epoch": 0.4994381882194312, + "grad_norm": 0.0914982259273529, + "learning_rate": 1.5054444416631207e-05, + "loss": 8.9047, + "step": 100010 + }, + { + "epoch": 0.4994881270443706, + "grad_norm": 0.09300156682729721, + "learning_rate": 1.5052942501689655e-05, + "loss": 8.9028, + "step": 100020 + }, + { + "epoch": 0.4995380658693101, + "grad_norm": 0.0923018679022789, + "learning_rate": 1.5051440586748104e-05, + "loss": 8.9091, + "step": 100030 + }, + { + "epoch": 0.4995880046942495, + "grad_norm": 0.09287258982658386, + "learning_rate": 1.5049938671806554e-05, + "loss": 8.9109, + "step": 100040 + }, + { + "epoch": 0.499637943519189, + "grad_norm": 0.0904991626739502, + "learning_rate": 1.5048436756865002e-05, + "loss": 8.9109, + "step": 100050 + }, + { + "epoch": 0.4996878823441284, + "grad_norm": 0.09570524841547012, + "learning_rate": 1.5046934841923454e-05, + "loss": 8.9005, + "step": 100060 + }, + { + "epoch": 0.4997378211690679, + "grad_norm": 0.09132358431816101, + "learning_rate": 1.5045432926981903e-05, + "loss": 8.9182, + "step": 100070 + }, + { + "epoch": 0.4997877599940073, + "grad_norm": 0.09124463051557541, + "learning_rate": 1.5043931012040351e-05, + "loss": 8.9059, + "step": 100080 + }, + { + "epoch": 0.4998376988189468, + "grad_norm": 0.08917669951915741, + "learning_rate": 1.5042429097098801e-05, + "loss": 8.9083, + "step": 100090 + }, + { + "epoch": 0.4998876376438862, + "grad_norm": 0.09558670222759247, + "learning_rate": 1.504092718215725e-05, + "loss": 8.9074, + "step": 100100 + }, + { + "epoch": 0.4999375764688257, + "grad_norm": 0.09686680883169174, + "learning_rate": 1.5039425267215702e-05, + "loss": 8.9087, + "step": 100110 + }, + { + "epoch": 0.4999875152937651, + "grad_norm": 0.08919398486614227, + "learning_rate": 1.503792335227415e-05, + "loss": 8.911, + "step": 100120 + }, + { + "epoch": 0.5000374541187046, + "grad_norm": 0.09795615077018738, + "learning_rate": 1.5036421437332599e-05, + "loss": 8.8899, + "step": 100130 + }, + { + "epoch": 0.500087392943644, + "grad_norm": 0.09374431520700455, + "learning_rate": 1.5034919522391049e-05, + "loss": 8.896, + "step": 100140 + }, + { + "epoch": 0.5001373317685834, + "grad_norm": 0.09322655200958252, + "learning_rate": 1.5033417607449497e-05, + "loss": 8.901, + "step": 100150 + }, + { + "epoch": 0.500187270593523, + "grad_norm": 0.0963103175163269, + "learning_rate": 1.503191569250795e-05, + "loss": 8.9022, + "step": 100160 + }, + { + "epoch": 0.5002372094184624, + "grad_norm": 0.08915145695209503, + "learning_rate": 1.5030413777566398e-05, + "loss": 8.902, + "step": 100170 + }, + { + "epoch": 0.5002871482434018, + "grad_norm": 0.08930926024913788, + "learning_rate": 1.5028911862624846e-05, + "loss": 8.9118, + "step": 100180 + }, + { + "epoch": 0.5003370870683412, + "grad_norm": 0.09767061471939087, + "learning_rate": 1.5027409947683296e-05, + "loss": 8.9057, + "step": 100190 + }, + { + "epoch": 0.5003870258932808, + "grad_norm": 0.09693524241447449, + "learning_rate": 1.5025908032741745e-05, + "loss": 8.8938, + "step": 100200 + }, + { + "epoch": 0.5004369647182202, + "grad_norm": 0.09122538566589355, + "learning_rate": 1.5024406117800197e-05, + "loss": 8.9038, + "step": 100210 + }, + { + "epoch": 0.5004869035431596, + "grad_norm": 0.0925898402929306, + "learning_rate": 1.5022904202858645e-05, + "loss": 8.9147, + "step": 100220 + }, + { + "epoch": 0.500536842368099, + "grad_norm": 0.08967383205890656, + "learning_rate": 1.5021402287917094e-05, + "loss": 8.9091, + "step": 100230 + }, + { + "epoch": 0.5005867811930386, + "grad_norm": 0.09207455813884735, + "learning_rate": 1.5019900372975544e-05, + "loss": 8.9131, + "step": 100240 + }, + { + "epoch": 0.500636720017978, + "grad_norm": 0.09674876183271408, + "learning_rate": 1.5018398458033992e-05, + "loss": 8.9054, + "step": 100250 + }, + { + "epoch": 0.5006866588429174, + "grad_norm": 0.09284425526857376, + "learning_rate": 1.5016896543092444e-05, + "loss": 8.9048, + "step": 100260 + }, + { + "epoch": 0.5007365976678568, + "grad_norm": 0.09239511936903, + "learning_rate": 1.5015394628150893e-05, + "loss": 8.9162, + "step": 100270 + }, + { + "epoch": 0.5007865364927964, + "grad_norm": 0.0956454798579216, + "learning_rate": 1.5013892713209341e-05, + "loss": 8.9134, + "step": 100280 + }, + { + "epoch": 0.5008364753177358, + "grad_norm": 0.0959239974617958, + "learning_rate": 1.5012390798267791e-05, + "loss": 8.8996, + "step": 100290 + }, + { + "epoch": 0.5008864141426752, + "grad_norm": 0.0976717472076416, + "learning_rate": 1.501088888332624e-05, + "loss": 8.9049, + "step": 100300 + }, + { + "epoch": 0.5009363529676146, + "grad_norm": 0.09285130351781845, + "learning_rate": 1.5009386968384692e-05, + "loss": 8.9057, + "step": 100310 + }, + { + "epoch": 0.5009862917925542, + "grad_norm": 0.08767900615930557, + "learning_rate": 1.500788505344314e-05, + "loss": 8.9035, + "step": 100320 + }, + { + "epoch": 0.5010362306174936, + "grad_norm": 0.09420838207006454, + "learning_rate": 1.5006383138501592e-05, + "loss": 8.8962, + "step": 100330 + }, + { + "epoch": 0.501086169442433, + "grad_norm": 0.09375480562448502, + "learning_rate": 1.5004881223560039e-05, + "loss": 8.9055, + "step": 100340 + }, + { + "epoch": 0.5011361082673724, + "grad_norm": 0.09297312796115875, + "learning_rate": 1.5003379308618487e-05, + "loss": 8.9098, + "step": 100350 + }, + { + "epoch": 0.501186047092312, + "grad_norm": 0.09020781517028809, + "learning_rate": 1.500187739367694e-05, + "loss": 8.9313, + "step": 100360 + }, + { + "epoch": 0.5012359859172514, + "grad_norm": 0.09823616594076157, + "learning_rate": 1.5000375478735388e-05, + "loss": 8.9211, + "step": 100370 + }, + { + "epoch": 0.5012859247421908, + "grad_norm": 0.09264297038316727, + "learning_rate": 1.4998873563793838e-05, + "loss": 8.9147, + "step": 100380 + }, + { + "epoch": 0.5013358635671302, + "grad_norm": 0.09266399592161179, + "learning_rate": 1.4997371648852286e-05, + "loss": 8.9005, + "step": 100390 + }, + { + "epoch": 0.5013858023920698, + "grad_norm": 0.0926387682557106, + "learning_rate": 1.4995869733910737e-05, + "loss": 8.9014, + "step": 100400 + }, + { + "epoch": 0.5014357412170092, + "grad_norm": 0.09379039704799652, + "learning_rate": 1.4994367818969187e-05, + "loss": 8.9031, + "step": 100410 + }, + { + "epoch": 0.5014856800419486, + "grad_norm": 0.09653208404779434, + "learning_rate": 1.4992865904027635e-05, + "loss": 8.9117, + "step": 100420 + }, + { + "epoch": 0.501535618866888, + "grad_norm": 0.09672179073095322, + "learning_rate": 1.4991363989086086e-05, + "loss": 8.9151, + "step": 100430 + }, + { + "epoch": 0.5015855576918276, + "grad_norm": 0.09220828860998154, + "learning_rate": 1.4989862074144534e-05, + "loss": 8.918, + "step": 100440 + }, + { + "epoch": 0.501635496516767, + "grad_norm": 0.08738303184509277, + "learning_rate": 1.4988360159202984e-05, + "loss": 8.8946, + "step": 100450 + }, + { + "epoch": 0.5016854353417064, + "grad_norm": 0.0968918427824974, + "learning_rate": 1.4986858244261434e-05, + "loss": 8.9172, + "step": 100460 + }, + { + "epoch": 0.5017353741666458, + "grad_norm": 0.08795435726642609, + "learning_rate": 1.4985356329319883e-05, + "loss": 8.8957, + "step": 100470 + }, + { + "epoch": 0.5017853129915854, + "grad_norm": 0.09090543538331985, + "learning_rate": 1.4983854414378333e-05, + "loss": 8.9172, + "step": 100480 + }, + { + "epoch": 0.5018352518165248, + "grad_norm": 0.09485392272472382, + "learning_rate": 1.4982352499436782e-05, + "loss": 8.908, + "step": 100490 + }, + { + "epoch": 0.5018851906414642, + "grad_norm": 0.094363734126091, + "learning_rate": 1.4980850584495232e-05, + "loss": 8.9137, + "step": 100500 + }, + { + "epoch": 0.5019351294664036, + "grad_norm": 0.08762264251708984, + "learning_rate": 1.4979348669553682e-05, + "loss": 8.916, + "step": 100510 + }, + { + "epoch": 0.5019850682913432, + "grad_norm": 0.09093135595321655, + "learning_rate": 1.497784675461213e-05, + "loss": 8.9092, + "step": 100520 + }, + { + "epoch": 0.5020350071162826, + "grad_norm": 0.08741986751556396, + "learning_rate": 1.497634483967058e-05, + "loss": 8.9032, + "step": 100530 + }, + { + "epoch": 0.502084945941222, + "grad_norm": 0.09750941395759583, + "learning_rate": 1.4974842924729029e-05, + "loss": 8.908, + "step": 100540 + }, + { + "epoch": 0.5021348847661614, + "grad_norm": 0.09141114354133606, + "learning_rate": 1.497334100978748e-05, + "loss": 8.8956, + "step": 100550 + }, + { + "epoch": 0.502184823591101, + "grad_norm": 0.08919312804937363, + "learning_rate": 1.497183909484593e-05, + "loss": 8.9119, + "step": 100560 + }, + { + "epoch": 0.5022347624160404, + "grad_norm": 0.09717653691768646, + "learning_rate": 1.497033717990438e-05, + "loss": 8.8956, + "step": 100570 + }, + { + "epoch": 0.5022847012409798, + "grad_norm": 0.09845616668462753, + "learning_rate": 1.4968835264962828e-05, + "loss": 8.8947, + "step": 100580 + }, + { + "epoch": 0.5023346400659192, + "grad_norm": 0.09342017769813538, + "learning_rate": 1.4967333350021277e-05, + "loss": 8.903, + "step": 100590 + }, + { + "epoch": 0.5023845788908587, + "grad_norm": 0.08772523701190948, + "learning_rate": 1.4965831435079727e-05, + "loss": 8.9031, + "step": 100600 + }, + { + "epoch": 0.5024345177157982, + "grad_norm": 0.09380695968866348, + "learning_rate": 1.4964329520138177e-05, + "loss": 8.9074, + "step": 100610 + }, + { + "epoch": 0.5024844565407376, + "grad_norm": 0.09516066312789917, + "learning_rate": 1.4962827605196627e-05, + "loss": 8.8997, + "step": 100620 + }, + { + "epoch": 0.502534395365677, + "grad_norm": 0.0951061025261879, + "learning_rate": 1.4961325690255076e-05, + "loss": 8.8925, + "step": 100630 + }, + { + "epoch": 0.5025843341906165, + "grad_norm": 0.09456291794776917, + "learning_rate": 1.4959823775313524e-05, + "loss": 8.8921, + "step": 100640 + }, + { + "epoch": 0.502634273015556, + "grad_norm": 0.09144452959299088, + "learning_rate": 1.4958321860371974e-05, + "loss": 8.8916, + "step": 100650 + }, + { + "epoch": 0.5026842118404954, + "grad_norm": 0.09398289024829865, + "learning_rate": 1.4956819945430424e-05, + "loss": 8.9007, + "step": 100660 + }, + { + "epoch": 0.5027341506654348, + "grad_norm": 0.09931785613298416, + "learning_rate": 1.4955318030488875e-05, + "loss": 8.903, + "step": 100670 + }, + { + "epoch": 0.5027840894903743, + "grad_norm": 0.09348601847887039, + "learning_rate": 1.4953816115547323e-05, + "loss": 8.9051, + "step": 100680 + }, + { + "epoch": 0.5028340283153138, + "grad_norm": 0.08900678902864456, + "learning_rate": 1.4952314200605772e-05, + "loss": 8.9082, + "step": 100690 + }, + { + "epoch": 0.5028839671402532, + "grad_norm": 0.09216514974832535, + "learning_rate": 1.4950812285664222e-05, + "loss": 8.9075, + "step": 100700 + }, + { + "epoch": 0.5029339059651926, + "grad_norm": 0.09171604365110397, + "learning_rate": 1.4949310370722672e-05, + "loss": 8.9241, + "step": 100710 + }, + { + "epoch": 0.5029838447901321, + "grad_norm": 0.08902275562286377, + "learning_rate": 1.4947808455781122e-05, + "loss": 8.9145, + "step": 100720 + }, + { + "epoch": 0.5030337836150716, + "grad_norm": 0.09136269986629486, + "learning_rate": 1.4946306540839572e-05, + "loss": 8.9118, + "step": 100730 + }, + { + "epoch": 0.503083722440011, + "grad_norm": 0.09541922062635422, + "learning_rate": 1.4944804625898019e-05, + "loss": 8.9042, + "step": 100740 + }, + { + "epoch": 0.5031336612649504, + "grad_norm": 0.08974391967058182, + "learning_rate": 1.494330271095647e-05, + "loss": 8.9063, + "step": 100750 + }, + { + "epoch": 0.5031836000898899, + "grad_norm": 0.0941217839717865, + "learning_rate": 1.494180079601492e-05, + "loss": 8.9088, + "step": 100760 + }, + { + "epoch": 0.5032335389148294, + "grad_norm": 0.0899343267083168, + "learning_rate": 1.494029888107337e-05, + "loss": 8.8967, + "step": 100770 + }, + { + "epoch": 0.5032834777397688, + "grad_norm": 0.09517884254455566, + "learning_rate": 1.493879696613182e-05, + "loss": 8.8919, + "step": 100780 + }, + { + "epoch": 0.5033334165647082, + "grad_norm": 0.08698021620512009, + "learning_rate": 1.4937295051190267e-05, + "loss": 8.9165, + "step": 100790 + }, + { + "epoch": 0.5033833553896477, + "grad_norm": 0.09504187107086182, + "learning_rate": 1.4935793136248717e-05, + "loss": 8.9146, + "step": 100800 + }, + { + "epoch": 0.5034332942145872, + "grad_norm": 0.09907368570566177, + "learning_rate": 1.4934291221307167e-05, + "loss": 8.8958, + "step": 100810 + }, + { + "epoch": 0.5034832330395266, + "grad_norm": 0.09145436435937881, + "learning_rate": 1.4932789306365617e-05, + "loss": 8.9008, + "step": 100820 + }, + { + "epoch": 0.503533171864466, + "grad_norm": 0.09592849761247635, + "learning_rate": 1.4931287391424067e-05, + "loss": 8.9033, + "step": 100830 + }, + { + "epoch": 0.5035831106894055, + "grad_norm": 0.0957927331328392, + "learning_rate": 1.4929785476482514e-05, + "loss": 8.91, + "step": 100840 + }, + { + "epoch": 0.503633049514345, + "grad_norm": 0.09893249720335007, + "learning_rate": 1.4928283561540964e-05, + "loss": 8.9059, + "step": 100850 + }, + { + "epoch": 0.5036829883392844, + "grad_norm": 0.0861205905675888, + "learning_rate": 1.4926781646599414e-05, + "loss": 8.9054, + "step": 100860 + }, + { + "epoch": 0.5037329271642238, + "grad_norm": 0.08853903412818909, + "learning_rate": 1.4925279731657865e-05, + "loss": 8.9096, + "step": 100870 + }, + { + "epoch": 0.5037828659891632, + "grad_norm": 0.09132624417543411, + "learning_rate": 1.4923777816716315e-05, + "loss": 8.8916, + "step": 100880 + }, + { + "epoch": 0.5038328048141028, + "grad_norm": 0.08741272985935211, + "learning_rate": 1.4922275901774763e-05, + "loss": 8.8881, + "step": 100890 + }, + { + "epoch": 0.5038827436390422, + "grad_norm": 0.09232524782419205, + "learning_rate": 1.4920773986833212e-05, + "loss": 8.9061, + "step": 100900 + }, + { + "epoch": 0.5039326824639816, + "grad_norm": 0.09237165004014969, + "learning_rate": 1.4919272071891662e-05, + "loss": 8.9059, + "step": 100910 + }, + { + "epoch": 0.503982621288921, + "grad_norm": 0.09467703849077225, + "learning_rate": 1.4917770156950112e-05, + "loss": 8.9001, + "step": 100920 + }, + { + "epoch": 0.5040325601138606, + "grad_norm": 0.09434116631746292, + "learning_rate": 1.4916268242008562e-05, + "loss": 8.8948, + "step": 100930 + }, + { + "epoch": 0.5040824989388, + "grad_norm": 0.0881483182311058, + "learning_rate": 1.491476632706701e-05, + "loss": 8.8947, + "step": 100940 + }, + { + "epoch": 0.5041324377637394, + "grad_norm": 0.09188562631607056, + "learning_rate": 1.491326441212546e-05, + "loss": 8.9034, + "step": 100950 + }, + { + "epoch": 0.5041823765886788, + "grad_norm": 0.09108608961105347, + "learning_rate": 1.491176249718391e-05, + "loss": 8.8958, + "step": 100960 + }, + { + "epoch": 0.5042323154136183, + "grad_norm": 0.09667716175317764, + "learning_rate": 1.491026058224236e-05, + "loss": 8.8876, + "step": 100970 + }, + { + "epoch": 0.5042822542385578, + "grad_norm": 0.0894923135638237, + "learning_rate": 1.490875866730081e-05, + "loss": 8.8906, + "step": 100980 + }, + { + "epoch": 0.5043321930634972, + "grad_norm": 0.08996430784463882, + "learning_rate": 1.4907256752359258e-05, + "loss": 8.8851, + "step": 100990 + }, + { + "epoch": 0.5043821318884366, + "grad_norm": 0.0906483381986618, + "learning_rate": 1.4905754837417707e-05, + "loss": 8.8891, + "step": 101000 + }, + { + "epoch": 0.5044320707133761, + "grad_norm": 0.09572287648916245, + "learning_rate": 1.4904252922476157e-05, + "loss": 8.906, + "step": 101010 + }, + { + "epoch": 0.5044820095383156, + "grad_norm": 0.09146282821893692, + "learning_rate": 1.4902751007534607e-05, + "loss": 8.9083, + "step": 101020 + }, + { + "epoch": 0.504531948363255, + "grad_norm": 0.10019837319850922, + "learning_rate": 1.4901249092593057e-05, + "loss": 8.9084, + "step": 101030 + }, + { + "epoch": 0.5045818871881944, + "grad_norm": 0.09378591179847717, + "learning_rate": 1.4899747177651506e-05, + "loss": 8.899, + "step": 101040 + }, + { + "epoch": 0.5046318260131339, + "grad_norm": 0.0919695645570755, + "learning_rate": 1.4898245262709956e-05, + "loss": 8.899, + "step": 101050 + }, + { + "epoch": 0.5046817648380734, + "grad_norm": 0.09100176393985748, + "learning_rate": 1.4896743347768404e-05, + "loss": 8.9073, + "step": 101060 + }, + { + "epoch": 0.5047317036630128, + "grad_norm": 0.09281841665506363, + "learning_rate": 1.4895241432826855e-05, + "loss": 8.9013, + "step": 101070 + }, + { + "epoch": 0.5047816424879522, + "grad_norm": 0.09110787510871887, + "learning_rate": 1.4893739517885305e-05, + "loss": 8.885, + "step": 101080 + }, + { + "epoch": 0.5048315813128917, + "grad_norm": 0.09434214234352112, + "learning_rate": 1.4892237602943753e-05, + "loss": 8.9101, + "step": 101090 + }, + { + "epoch": 0.5048815201378312, + "grad_norm": 0.09112465381622314, + "learning_rate": 1.4890735688002203e-05, + "loss": 8.8961, + "step": 101100 + }, + { + "epoch": 0.5049314589627706, + "grad_norm": 0.0931396409869194, + "learning_rate": 1.4889233773060652e-05, + "loss": 8.9027, + "step": 101110 + }, + { + "epoch": 0.50498139778771, + "grad_norm": 0.0969899371266365, + "learning_rate": 1.4887731858119102e-05, + "loss": 8.8754, + "step": 101120 + }, + { + "epoch": 0.5050313366126495, + "grad_norm": 0.09737983345985413, + "learning_rate": 1.4886229943177552e-05, + "loss": 8.896, + "step": 101130 + }, + { + "epoch": 0.505081275437589, + "grad_norm": 0.0903887152671814, + "learning_rate": 1.4884728028236e-05, + "loss": 8.8901, + "step": 101140 + }, + { + "epoch": 0.5051312142625284, + "grad_norm": 0.09329243004322052, + "learning_rate": 1.4883226113294451e-05, + "loss": 8.9111, + "step": 101150 + }, + { + "epoch": 0.5051811530874678, + "grad_norm": 0.09709962457418442, + "learning_rate": 1.48817241983529e-05, + "loss": 8.9032, + "step": 101160 + }, + { + "epoch": 0.5052310919124073, + "grad_norm": 0.09550347924232483, + "learning_rate": 1.488022228341135e-05, + "loss": 8.8953, + "step": 101170 + }, + { + "epoch": 0.5052810307373468, + "grad_norm": 0.09034687280654907, + "learning_rate": 1.48787203684698e-05, + "loss": 8.8965, + "step": 101180 + }, + { + "epoch": 0.5053309695622862, + "grad_norm": 0.08858486264944077, + "learning_rate": 1.4877218453528248e-05, + "loss": 8.8872, + "step": 101190 + }, + { + "epoch": 0.5053809083872256, + "grad_norm": 0.09409047663211823, + "learning_rate": 1.4875716538586698e-05, + "loss": 8.8993, + "step": 101200 + }, + { + "epoch": 0.5054308472121651, + "grad_norm": 0.09161051362752914, + "learning_rate": 1.4874214623645149e-05, + "loss": 8.8977, + "step": 101210 + }, + { + "epoch": 0.5054807860371046, + "grad_norm": 0.09429701417684555, + "learning_rate": 1.4872712708703597e-05, + "loss": 8.9064, + "step": 101220 + }, + { + "epoch": 0.505530724862044, + "grad_norm": 0.09554371237754822, + "learning_rate": 1.4871210793762047e-05, + "loss": 8.8981, + "step": 101230 + }, + { + "epoch": 0.5055806636869834, + "grad_norm": 0.10310646891593933, + "learning_rate": 1.4869708878820496e-05, + "loss": 8.899, + "step": 101240 + }, + { + "epoch": 0.5056306025119229, + "grad_norm": 0.09066363424062729, + "learning_rate": 1.4868206963878946e-05, + "loss": 8.9038, + "step": 101250 + }, + { + "epoch": 0.5056805413368624, + "grad_norm": 0.09133642911911011, + "learning_rate": 1.4866705048937396e-05, + "loss": 8.893, + "step": 101260 + }, + { + "epoch": 0.5057304801618018, + "grad_norm": 0.09266974031925201, + "learning_rate": 1.4865203133995845e-05, + "loss": 8.8918, + "step": 101270 + }, + { + "epoch": 0.5057804189867412, + "grad_norm": 0.09828782081604004, + "learning_rate": 1.4863701219054295e-05, + "loss": 8.9079, + "step": 101280 + }, + { + "epoch": 0.5058303578116807, + "grad_norm": 0.09121903032064438, + "learning_rate": 1.4862199304112743e-05, + "loss": 8.9018, + "step": 101290 + }, + { + "epoch": 0.5058802966366202, + "grad_norm": 0.09173334389925003, + "learning_rate": 1.4860697389171193e-05, + "loss": 8.8951, + "step": 101300 + }, + { + "epoch": 0.5059302354615596, + "grad_norm": 0.09970010817050934, + "learning_rate": 1.4859195474229644e-05, + "loss": 8.8949, + "step": 101310 + }, + { + "epoch": 0.505980174286499, + "grad_norm": 0.0909634530544281, + "learning_rate": 1.4857693559288092e-05, + "loss": 8.8874, + "step": 101320 + }, + { + "epoch": 0.5060301131114385, + "grad_norm": 0.09572920203208923, + "learning_rate": 1.4856191644346542e-05, + "loss": 8.882, + "step": 101330 + }, + { + "epoch": 0.506080051936378, + "grad_norm": 0.08662997931241989, + "learning_rate": 1.485468972940499e-05, + "loss": 8.9034, + "step": 101340 + }, + { + "epoch": 0.5061299907613174, + "grad_norm": 0.09258969128131866, + "learning_rate": 1.4853187814463441e-05, + "loss": 8.8739, + "step": 101350 + }, + { + "epoch": 0.5061799295862568, + "grad_norm": 0.08739626407623291, + "learning_rate": 1.4851685899521891e-05, + "loss": 8.8976, + "step": 101360 + }, + { + "epoch": 0.5062298684111963, + "grad_norm": 0.09354405850172043, + "learning_rate": 1.4850183984580341e-05, + "loss": 8.8916, + "step": 101370 + }, + { + "epoch": 0.5062798072361357, + "grad_norm": 0.0959988608956337, + "learning_rate": 1.484868206963879e-05, + "loss": 8.9009, + "step": 101380 + }, + { + "epoch": 0.5063297460610752, + "grad_norm": 0.09055614471435547, + "learning_rate": 1.4847180154697238e-05, + "loss": 8.8916, + "step": 101390 + }, + { + "epoch": 0.5063796848860146, + "grad_norm": 0.09432981163263321, + "learning_rate": 1.4845678239755688e-05, + "loss": 8.9087, + "step": 101400 + }, + { + "epoch": 0.5064296237109541, + "grad_norm": 0.09564248472452164, + "learning_rate": 1.4844176324814139e-05, + "loss": 8.891, + "step": 101410 + }, + { + "epoch": 0.5064795625358935, + "grad_norm": 0.09236615151166916, + "learning_rate": 1.4842674409872589e-05, + "loss": 8.8975, + "step": 101420 + }, + { + "epoch": 0.506529501360833, + "grad_norm": 0.09159623831510544, + "learning_rate": 1.4841172494931037e-05, + "loss": 8.9015, + "step": 101430 + }, + { + "epoch": 0.5065794401857724, + "grad_norm": 0.09691286832094193, + "learning_rate": 1.4839670579989487e-05, + "loss": 8.8919, + "step": 101440 + }, + { + "epoch": 0.5066293790107119, + "grad_norm": 0.09283226728439331, + "learning_rate": 1.4838168665047936e-05, + "loss": 8.8946, + "step": 101450 + }, + { + "epoch": 0.5066793178356513, + "grad_norm": 0.09346681088209152, + "learning_rate": 1.4836666750106386e-05, + "loss": 8.8962, + "step": 101460 + }, + { + "epoch": 0.5067292566605908, + "grad_norm": 0.09086382389068604, + "learning_rate": 1.4835164835164836e-05, + "loss": 8.8942, + "step": 101470 + }, + { + "epoch": 0.5067791954855302, + "grad_norm": 0.09305468201637268, + "learning_rate": 1.4833662920223285e-05, + "loss": 8.8986, + "step": 101480 + }, + { + "epoch": 0.5068291343104697, + "grad_norm": 0.09416045248508453, + "learning_rate": 1.4832161005281735e-05, + "loss": 8.8897, + "step": 101490 + }, + { + "epoch": 0.5068790731354091, + "grad_norm": 0.09527656435966492, + "learning_rate": 1.4830659090340183e-05, + "loss": 8.8883, + "step": 101500 + }, + { + "epoch": 0.5069290119603486, + "grad_norm": 0.09679878503084183, + "learning_rate": 1.4829157175398634e-05, + "loss": 8.8931, + "step": 101510 + }, + { + "epoch": 0.506978950785288, + "grad_norm": 0.09845301508903503, + "learning_rate": 1.4827655260457084e-05, + "loss": 8.8859, + "step": 101520 + }, + { + "epoch": 0.5070288896102275, + "grad_norm": 0.09258875995874405, + "learning_rate": 1.4826153345515534e-05, + "loss": 8.8809, + "step": 101530 + }, + { + "epoch": 0.5070788284351669, + "grad_norm": 0.08814681321382523, + "learning_rate": 1.4824651430573982e-05, + "loss": 8.8913, + "step": 101540 + }, + { + "epoch": 0.5071287672601064, + "grad_norm": 0.09510982036590576, + "learning_rate": 1.4823149515632431e-05, + "loss": 8.9071, + "step": 101550 + }, + { + "epoch": 0.5071787060850458, + "grad_norm": 0.09275613725185394, + "learning_rate": 1.4821647600690881e-05, + "loss": 8.9034, + "step": 101560 + }, + { + "epoch": 0.5072286449099853, + "grad_norm": 0.09569612890481949, + "learning_rate": 1.4820145685749331e-05, + "loss": 8.8981, + "step": 101570 + }, + { + "epoch": 0.5072785837349247, + "grad_norm": 0.10368134826421738, + "learning_rate": 1.4818643770807781e-05, + "loss": 8.8932, + "step": 101580 + }, + { + "epoch": 0.5073285225598642, + "grad_norm": 0.0873536765575409, + "learning_rate": 1.481714185586623e-05, + "loss": 8.8866, + "step": 101590 + }, + { + "epoch": 0.5073784613848036, + "grad_norm": 0.0927792489528656, + "learning_rate": 1.4815639940924678e-05, + "loss": 8.8916, + "step": 101600 + }, + { + "epoch": 0.5074284002097431, + "grad_norm": 0.09179840981960297, + "learning_rate": 1.4814138025983129e-05, + "loss": 8.8925, + "step": 101610 + }, + { + "epoch": 0.5074783390346825, + "grad_norm": 0.09306514263153076, + "learning_rate": 1.4812636111041579e-05, + "loss": 8.9127, + "step": 101620 + }, + { + "epoch": 0.507528277859622, + "grad_norm": 0.09609080106019974, + "learning_rate": 1.4811134196100029e-05, + "loss": 8.8901, + "step": 101630 + }, + { + "epoch": 0.5075782166845614, + "grad_norm": 0.09421303868293762, + "learning_rate": 1.4809632281158477e-05, + "loss": 8.8944, + "step": 101640 + }, + { + "epoch": 0.5076281555095009, + "grad_norm": 0.09363938122987747, + "learning_rate": 1.4808130366216926e-05, + "loss": 8.9101, + "step": 101650 + }, + { + "epoch": 0.5076780943344403, + "grad_norm": 0.09263626486063004, + "learning_rate": 1.4806628451275376e-05, + "loss": 8.8904, + "step": 101660 + }, + { + "epoch": 0.5077280331593798, + "grad_norm": 0.08773566037416458, + "learning_rate": 1.4805126536333826e-05, + "loss": 8.8914, + "step": 101670 + }, + { + "epoch": 0.5077779719843192, + "grad_norm": 0.09525804966688156, + "learning_rate": 1.4803624621392276e-05, + "loss": 8.8931, + "step": 101680 + }, + { + "epoch": 0.5078279108092587, + "grad_norm": 0.09385735541582108, + "learning_rate": 1.4802122706450727e-05, + "loss": 8.904, + "step": 101690 + }, + { + "epoch": 0.5078778496341981, + "grad_norm": 0.0941053256392479, + "learning_rate": 1.4800620791509173e-05, + "loss": 8.8882, + "step": 101700 + }, + { + "epoch": 0.5079277884591376, + "grad_norm": 0.10088019073009491, + "learning_rate": 1.4799118876567624e-05, + "loss": 8.8875, + "step": 101710 + }, + { + "epoch": 0.507977727284077, + "grad_norm": 0.09125470370054245, + "learning_rate": 1.4797616961626074e-05, + "loss": 8.8858, + "step": 101720 + }, + { + "epoch": 0.5080276661090165, + "grad_norm": 0.08886610716581345, + "learning_rate": 1.4796115046684524e-05, + "loss": 8.8908, + "step": 101730 + }, + { + "epoch": 0.5080776049339559, + "grad_norm": 0.09195410460233688, + "learning_rate": 1.4794613131742974e-05, + "loss": 8.8764, + "step": 101740 + }, + { + "epoch": 0.5081275437588954, + "grad_norm": 0.09641274064779282, + "learning_rate": 1.4793111216801421e-05, + "loss": 8.88, + "step": 101750 + }, + { + "epoch": 0.5081774825838348, + "grad_norm": 0.08937504887580872, + "learning_rate": 1.4791609301859871e-05, + "loss": 8.8912, + "step": 101760 + }, + { + "epoch": 0.5082274214087743, + "grad_norm": 0.10038053244352341, + "learning_rate": 1.4790107386918321e-05, + "loss": 8.8898, + "step": 101770 + }, + { + "epoch": 0.5082773602337137, + "grad_norm": 0.08841981738805771, + "learning_rate": 1.4788605471976772e-05, + "loss": 8.902, + "step": 101780 + }, + { + "epoch": 0.5083272990586531, + "grad_norm": 0.09414731711149216, + "learning_rate": 1.4787103557035222e-05, + "loss": 8.8862, + "step": 101790 + }, + { + "epoch": 0.5083772378835926, + "grad_norm": 0.09005799144506454, + "learning_rate": 1.4785601642093668e-05, + "loss": 8.884, + "step": 101800 + }, + { + "epoch": 0.5084271767085321, + "grad_norm": 0.09526651352643967, + "learning_rate": 1.4784099727152119e-05, + "loss": 8.8958, + "step": 101810 + }, + { + "epoch": 0.5084771155334715, + "grad_norm": 0.094487763941288, + "learning_rate": 1.4782597812210569e-05, + "loss": 8.9035, + "step": 101820 + }, + { + "epoch": 0.508527054358411, + "grad_norm": 0.0912264958024025, + "learning_rate": 1.4781095897269019e-05, + "loss": 8.8972, + "step": 101830 + }, + { + "epoch": 0.5085769931833504, + "grad_norm": 0.09153269231319427, + "learning_rate": 1.477959398232747e-05, + "loss": 8.8956, + "step": 101840 + }, + { + "epoch": 0.5086269320082899, + "grad_norm": 0.09265515953302383, + "learning_rate": 1.4778092067385918e-05, + "loss": 8.8747, + "step": 101850 + }, + { + "epoch": 0.5086768708332293, + "grad_norm": 0.09390241652727127, + "learning_rate": 1.4776590152444366e-05, + "loss": 8.9175, + "step": 101860 + }, + { + "epoch": 0.5087268096581687, + "grad_norm": 0.09035301208496094, + "learning_rate": 1.4775088237502816e-05, + "loss": 8.8821, + "step": 101870 + }, + { + "epoch": 0.5087767484831082, + "grad_norm": 0.08922744542360306, + "learning_rate": 1.4773586322561267e-05, + "loss": 8.8883, + "step": 101880 + }, + { + "epoch": 0.5088266873080476, + "grad_norm": 0.09103063493967056, + "learning_rate": 1.4772084407619717e-05, + "loss": 8.8571, + "step": 101890 + }, + { + "epoch": 0.5088766261329871, + "grad_norm": 0.09650906175374985, + "learning_rate": 1.4770582492678165e-05, + "loss": 8.884, + "step": 101900 + }, + { + "epoch": 0.5089265649579265, + "grad_norm": 0.09376221150159836, + "learning_rate": 1.4769080577736614e-05, + "loss": 8.8981, + "step": 101910 + }, + { + "epoch": 0.508976503782866, + "grad_norm": 0.0935838520526886, + "learning_rate": 1.4767578662795064e-05, + "loss": 8.8788, + "step": 101920 + }, + { + "epoch": 0.5090264426078054, + "grad_norm": 0.09474118798971176, + "learning_rate": 1.4766076747853514e-05, + "loss": 8.8921, + "step": 101930 + }, + { + "epoch": 0.5090763814327449, + "grad_norm": 0.0944557636976242, + "learning_rate": 1.4764574832911964e-05, + "loss": 8.9048, + "step": 101940 + }, + { + "epoch": 0.5091263202576843, + "grad_norm": 0.08811389654874802, + "learning_rate": 1.4763072917970413e-05, + "loss": 8.9097, + "step": 101950 + }, + { + "epoch": 0.5091762590826238, + "grad_norm": 0.0878419429063797, + "learning_rate": 1.4761571003028861e-05, + "loss": 8.8932, + "step": 101960 + }, + { + "epoch": 0.5092261979075632, + "grad_norm": 0.09334632754325867, + "learning_rate": 1.4760069088087311e-05, + "loss": 8.8877, + "step": 101970 + }, + { + "epoch": 0.5092761367325027, + "grad_norm": 0.09611135721206665, + "learning_rate": 1.4758567173145762e-05, + "loss": 8.8956, + "step": 101980 + }, + { + "epoch": 0.5093260755574421, + "grad_norm": 0.09940996766090393, + "learning_rate": 1.4757065258204212e-05, + "loss": 8.8933, + "step": 101990 + }, + { + "epoch": 0.5093760143823816, + "grad_norm": 0.08453790843486786, + "learning_rate": 1.475556334326266e-05, + "loss": 8.8861, + "step": 102000 + }, + { + "epoch": 0.509425953207321, + "grad_norm": 0.09275045245885849, + "learning_rate": 1.475406142832111e-05, + "loss": 8.8927, + "step": 102010 + }, + { + "epoch": 0.5094758920322605, + "grad_norm": 0.09172289073467255, + "learning_rate": 1.4752559513379559e-05, + "loss": 8.8762, + "step": 102020 + }, + { + "epoch": 0.5095258308571999, + "grad_norm": 0.09009222686290741, + "learning_rate": 1.4751057598438009e-05, + "loss": 8.9011, + "step": 102030 + }, + { + "epoch": 0.5095757696821394, + "grad_norm": 0.09388335049152374, + "learning_rate": 1.474955568349646e-05, + "loss": 8.8875, + "step": 102040 + }, + { + "epoch": 0.5096257085070788, + "grad_norm": 0.09411590546369553, + "learning_rate": 1.4748053768554908e-05, + "loss": 8.8942, + "step": 102050 + }, + { + "epoch": 0.5096756473320183, + "grad_norm": 0.08968400955200195, + "learning_rate": 1.4746551853613358e-05, + "loss": 8.8953, + "step": 102060 + }, + { + "epoch": 0.5097255861569577, + "grad_norm": 0.09362632036209106, + "learning_rate": 1.4745049938671806e-05, + "loss": 8.9011, + "step": 102070 + }, + { + "epoch": 0.5097755249818972, + "grad_norm": 0.093928761780262, + "learning_rate": 1.4743548023730257e-05, + "loss": 8.8858, + "step": 102080 + }, + { + "epoch": 0.5098254638068366, + "grad_norm": 0.09098640829324722, + "learning_rate": 1.4742046108788707e-05, + "loss": 8.8899, + "step": 102090 + }, + { + "epoch": 0.5098754026317761, + "grad_norm": 0.09484419226646423, + "learning_rate": 1.4740544193847155e-05, + "loss": 8.8725, + "step": 102100 + }, + { + "epoch": 0.5099253414567155, + "grad_norm": 0.0966237410902977, + "learning_rate": 1.4739042278905605e-05, + "loss": 8.8903, + "step": 102110 + }, + { + "epoch": 0.509975280281655, + "grad_norm": 0.09729520231485367, + "learning_rate": 1.4737540363964054e-05, + "loss": 8.8814, + "step": 102120 + }, + { + "epoch": 0.5100252191065944, + "grad_norm": 0.09406805038452148, + "learning_rate": 1.4736038449022504e-05, + "loss": 8.8763, + "step": 102130 + }, + { + "epoch": 0.5100751579315339, + "grad_norm": 0.0967145785689354, + "learning_rate": 1.4734536534080954e-05, + "loss": 8.8686, + "step": 102140 + }, + { + "epoch": 0.5101250967564733, + "grad_norm": 0.10264673829078674, + "learning_rate": 1.4733034619139403e-05, + "loss": 8.8916, + "step": 102150 + }, + { + "epoch": 0.5101750355814128, + "grad_norm": 0.09559258073568344, + "learning_rate": 1.4731532704197853e-05, + "loss": 8.889, + "step": 102160 + }, + { + "epoch": 0.5102249744063522, + "grad_norm": 0.09675032645463943, + "learning_rate": 1.4730030789256303e-05, + "loss": 8.8761, + "step": 102170 + }, + { + "epoch": 0.5102749132312917, + "grad_norm": 0.09106460958719254, + "learning_rate": 1.4728528874314752e-05, + "loss": 8.89, + "step": 102180 + }, + { + "epoch": 0.5103248520562311, + "grad_norm": 0.09537220746278763, + "learning_rate": 1.4727026959373202e-05, + "loss": 8.892, + "step": 102190 + }, + { + "epoch": 0.5103747908811705, + "grad_norm": 0.08975797146558762, + "learning_rate": 1.472552504443165e-05, + "loss": 8.8853, + "step": 102200 + }, + { + "epoch": 0.51042472970611, + "grad_norm": 0.09914405643939972, + "learning_rate": 1.47240231294901e-05, + "loss": 8.8851, + "step": 102210 + }, + { + "epoch": 0.5104746685310495, + "grad_norm": 0.09528080374002457, + "learning_rate": 1.472252121454855e-05, + "loss": 8.9045, + "step": 102220 + }, + { + "epoch": 0.5105246073559889, + "grad_norm": 0.09561271965503693, + "learning_rate": 1.4721019299606999e-05, + "loss": 8.8875, + "step": 102230 + }, + { + "epoch": 0.5105745461809283, + "grad_norm": 0.09324440360069275, + "learning_rate": 1.471951738466545e-05, + "loss": 8.882, + "step": 102240 + }, + { + "epoch": 0.5106244850058678, + "grad_norm": 0.10084079951047897, + "learning_rate": 1.4718015469723898e-05, + "loss": 8.8891, + "step": 102250 + }, + { + "epoch": 0.5106744238308073, + "grad_norm": 0.09249073266983032, + "learning_rate": 1.4716513554782348e-05, + "loss": 8.8773, + "step": 102260 + }, + { + "epoch": 0.5107243626557467, + "grad_norm": 0.09199045598506927, + "learning_rate": 1.4715011639840798e-05, + "loss": 8.8801, + "step": 102270 + }, + { + "epoch": 0.5107743014806861, + "grad_norm": 0.09335954487323761, + "learning_rate": 1.4713509724899247e-05, + "loss": 8.8854, + "step": 102280 + }, + { + "epoch": 0.5108242403056256, + "grad_norm": 0.09267157316207886, + "learning_rate": 1.4712007809957697e-05, + "loss": 8.8834, + "step": 102290 + }, + { + "epoch": 0.5108741791305651, + "grad_norm": 0.0931268110871315, + "learning_rate": 1.4710505895016145e-05, + "loss": 8.8788, + "step": 102300 + }, + { + "epoch": 0.5109241179555045, + "grad_norm": 0.09288343042135239, + "learning_rate": 1.4709003980074595e-05, + "loss": 8.8813, + "step": 102310 + }, + { + "epoch": 0.5109740567804439, + "grad_norm": 0.0869889035820961, + "learning_rate": 1.4707502065133046e-05, + "loss": 8.8893, + "step": 102320 + }, + { + "epoch": 0.5110239956053834, + "grad_norm": 0.0896022841334343, + "learning_rate": 1.4706000150191496e-05, + "loss": 8.8793, + "step": 102330 + }, + { + "epoch": 0.5110739344303229, + "grad_norm": 0.09461673349142075, + "learning_rate": 1.4704498235249944e-05, + "loss": 8.8852, + "step": 102340 + }, + { + "epoch": 0.5111238732552623, + "grad_norm": 0.09535450488328934, + "learning_rate": 1.4702996320308393e-05, + "loss": 8.8921, + "step": 102350 + }, + { + "epoch": 0.5111738120802017, + "grad_norm": 0.09272834658622742, + "learning_rate": 1.4701494405366843e-05, + "loss": 8.9021, + "step": 102360 + }, + { + "epoch": 0.5112237509051412, + "grad_norm": 0.09534373134374619, + "learning_rate": 1.4699992490425293e-05, + "loss": 8.8848, + "step": 102370 + }, + { + "epoch": 0.5112736897300807, + "grad_norm": 0.09193330258131027, + "learning_rate": 1.4698490575483743e-05, + "loss": 8.8843, + "step": 102380 + }, + { + "epoch": 0.5113236285550201, + "grad_norm": 0.09175775945186615, + "learning_rate": 1.4696988660542192e-05, + "loss": 8.895, + "step": 102390 + }, + { + "epoch": 0.5113735673799595, + "grad_norm": 0.09013816714286804, + "learning_rate": 1.469548674560064e-05, + "loss": 8.8983, + "step": 102400 + }, + { + "epoch": 0.511423506204899, + "grad_norm": 0.09730950742959976, + "learning_rate": 1.469398483065909e-05, + "loss": 8.8866, + "step": 102410 + }, + { + "epoch": 0.5114734450298385, + "grad_norm": 0.09352081269025803, + "learning_rate": 1.469248291571754e-05, + "loss": 8.8842, + "step": 102420 + }, + { + "epoch": 0.5115233838547779, + "grad_norm": 0.09047748893499374, + "learning_rate": 1.469098100077599e-05, + "loss": 8.8971, + "step": 102430 + }, + { + "epoch": 0.5115733226797173, + "grad_norm": 0.09623833000659943, + "learning_rate": 1.468947908583444e-05, + "loss": 8.8938, + "step": 102440 + }, + { + "epoch": 0.5116232615046568, + "grad_norm": 0.09208162873983383, + "learning_rate": 1.4687977170892888e-05, + "loss": 8.902, + "step": 102450 + }, + { + "epoch": 0.5116732003295963, + "grad_norm": 0.0962912067770958, + "learning_rate": 1.4686475255951338e-05, + "loss": 8.8727, + "step": 102460 + }, + { + "epoch": 0.5117231391545357, + "grad_norm": 0.0914090946316719, + "learning_rate": 1.4684973341009788e-05, + "loss": 8.8823, + "step": 102470 + }, + { + "epoch": 0.5117730779794751, + "grad_norm": 0.09146260470151901, + "learning_rate": 1.4683471426068238e-05, + "loss": 8.9058, + "step": 102480 + }, + { + "epoch": 0.5118230168044146, + "grad_norm": 0.09242929518222809, + "learning_rate": 1.4681969511126688e-05, + "loss": 8.8762, + "step": 102490 + }, + { + "epoch": 0.5118729556293541, + "grad_norm": 0.09700184315443039, + "learning_rate": 1.4680467596185135e-05, + "loss": 8.8787, + "step": 102500 + }, + { + "epoch": 0.5119228944542935, + "grad_norm": 0.08677946031093597, + "learning_rate": 1.4678965681243585e-05, + "loss": 8.8883, + "step": 102510 + }, + { + "epoch": 0.5119728332792329, + "grad_norm": 0.09197265654802322, + "learning_rate": 1.4677463766302036e-05, + "loss": 8.8903, + "step": 102520 + }, + { + "epoch": 0.5120227721041724, + "grad_norm": 0.08945990353822708, + "learning_rate": 1.4675961851360486e-05, + "loss": 8.8724, + "step": 102530 + }, + { + "epoch": 0.5120727109291119, + "grad_norm": 0.08991879224777222, + "learning_rate": 1.4674459936418936e-05, + "loss": 8.8745, + "step": 102540 + }, + { + "epoch": 0.5121226497540513, + "grad_norm": 0.09267672151327133, + "learning_rate": 1.4672958021477383e-05, + "loss": 8.8855, + "step": 102550 + }, + { + "epoch": 0.5121725885789907, + "grad_norm": 0.09635213762521744, + "learning_rate": 1.4671456106535833e-05, + "loss": 8.8905, + "step": 102560 + }, + { + "epoch": 0.5122225274039302, + "grad_norm": 0.0958060547709465, + "learning_rate": 1.4669954191594283e-05, + "loss": 8.9029, + "step": 102570 + }, + { + "epoch": 0.5122724662288697, + "grad_norm": 0.09407208114862442, + "learning_rate": 1.4668452276652733e-05, + "loss": 8.8742, + "step": 102580 + }, + { + "epoch": 0.5123224050538091, + "grad_norm": 0.09680324047803879, + "learning_rate": 1.4666950361711183e-05, + "loss": 8.8967, + "step": 102590 + }, + { + "epoch": 0.5123723438787485, + "grad_norm": 0.09674806892871857, + "learning_rate": 1.466544844676963e-05, + "loss": 8.8878, + "step": 102600 + }, + { + "epoch": 0.512422282703688, + "grad_norm": 0.08954492211341858, + "learning_rate": 1.466394653182808e-05, + "loss": 8.8912, + "step": 102610 + }, + { + "epoch": 0.5124722215286275, + "grad_norm": 0.08924522995948792, + "learning_rate": 1.466244461688653e-05, + "loss": 8.8811, + "step": 102620 + }, + { + "epoch": 0.5125221603535669, + "grad_norm": 0.09569413959980011, + "learning_rate": 1.466094270194498e-05, + "loss": 8.8828, + "step": 102630 + }, + { + "epoch": 0.5125720991785063, + "grad_norm": 0.09132692217826843, + "learning_rate": 1.4659440787003431e-05, + "loss": 8.8799, + "step": 102640 + }, + { + "epoch": 0.5126220380034457, + "grad_norm": 0.08919345587491989, + "learning_rate": 1.4657938872061878e-05, + "loss": 8.8885, + "step": 102650 + }, + { + "epoch": 0.5126719768283853, + "grad_norm": 0.09804876893758774, + "learning_rate": 1.4656436957120328e-05, + "loss": 8.8798, + "step": 102660 + }, + { + "epoch": 0.5127219156533247, + "grad_norm": 0.10200927406549454, + "learning_rate": 1.4654935042178778e-05, + "loss": 8.8672, + "step": 102670 + }, + { + "epoch": 0.5127718544782641, + "grad_norm": 0.09064894169569016, + "learning_rate": 1.4653433127237228e-05, + "loss": 8.8719, + "step": 102680 + }, + { + "epoch": 0.5128217933032035, + "grad_norm": 0.09494546800851822, + "learning_rate": 1.4651931212295678e-05, + "loss": 8.8996, + "step": 102690 + }, + { + "epoch": 0.5128717321281431, + "grad_norm": 0.09214048832654953, + "learning_rate": 1.4650429297354127e-05, + "loss": 8.8922, + "step": 102700 + }, + { + "epoch": 0.5129216709530825, + "grad_norm": 0.09070252627134323, + "learning_rate": 1.4648927382412575e-05, + "loss": 8.8887, + "step": 102710 + }, + { + "epoch": 0.5129716097780219, + "grad_norm": 0.09582309424877167, + "learning_rate": 1.4647425467471026e-05, + "loss": 8.8841, + "step": 102720 + }, + { + "epoch": 0.5130215486029613, + "grad_norm": 0.0906309261918068, + "learning_rate": 1.4645923552529476e-05, + "loss": 8.8794, + "step": 102730 + }, + { + "epoch": 0.5130714874279009, + "grad_norm": 0.0934000313282013, + "learning_rate": 1.4644421637587926e-05, + "loss": 8.8729, + "step": 102740 + }, + { + "epoch": 0.5131214262528403, + "grad_norm": 0.09085663408041, + "learning_rate": 1.4642919722646374e-05, + "loss": 8.8923, + "step": 102750 + }, + { + "epoch": 0.5131713650777797, + "grad_norm": 0.0955960750579834, + "learning_rate": 1.4641417807704823e-05, + "loss": 8.8932, + "step": 102760 + }, + { + "epoch": 0.5132213039027191, + "grad_norm": 0.09510260075330734, + "learning_rate": 1.4639915892763273e-05, + "loss": 8.8844, + "step": 102770 + }, + { + "epoch": 0.5132712427276587, + "grad_norm": 0.0931367352604866, + "learning_rate": 1.4638413977821723e-05, + "loss": 8.8895, + "step": 102780 + }, + { + "epoch": 0.5133211815525981, + "grad_norm": 0.09127708524465561, + "learning_rate": 1.4636912062880173e-05, + "loss": 8.8706, + "step": 102790 + }, + { + "epoch": 0.5133711203775375, + "grad_norm": 0.09185762703418732, + "learning_rate": 1.4635410147938622e-05, + "loss": 8.8691, + "step": 102800 + }, + { + "epoch": 0.5134210592024769, + "grad_norm": 0.08858191221952438, + "learning_rate": 1.463390823299707e-05, + "loss": 8.8763, + "step": 102810 + }, + { + "epoch": 0.5134709980274165, + "grad_norm": 0.08831470459699631, + "learning_rate": 1.463240631805552e-05, + "loss": 8.8851, + "step": 102820 + }, + { + "epoch": 0.5135209368523559, + "grad_norm": 0.08874993771314621, + "learning_rate": 1.463090440311397e-05, + "loss": 8.8826, + "step": 102830 + }, + { + "epoch": 0.5135708756772953, + "grad_norm": 0.09777554869651794, + "learning_rate": 1.4629402488172421e-05, + "loss": 8.888, + "step": 102840 + }, + { + "epoch": 0.5136208145022347, + "grad_norm": 0.08795005083084106, + "learning_rate": 1.462790057323087e-05, + "loss": 8.8808, + "step": 102850 + }, + { + "epoch": 0.5136707533271742, + "grad_norm": 0.08897390216588974, + "learning_rate": 1.462639865828932e-05, + "loss": 8.8769, + "step": 102860 + }, + { + "epoch": 0.5137206921521137, + "grad_norm": 0.09718848019838333, + "learning_rate": 1.4624896743347768e-05, + "loss": 8.8757, + "step": 102870 + }, + { + "epoch": 0.5137706309770531, + "grad_norm": 0.09486740082502365, + "learning_rate": 1.4623394828406218e-05, + "loss": 8.8906, + "step": 102880 + }, + { + "epoch": 0.5138205698019925, + "grad_norm": 0.09383400529623032, + "learning_rate": 1.4621892913464668e-05, + "loss": 8.8932, + "step": 102890 + }, + { + "epoch": 0.513870508626932, + "grad_norm": 0.09229724854230881, + "learning_rate": 1.4620390998523117e-05, + "loss": 8.8677, + "step": 102900 + }, + { + "epoch": 0.5139204474518715, + "grad_norm": 0.09365616738796234, + "learning_rate": 1.4618889083581567e-05, + "loss": 8.8787, + "step": 102910 + }, + { + "epoch": 0.5139703862768109, + "grad_norm": 0.09437093883752823, + "learning_rate": 1.4617387168640016e-05, + "loss": 8.9, + "step": 102920 + }, + { + "epoch": 0.5140203251017503, + "grad_norm": 0.09300998598337173, + "learning_rate": 1.4615885253698466e-05, + "loss": 8.88, + "step": 102930 + }, + { + "epoch": 0.5140702639266898, + "grad_norm": 0.09167800098657608, + "learning_rate": 1.4614383338756916e-05, + "loss": 8.8927, + "step": 102940 + }, + { + "epoch": 0.5141202027516293, + "grad_norm": 0.09113366156816483, + "learning_rate": 1.4612881423815364e-05, + "loss": 8.8802, + "step": 102950 + }, + { + "epoch": 0.5141701415765687, + "grad_norm": 0.09514357149600983, + "learning_rate": 1.4611379508873815e-05, + "loss": 8.8725, + "step": 102960 + }, + { + "epoch": 0.5142200804015081, + "grad_norm": 0.09392548352479935, + "learning_rate": 1.4609877593932263e-05, + "loss": 8.869, + "step": 102970 + }, + { + "epoch": 0.5142700192264476, + "grad_norm": 0.08608120679855347, + "learning_rate": 1.4608375678990713e-05, + "loss": 8.8806, + "step": 102980 + }, + { + "epoch": 0.5143199580513871, + "grad_norm": 0.09436332434415817, + "learning_rate": 1.4606873764049163e-05, + "loss": 8.8717, + "step": 102990 + }, + { + "epoch": 0.5143698968763265, + "grad_norm": 0.09002230316400528, + "learning_rate": 1.4605371849107612e-05, + "loss": 8.8916, + "step": 103000 + }, + { + "epoch": 0.5144198357012659, + "grad_norm": 0.09780488163232803, + "learning_rate": 1.4603869934166062e-05, + "loss": 8.8725, + "step": 103010 + }, + { + "epoch": 0.5144697745262053, + "grad_norm": 0.09558107703924179, + "learning_rate": 1.4602368019224512e-05, + "loss": 8.8836, + "step": 103020 + }, + { + "epoch": 0.5145197133511449, + "grad_norm": 0.09473099559545517, + "learning_rate": 1.460086610428296e-05, + "loss": 8.8739, + "step": 103030 + }, + { + "epoch": 0.5145696521760843, + "grad_norm": 0.09364248812198639, + "learning_rate": 1.4599364189341411e-05, + "loss": 8.8709, + "step": 103040 + }, + { + "epoch": 0.5146195910010237, + "grad_norm": 0.09198643267154694, + "learning_rate": 1.459786227439986e-05, + "loss": 8.8838, + "step": 103050 + }, + { + "epoch": 0.5146695298259631, + "grad_norm": 0.0934210941195488, + "learning_rate": 1.459636035945831e-05, + "loss": 8.8783, + "step": 103060 + }, + { + "epoch": 0.5147194686509027, + "grad_norm": 0.09671808034181595, + "learning_rate": 1.459485844451676e-05, + "loss": 8.8814, + "step": 103070 + }, + { + "epoch": 0.5147694074758421, + "grad_norm": 0.0925278291106224, + "learning_rate": 1.4593356529575208e-05, + "loss": 8.8924, + "step": 103080 + }, + { + "epoch": 0.5148193463007815, + "grad_norm": 0.09130702912807465, + "learning_rate": 1.4591854614633658e-05, + "loss": 8.8789, + "step": 103090 + }, + { + "epoch": 0.5148692851257209, + "grad_norm": 0.0948934555053711, + "learning_rate": 1.4590352699692107e-05, + "loss": 8.8758, + "step": 103100 + }, + { + "epoch": 0.5149192239506605, + "grad_norm": 0.09707729518413544, + "learning_rate": 1.4588850784750557e-05, + "loss": 8.8773, + "step": 103110 + }, + { + "epoch": 0.5149691627755999, + "grad_norm": 0.09346991032361984, + "learning_rate": 1.4587348869809007e-05, + "loss": 8.8743, + "step": 103120 + }, + { + "epoch": 0.5150191016005393, + "grad_norm": 0.09797349572181702, + "learning_rate": 1.4585846954867456e-05, + "loss": 8.8825, + "step": 103130 + }, + { + "epoch": 0.5150690404254787, + "grad_norm": 0.08969218283891678, + "learning_rate": 1.4584345039925906e-05, + "loss": 8.889, + "step": 103140 + }, + { + "epoch": 0.5151189792504183, + "grad_norm": 0.0924285352230072, + "learning_rate": 1.4582843124984354e-05, + "loss": 8.876, + "step": 103150 + }, + { + "epoch": 0.5151689180753577, + "grad_norm": 0.0896601751446724, + "learning_rate": 1.4581341210042805e-05, + "loss": 8.8611, + "step": 103160 + }, + { + "epoch": 0.5152188569002971, + "grad_norm": 0.08862532675266266, + "learning_rate": 1.4579839295101255e-05, + "loss": 8.8764, + "step": 103170 + }, + { + "epoch": 0.5152687957252365, + "grad_norm": 0.09978237003087997, + "learning_rate": 1.4578337380159705e-05, + "loss": 8.883, + "step": 103180 + }, + { + "epoch": 0.5153187345501761, + "grad_norm": 0.09299599379301071, + "learning_rate": 1.4576835465218153e-05, + "loss": 8.8806, + "step": 103190 + }, + { + "epoch": 0.5153686733751155, + "grad_norm": 0.09699616581201553, + "learning_rate": 1.4575333550276602e-05, + "loss": 8.8871, + "step": 103200 + }, + { + "epoch": 0.5154186122000549, + "grad_norm": 0.09479953348636627, + "learning_rate": 1.4573831635335052e-05, + "loss": 8.8893, + "step": 103210 + }, + { + "epoch": 0.5154685510249943, + "grad_norm": 0.09587261080741882, + "learning_rate": 1.4572329720393502e-05, + "loss": 8.8802, + "step": 103220 + }, + { + "epoch": 0.5155184898499339, + "grad_norm": 0.09195046871900558, + "learning_rate": 1.4570827805451952e-05, + "loss": 8.8887, + "step": 103230 + }, + { + "epoch": 0.5155684286748733, + "grad_norm": 0.09111469984054565, + "learning_rate": 1.4569325890510401e-05, + "loss": 8.868, + "step": 103240 + }, + { + "epoch": 0.5156183674998127, + "grad_norm": 0.09878696501255035, + "learning_rate": 1.456782397556885e-05, + "loss": 8.8859, + "step": 103250 + }, + { + "epoch": 0.5156683063247521, + "grad_norm": 0.09132800251245499, + "learning_rate": 1.45663220606273e-05, + "loss": 8.8826, + "step": 103260 + }, + { + "epoch": 0.5157182451496917, + "grad_norm": 0.0929095596075058, + "learning_rate": 1.456482014568575e-05, + "loss": 8.8912, + "step": 103270 + }, + { + "epoch": 0.5157681839746311, + "grad_norm": 0.0883350670337677, + "learning_rate": 1.45633182307442e-05, + "loss": 8.8899, + "step": 103280 + }, + { + "epoch": 0.5158181227995705, + "grad_norm": 0.0903296247124672, + "learning_rate": 1.4561816315802648e-05, + "loss": 8.8713, + "step": 103290 + }, + { + "epoch": 0.5158680616245099, + "grad_norm": 0.09251569956541061, + "learning_rate": 1.4560314400861097e-05, + "loss": 8.8755, + "step": 103300 + }, + { + "epoch": 0.5159180004494495, + "grad_norm": 0.10021259635686874, + "learning_rate": 1.4558812485919547e-05, + "loss": 8.8717, + "step": 103310 + }, + { + "epoch": 0.5159679392743889, + "grad_norm": 0.09307809174060822, + "learning_rate": 1.4557310570977997e-05, + "loss": 8.861, + "step": 103320 + }, + { + "epoch": 0.5160178780993283, + "grad_norm": 0.09546101093292236, + "learning_rate": 1.4555808656036448e-05, + "loss": 8.8763, + "step": 103330 + }, + { + "epoch": 0.5160678169242677, + "grad_norm": 0.09137708693742752, + "learning_rate": 1.4554306741094898e-05, + "loss": 8.8986, + "step": 103340 + }, + { + "epoch": 0.5161177557492073, + "grad_norm": 0.09687686711549759, + "learning_rate": 1.4552804826153344e-05, + "loss": 8.8711, + "step": 103350 + }, + { + "epoch": 0.5161676945741467, + "grad_norm": 0.09281541407108307, + "learning_rate": 1.4551302911211795e-05, + "loss": 8.8645, + "step": 103360 + }, + { + "epoch": 0.5162176333990861, + "grad_norm": 0.08926443755626678, + "learning_rate": 1.4549800996270245e-05, + "loss": 8.8634, + "step": 103370 + }, + { + "epoch": 0.5162675722240255, + "grad_norm": 0.09436703473329544, + "learning_rate": 1.4548299081328695e-05, + "loss": 8.878, + "step": 103380 + }, + { + "epoch": 0.5163175110489651, + "grad_norm": 0.09582871198654175, + "learning_rate": 1.4546797166387145e-05, + "loss": 8.8757, + "step": 103390 + }, + { + "epoch": 0.5163674498739045, + "grad_norm": 0.09050548076629639, + "learning_rate": 1.4545295251445592e-05, + "loss": 8.8923, + "step": 103400 + }, + { + "epoch": 0.5164173886988439, + "grad_norm": 0.0901259183883667, + "learning_rate": 1.4543793336504042e-05, + "loss": 8.8696, + "step": 103410 + }, + { + "epoch": 0.5164673275237833, + "grad_norm": 0.09428418427705765, + "learning_rate": 1.4542291421562492e-05, + "loss": 8.882, + "step": 103420 + }, + { + "epoch": 0.5165172663487229, + "grad_norm": 0.09372241050004959, + "learning_rate": 1.4540789506620943e-05, + "loss": 8.874, + "step": 103430 + }, + { + "epoch": 0.5165672051736623, + "grad_norm": 0.09623238444328308, + "learning_rate": 1.4539287591679393e-05, + "loss": 8.8679, + "step": 103440 + }, + { + "epoch": 0.5166171439986017, + "grad_norm": 0.09057226777076721, + "learning_rate": 1.453778567673784e-05, + "loss": 8.8789, + "step": 103450 + }, + { + "epoch": 0.5166670828235411, + "grad_norm": 0.09329625219106674, + "learning_rate": 1.453628376179629e-05, + "loss": 8.874, + "step": 103460 + }, + { + "epoch": 0.5167170216484807, + "grad_norm": 0.0941077321767807, + "learning_rate": 1.453478184685474e-05, + "loss": 8.8716, + "step": 103470 + }, + { + "epoch": 0.5167669604734201, + "grad_norm": 0.09424999356269836, + "learning_rate": 1.453327993191319e-05, + "loss": 8.8834, + "step": 103480 + }, + { + "epoch": 0.5168168992983595, + "grad_norm": 0.0937827080488205, + "learning_rate": 1.453177801697164e-05, + "loss": 8.8731, + "step": 103490 + }, + { + "epoch": 0.5168668381232989, + "grad_norm": 0.0896645337343216, + "learning_rate": 1.4530276102030089e-05, + "loss": 8.8669, + "step": 103500 + }, + { + "epoch": 0.5169167769482385, + "grad_norm": 0.08836094290018082, + "learning_rate": 1.4528774187088537e-05, + "loss": 8.8832, + "step": 103510 + }, + { + "epoch": 0.5169667157731779, + "grad_norm": 0.0928412526845932, + "learning_rate": 1.4527272272146987e-05, + "loss": 8.8857, + "step": 103520 + }, + { + "epoch": 0.5170166545981173, + "grad_norm": 0.09539926797151566, + "learning_rate": 1.4525770357205438e-05, + "loss": 8.892, + "step": 103530 + }, + { + "epoch": 0.5170665934230567, + "grad_norm": 0.09582491964101791, + "learning_rate": 1.4524268442263888e-05, + "loss": 8.8688, + "step": 103540 + }, + { + "epoch": 0.5171165322479963, + "grad_norm": 0.09870574623346329, + "learning_rate": 1.4522766527322336e-05, + "loss": 8.8783, + "step": 103550 + }, + { + "epoch": 0.5171664710729357, + "grad_norm": 0.09343771636486053, + "learning_rate": 1.4521264612380785e-05, + "loss": 8.8707, + "step": 103560 + }, + { + "epoch": 0.5172164098978751, + "grad_norm": 0.09553004056215286, + "learning_rate": 1.4519762697439235e-05, + "loss": 8.8827, + "step": 103570 + }, + { + "epoch": 0.5172663487228145, + "grad_norm": 0.09276316314935684, + "learning_rate": 1.4518260782497685e-05, + "loss": 8.8747, + "step": 103580 + }, + { + "epoch": 0.517316287547754, + "grad_norm": 0.09070324152708054, + "learning_rate": 1.4516758867556135e-05, + "loss": 8.876, + "step": 103590 + }, + { + "epoch": 0.5173662263726935, + "grad_norm": 0.09403187781572342, + "learning_rate": 1.4515256952614584e-05, + "loss": 8.8583, + "step": 103600 + }, + { + "epoch": 0.5174161651976329, + "grad_norm": 0.08794444054365158, + "learning_rate": 1.4513755037673032e-05, + "loss": 8.8818, + "step": 103610 + }, + { + "epoch": 0.5174661040225723, + "grad_norm": 0.09099708497524261, + "learning_rate": 1.4512253122731482e-05, + "loss": 8.8783, + "step": 103620 + }, + { + "epoch": 0.5175160428475118, + "grad_norm": 0.09580422937870026, + "learning_rate": 1.4510751207789933e-05, + "loss": 8.8714, + "step": 103630 + }, + { + "epoch": 0.5175659816724513, + "grad_norm": 0.09501136839389801, + "learning_rate": 1.4509249292848383e-05, + "loss": 8.8734, + "step": 103640 + }, + { + "epoch": 0.5176159204973907, + "grad_norm": 0.09438387304544449, + "learning_rate": 1.4507747377906831e-05, + "loss": 8.8821, + "step": 103650 + }, + { + "epoch": 0.5176658593223301, + "grad_norm": 0.09567675739526749, + "learning_rate": 1.4506245462965281e-05, + "loss": 8.8686, + "step": 103660 + }, + { + "epoch": 0.5177157981472696, + "grad_norm": 0.09035927057266235, + "learning_rate": 1.450474354802373e-05, + "loss": 8.8584, + "step": 103670 + }, + { + "epoch": 0.5177657369722091, + "grad_norm": 0.09373793751001358, + "learning_rate": 1.450324163308218e-05, + "loss": 8.8612, + "step": 103680 + }, + { + "epoch": 0.5178156757971485, + "grad_norm": 0.0980074554681778, + "learning_rate": 1.450173971814063e-05, + "loss": 8.8687, + "step": 103690 + }, + { + "epoch": 0.5178656146220879, + "grad_norm": 0.09776370227336884, + "learning_rate": 1.450023780319908e-05, + "loss": 8.8841, + "step": 103700 + }, + { + "epoch": 0.5179155534470274, + "grad_norm": 0.0890578031539917, + "learning_rate": 1.4498735888257529e-05, + "loss": 8.8819, + "step": 103710 + }, + { + "epoch": 0.5179654922719669, + "grad_norm": 0.09932509809732437, + "learning_rate": 1.4497233973315977e-05, + "loss": 8.8684, + "step": 103720 + }, + { + "epoch": 0.5180154310969063, + "grad_norm": 0.09279154241085052, + "learning_rate": 1.4495732058374428e-05, + "loss": 8.8734, + "step": 103730 + }, + { + "epoch": 0.5180653699218457, + "grad_norm": 0.09163205325603485, + "learning_rate": 1.4494230143432878e-05, + "loss": 8.8798, + "step": 103740 + }, + { + "epoch": 0.5181153087467852, + "grad_norm": 0.08860352635383606, + "learning_rate": 1.4492728228491328e-05, + "loss": 8.8876, + "step": 103750 + }, + { + "epoch": 0.5181652475717247, + "grad_norm": 0.08968942612409592, + "learning_rate": 1.4491226313549776e-05, + "loss": 8.8661, + "step": 103760 + }, + { + "epoch": 0.5182151863966641, + "grad_norm": 0.09817136079072952, + "learning_rate": 1.4489724398608225e-05, + "loss": 8.88, + "step": 103770 + }, + { + "epoch": 0.5182651252216035, + "grad_norm": 0.09469393640756607, + "learning_rate": 1.4488222483666675e-05, + "loss": 8.8826, + "step": 103780 + }, + { + "epoch": 0.518315064046543, + "grad_norm": 0.08931383490562439, + "learning_rate": 1.4486720568725125e-05, + "loss": 8.874, + "step": 103790 + }, + { + "epoch": 0.5183650028714825, + "grad_norm": 0.09799165278673172, + "learning_rate": 1.4485218653783575e-05, + "loss": 8.8699, + "step": 103800 + }, + { + "epoch": 0.5184149416964219, + "grad_norm": 0.09098178893327713, + "learning_rate": 1.4483716738842024e-05, + "loss": 8.8739, + "step": 103810 + }, + { + "epoch": 0.5184648805213613, + "grad_norm": 0.0937158390879631, + "learning_rate": 1.4482214823900474e-05, + "loss": 8.8743, + "step": 103820 + }, + { + "epoch": 0.5185148193463008, + "grad_norm": 0.09612367302179337, + "learning_rate": 1.4480712908958923e-05, + "loss": 8.8872, + "step": 103830 + }, + { + "epoch": 0.5185647581712403, + "grad_norm": 0.08803288638591766, + "learning_rate": 1.4479210994017373e-05, + "loss": 8.8751, + "step": 103840 + }, + { + "epoch": 0.5186146969961797, + "grad_norm": 0.09393215924501419, + "learning_rate": 1.4477709079075823e-05, + "loss": 8.8681, + "step": 103850 + }, + { + "epoch": 0.5186646358211191, + "grad_norm": 0.09139379113912582, + "learning_rate": 1.4476207164134271e-05, + "loss": 8.8752, + "step": 103860 + }, + { + "epoch": 0.5187145746460585, + "grad_norm": 0.08722906559705734, + "learning_rate": 1.4474705249192722e-05, + "loss": 8.8791, + "step": 103870 + }, + { + "epoch": 0.5187645134709981, + "grad_norm": 0.09366084635257721, + "learning_rate": 1.447320333425117e-05, + "loss": 8.8777, + "step": 103880 + }, + { + "epoch": 0.5188144522959375, + "grad_norm": 0.08771378546953201, + "learning_rate": 1.447170141930962e-05, + "loss": 8.8769, + "step": 103890 + }, + { + "epoch": 0.5188643911208769, + "grad_norm": 0.09123623371124268, + "learning_rate": 1.447019950436807e-05, + "loss": 8.8813, + "step": 103900 + }, + { + "epoch": 0.5189143299458163, + "grad_norm": 0.0887846052646637, + "learning_rate": 1.4468697589426519e-05, + "loss": 8.8682, + "step": 103910 + }, + { + "epoch": 0.5189642687707559, + "grad_norm": 0.091937355697155, + "learning_rate": 1.4467195674484969e-05, + "loss": 8.8748, + "step": 103920 + }, + { + "epoch": 0.5190142075956953, + "grad_norm": 0.09356644749641418, + "learning_rate": 1.4465693759543418e-05, + "loss": 8.8539, + "step": 103930 + }, + { + "epoch": 0.5190641464206347, + "grad_norm": 0.08848299831151962, + "learning_rate": 1.4464191844601868e-05, + "loss": 8.8697, + "step": 103940 + }, + { + "epoch": 0.5191140852455741, + "grad_norm": 0.0895775780081749, + "learning_rate": 1.4462689929660318e-05, + "loss": 8.8636, + "step": 103950 + }, + { + "epoch": 0.5191640240705137, + "grad_norm": 0.09382645785808563, + "learning_rate": 1.4461188014718766e-05, + "loss": 8.8704, + "step": 103960 + }, + { + "epoch": 0.5192139628954531, + "grad_norm": 0.0946570485830307, + "learning_rate": 1.4459686099777217e-05, + "loss": 8.8538, + "step": 103970 + }, + { + "epoch": 0.5192639017203925, + "grad_norm": 0.09343206882476807, + "learning_rate": 1.4458184184835667e-05, + "loss": 8.8671, + "step": 103980 + }, + { + "epoch": 0.5193138405453319, + "grad_norm": 0.09253936260938644, + "learning_rate": 1.4456682269894115e-05, + "loss": 8.8672, + "step": 103990 + }, + { + "epoch": 0.5193637793702715, + "grad_norm": 0.0950196385383606, + "learning_rate": 1.4455180354952565e-05, + "loss": 8.8815, + "step": 104000 + }, + { + "epoch": 0.5194137181952109, + "grad_norm": 0.09575220942497253, + "learning_rate": 1.4453678440011014e-05, + "loss": 8.8704, + "step": 104010 + }, + { + "epoch": 0.5194636570201503, + "grad_norm": 0.09172710031270981, + "learning_rate": 1.4452176525069464e-05, + "loss": 8.8797, + "step": 104020 + }, + { + "epoch": 0.5195135958450897, + "grad_norm": 0.09384613484144211, + "learning_rate": 1.4450674610127914e-05, + "loss": 8.8628, + "step": 104030 + }, + { + "epoch": 0.5195635346700292, + "grad_norm": 0.09063451737165451, + "learning_rate": 1.4449172695186363e-05, + "loss": 8.8736, + "step": 104040 + }, + { + "epoch": 0.5196134734949687, + "grad_norm": 0.09290114045143127, + "learning_rate": 1.4447670780244813e-05, + "loss": 8.8818, + "step": 104050 + }, + { + "epoch": 0.5196634123199081, + "grad_norm": 0.09309238195419312, + "learning_rate": 1.4446168865303261e-05, + "loss": 8.8818, + "step": 104060 + }, + { + "epoch": 0.5197133511448475, + "grad_norm": 0.09484042227268219, + "learning_rate": 1.4444666950361712e-05, + "loss": 8.882, + "step": 104070 + }, + { + "epoch": 0.519763289969787, + "grad_norm": 0.09208900481462479, + "learning_rate": 1.4443165035420162e-05, + "loss": 8.8948, + "step": 104080 + }, + { + "epoch": 0.5198132287947265, + "grad_norm": 0.09136916697025299, + "learning_rate": 1.444166312047861e-05, + "loss": 8.8841, + "step": 104090 + }, + { + "epoch": 0.5198631676196659, + "grad_norm": 0.09338723123073578, + "learning_rate": 1.444016120553706e-05, + "loss": 8.8688, + "step": 104100 + }, + { + "epoch": 0.5199131064446053, + "grad_norm": 0.09019609540700912, + "learning_rate": 1.4438659290595509e-05, + "loss": 8.8557, + "step": 104110 + }, + { + "epoch": 0.5199630452695448, + "grad_norm": 0.09477616846561432, + "learning_rate": 1.4437157375653959e-05, + "loss": 8.8702, + "step": 104120 + }, + { + "epoch": 0.5200129840944843, + "grad_norm": 0.09462156146764755, + "learning_rate": 1.443565546071241e-05, + "loss": 8.8836, + "step": 104130 + }, + { + "epoch": 0.5200629229194237, + "grad_norm": 0.09140991419553757, + "learning_rate": 1.443415354577086e-05, + "loss": 8.8589, + "step": 104140 + }, + { + "epoch": 0.5201128617443631, + "grad_norm": 0.09621613472700119, + "learning_rate": 1.4432651630829308e-05, + "loss": 8.8712, + "step": 104150 + }, + { + "epoch": 0.5201628005693026, + "grad_norm": 0.09273172914981842, + "learning_rate": 1.4431149715887756e-05, + "loss": 8.8778, + "step": 104160 + }, + { + "epoch": 0.5202127393942421, + "grad_norm": 0.09596338123083115, + "learning_rate": 1.4429647800946207e-05, + "loss": 8.8749, + "step": 104170 + }, + { + "epoch": 0.5202626782191815, + "grad_norm": 0.09606953710317612, + "learning_rate": 1.4428145886004657e-05, + "loss": 8.8644, + "step": 104180 + }, + { + "epoch": 0.5203126170441209, + "grad_norm": 0.08977267146110535, + "learning_rate": 1.4426643971063107e-05, + "loss": 8.8545, + "step": 104190 + }, + { + "epoch": 0.5203625558690604, + "grad_norm": 0.09220623224973679, + "learning_rate": 1.4425142056121555e-05, + "loss": 8.8825, + "step": 104200 + }, + { + "epoch": 0.5204124946939999, + "grad_norm": 0.09000903367996216, + "learning_rate": 1.4423640141180004e-05, + "loss": 8.8679, + "step": 104210 + }, + { + "epoch": 0.5204624335189393, + "grad_norm": 0.09188482165336609, + "learning_rate": 1.4422138226238454e-05, + "loss": 8.8606, + "step": 104220 + }, + { + "epoch": 0.5205123723438787, + "grad_norm": 0.09331043064594269, + "learning_rate": 1.4420636311296904e-05, + "loss": 8.8716, + "step": 104230 + }, + { + "epoch": 0.5205623111688182, + "grad_norm": 0.0948970839381218, + "learning_rate": 1.4419134396355354e-05, + "loss": 8.8613, + "step": 104240 + }, + { + "epoch": 0.5206122499937577, + "grad_norm": 0.09408802539110184, + "learning_rate": 1.4417632481413803e-05, + "loss": 8.8697, + "step": 104250 + }, + { + "epoch": 0.5206621888186971, + "grad_norm": 0.10012494027614594, + "learning_rate": 1.4416130566472251e-05, + "loss": 8.8821, + "step": 104260 + }, + { + "epoch": 0.5207121276436365, + "grad_norm": 0.08951479196548462, + "learning_rate": 1.4414628651530702e-05, + "loss": 8.8749, + "step": 104270 + }, + { + "epoch": 0.520762066468576, + "grad_norm": 0.09365096688270569, + "learning_rate": 1.4413126736589152e-05, + "loss": 8.8729, + "step": 104280 + }, + { + "epoch": 0.5208120052935155, + "grad_norm": 0.08924220502376556, + "learning_rate": 1.4411624821647602e-05, + "loss": 8.8587, + "step": 104290 + }, + { + "epoch": 0.5208619441184549, + "grad_norm": 0.09431713819503784, + "learning_rate": 1.4410122906706052e-05, + "loss": 8.8691, + "step": 104300 + }, + { + "epoch": 0.5209118829433943, + "grad_norm": 0.0971921905875206, + "learning_rate": 1.4408620991764499e-05, + "loss": 8.8626, + "step": 104310 + }, + { + "epoch": 0.5209618217683338, + "grad_norm": 0.09287826716899872, + "learning_rate": 1.4407119076822949e-05, + "loss": 8.8534, + "step": 104320 + }, + { + "epoch": 0.5210117605932733, + "grad_norm": 0.09646140784025192, + "learning_rate": 1.44056171618814e-05, + "loss": 8.8684, + "step": 104330 + }, + { + "epoch": 0.5210616994182127, + "grad_norm": 0.09730952978134155, + "learning_rate": 1.440411524693985e-05, + "loss": 8.8817, + "step": 104340 + }, + { + "epoch": 0.5211116382431521, + "grad_norm": 0.09068258851766586, + "learning_rate": 1.44026133319983e-05, + "loss": 8.8648, + "step": 104350 + }, + { + "epoch": 0.5211615770680916, + "grad_norm": 0.09530194848775864, + "learning_rate": 1.4401111417056746e-05, + "loss": 8.8584, + "step": 104360 + }, + { + "epoch": 0.521211515893031, + "grad_norm": 0.09383071959018707, + "learning_rate": 1.4399609502115197e-05, + "loss": 8.8508, + "step": 104370 + }, + { + "epoch": 0.5212614547179705, + "grad_norm": 0.096278615295887, + "learning_rate": 1.4398107587173647e-05, + "loss": 8.8657, + "step": 104380 + }, + { + "epoch": 0.5213113935429099, + "grad_norm": 0.09329640120267868, + "learning_rate": 1.4396605672232097e-05, + "loss": 8.8752, + "step": 104390 + }, + { + "epoch": 0.5213613323678494, + "grad_norm": 0.0894843116402626, + "learning_rate": 1.4395103757290547e-05, + "loss": 8.8745, + "step": 104400 + }, + { + "epoch": 0.5214112711927888, + "grad_norm": 0.0973014086484909, + "learning_rate": 1.4393601842348994e-05, + "loss": 8.8633, + "step": 104410 + }, + { + "epoch": 0.5214612100177283, + "grad_norm": 0.09077692031860352, + "learning_rate": 1.4392099927407444e-05, + "loss": 8.8766, + "step": 104420 + }, + { + "epoch": 0.5215111488426677, + "grad_norm": 0.09806881844997406, + "learning_rate": 1.4390598012465894e-05, + "loss": 8.868, + "step": 104430 + }, + { + "epoch": 0.5215610876676072, + "grad_norm": 0.09557566046714783, + "learning_rate": 1.4389096097524344e-05, + "loss": 8.8595, + "step": 104440 + }, + { + "epoch": 0.5216110264925466, + "grad_norm": 0.09814813733100891, + "learning_rate": 1.4387594182582795e-05, + "loss": 8.8594, + "step": 104450 + }, + { + "epoch": 0.5216609653174861, + "grad_norm": 0.09561028331518173, + "learning_rate": 1.4386092267641243e-05, + "loss": 8.8795, + "step": 104460 + }, + { + "epoch": 0.5217109041424255, + "grad_norm": 0.0946744978427887, + "learning_rate": 1.4384590352699692e-05, + "loss": 8.8686, + "step": 104470 + }, + { + "epoch": 0.521760842967365, + "grad_norm": 0.08944974094629288, + "learning_rate": 1.4383088437758142e-05, + "loss": 8.851, + "step": 104480 + }, + { + "epoch": 0.5218107817923044, + "grad_norm": 0.09138962626457214, + "learning_rate": 1.4381586522816592e-05, + "loss": 8.8796, + "step": 104490 + }, + { + "epoch": 0.5218607206172439, + "grad_norm": 0.09289447963237762, + "learning_rate": 1.4380084607875042e-05, + "loss": 8.8598, + "step": 104500 + }, + { + "epoch": 0.5219106594421833, + "grad_norm": 0.0965084508061409, + "learning_rate": 1.437858269293349e-05, + "loss": 8.8582, + "step": 104510 + }, + { + "epoch": 0.5219605982671228, + "grad_norm": 0.08889494836330414, + "learning_rate": 1.4377080777991939e-05, + "loss": 8.8847, + "step": 104520 + }, + { + "epoch": 0.5220105370920622, + "grad_norm": 0.09522352367639542, + "learning_rate": 1.437557886305039e-05, + "loss": 8.8416, + "step": 104530 + }, + { + "epoch": 0.5220604759170017, + "grad_norm": 0.09857291728258133, + "learning_rate": 1.437407694810884e-05, + "loss": 8.8689, + "step": 104540 + }, + { + "epoch": 0.5221104147419411, + "grad_norm": 0.0958976224064827, + "learning_rate": 1.437257503316729e-05, + "loss": 8.859, + "step": 104550 + }, + { + "epoch": 0.5221603535668806, + "grad_norm": 0.08890080451965332, + "learning_rate": 1.4371073118225738e-05, + "loss": 8.8703, + "step": 104560 + }, + { + "epoch": 0.52221029239182, + "grad_norm": 0.09524325281381607, + "learning_rate": 1.4369571203284187e-05, + "loss": 8.876, + "step": 104570 + }, + { + "epoch": 0.5222602312167595, + "grad_norm": 0.0954352393746376, + "learning_rate": 1.4368069288342637e-05, + "loss": 8.867, + "step": 104580 + }, + { + "epoch": 0.5223101700416989, + "grad_norm": 0.09157583862543106, + "learning_rate": 1.4366567373401087e-05, + "loss": 8.8728, + "step": 104590 + }, + { + "epoch": 0.5223601088666384, + "grad_norm": 0.08985961973667145, + "learning_rate": 1.4365065458459537e-05, + "loss": 8.8768, + "step": 104600 + }, + { + "epoch": 0.5224100476915778, + "grad_norm": 0.09679489582777023, + "learning_rate": 1.4363563543517986e-05, + "loss": 8.8566, + "step": 104610 + }, + { + "epoch": 0.5224599865165173, + "grad_norm": 0.09118127077817917, + "learning_rate": 1.4362061628576436e-05, + "loss": 8.8611, + "step": 104620 + }, + { + "epoch": 0.5225099253414567, + "grad_norm": 0.09494442492723465, + "learning_rate": 1.4360559713634884e-05, + "loss": 8.8769, + "step": 104630 + }, + { + "epoch": 0.5225598641663962, + "grad_norm": 0.08483417332172394, + "learning_rate": 1.4359057798693334e-05, + "loss": 8.8627, + "step": 104640 + }, + { + "epoch": 0.5226098029913356, + "grad_norm": 0.09283437579870224, + "learning_rate": 1.4357555883751785e-05, + "loss": 8.8751, + "step": 104650 + }, + { + "epoch": 0.5226597418162751, + "grad_norm": 0.09502757340669632, + "learning_rate": 1.4356053968810233e-05, + "loss": 8.8703, + "step": 104660 + }, + { + "epoch": 0.5227096806412145, + "grad_norm": 0.09698113799095154, + "learning_rate": 1.4354552053868683e-05, + "loss": 8.8747, + "step": 104670 + }, + { + "epoch": 0.522759619466154, + "grad_norm": 0.09982874244451523, + "learning_rate": 1.4353050138927132e-05, + "loss": 8.8763, + "step": 104680 + }, + { + "epoch": 0.5228095582910934, + "grad_norm": 0.09431293606758118, + "learning_rate": 1.4351548223985582e-05, + "loss": 8.875, + "step": 104690 + }, + { + "epoch": 0.5228594971160329, + "grad_norm": 0.09267256408929825, + "learning_rate": 1.4350046309044032e-05, + "loss": 8.8649, + "step": 104700 + }, + { + "epoch": 0.5229094359409723, + "grad_norm": 0.09294337034225464, + "learning_rate": 1.434854439410248e-05, + "loss": 8.8886, + "step": 104710 + }, + { + "epoch": 0.5229593747659118, + "grad_norm": 0.09335044771432877, + "learning_rate": 1.434704247916093e-05, + "loss": 8.862, + "step": 104720 + }, + { + "epoch": 0.5230093135908512, + "grad_norm": 0.094884492456913, + "learning_rate": 1.434554056421938e-05, + "loss": 8.8607, + "step": 104730 + }, + { + "epoch": 0.5230592524157907, + "grad_norm": 0.10318083316087723, + "learning_rate": 1.434403864927783e-05, + "loss": 8.8615, + "step": 104740 + }, + { + "epoch": 0.5231091912407301, + "grad_norm": 0.08785218000411987, + "learning_rate": 1.434253673433628e-05, + "loss": 8.8801, + "step": 104750 + }, + { + "epoch": 0.5231591300656696, + "grad_norm": 0.09105756133794785, + "learning_rate": 1.4341034819394728e-05, + "loss": 8.88, + "step": 104760 + }, + { + "epoch": 0.523209068890609, + "grad_norm": 0.09258095175027847, + "learning_rate": 1.4339532904453178e-05, + "loss": 8.8629, + "step": 104770 + }, + { + "epoch": 0.5232590077155485, + "grad_norm": 0.09019656479358673, + "learning_rate": 1.4338030989511628e-05, + "loss": 8.876, + "step": 104780 + }, + { + "epoch": 0.5233089465404879, + "grad_norm": 0.09507337957620621, + "learning_rate": 1.4336529074570077e-05, + "loss": 8.8558, + "step": 104790 + }, + { + "epoch": 0.5233588853654274, + "grad_norm": 0.09072145074605942, + "learning_rate": 1.4335027159628527e-05, + "loss": 8.8707, + "step": 104800 + }, + { + "epoch": 0.5234088241903668, + "grad_norm": 0.08817720413208008, + "learning_rate": 1.4333525244686976e-05, + "loss": 8.8654, + "step": 104810 + }, + { + "epoch": 0.5234587630153062, + "grad_norm": 0.09037192910909653, + "learning_rate": 1.4332023329745426e-05, + "loss": 8.8517, + "step": 104820 + }, + { + "epoch": 0.5235087018402457, + "grad_norm": 0.087149478495121, + "learning_rate": 1.4330521414803876e-05, + "loss": 8.8782, + "step": 104830 + }, + { + "epoch": 0.5235586406651851, + "grad_norm": 0.10001854598522186, + "learning_rate": 1.4329019499862324e-05, + "loss": 8.8626, + "step": 104840 + }, + { + "epoch": 0.5236085794901246, + "grad_norm": 0.09408778697252274, + "learning_rate": 1.4327517584920775e-05, + "loss": 8.8606, + "step": 104850 + }, + { + "epoch": 0.523658518315064, + "grad_norm": 0.092310830950737, + "learning_rate": 1.4326015669979223e-05, + "loss": 8.8692, + "step": 104860 + }, + { + "epoch": 0.5237084571400035, + "grad_norm": 0.0982789471745491, + "learning_rate": 1.4324513755037673e-05, + "loss": 8.8585, + "step": 104870 + }, + { + "epoch": 0.5237583959649429, + "grad_norm": 0.09160380810499191, + "learning_rate": 1.4323011840096124e-05, + "loss": 8.8631, + "step": 104880 + }, + { + "epoch": 0.5238083347898824, + "grad_norm": 0.09572508931159973, + "learning_rate": 1.4321509925154572e-05, + "loss": 8.8558, + "step": 104890 + }, + { + "epoch": 0.5238582736148218, + "grad_norm": 0.09284794330596924, + "learning_rate": 1.4320008010213022e-05, + "loss": 8.8739, + "step": 104900 + }, + { + "epoch": 0.5239082124397613, + "grad_norm": 0.09144847840070724, + "learning_rate": 1.431850609527147e-05, + "loss": 8.8543, + "step": 104910 + }, + { + "epoch": 0.5239581512647007, + "grad_norm": 0.10097398608922958, + "learning_rate": 1.431700418032992e-05, + "loss": 8.8667, + "step": 104920 + }, + { + "epoch": 0.5240080900896402, + "grad_norm": 0.10776495188474655, + "learning_rate": 1.4315502265388371e-05, + "loss": 8.8659, + "step": 104930 + }, + { + "epoch": 0.5240580289145796, + "grad_norm": 0.08633417636156082, + "learning_rate": 1.4314000350446821e-05, + "loss": 8.8753, + "step": 104940 + }, + { + "epoch": 0.5241079677395191, + "grad_norm": 0.08752107620239258, + "learning_rate": 1.431249843550527e-05, + "loss": 8.8829, + "step": 104950 + }, + { + "epoch": 0.5241579065644585, + "grad_norm": 0.08872366696596146, + "learning_rate": 1.4310996520563718e-05, + "loss": 8.862, + "step": 104960 + }, + { + "epoch": 0.524207845389398, + "grad_norm": 0.09275726228952408, + "learning_rate": 1.4309494605622168e-05, + "loss": 8.8588, + "step": 104970 + }, + { + "epoch": 0.5242577842143374, + "grad_norm": 0.09212324768304825, + "learning_rate": 1.4307992690680619e-05, + "loss": 8.8612, + "step": 104980 + }, + { + "epoch": 0.5243077230392769, + "grad_norm": 0.09363345056772232, + "learning_rate": 1.4306490775739069e-05, + "loss": 8.8581, + "step": 104990 + }, + { + "epoch": 0.5243576618642163, + "grad_norm": 0.09286009520292282, + "learning_rate": 1.4304988860797517e-05, + "loss": 8.8635, + "step": 105000 + }, + { + "epoch": 0.5244076006891558, + "grad_norm": 0.0911487564444542, + "learning_rate": 1.4303486945855966e-05, + "loss": 8.8541, + "step": 105010 + }, + { + "epoch": 0.5244575395140952, + "grad_norm": 0.08649508655071259, + "learning_rate": 1.4301985030914416e-05, + "loss": 8.8619, + "step": 105020 + }, + { + "epoch": 0.5245074783390347, + "grad_norm": 0.09261025488376617, + "learning_rate": 1.4300483115972866e-05, + "loss": 8.8467, + "step": 105030 + }, + { + "epoch": 0.5245574171639741, + "grad_norm": 0.0908532589673996, + "learning_rate": 1.4298981201031316e-05, + "loss": 8.8492, + "step": 105040 + }, + { + "epoch": 0.5246073559889136, + "grad_norm": 0.09164856374263763, + "learning_rate": 1.4297479286089765e-05, + "loss": 8.863, + "step": 105050 + }, + { + "epoch": 0.524657294813853, + "grad_norm": 0.09361206740140915, + "learning_rate": 1.4295977371148213e-05, + "loss": 8.8672, + "step": 105060 + }, + { + "epoch": 0.5247072336387925, + "grad_norm": 0.09860822558403015, + "learning_rate": 1.4294475456206663e-05, + "loss": 8.8629, + "step": 105070 + }, + { + "epoch": 0.5247571724637319, + "grad_norm": 0.09220654517412186, + "learning_rate": 1.4292973541265114e-05, + "loss": 8.8683, + "step": 105080 + }, + { + "epoch": 0.5248071112886714, + "grad_norm": 0.09145854413509369, + "learning_rate": 1.4291471626323564e-05, + "loss": 8.8745, + "step": 105090 + }, + { + "epoch": 0.5248570501136108, + "grad_norm": 0.09256047010421753, + "learning_rate": 1.4289969711382014e-05, + "loss": 8.8524, + "step": 105100 + }, + { + "epoch": 0.5249069889385503, + "grad_norm": 0.09105725586414337, + "learning_rate": 1.428846779644046e-05, + "loss": 8.8741, + "step": 105110 + }, + { + "epoch": 0.5249569277634897, + "grad_norm": 0.09319113194942474, + "learning_rate": 1.428696588149891e-05, + "loss": 8.8626, + "step": 105120 + }, + { + "epoch": 0.5250068665884292, + "grad_norm": 0.09418758749961853, + "learning_rate": 1.4285463966557361e-05, + "loss": 8.8647, + "step": 105130 + }, + { + "epoch": 0.5250568054133686, + "grad_norm": 0.09596774727106094, + "learning_rate": 1.4283962051615811e-05, + "loss": 8.8574, + "step": 105140 + }, + { + "epoch": 0.525106744238308, + "grad_norm": 0.09476704150438309, + "learning_rate": 1.4282460136674261e-05, + "loss": 8.8575, + "step": 105150 + }, + { + "epoch": 0.5251566830632475, + "grad_norm": 0.09615634381771088, + "learning_rate": 1.4280958221732708e-05, + "loss": 8.8502, + "step": 105160 + }, + { + "epoch": 0.525206621888187, + "grad_norm": 0.09071250259876251, + "learning_rate": 1.4279456306791158e-05, + "loss": 8.8623, + "step": 105170 + }, + { + "epoch": 0.5252565607131264, + "grad_norm": 0.08948079496622086, + "learning_rate": 1.4277954391849609e-05, + "loss": 8.8616, + "step": 105180 + }, + { + "epoch": 0.5253064995380659, + "grad_norm": 0.09837137907743454, + "learning_rate": 1.4276452476908059e-05, + "loss": 8.856, + "step": 105190 + }, + { + "epoch": 0.5253564383630053, + "grad_norm": 0.09280554950237274, + "learning_rate": 1.4274950561966509e-05, + "loss": 8.8634, + "step": 105200 + }, + { + "epoch": 0.5254063771879448, + "grad_norm": 0.09170987457036972, + "learning_rate": 1.4273448647024956e-05, + "loss": 8.8582, + "step": 105210 + }, + { + "epoch": 0.5254563160128842, + "grad_norm": 0.09427595138549805, + "learning_rate": 1.4271946732083406e-05, + "loss": 8.8591, + "step": 105220 + }, + { + "epoch": 0.5255062548378236, + "grad_norm": 0.08826853334903717, + "learning_rate": 1.4270444817141856e-05, + "loss": 8.8695, + "step": 105230 + }, + { + "epoch": 0.5255561936627631, + "grad_norm": 0.09086894989013672, + "learning_rate": 1.4268942902200306e-05, + "loss": 8.8574, + "step": 105240 + }, + { + "epoch": 0.5256061324877026, + "grad_norm": 0.09478379786014557, + "learning_rate": 1.4267440987258756e-05, + "loss": 8.8617, + "step": 105250 + }, + { + "epoch": 0.525656071312642, + "grad_norm": 0.09793008863925934, + "learning_rate": 1.4265939072317205e-05, + "loss": 8.8654, + "step": 105260 + }, + { + "epoch": 0.5257060101375814, + "grad_norm": 0.09823063015937805, + "learning_rate": 1.4264437157375653e-05, + "loss": 8.8549, + "step": 105270 + }, + { + "epoch": 0.5257559489625209, + "grad_norm": 0.08841749280691147, + "learning_rate": 1.4262935242434104e-05, + "loss": 8.865, + "step": 105280 + }, + { + "epoch": 0.5258058877874604, + "grad_norm": 0.09652768075466156, + "learning_rate": 1.4261433327492554e-05, + "loss": 8.8668, + "step": 105290 + }, + { + "epoch": 0.5258558266123998, + "grad_norm": 0.09202666580677032, + "learning_rate": 1.4259931412551004e-05, + "loss": 8.8702, + "step": 105300 + }, + { + "epoch": 0.5259057654373392, + "grad_norm": 0.09303510934114456, + "learning_rate": 1.4258429497609452e-05, + "loss": 8.8646, + "step": 105310 + }, + { + "epoch": 0.5259557042622787, + "grad_norm": 0.09391511976718903, + "learning_rate": 1.4256927582667901e-05, + "loss": 8.8435, + "step": 105320 + }, + { + "epoch": 0.5260056430872182, + "grad_norm": 0.10001968592405319, + "learning_rate": 1.4255425667726351e-05, + "loss": 8.8473, + "step": 105330 + }, + { + "epoch": 0.5260555819121576, + "grad_norm": 0.09100840985774994, + "learning_rate": 1.4253923752784801e-05, + "loss": 8.8602, + "step": 105340 + }, + { + "epoch": 0.526105520737097, + "grad_norm": 0.0969325453042984, + "learning_rate": 1.4252421837843251e-05, + "loss": 8.8614, + "step": 105350 + }, + { + "epoch": 0.5261554595620365, + "grad_norm": 0.09638604521751404, + "learning_rate": 1.42509199229017e-05, + "loss": 8.8666, + "step": 105360 + }, + { + "epoch": 0.526205398386976, + "grad_norm": 0.09278382360935211, + "learning_rate": 1.4249418007960148e-05, + "loss": 8.8661, + "step": 105370 + }, + { + "epoch": 0.5262553372119154, + "grad_norm": 0.09172243624925613, + "learning_rate": 1.4247916093018599e-05, + "loss": 8.8472, + "step": 105380 + }, + { + "epoch": 0.5263052760368548, + "grad_norm": 0.09195327013731003, + "learning_rate": 1.4246414178077049e-05, + "loss": 8.8642, + "step": 105390 + }, + { + "epoch": 0.5263552148617943, + "grad_norm": 0.0913047194480896, + "learning_rate": 1.4244912263135499e-05, + "loss": 8.8626, + "step": 105400 + }, + { + "epoch": 0.5264051536867338, + "grad_norm": 0.09170885384082794, + "learning_rate": 1.4243410348193947e-05, + "loss": 8.8508, + "step": 105410 + }, + { + "epoch": 0.5264550925116732, + "grad_norm": 0.0922761857509613, + "learning_rate": 1.4241908433252398e-05, + "loss": 8.8671, + "step": 105420 + }, + { + "epoch": 0.5265050313366126, + "grad_norm": 0.0899137631058693, + "learning_rate": 1.4240406518310846e-05, + "loss": 8.866, + "step": 105430 + }, + { + "epoch": 0.5265549701615521, + "grad_norm": 0.10037946701049805, + "learning_rate": 1.4238904603369296e-05, + "loss": 8.8532, + "step": 105440 + }, + { + "epoch": 0.5266049089864916, + "grad_norm": 0.08979129791259766, + "learning_rate": 1.4237402688427746e-05, + "loss": 8.8657, + "step": 105450 + }, + { + "epoch": 0.526654847811431, + "grad_norm": 0.08900034427642822, + "learning_rate": 1.4235900773486195e-05, + "loss": 8.8669, + "step": 105460 + }, + { + "epoch": 0.5267047866363704, + "grad_norm": 0.08913334459066391, + "learning_rate": 1.4234398858544645e-05, + "loss": 8.8633, + "step": 105470 + }, + { + "epoch": 0.5267547254613099, + "grad_norm": 0.09597323089838028, + "learning_rate": 1.4232896943603094e-05, + "loss": 8.8568, + "step": 105480 + }, + { + "epoch": 0.5268046642862494, + "grad_norm": 0.09855466336011887, + "learning_rate": 1.4231395028661544e-05, + "loss": 8.8733, + "step": 105490 + }, + { + "epoch": 0.5268546031111888, + "grad_norm": 0.096231609582901, + "learning_rate": 1.4229893113719994e-05, + "loss": 8.8701, + "step": 105500 + }, + { + "epoch": 0.5269045419361282, + "grad_norm": 0.09357398748397827, + "learning_rate": 1.4228391198778442e-05, + "loss": 8.8504, + "step": 105510 + }, + { + "epoch": 0.5269544807610677, + "grad_norm": 0.09753704071044922, + "learning_rate": 1.4226889283836893e-05, + "loss": 8.8658, + "step": 105520 + }, + { + "epoch": 0.5270044195860072, + "grad_norm": 0.08956428617238998, + "learning_rate": 1.4225387368895341e-05, + "loss": 8.8621, + "step": 105530 + }, + { + "epoch": 0.5270543584109466, + "grad_norm": 0.10666527599096298, + "learning_rate": 1.4223885453953791e-05, + "loss": 8.8537, + "step": 105540 + }, + { + "epoch": 0.527104297235886, + "grad_norm": 0.0884803831577301, + "learning_rate": 1.4222383539012241e-05, + "loss": 8.8577, + "step": 105550 + }, + { + "epoch": 0.5271542360608255, + "grad_norm": 0.0955420434474945, + "learning_rate": 1.422088162407069e-05, + "loss": 8.8548, + "step": 105560 + }, + { + "epoch": 0.527204174885765, + "grad_norm": 0.09400387853384018, + "learning_rate": 1.421937970912914e-05, + "loss": 8.8536, + "step": 105570 + }, + { + "epoch": 0.5272541137107044, + "grad_norm": 0.09268182516098022, + "learning_rate": 1.4217877794187589e-05, + "loss": 8.8589, + "step": 105580 + }, + { + "epoch": 0.5273040525356438, + "grad_norm": 0.09028761833906174, + "learning_rate": 1.4216375879246039e-05, + "loss": 8.8849, + "step": 105590 + }, + { + "epoch": 0.5273539913605833, + "grad_norm": 0.09481693059206009, + "learning_rate": 1.4214873964304489e-05, + "loss": 8.8536, + "step": 105600 + }, + { + "epoch": 0.5274039301855228, + "grad_norm": 0.09226229786872864, + "learning_rate": 1.4213372049362937e-05, + "loss": 8.8569, + "step": 105610 + }, + { + "epoch": 0.5274538690104622, + "grad_norm": 0.0896633192896843, + "learning_rate": 1.4211870134421388e-05, + "loss": 8.8696, + "step": 105620 + }, + { + "epoch": 0.5275038078354016, + "grad_norm": 0.09037711471319199, + "learning_rate": 1.4210368219479838e-05, + "loss": 8.8657, + "step": 105630 + }, + { + "epoch": 0.527553746660341, + "grad_norm": 0.09311304241418839, + "learning_rate": 1.4208866304538286e-05, + "loss": 8.8558, + "step": 105640 + }, + { + "epoch": 0.5276036854852806, + "grad_norm": 0.09499665349721909, + "learning_rate": 1.4207364389596736e-05, + "loss": 8.8671, + "step": 105650 + }, + { + "epoch": 0.52765362431022, + "grad_norm": 0.09343437850475311, + "learning_rate": 1.4205862474655185e-05, + "loss": 8.8469, + "step": 105660 + }, + { + "epoch": 0.5277035631351594, + "grad_norm": 0.09089982509613037, + "learning_rate": 1.4204360559713635e-05, + "loss": 8.8575, + "step": 105670 + }, + { + "epoch": 0.5277535019600988, + "grad_norm": 0.09557026624679565, + "learning_rate": 1.4202858644772085e-05, + "loss": 8.8554, + "step": 105680 + }, + { + "epoch": 0.5278034407850384, + "grad_norm": 0.09490370750427246, + "learning_rate": 1.4201356729830534e-05, + "loss": 8.8517, + "step": 105690 + }, + { + "epoch": 0.5278533796099778, + "grad_norm": 0.09626539051532745, + "learning_rate": 1.4199854814888984e-05, + "loss": 8.8529, + "step": 105700 + }, + { + "epoch": 0.5279033184349172, + "grad_norm": 0.09501594305038452, + "learning_rate": 1.4198352899947432e-05, + "loss": 8.8488, + "step": 105710 + }, + { + "epoch": 0.5279532572598566, + "grad_norm": 0.09240279346704483, + "learning_rate": 1.4196850985005883e-05, + "loss": 8.855, + "step": 105720 + }, + { + "epoch": 0.5280031960847962, + "grad_norm": 0.09040988236665726, + "learning_rate": 1.4195349070064333e-05, + "loss": 8.8664, + "step": 105730 + }, + { + "epoch": 0.5280531349097356, + "grad_norm": 0.0923992171883583, + "learning_rate": 1.4193847155122781e-05, + "loss": 8.8492, + "step": 105740 + }, + { + "epoch": 0.528103073734675, + "grad_norm": 0.09252621978521347, + "learning_rate": 1.4192345240181231e-05, + "loss": 8.8525, + "step": 105750 + }, + { + "epoch": 0.5281530125596144, + "grad_norm": 0.09642771631479263, + "learning_rate": 1.419084332523968e-05, + "loss": 8.8541, + "step": 105760 + }, + { + "epoch": 0.528202951384554, + "grad_norm": 0.09207062423229218, + "learning_rate": 1.418934141029813e-05, + "loss": 8.8631, + "step": 105770 + }, + { + "epoch": 0.5282528902094934, + "grad_norm": 0.09391027688980103, + "learning_rate": 1.418783949535658e-05, + "loss": 8.8586, + "step": 105780 + }, + { + "epoch": 0.5283028290344328, + "grad_norm": 0.09609290212392807, + "learning_rate": 1.418633758041503e-05, + "loss": 8.8679, + "step": 105790 + }, + { + "epoch": 0.5283527678593722, + "grad_norm": 0.0963875949382782, + "learning_rate": 1.4184835665473479e-05, + "loss": 8.8622, + "step": 105800 + }, + { + "epoch": 0.5284027066843117, + "grad_norm": 0.08848047256469727, + "learning_rate": 1.4183333750531929e-05, + "loss": 8.8517, + "step": 105810 + }, + { + "epoch": 0.5284526455092512, + "grad_norm": 0.08758027851581573, + "learning_rate": 1.4181831835590378e-05, + "loss": 8.8608, + "step": 105820 + }, + { + "epoch": 0.5285025843341906, + "grad_norm": 0.09200868755578995, + "learning_rate": 1.4180329920648828e-05, + "loss": 8.8544, + "step": 105830 + }, + { + "epoch": 0.52855252315913, + "grad_norm": 0.09613315761089325, + "learning_rate": 1.4178828005707278e-05, + "loss": 8.8563, + "step": 105840 + }, + { + "epoch": 0.5286024619840695, + "grad_norm": 0.09787869453430176, + "learning_rate": 1.4177326090765726e-05, + "loss": 8.8435, + "step": 105850 + }, + { + "epoch": 0.528652400809009, + "grad_norm": 0.09556427597999573, + "learning_rate": 1.4175824175824177e-05, + "loss": 8.861, + "step": 105860 + }, + { + "epoch": 0.5287023396339484, + "grad_norm": 0.09230974316596985, + "learning_rate": 1.4174322260882625e-05, + "loss": 8.8409, + "step": 105870 + }, + { + "epoch": 0.5287522784588878, + "grad_norm": 0.0904126837849617, + "learning_rate": 1.4172820345941075e-05, + "loss": 8.8483, + "step": 105880 + }, + { + "epoch": 0.5288022172838273, + "grad_norm": 0.09018577635288239, + "learning_rate": 1.4171318430999525e-05, + "loss": 8.8603, + "step": 105890 + }, + { + "epoch": 0.5288521561087668, + "grad_norm": 0.09044212102890015, + "learning_rate": 1.4169816516057974e-05, + "loss": 8.845, + "step": 105900 + }, + { + "epoch": 0.5289020949337062, + "grad_norm": 0.08735643327236176, + "learning_rate": 1.4168314601116424e-05, + "loss": 8.8518, + "step": 105910 + }, + { + "epoch": 0.5289520337586456, + "grad_norm": 0.08877028524875641, + "learning_rate": 1.4166812686174873e-05, + "loss": 8.8487, + "step": 105920 + }, + { + "epoch": 0.5290019725835851, + "grad_norm": 0.09317787736654282, + "learning_rate": 1.4165310771233323e-05, + "loss": 8.8551, + "step": 105930 + }, + { + "epoch": 0.5290519114085246, + "grad_norm": 0.09516378492116928, + "learning_rate": 1.4163808856291773e-05, + "loss": 8.8644, + "step": 105940 + }, + { + "epoch": 0.529101850233464, + "grad_norm": 0.08973415195941925, + "learning_rate": 1.4162306941350223e-05, + "loss": 8.876, + "step": 105950 + }, + { + "epoch": 0.5291517890584034, + "grad_norm": 0.09267198294401169, + "learning_rate": 1.4160805026408672e-05, + "loss": 8.8482, + "step": 105960 + }, + { + "epoch": 0.5292017278833429, + "grad_norm": 0.0925462618470192, + "learning_rate": 1.415930311146712e-05, + "loss": 8.8538, + "step": 105970 + }, + { + "epoch": 0.5292516667082824, + "grad_norm": 0.09187739342451096, + "learning_rate": 1.415780119652557e-05, + "loss": 8.8542, + "step": 105980 + }, + { + "epoch": 0.5293016055332218, + "grad_norm": 0.08892559260129929, + "learning_rate": 1.415629928158402e-05, + "loss": 8.8422, + "step": 105990 + }, + { + "epoch": 0.5293515443581612, + "grad_norm": 0.09179945290088654, + "learning_rate": 1.415479736664247e-05, + "loss": 8.8534, + "step": 106000 + }, + { + "epoch": 0.5294014831831007, + "grad_norm": 0.09214461594820023, + "learning_rate": 1.4153295451700919e-05, + "loss": 8.8695, + "step": 106010 + }, + { + "epoch": 0.5294514220080402, + "grad_norm": 0.09491454809904099, + "learning_rate": 1.4151793536759368e-05, + "loss": 8.8725, + "step": 106020 + }, + { + "epoch": 0.5295013608329796, + "grad_norm": 0.09679799526929855, + "learning_rate": 1.4150291621817818e-05, + "loss": 8.845, + "step": 106030 + }, + { + "epoch": 0.529551299657919, + "grad_norm": 0.08733250945806503, + "learning_rate": 1.4148789706876268e-05, + "loss": 8.8541, + "step": 106040 + }, + { + "epoch": 0.5296012384828584, + "grad_norm": 0.09057143330574036, + "learning_rate": 1.4147287791934718e-05, + "loss": 8.8618, + "step": 106050 + }, + { + "epoch": 0.529651177307798, + "grad_norm": 0.09323775768280029, + "learning_rate": 1.4145785876993167e-05, + "loss": 8.8594, + "step": 106060 + }, + { + "epoch": 0.5297011161327374, + "grad_norm": 0.09307857602834702, + "learning_rate": 1.4144283962051615e-05, + "loss": 8.8627, + "step": 106070 + }, + { + "epoch": 0.5297510549576768, + "grad_norm": 0.09620136767625809, + "learning_rate": 1.4142782047110065e-05, + "loss": 8.8487, + "step": 106080 + }, + { + "epoch": 0.5298009937826162, + "grad_norm": 0.08902984112501144, + "learning_rate": 1.4141280132168515e-05, + "loss": 8.866, + "step": 106090 + }, + { + "epoch": 0.5298509326075558, + "grad_norm": 0.09401166439056396, + "learning_rate": 1.4139778217226966e-05, + "loss": 8.8448, + "step": 106100 + }, + { + "epoch": 0.5299008714324952, + "grad_norm": 0.09507770091295242, + "learning_rate": 1.4138276302285416e-05, + "loss": 8.852, + "step": 106110 + }, + { + "epoch": 0.5299508102574346, + "grad_norm": 0.09667683392763138, + "learning_rate": 1.4136774387343863e-05, + "loss": 8.8424, + "step": 106120 + }, + { + "epoch": 0.530000749082374, + "grad_norm": 0.09889981150627136, + "learning_rate": 1.4135272472402313e-05, + "loss": 8.8661, + "step": 106130 + }, + { + "epoch": 0.5300506879073136, + "grad_norm": 0.09269087016582489, + "learning_rate": 1.4133770557460763e-05, + "loss": 8.8558, + "step": 106140 + }, + { + "epoch": 0.530100626732253, + "grad_norm": 0.09363644570112228, + "learning_rate": 1.4132268642519213e-05, + "loss": 8.8576, + "step": 106150 + }, + { + "epoch": 0.5301505655571924, + "grad_norm": 0.09286314994096756, + "learning_rate": 1.4130766727577663e-05, + "loss": 8.8647, + "step": 106160 + }, + { + "epoch": 0.5302005043821318, + "grad_norm": 0.09268610924482346, + "learning_rate": 1.412926481263611e-05, + "loss": 8.8578, + "step": 106170 + }, + { + "epoch": 0.5302504432070714, + "grad_norm": 0.09902572631835938, + "learning_rate": 1.412776289769456e-05, + "loss": 8.8533, + "step": 106180 + }, + { + "epoch": 0.5303003820320108, + "grad_norm": 0.093776173889637, + "learning_rate": 1.412626098275301e-05, + "loss": 8.8647, + "step": 106190 + }, + { + "epoch": 0.5303503208569502, + "grad_norm": 0.0955788791179657, + "learning_rate": 1.412475906781146e-05, + "loss": 8.8529, + "step": 106200 + }, + { + "epoch": 0.5304002596818896, + "grad_norm": 0.09192150086164474, + "learning_rate": 1.412325715286991e-05, + "loss": 8.8548, + "step": 106210 + }, + { + "epoch": 0.5304501985068292, + "grad_norm": 0.09892907738685608, + "learning_rate": 1.4121755237928358e-05, + "loss": 8.8589, + "step": 106220 + }, + { + "epoch": 0.5305001373317686, + "grad_norm": 0.09268810600042343, + "learning_rate": 1.4120253322986808e-05, + "loss": 8.8518, + "step": 106230 + }, + { + "epoch": 0.530550076156708, + "grad_norm": 0.09231319278478622, + "learning_rate": 1.4118751408045258e-05, + "loss": 8.8329, + "step": 106240 + }, + { + "epoch": 0.5306000149816474, + "grad_norm": 0.08841013163328171, + "learning_rate": 1.4117249493103708e-05, + "loss": 8.8476, + "step": 106250 + }, + { + "epoch": 0.530649953806587, + "grad_norm": 0.09667849540710449, + "learning_rate": 1.4115747578162158e-05, + "loss": 8.8692, + "step": 106260 + }, + { + "epoch": 0.5306998926315264, + "grad_norm": 0.0873061865568161, + "learning_rate": 1.4114245663220607e-05, + "loss": 8.8684, + "step": 106270 + }, + { + "epoch": 0.5307498314564658, + "grad_norm": 0.0922415629029274, + "learning_rate": 1.4112743748279055e-05, + "loss": 8.8375, + "step": 106280 + }, + { + "epoch": 0.5307997702814052, + "grad_norm": 0.09075237065553665, + "learning_rate": 1.4111241833337505e-05, + "loss": 8.8494, + "step": 106290 + }, + { + "epoch": 0.5308497091063448, + "grad_norm": 0.09417222440242767, + "learning_rate": 1.4109739918395956e-05, + "loss": 8.8416, + "step": 106300 + }, + { + "epoch": 0.5308996479312842, + "grad_norm": 0.09558428823947906, + "learning_rate": 1.4108238003454406e-05, + "loss": 8.8383, + "step": 106310 + }, + { + "epoch": 0.5309495867562236, + "grad_norm": 0.10185667872428894, + "learning_rate": 1.4106736088512854e-05, + "loss": 8.8451, + "step": 106320 + }, + { + "epoch": 0.530999525581163, + "grad_norm": 0.09136248379945755, + "learning_rate": 1.4105234173571303e-05, + "loss": 8.8492, + "step": 106330 + }, + { + "epoch": 0.5310494644061026, + "grad_norm": 0.09055881202220917, + "learning_rate": 1.4103732258629753e-05, + "loss": 8.8517, + "step": 106340 + }, + { + "epoch": 0.531099403231042, + "grad_norm": 0.08744243532419205, + "learning_rate": 1.4102230343688203e-05, + "loss": 8.8688, + "step": 106350 + }, + { + "epoch": 0.5311493420559814, + "grad_norm": 0.0952538251876831, + "learning_rate": 1.4100728428746653e-05, + "loss": 8.8506, + "step": 106360 + }, + { + "epoch": 0.5311992808809208, + "grad_norm": 0.0906098335981369, + "learning_rate": 1.4099226513805102e-05, + "loss": 8.8518, + "step": 106370 + }, + { + "epoch": 0.5312492197058604, + "grad_norm": 0.0909523218870163, + "learning_rate": 1.409772459886355e-05, + "loss": 8.8542, + "step": 106380 + }, + { + "epoch": 0.5312991585307998, + "grad_norm": 0.0970349833369255, + "learning_rate": 1.4096222683922e-05, + "loss": 8.8624, + "step": 106390 + }, + { + "epoch": 0.5313490973557392, + "grad_norm": 0.09205695241689682, + "learning_rate": 1.409472076898045e-05, + "loss": 8.8506, + "step": 106400 + }, + { + "epoch": 0.5313990361806786, + "grad_norm": 0.09964334964752197, + "learning_rate": 1.40932188540389e-05, + "loss": 8.8655, + "step": 106410 + }, + { + "epoch": 0.5314489750056182, + "grad_norm": 0.0929349958896637, + "learning_rate": 1.409171693909735e-05, + "loss": 8.8504, + "step": 106420 + }, + { + "epoch": 0.5314989138305576, + "grad_norm": 0.08960101008415222, + "learning_rate": 1.40902150241558e-05, + "loss": 8.8597, + "step": 106430 + }, + { + "epoch": 0.531548852655497, + "grad_norm": 0.09919091314077377, + "learning_rate": 1.4088713109214248e-05, + "loss": 8.8413, + "step": 106440 + }, + { + "epoch": 0.5315987914804364, + "grad_norm": 0.10037015378475189, + "learning_rate": 1.4087211194272698e-05, + "loss": 8.8489, + "step": 106450 + }, + { + "epoch": 0.531648730305376, + "grad_norm": 0.09269418567419052, + "learning_rate": 1.4085709279331148e-05, + "loss": 8.8392, + "step": 106460 + }, + { + "epoch": 0.5316986691303154, + "grad_norm": 0.09641455113887787, + "learning_rate": 1.4084207364389597e-05, + "loss": 8.8553, + "step": 106470 + }, + { + "epoch": 0.5317486079552548, + "grad_norm": 0.09411516040563583, + "learning_rate": 1.4082705449448047e-05, + "loss": 8.845, + "step": 106480 + }, + { + "epoch": 0.5317985467801942, + "grad_norm": 0.08999986946582794, + "learning_rate": 1.4081203534506495e-05, + "loss": 8.8416, + "step": 106490 + }, + { + "epoch": 0.5318484856051338, + "grad_norm": 0.0922091007232666, + "learning_rate": 1.4079701619564946e-05, + "loss": 8.8469, + "step": 106500 + }, + { + "epoch": 0.5318984244300732, + "grad_norm": 0.09420941770076752, + "learning_rate": 1.4078199704623396e-05, + "loss": 8.862, + "step": 106510 + }, + { + "epoch": 0.5319483632550126, + "grad_norm": 0.09016049653291702, + "learning_rate": 1.4076697789681844e-05, + "loss": 8.8476, + "step": 106520 + }, + { + "epoch": 0.531998302079952, + "grad_norm": 0.104548379778862, + "learning_rate": 1.4075195874740295e-05, + "loss": 8.8476, + "step": 106530 + }, + { + "epoch": 0.5320482409048916, + "grad_norm": 0.08957797288894653, + "learning_rate": 1.4073693959798743e-05, + "loss": 8.856, + "step": 106540 + }, + { + "epoch": 0.532098179729831, + "grad_norm": 0.09814672917127609, + "learning_rate": 1.4072192044857193e-05, + "loss": 8.8517, + "step": 106550 + }, + { + "epoch": 0.5321481185547704, + "grad_norm": 0.09064110368490219, + "learning_rate": 1.4070690129915643e-05, + "loss": 8.8456, + "step": 106560 + }, + { + "epoch": 0.5321980573797098, + "grad_norm": 0.09723041951656342, + "learning_rate": 1.4069188214974092e-05, + "loss": 8.8518, + "step": 106570 + }, + { + "epoch": 0.5322479962046494, + "grad_norm": 0.09290885925292969, + "learning_rate": 1.4067686300032542e-05, + "loss": 8.8574, + "step": 106580 + }, + { + "epoch": 0.5322979350295888, + "grad_norm": 0.09443046897649765, + "learning_rate": 1.4066184385090992e-05, + "loss": 8.8631, + "step": 106590 + }, + { + "epoch": 0.5323478738545282, + "grad_norm": 0.09200415760278702, + "learning_rate": 1.406468247014944e-05, + "loss": 8.8491, + "step": 106600 + }, + { + "epoch": 0.5323978126794676, + "grad_norm": 0.09204615652561188, + "learning_rate": 1.4063180555207891e-05, + "loss": 8.8424, + "step": 106610 + }, + { + "epoch": 0.5324477515044072, + "grad_norm": 0.0978984385728836, + "learning_rate": 1.406167864026634e-05, + "loss": 8.8551, + "step": 106620 + }, + { + "epoch": 0.5324976903293466, + "grad_norm": 0.10067642480134964, + "learning_rate": 1.406017672532479e-05, + "loss": 8.8473, + "step": 106630 + }, + { + "epoch": 0.532547629154286, + "grad_norm": 0.09151472896337509, + "learning_rate": 1.405867481038324e-05, + "loss": 8.8594, + "step": 106640 + }, + { + "epoch": 0.5325975679792254, + "grad_norm": 0.09709201753139496, + "learning_rate": 1.4057172895441688e-05, + "loss": 8.8443, + "step": 106650 + }, + { + "epoch": 0.532647506804165, + "grad_norm": 0.09483177214860916, + "learning_rate": 1.4055670980500138e-05, + "loss": 8.8499, + "step": 106660 + }, + { + "epoch": 0.5326974456291044, + "grad_norm": 0.09062449634075165, + "learning_rate": 1.4054169065558587e-05, + "loss": 8.8465, + "step": 106670 + }, + { + "epoch": 0.5327473844540438, + "grad_norm": 0.08802146464586258, + "learning_rate": 1.4052667150617037e-05, + "loss": 8.8475, + "step": 106680 + }, + { + "epoch": 0.5327973232789832, + "grad_norm": 0.09027039259672165, + "learning_rate": 1.4051165235675487e-05, + "loss": 8.8396, + "step": 106690 + }, + { + "epoch": 0.5328472621039227, + "grad_norm": 0.08948853611946106, + "learning_rate": 1.4049663320733936e-05, + "loss": 8.855, + "step": 106700 + }, + { + "epoch": 0.5328972009288622, + "grad_norm": 0.09329523891210556, + "learning_rate": 1.4048161405792386e-05, + "loss": 8.8439, + "step": 106710 + }, + { + "epoch": 0.5329471397538016, + "grad_norm": 0.09256135672330856, + "learning_rate": 1.4046659490850834e-05, + "loss": 8.8441, + "step": 106720 + }, + { + "epoch": 0.532997078578741, + "grad_norm": 0.0897359699010849, + "learning_rate": 1.4045157575909285e-05, + "loss": 8.8476, + "step": 106730 + }, + { + "epoch": 0.5330470174036805, + "grad_norm": 0.10047424584627151, + "learning_rate": 1.4043655660967735e-05, + "loss": 8.858, + "step": 106740 + }, + { + "epoch": 0.53309695622862, + "grad_norm": 0.09750477969646454, + "learning_rate": 1.4042153746026185e-05, + "loss": 8.8484, + "step": 106750 + }, + { + "epoch": 0.5331468950535594, + "grad_norm": 0.09380090236663818, + "learning_rate": 1.4040651831084633e-05, + "loss": 8.8272, + "step": 106760 + }, + { + "epoch": 0.5331968338784988, + "grad_norm": 0.09145848453044891, + "learning_rate": 1.4039149916143082e-05, + "loss": 8.8505, + "step": 106770 + }, + { + "epoch": 0.5332467727034383, + "grad_norm": 0.09653259068727493, + "learning_rate": 1.4037648001201532e-05, + "loss": 8.837, + "step": 106780 + }, + { + "epoch": 0.5332967115283778, + "grad_norm": 0.09636209160089493, + "learning_rate": 1.4036146086259982e-05, + "loss": 8.8567, + "step": 106790 + }, + { + "epoch": 0.5333466503533172, + "grad_norm": 0.09500285983085632, + "learning_rate": 1.4034644171318432e-05, + "loss": 8.8523, + "step": 106800 + }, + { + "epoch": 0.5333965891782566, + "grad_norm": 0.09158430993556976, + "learning_rate": 1.4033142256376881e-05, + "loss": 8.8477, + "step": 106810 + }, + { + "epoch": 0.533446528003196, + "grad_norm": 0.09533253312110901, + "learning_rate": 1.403164034143533e-05, + "loss": 8.8366, + "step": 106820 + }, + { + "epoch": 0.5334964668281356, + "grad_norm": 0.09380104392766953, + "learning_rate": 1.403013842649378e-05, + "loss": 8.8238, + "step": 106830 + }, + { + "epoch": 0.533546405653075, + "grad_norm": 0.09858004003763199, + "learning_rate": 1.402863651155223e-05, + "loss": 8.8324, + "step": 106840 + }, + { + "epoch": 0.5335963444780144, + "grad_norm": 0.09267483651638031, + "learning_rate": 1.402713459661068e-05, + "loss": 8.8385, + "step": 106850 + }, + { + "epoch": 0.5336462833029538, + "grad_norm": 0.0965743437409401, + "learning_rate": 1.4025632681669128e-05, + "loss": 8.8602, + "step": 106860 + }, + { + "epoch": 0.5336962221278934, + "grad_norm": 0.09583596140146255, + "learning_rate": 1.4024130766727577e-05, + "loss": 8.8519, + "step": 106870 + }, + { + "epoch": 0.5337461609528328, + "grad_norm": 0.095204658806324, + "learning_rate": 1.4022628851786027e-05, + "loss": 8.8489, + "step": 106880 + }, + { + "epoch": 0.5337960997777722, + "grad_norm": 0.09433889389038086, + "learning_rate": 1.4021126936844477e-05, + "loss": 8.8466, + "step": 106890 + }, + { + "epoch": 0.5338460386027116, + "grad_norm": 0.09470517933368683, + "learning_rate": 1.4019625021902927e-05, + "loss": 8.8563, + "step": 106900 + }, + { + "epoch": 0.5338959774276512, + "grad_norm": 0.09337898343801498, + "learning_rate": 1.4018123106961378e-05, + "loss": 8.842, + "step": 106910 + }, + { + "epoch": 0.5339459162525906, + "grad_norm": 0.09626980870962143, + "learning_rate": 1.4016621192019824e-05, + "loss": 8.8568, + "step": 106920 + }, + { + "epoch": 0.53399585507753, + "grad_norm": 0.09709392488002777, + "learning_rate": 1.4015119277078275e-05, + "loss": 8.8477, + "step": 106930 + }, + { + "epoch": 0.5340457939024694, + "grad_norm": 0.09491003304719925, + "learning_rate": 1.4013617362136725e-05, + "loss": 8.8506, + "step": 106940 + }, + { + "epoch": 0.534095732727409, + "grad_norm": 0.09251262992620468, + "learning_rate": 1.4012115447195175e-05, + "loss": 8.8449, + "step": 106950 + }, + { + "epoch": 0.5341456715523484, + "grad_norm": 0.0920642837882042, + "learning_rate": 1.4010613532253625e-05, + "loss": 8.8396, + "step": 106960 + }, + { + "epoch": 0.5341956103772878, + "grad_norm": 0.09323357790708542, + "learning_rate": 1.4009111617312072e-05, + "loss": 8.8498, + "step": 106970 + }, + { + "epoch": 0.5342455492022272, + "grad_norm": 0.09278106689453125, + "learning_rate": 1.4007609702370522e-05, + "loss": 8.8429, + "step": 106980 + }, + { + "epoch": 0.5342954880271668, + "grad_norm": 0.09746766835451126, + "learning_rate": 1.4006107787428972e-05, + "loss": 8.8352, + "step": 106990 + }, + { + "epoch": 0.5343454268521062, + "grad_norm": 0.10587667673826218, + "learning_rate": 1.4004605872487422e-05, + "loss": 8.8457, + "step": 107000 + }, + { + "epoch": 0.5343953656770456, + "grad_norm": 0.09702330827713013, + "learning_rate": 1.4003103957545873e-05, + "loss": 8.8515, + "step": 107010 + }, + { + "epoch": 0.534445304501985, + "grad_norm": 0.09104038029909134, + "learning_rate": 1.400160204260432e-05, + "loss": 8.847, + "step": 107020 + }, + { + "epoch": 0.5344952433269246, + "grad_norm": 0.08740893006324768, + "learning_rate": 1.400010012766277e-05, + "loss": 8.856, + "step": 107030 + }, + { + "epoch": 0.534545182151864, + "grad_norm": 0.09178484231233597, + "learning_rate": 1.399859821272122e-05, + "loss": 8.8457, + "step": 107040 + }, + { + "epoch": 0.5345951209768034, + "grad_norm": 0.09630772471427917, + "learning_rate": 1.399709629777967e-05, + "loss": 8.8488, + "step": 107050 + }, + { + "epoch": 0.5346450598017428, + "grad_norm": 0.09321489930152893, + "learning_rate": 1.399559438283812e-05, + "loss": 8.845, + "step": 107060 + }, + { + "epoch": 0.5346949986266823, + "grad_norm": 0.09223134070634842, + "learning_rate": 1.3994092467896569e-05, + "loss": 8.8505, + "step": 107070 + }, + { + "epoch": 0.5347449374516218, + "grad_norm": 0.09391648322343826, + "learning_rate": 1.3992590552955017e-05, + "loss": 8.8556, + "step": 107080 + }, + { + "epoch": 0.5347948762765612, + "grad_norm": 0.08899156749248505, + "learning_rate": 1.3991088638013467e-05, + "loss": 8.8542, + "step": 107090 + }, + { + "epoch": 0.5348448151015006, + "grad_norm": 0.09277118742465973, + "learning_rate": 1.3989586723071917e-05, + "loss": 8.8285, + "step": 107100 + }, + { + "epoch": 0.5348947539264401, + "grad_norm": 0.09124419838190079, + "learning_rate": 1.3988084808130368e-05, + "loss": 8.8385, + "step": 107110 + }, + { + "epoch": 0.5349446927513796, + "grad_norm": 0.08537127077579498, + "learning_rate": 1.3986582893188816e-05, + "loss": 8.8404, + "step": 107120 + }, + { + "epoch": 0.534994631576319, + "grad_norm": 0.09226098656654358, + "learning_rate": 1.3985080978247265e-05, + "loss": 8.8535, + "step": 107130 + }, + { + "epoch": 0.5350445704012584, + "grad_norm": 0.09150153398513794, + "learning_rate": 1.3983579063305715e-05, + "loss": 8.8599, + "step": 107140 + }, + { + "epoch": 0.535094509226198, + "grad_norm": 0.09004461765289307, + "learning_rate": 1.3982077148364165e-05, + "loss": 8.836, + "step": 107150 + }, + { + "epoch": 0.5351444480511374, + "grad_norm": 0.08848071843385696, + "learning_rate": 1.3980575233422615e-05, + "loss": 8.84, + "step": 107160 + }, + { + "epoch": 0.5351943868760768, + "grad_norm": 0.09761686623096466, + "learning_rate": 1.3979073318481064e-05, + "loss": 8.8489, + "step": 107170 + }, + { + "epoch": 0.5352443257010162, + "grad_norm": 0.09823184460401535, + "learning_rate": 1.3977571403539512e-05, + "loss": 8.8443, + "step": 107180 + }, + { + "epoch": 0.5352942645259557, + "grad_norm": 0.10020698606967926, + "learning_rate": 1.3976069488597962e-05, + "loss": 8.8401, + "step": 107190 + }, + { + "epoch": 0.5353442033508952, + "grad_norm": 0.08891124278306961, + "learning_rate": 1.3974567573656412e-05, + "loss": 8.8526, + "step": 107200 + }, + { + "epoch": 0.5353941421758346, + "grad_norm": 0.10143271088600159, + "learning_rate": 1.3973065658714863e-05, + "loss": 8.8288, + "step": 107210 + }, + { + "epoch": 0.535444081000774, + "grad_norm": 0.09501948952674866, + "learning_rate": 1.3971563743773311e-05, + "loss": 8.8283, + "step": 107220 + }, + { + "epoch": 0.5354940198257135, + "grad_norm": 0.09250184148550034, + "learning_rate": 1.3970061828831761e-05, + "loss": 8.8371, + "step": 107230 + }, + { + "epoch": 0.535543958650653, + "grad_norm": 0.10043402016162872, + "learning_rate": 1.396855991389021e-05, + "loss": 8.8402, + "step": 107240 + }, + { + "epoch": 0.5355938974755924, + "grad_norm": 0.0956840068101883, + "learning_rate": 1.396705799894866e-05, + "loss": 8.8494, + "step": 107250 + }, + { + "epoch": 0.5356438363005318, + "grad_norm": 0.09393565356731415, + "learning_rate": 1.396555608400711e-05, + "loss": 8.8335, + "step": 107260 + }, + { + "epoch": 0.5356937751254713, + "grad_norm": 0.09771479666233063, + "learning_rate": 1.3964054169065559e-05, + "loss": 8.8376, + "step": 107270 + }, + { + "epoch": 0.5357437139504108, + "grad_norm": 0.08908728510141373, + "learning_rate": 1.3962552254124009e-05, + "loss": 8.8401, + "step": 107280 + }, + { + "epoch": 0.5357936527753502, + "grad_norm": 0.08806288987398148, + "learning_rate": 1.3961050339182457e-05, + "loss": 8.8491, + "step": 107290 + }, + { + "epoch": 0.5358435916002896, + "grad_norm": 0.09544865041971207, + "learning_rate": 1.3959548424240907e-05, + "loss": 8.8399, + "step": 107300 + }, + { + "epoch": 0.5358935304252291, + "grad_norm": 0.0903419628739357, + "learning_rate": 1.3958046509299358e-05, + "loss": 8.8217, + "step": 107310 + }, + { + "epoch": 0.5359434692501686, + "grad_norm": 0.09364979714155197, + "learning_rate": 1.3956544594357806e-05, + "loss": 8.8447, + "step": 107320 + }, + { + "epoch": 0.535993408075108, + "grad_norm": 0.09100334346294403, + "learning_rate": 1.3955042679416256e-05, + "loss": 8.8389, + "step": 107330 + }, + { + "epoch": 0.5360433469000474, + "grad_norm": 0.09545484930276871, + "learning_rate": 1.3953540764474705e-05, + "loss": 8.8371, + "step": 107340 + }, + { + "epoch": 0.5360932857249869, + "grad_norm": 0.09141393005847931, + "learning_rate": 1.3952038849533155e-05, + "loss": 8.8562, + "step": 107350 + }, + { + "epoch": 0.5361432245499264, + "grad_norm": 0.09346693754196167, + "learning_rate": 1.3950536934591605e-05, + "loss": 8.8436, + "step": 107360 + }, + { + "epoch": 0.5361931633748658, + "grad_norm": 0.09371886402368546, + "learning_rate": 1.3949035019650054e-05, + "loss": 8.8336, + "step": 107370 + }, + { + "epoch": 0.5362431021998052, + "grad_norm": 0.08820048719644547, + "learning_rate": 1.3947533104708504e-05, + "loss": 8.8447, + "step": 107380 + }, + { + "epoch": 0.5362930410247447, + "grad_norm": 0.09496960043907166, + "learning_rate": 1.3946031189766954e-05, + "loss": 8.8354, + "step": 107390 + }, + { + "epoch": 0.5363429798496842, + "grad_norm": 0.09885689616203308, + "learning_rate": 1.3944529274825402e-05, + "loss": 8.8273, + "step": 107400 + }, + { + "epoch": 0.5363929186746236, + "grad_norm": 0.09907709807157516, + "learning_rate": 1.3943027359883853e-05, + "loss": 8.8314, + "step": 107410 + }, + { + "epoch": 0.536442857499563, + "grad_norm": 0.08787566423416138, + "learning_rate": 1.3941525444942301e-05, + "loss": 8.8396, + "step": 107420 + }, + { + "epoch": 0.5364927963245025, + "grad_norm": 0.0915251299738884, + "learning_rate": 1.3940023530000751e-05, + "loss": 8.8471, + "step": 107430 + }, + { + "epoch": 0.536542735149442, + "grad_norm": 0.09245522320270538, + "learning_rate": 1.3938521615059201e-05, + "loss": 8.8446, + "step": 107440 + }, + { + "epoch": 0.5365926739743814, + "grad_norm": 0.09490872919559479, + "learning_rate": 1.393701970011765e-05, + "loss": 8.8361, + "step": 107450 + }, + { + "epoch": 0.5366426127993208, + "grad_norm": 0.1009124219417572, + "learning_rate": 1.39355177851761e-05, + "loss": 8.8215, + "step": 107460 + }, + { + "epoch": 0.5366925516242603, + "grad_norm": 0.10078973323106766, + "learning_rate": 1.3934015870234549e-05, + "loss": 8.8342, + "step": 107470 + }, + { + "epoch": 0.5367424904491997, + "grad_norm": 0.09087687730789185, + "learning_rate": 1.3932513955292999e-05, + "loss": 8.8498, + "step": 107480 + }, + { + "epoch": 0.5367924292741392, + "grad_norm": 0.09109964966773987, + "learning_rate": 1.3931012040351449e-05, + "loss": 8.8493, + "step": 107490 + }, + { + "epoch": 0.5368423680990786, + "grad_norm": 0.09533953666687012, + "learning_rate": 1.3929510125409897e-05, + "loss": 8.8549, + "step": 107500 + }, + { + "epoch": 0.5368923069240181, + "grad_norm": 0.09448252618312836, + "learning_rate": 1.3928008210468348e-05, + "loss": 8.8328, + "step": 107510 + }, + { + "epoch": 0.5369422457489575, + "grad_norm": 0.0955210030078888, + "learning_rate": 1.3926506295526796e-05, + "loss": 8.8328, + "step": 107520 + }, + { + "epoch": 0.536992184573897, + "grad_norm": 0.10150817781686783, + "learning_rate": 1.3925004380585246e-05, + "loss": 8.8467, + "step": 107530 + }, + { + "epoch": 0.5370421233988364, + "grad_norm": 0.09742490202188492, + "learning_rate": 1.3923502465643696e-05, + "loss": 8.8382, + "step": 107540 + }, + { + "epoch": 0.5370920622237759, + "grad_norm": 0.09283208101987839, + "learning_rate": 1.3922000550702147e-05, + "loss": 8.8348, + "step": 107550 + }, + { + "epoch": 0.5371420010487153, + "grad_norm": 0.09196760505437851, + "learning_rate": 1.3920498635760595e-05, + "loss": 8.8501, + "step": 107560 + }, + { + "epoch": 0.5371919398736548, + "grad_norm": 0.09395366907119751, + "learning_rate": 1.3918996720819044e-05, + "loss": 8.8297, + "step": 107570 + }, + { + "epoch": 0.5372418786985942, + "grad_norm": 0.09952932596206665, + "learning_rate": 1.3917494805877494e-05, + "loss": 8.8295, + "step": 107580 + }, + { + "epoch": 0.5372918175235337, + "grad_norm": 0.10399974137544632, + "learning_rate": 1.3915992890935944e-05, + "loss": 8.8323, + "step": 107590 + }, + { + "epoch": 0.5373417563484731, + "grad_norm": 0.08807992935180664, + "learning_rate": 1.3914490975994394e-05, + "loss": 8.8514, + "step": 107600 + }, + { + "epoch": 0.5373916951734126, + "grad_norm": 0.09528198093175888, + "learning_rate": 1.3912989061052843e-05, + "loss": 8.8302, + "step": 107610 + }, + { + "epoch": 0.537441633998352, + "grad_norm": 0.09758550673723221, + "learning_rate": 1.3911487146111291e-05, + "loss": 8.8455, + "step": 107620 + }, + { + "epoch": 0.5374915728232915, + "grad_norm": 0.0921962559223175, + "learning_rate": 1.3909985231169741e-05, + "loss": 8.8425, + "step": 107630 + }, + { + "epoch": 0.5375415116482309, + "grad_norm": 0.09394194185733795, + "learning_rate": 1.3908483316228191e-05, + "loss": 8.8426, + "step": 107640 + }, + { + "epoch": 0.5375914504731704, + "grad_norm": 0.0899246484041214, + "learning_rate": 1.3906981401286642e-05, + "loss": 8.8407, + "step": 107650 + }, + { + "epoch": 0.5376413892981098, + "grad_norm": 0.09396055340766907, + "learning_rate": 1.390547948634509e-05, + "loss": 8.8325, + "step": 107660 + }, + { + "epoch": 0.5376913281230493, + "grad_norm": 0.09703001379966736, + "learning_rate": 1.3903977571403539e-05, + "loss": 8.8322, + "step": 107670 + }, + { + "epoch": 0.5377412669479887, + "grad_norm": 0.08996451646089554, + "learning_rate": 1.3902475656461989e-05, + "loss": 8.8422, + "step": 107680 + }, + { + "epoch": 0.5377912057729282, + "grad_norm": 0.09080103039741516, + "learning_rate": 1.3900973741520439e-05, + "loss": 8.8472, + "step": 107690 + }, + { + "epoch": 0.5378411445978676, + "grad_norm": 0.09670452773571014, + "learning_rate": 1.3899471826578889e-05, + "loss": 8.8514, + "step": 107700 + }, + { + "epoch": 0.5378910834228071, + "grad_norm": 0.0983625054359436, + "learning_rate": 1.389796991163734e-05, + "loss": 8.8386, + "step": 107710 + }, + { + "epoch": 0.5379410222477465, + "grad_norm": 0.09571649879217148, + "learning_rate": 1.3896467996695786e-05, + "loss": 8.8353, + "step": 107720 + }, + { + "epoch": 0.537990961072686, + "grad_norm": 0.0922119989991188, + "learning_rate": 1.3894966081754236e-05, + "loss": 8.8401, + "step": 107730 + }, + { + "epoch": 0.5380408998976254, + "grad_norm": 0.08857207745313644, + "learning_rate": 1.3893464166812686e-05, + "loss": 8.842, + "step": 107740 + }, + { + "epoch": 0.5380908387225649, + "grad_norm": 0.10182986408472061, + "learning_rate": 1.3891962251871137e-05, + "loss": 8.8435, + "step": 107750 + }, + { + "epoch": 0.5381407775475043, + "grad_norm": 0.09429343789815903, + "learning_rate": 1.3890460336929587e-05, + "loss": 8.8431, + "step": 107760 + }, + { + "epoch": 0.5381907163724438, + "grad_norm": 0.09139162302017212, + "learning_rate": 1.3888958421988034e-05, + "loss": 8.8568, + "step": 107770 + }, + { + "epoch": 0.5382406551973832, + "grad_norm": 0.08870648592710495, + "learning_rate": 1.3887456507046484e-05, + "loss": 8.8555, + "step": 107780 + }, + { + "epoch": 0.5382905940223226, + "grad_norm": 0.09010998159646988, + "learning_rate": 1.3885954592104934e-05, + "loss": 8.8466, + "step": 107790 + }, + { + "epoch": 0.5383405328472621, + "grad_norm": 0.09655839204788208, + "learning_rate": 1.3884452677163384e-05, + "loss": 8.8326, + "step": 107800 + }, + { + "epoch": 0.5383904716722016, + "grad_norm": 0.09276573359966278, + "learning_rate": 1.3882950762221834e-05, + "loss": 8.8574, + "step": 107810 + }, + { + "epoch": 0.538440410497141, + "grad_norm": 0.0921722874045372, + "learning_rate": 1.3881448847280281e-05, + "loss": 8.8477, + "step": 107820 + }, + { + "epoch": 0.5384903493220804, + "grad_norm": 0.0946490541100502, + "learning_rate": 1.3879946932338731e-05, + "loss": 8.8401, + "step": 107830 + }, + { + "epoch": 0.5385402881470199, + "grad_norm": 0.0921718031167984, + "learning_rate": 1.3878445017397181e-05, + "loss": 8.8461, + "step": 107840 + }, + { + "epoch": 0.5385902269719594, + "grad_norm": 0.08847850561141968, + "learning_rate": 1.3876943102455632e-05, + "loss": 8.8462, + "step": 107850 + }, + { + "epoch": 0.5386401657968988, + "grad_norm": 0.09322067350149155, + "learning_rate": 1.3875441187514082e-05, + "loss": 8.8422, + "step": 107860 + }, + { + "epoch": 0.5386901046218382, + "grad_norm": 0.09097786247730255, + "learning_rate": 1.387393927257253e-05, + "loss": 8.8435, + "step": 107870 + }, + { + "epoch": 0.5387400434467777, + "grad_norm": 0.09428925067186356, + "learning_rate": 1.3872437357630979e-05, + "loss": 8.8407, + "step": 107880 + }, + { + "epoch": 0.5387899822717171, + "grad_norm": 0.09316663444042206, + "learning_rate": 1.3870935442689429e-05, + "loss": 8.8444, + "step": 107890 + }, + { + "epoch": 0.5388399210966566, + "grad_norm": 0.08978422731161118, + "learning_rate": 1.386943352774788e-05, + "loss": 8.839, + "step": 107900 + }, + { + "epoch": 0.538889859921596, + "grad_norm": 0.09495079517364502, + "learning_rate": 1.386793161280633e-05, + "loss": 8.8695, + "step": 107910 + }, + { + "epoch": 0.5389397987465355, + "grad_norm": 0.09116814285516739, + "learning_rate": 1.3866429697864778e-05, + "loss": 8.8255, + "step": 107920 + }, + { + "epoch": 0.538989737571475, + "grad_norm": 0.0922946184873581, + "learning_rate": 1.3864927782923226e-05, + "loss": 8.8413, + "step": 107930 + }, + { + "epoch": 0.5390396763964144, + "grad_norm": 0.09681878238916397, + "learning_rate": 1.3863425867981676e-05, + "loss": 8.8529, + "step": 107940 + }, + { + "epoch": 0.5390896152213538, + "grad_norm": 0.0935935452580452, + "learning_rate": 1.3861923953040127e-05, + "loss": 8.8531, + "step": 107950 + }, + { + "epoch": 0.5391395540462933, + "grad_norm": 0.09184682369232178, + "learning_rate": 1.3860422038098577e-05, + "loss": 8.8414, + "step": 107960 + }, + { + "epoch": 0.5391894928712327, + "grad_norm": 0.10028470307588577, + "learning_rate": 1.3858920123157025e-05, + "loss": 8.8435, + "step": 107970 + }, + { + "epoch": 0.5392394316961722, + "grad_norm": 0.09753447771072388, + "learning_rate": 1.3857418208215474e-05, + "loss": 8.8459, + "step": 107980 + }, + { + "epoch": 0.5392893705211116, + "grad_norm": 0.10108836740255356, + "learning_rate": 1.3855916293273924e-05, + "loss": 8.8328, + "step": 107990 + }, + { + "epoch": 0.5393393093460511, + "grad_norm": 0.08985279500484467, + "learning_rate": 1.3854414378332374e-05, + "loss": 8.8288, + "step": 108000 + }, + { + "epoch": 0.5393892481709905, + "grad_norm": 0.08839620649814606, + "learning_rate": 1.3852912463390824e-05, + "loss": 8.8446, + "step": 108010 + }, + { + "epoch": 0.53943918699593, + "grad_norm": 0.09369973093271255, + "learning_rate": 1.3851410548449273e-05, + "loss": 8.8246, + "step": 108020 + }, + { + "epoch": 0.5394891258208694, + "grad_norm": 0.09340804815292358, + "learning_rate": 1.3849908633507723e-05, + "loss": 8.8316, + "step": 108030 + }, + { + "epoch": 0.5395390646458089, + "grad_norm": 0.09237315505743027, + "learning_rate": 1.3848406718566171e-05, + "loss": 8.8389, + "step": 108040 + }, + { + "epoch": 0.5395890034707483, + "grad_norm": 0.09002888947725296, + "learning_rate": 1.3846904803624622e-05, + "loss": 8.8388, + "step": 108050 + }, + { + "epoch": 0.5396389422956878, + "grad_norm": 0.0946909487247467, + "learning_rate": 1.3845402888683072e-05, + "loss": 8.8354, + "step": 108060 + }, + { + "epoch": 0.5396888811206272, + "grad_norm": 0.09334079921245575, + "learning_rate": 1.3843900973741522e-05, + "loss": 8.8316, + "step": 108070 + }, + { + "epoch": 0.5397388199455667, + "grad_norm": 0.09467971324920654, + "learning_rate": 1.384239905879997e-05, + "loss": 8.8425, + "step": 108080 + }, + { + "epoch": 0.5397887587705061, + "grad_norm": 0.09707115590572357, + "learning_rate": 1.3840897143858419e-05, + "loss": 8.8327, + "step": 108090 + }, + { + "epoch": 0.5398386975954456, + "grad_norm": 0.09300867468118668, + "learning_rate": 1.383939522891687e-05, + "loss": 8.8463, + "step": 108100 + }, + { + "epoch": 0.539888636420385, + "grad_norm": 0.09528551995754242, + "learning_rate": 1.383789331397532e-05, + "loss": 8.8379, + "step": 108110 + }, + { + "epoch": 0.5399385752453245, + "grad_norm": 0.09320961683988571, + "learning_rate": 1.383639139903377e-05, + "loss": 8.8346, + "step": 108120 + }, + { + "epoch": 0.5399885140702639, + "grad_norm": 0.09055766463279724, + "learning_rate": 1.3834889484092218e-05, + "loss": 8.8463, + "step": 108130 + }, + { + "epoch": 0.5400384528952034, + "grad_norm": 0.09693289548158646, + "learning_rate": 1.3833387569150667e-05, + "loss": 8.8457, + "step": 108140 + }, + { + "epoch": 0.5400883917201428, + "grad_norm": 0.10044632852077484, + "learning_rate": 1.3831885654209117e-05, + "loss": 8.8156, + "step": 108150 + }, + { + "epoch": 0.5401383305450823, + "grad_norm": 0.09970605373382568, + "learning_rate": 1.3830383739267567e-05, + "loss": 8.8469, + "step": 108160 + }, + { + "epoch": 0.5401882693700217, + "grad_norm": 0.09658732265233994, + "learning_rate": 1.3828881824326017e-05, + "loss": 8.8184, + "step": 108170 + }, + { + "epoch": 0.5402382081949612, + "grad_norm": 0.0948546975851059, + "learning_rate": 1.3827379909384466e-05, + "loss": 8.8466, + "step": 108180 + }, + { + "epoch": 0.5402881470199006, + "grad_norm": 0.09518400579690933, + "learning_rate": 1.3825877994442916e-05, + "loss": 8.8373, + "step": 108190 + }, + { + "epoch": 0.5403380858448401, + "grad_norm": 0.09109831601381302, + "learning_rate": 1.3824376079501364e-05, + "loss": 8.8272, + "step": 108200 + }, + { + "epoch": 0.5403880246697795, + "grad_norm": 0.08981281518936157, + "learning_rate": 1.3822874164559814e-05, + "loss": 8.8456, + "step": 108210 + }, + { + "epoch": 0.540437963494719, + "grad_norm": 0.10105878114700317, + "learning_rate": 1.3821372249618265e-05, + "loss": 8.8423, + "step": 108220 + }, + { + "epoch": 0.5404879023196584, + "grad_norm": 0.08803227543830872, + "learning_rate": 1.3819870334676713e-05, + "loss": 8.8416, + "step": 108230 + }, + { + "epoch": 0.5405378411445979, + "grad_norm": 0.09488827735185623, + "learning_rate": 1.3818368419735163e-05, + "loss": 8.8317, + "step": 108240 + }, + { + "epoch": 0.5405877799695373, + "grad_norm": 0.0969134047627449, + "learning_rate": 1.3816866504793612e-05, + "loss": 8.837, + "step": 108250 + }, + { + "epoch": 0.5406377187944768, + "grad_norm": 0.09399174153804779, + "learning_rate": 1.3815364589852062e-05, + "loss": 8.8314, + "step": 108260 + }, + { + "epoch": 0.5406876576194162, + "grad_norm": 0.08893413096666336, + "learning_rate": 1.3813862674910512e-05, + "loss": 8.8467, + "step": 108270 + }, + { + "epoch": 0.5407375964443557, + "grad_norm": 0.09072422981262207, + "learning_rate": 1.381236075996896e-05, + "loss": 8.8399, + "step": 108280 + }, + { + "epoch": 0.5407875352692951, + "grad_norm": 0.09168332815170288, + "learning_rate": 1.381085884502741e-05, + "loss": 8.8355, + "step": 108290 + }, + { + "epoch": 0.5408374740942345, + "grad_norm": 0.09577570855617523, + "learning_rate": 1.380935693008586e-05, + "loss": 8.8358, + "step": 108300 + }, + { + "epoch": 0.540887412919174, + "grad_norm": 0.09399871528148651, + "learning_rate": 1.380785501514431e-05, + "loss": 8.8198, + "step": 108310 + }, + { + "epoch": 0.5409373517441135, + "grad_norm": 0.09029237180948257, + "learning_rate": 1.380635310020276e-05, + "loss": 8.8317, + "step": 108320 + }, + { + "epoch": 0.5409872905690529, + "grad_norm": 0.09074974060058594, + "learning_rate": 1.3804851185261208e-05, + "loss": 8.8355, + "step": 108330 + }, + { + "epoch": 0.5410372293939923, + "grad_norm": 0.08936341851949692, + "learning_rate": 1.3803349270319658e-05, + "loss": 8.8351, + "step": 108340 + }, + { + "epoch": 0.5410871682189318, + "grad_norm": 0.08986354619264603, + "learning_rate": 1.3801847355378108e-05, + "loss": 8.8396, + "step": 108350 + }, + { + "epoch": 0.5411371070438713, + "grad_norm": 0.0931684747338295, + "learning_rate": 1.3800345440436557e-05, + "loss": 8.8502, + "step": 108360 + }, + { + "epoch": 0.5411870458688107, + "grad_norm": 0.09337104111909866, + "learning_rate": 1.3798843525495007e-05, + "loss": 8.841, + "step": 108370 + }, + { + "epoch": 0.5412369846937501, + "grad_norm": 0.09039416164159775, + "learning_rate": 1.3797341610553456e-05, + "loss": 8.8413, + "step": 108380 + }, + { + "epoch": 0.5412869235186896, + "grad_norm": 0.09265271574258804, + "learning_rate": 1.3795839695611906e-05, + "loss": 8.8387, + "step": 108390 + }, + { + "epoch": 0.5413368623436291, + "grad_norm": 0.09323190897703171, + "learning_rate": 1.3794337780670356e-05, + "loss": 8.8338, + "step": 108400 + }, + { + "epoch": 0.5413868011685685, + "grad_norm": 0.0942569449543953, + "learning_rate": 1.3792835865728804e-05, + "loss": 8.829, + "step": 108410 + }, + { + "epoch": 0.5414367399935079, + "grad_norm": 0.09136468917131424, + "learning_rate": 1.3791333950787255e-05, + "loss": 8.8375, + "step": 108420 + }, + { + "epoch": 0.5414866788184474, + "grad_norm": 0.09518197923898697, + "learning_rate": 1.3789832035845703e-05, + "loss": 8.8347, + "step": 108430 + }, + { + "epoch": 0.5415366176433869, + "grad_norm": 0.09513925015926361, + "learning_rate": 1.3788330120904153e-05, + "loss": 8.8337, + "step": 108440 + }, + { + "epoch": 0.5415865564683263, + "grad_norm": 0.09242982417345047, + "learning_rate": 1.3786828205962603e-05, + "loss": 8.8264, + "step": 108450 + }, + { + "epoch": 0.5416364952932657, + "grad_norm": 0.09404724091291428, + "learning_rate": 1.3785326291021052e-05, + "loss": 8.8328, + "step": 108460 + }, + { + "epoch": 0.5416864341182052, + "grad_norm": 0.09275107085704803, + "learning_rate": 1.3783824376079502e-05, + "loss": 8.8397, + "step": 108470 + }, + { + "epoch": 0.5417363729431447, + "grad_norm": 0.09128131717443466, + "learning_rate": 1.378232246113795e-05, + "loss": 8.8265, + "step": 108480 + }, + { + "epoch": 0.5417863117680841, + "grad_norm": 0.09248005598783493, + "learning_rate": 1.37808205461964e-05, + "loss": 8.8399, + "step": 108490 + }, + { + "epoch": 0.5418362505930235, + "grad_norm": 0.09171829372644424, + "learning_rate": 1.3779318631254851e-05, + "loss": 8.8332, + "step": 108500 + }, + { + "epoch": 0.541886189417963, + "grad_norm": 0.09409584105014801, + "learning_rate": 1.37778167163133e-05, + "loss": 8.8347, + "step": 108510 + }, + { + "epoch": 0.5419361282429025, + "grad_norm": 0.09212516993284225, + "learning_rate": 1.377631480137175e-05, + "loss": 8.839, + "step": 108520 + }, + { + "epoch": 0.5419860670678419, + "grad_norm": 0.0963674783706665, + "learning_rate": 1.3774812886430198e-05, + "loss": 8.8263, + "step": 108530 + }, + { + "epoch": 0.5420360058927813, + "grad_norm": 0.09333939105272293, + "learning_rate": 1.3773310971488648e-05, + "loss": 8.8339, + "step": 108540 + }, + { + "epoch": 0.5420859447177208, + "grad_norm": 0.0930102989077568, + "learning_rate": 1.3771809056547098e-05, + "loss": 8.8331, + "step": 108550 + }, + { + "epoch": 0.5421358835426603, + "grad_norm": 0.09327764064073563, + "learning_rate": 1.3770307141605549e-05, + "loss": 8.8169, + "step": 108560 + }, + { + "epoch": 0.5421858223675997, + "grad_norm": 0.09788885712623596, + "learning_rate": 1.3768805226663997e-05, + "loss": 8.8428, + "step": 108570 + }, + { + "epoch": 0.5422357611925391, + "grad_norm": 0.08719142526388168, + "learning_rate": 1.3767303311722446e-05, + "loss": 8.835, + "step": 108580 + }, + { + "epoch": 0.5422857000174786, + "grad_norm": 0.08899932354688644, + "learning_rate": 1.3765801396780896e-05, + "loss": 8.8379, + "step": 108590 + }, + { + "epoch": 0.5423356388424181, + "grad_norm": 0.09376303106546402, + "learning_rate": 1.3764299481839346e-05, + "loss": 8.8337, + "step": 108600 + }, + { + "epoch": 0.5423855776673575, + "grad_norm": 0.09086206555366516, + "learning_rate": 1.3762797566897796e-05, + "loss": 8.8522, + "step": 108610 + }, + { + "epoch": 0.5424355164922969, + "grad_norm": 0.09616357088088989, + "learning_rate": 1.3761295651956245e-05, + "loss": 8.8464, + "step": 108620 + }, + { + "epoch": 0.5424854553172364, + "grad_norm": 0.09458029270172119, + "learning_rate": 1.3759793737014693e-05, + "loss": 8.8334, + "step": 108630 + }, + { + "epoch": 0.5425353941421759, + "grad_norm": 0.0973440557718277, + "learning_rate": 1.3758291822073143e-05, + "loss": 8.8272, + "step": 108640 + }, + { + "epoch": 0.5425853329671153, + "grad_norm": 0.09475944191217422, + "learning_rate": 1.3756789907131593e-05, + "loss": 8.828, + "step": 108650 + }, + { + "epoch": 0.5426352717920547, + "grad_norm": 0.09611568599939346, + "learning_rate": 1.3755287992190044e-05, + "loss": 8.8075, + "step": 108660 + }, + { + "epoch": 0.5426852106169942, + "grad_norm": 0.09558840841054916, + "learning_rate": 1.3753786077248492e-05, + "loss": 8.8357, + "step": 108670 + }, + { + "epoch": 0.5427351494419337, + "grad_norm": 0.09221713989973068, + "learning_rate": 1.375228416230694e-05, + "loss": 8.8487, + "step": 108680 + }, + { + "epoch": 0.5427850882668731, + "grad_norm": 0.09657151997089386, + "learning_rate": 1.375078224736539e-05, + "loss": 8.8293, + "step": 108690 + }, + { + "epoch": 0.5428350270918125, + "grad_norm": 0.09163092821836472, + "learning_rate": 1.3749280332423841e-05, + "loss": 8.8348, + "step": 108700 + }, + { + "epoch": 0.542884965916752, + "grad_norm": 0.09311132878065109, + "learning_rate": 1.3747778417482291e-05, + "loss": 8.837, + "step": 108710 + }, + { + "epoch": 0.5429349047416915, + "grad_norm": 0.08675689995288849, + "learning_rate": 1.3746276502540741e-05, + "loss": 8.8421, + "step": 108720 + }, + { + "epoch": 0.5429848435666309, + "grad_norm": 0.08896356076002121, + "learning_rate": 1.3744774587599188e-05, + "loss": 8.8465, + "step": 108730 + }, + { + "epoch": 0.5430347823915703, + "grad_norm": 0.09236080199480057, + "learning_rate": 1.3743272672657638e-05, + "loss": 8.8397, + "step": 108740 + }, + { + "epoch": 0.5430847212165097, + "grad_norm": 0.09245762974023819, + "learning_rate": 1.3741770757716088e-05, + "loss": 8.835, + "step": 108750 + }, + { + "epoch": 0.5431346600414493, + "grad_norm": 0.09627767652273178, + "learning_rate": 1.3740268842774539e-05, + "loss": 8.8153, + "step": 108760 + }, + { + "epoch": 0.5431845988663887, + "grad_norm": 0.09583393484354019, + "learning_rate": 1.3738766927832989e-05, + "loss": 8.8408, + "step": 108770 + }, + { + "epoch": 0.5432345376913281, + "grad_norm": 0.10037499666213989, + "learning_rate": 1.3737265012891436e-05, + "loss": 8.8385, + "step": 108780 + }, + { + "epoch": 0.5432844765162675, + "grad_norm": 0.08847088366746902, + "learning_rate": 1.3735763097949886e-05, + "loss": 8.8244, + "step": 108790 + }, + { + "epoch": 0.543334415341207, + "grad_norm": 0.0916975736618042, + "learning_rate": 1.3734261183008336e-05, + "loss": 8.8241, + "step": 108800 + }, + { + "epoch": 0.5433843541661465, + "grad_norm": 0.08768840879201889, + "learning_rate": 1.3732759268066786e-05, + "loss": 8.84, + "step": 108810 + }, + { + "epoch": 0.5434342929910859, + "grad_norm": 0.09283578395843506, + "learning_rate": 1.3731257353125236e-05, + "loss": 8.8298, + "step": 108820 + }, + { + "epoch": 0.5434842318160253, + "grad_norm": 0.09757833927869797, + "learning_rate": 1.3729755438183683e-05, + "loss": 8.8251, + "step": 108830 + }, + { + "epoch": 0.5435341706409648, + "grad_norm": 0.10008548200130463, + "learning_rate": 1.3728253523242133e-05, + "loss": 8.8276, + "step": 108840 + }, + { + "epoch": 0.5435841094659043, + "grad_norm": 0.09164326637983322, + "learning_rate": 1.3726751608300583e-05, + "loss": 8.8318, + "step": 108850 + }, + { + "epoch": 0.5436340482908437, + "grad_norm": 0.09030098468065262, + "learning_rate": 1.3725249693359034e-05, + "loss": 8.8277, + "step": 108860 + }, + { + "epoch": 0.5436839871157831, + "grad_norm": 0.08942774683237076, + "learning_rate": 1.3723747778417484e-05, + "loss": 8.8314, + "step": 108870 + }, + { + "epoch": 0.5437339259407226, + "grad_norm": 0.10168751329183578, + "learning_rate": 1.3722245863475932e-05, + "loss": 8.8215, + "step": 108880 + }, + { + "epoch": 0.5437838647656621, + "grad_norm": 0.09753987193107605, + "learning_rate": 1.372074394853438e-05, + "loss": 8.8198, + "step": 108890 + }, + { + "epoch": 0.5438338035906015, + "grad_norm": 0.10058535635471344, + "learning_rate": 1.3719242033592831e-05, + "loss": 8.8108, + "step": 108900 + }, + { + "epoch": 0.5438837424155409, + "grad_norm": 0.09578339755535126, + "learning_rate": 1.3717740118651281e-05, + "loss": 8.8362, + "step": 108910 + }, + { + "epoch": 0.5439336812404804, + "grad_norm": 0.08314099907875061, + "learning_rate": 1.3716238203709731e-05, + "loss": 8.8229, + "step": 108920 + }, + { + "epoch": 0.5439836200654199, + "grad_norm": 0.08905912190675735, + "learning_rate": 1.371473628876818e-05, + "loss": 8.8382, + "step": 108930 + }, + { + "epoch": 0.5440335588903593, + "grad_norm": 0.09270746260881424, + "learning_rate": 1.3713234373826628e-05, + "loss": 8.8224, + "step": 108940 + }, + { + "epoch": 0.5440834977152987, + "grad_norm": 0.09719360619783401, + "learning_rate": 1.3711732458885078e-05, + "loss": 8.8304, + "step": 108950 + }, + { + "epoch": 0.5441334365402382, + "grad_norm": 0.08986958116292953, + "learning_rate": 1.3710230543943529e-05, + "loss": 8.8256, + "step": 108960 + }, + { + "epoch": 0.5441833753651777, + "grad_norm": 0.09314291179180145, + "learning_rate": 1.3708728629001979e-05, + "loss": 8.8262, + "step": 108970 + }, + { + "epoch": 0.5442333141901171, + "grad_norm": 0.09784272313117981, + "learning_rate": 1.3707226714060427e-05, + "loss": 8.8172, + "step": 108980 + }, + { + "epoch": 0.5442832530150565, + "grad_norm": 0.09231096506118774, + "learning_rate": 1.3705724799118876e-05, + "loss": 8.8399, + "step": 108990 + }, + { + "epoch": 0.544333191839996, + "grad_norm": 0.09249245375394821, + "learning_rate": 1.3704222884177326e-05, + "loss": 8.8255, + "step": 109000 + }, + { + "epoch": 0.5443831306649355, + "grad_norm": 0.09249752014875412, + "learning_rate": 1.3702720969235776e-05, + "loss": 8.8309, + "step": 109010 + }, + { + "epoch": 0.5444330694898749, + "grad_norm": 0.09660474956035614, + "learning_rate": 1.3701219054294226e-05, + "loss": 8.8297, + "step": 109020 + }, + { + "epoch": 0.5444830083148143, + "grad_norm": 0.09552586078643799, + "learning_rate": 1.3699717139352675e-05, + "loss": 8.8145, + "step": 109030 + }, + { + "epoch": 0.5445329471397538, + "grad_norm": 0.09530143439769745, + "learning_rate": 1.3698215224411125e-05, + "loss": 8.82, + "step": 109040 + }, + { + "epoch": 0.5445828859646933, + "grad_norm": 0.10076169669628143, + "learning_rate": 1.3696713309469573e-05, + "loss": 8.8208, + "step": 109050 + }, + { + "epoch": 0.5446328247896327, + "grad_norm": 0.09382011741399765, + "learning_rate": 1.3695211394528024e-05, + "loss": 8.8229, + "step": 109060 + }, + { + "epoch": 0.5446827636145721, + "grad_norm": 0.09154395759105682, + "learning_rate": 1.3693709479586474e-05, + "loss": 8.833, + "step": 109070 + }, + { + "epoch": 0.5447327024395116, + "grad_norm": 0.0922236368060112, + "learning_rate": 1.3692207564644922e-05, + "loss": 8.8162, + "step": 109080 + }, + { + "epoch": 0.5447826412644511, + "grad_norm": 0.09294591844081879, + "learning_rate": 1.3690705649703372e-05, + "loss": 8.8393, + "step": 109090 + }, + { + "epoch": 0.5448325800893905, + "grad_norm": 0.09426495432853699, + "learning_rate": 1.3689203734761821e-05, + "loss": 8.8455, + "step": 109100 + }, + { + "epoch": 0.5448825189143299, + "grad_norm": 0.09277299046516418, + "learning_rate": 1.3687701819820271e-05, + "loss": 8.8368, + "step": 109110 + }, + { + "epoch": 0.5449324577392693, + "grad_norm": 0.09092520922422409, + "learning_rate": 1.3686199904878721e-05, + "loss": 8.8391, + "step": 109120 + }, + { + "epoch": 0.5449823965642089, + "grad_norm": 0.0939490795135498, + "learning_rate": 1.368469798993717e-05, + "loss": 8.832, + "step": 109130 + }, + { + "epoch": 0.5450323353891483, + "grad_norm": 0.09599693864583969, + "learning_rate": 1.368319607499562e-05, + "loss": 8.8155, + "step": 109140 + }, + { + "epoch": 0.5450822742140877, + "grad_norm": 0.10206658393144608, + "learning_rate": 1.3681694160054068e-05, + "loss": 8.8281, + "step": 109150 + }, + { + "epoch": 0.5451322130390271, + "grad_norm": 0.10047373175621033, + "learning_rate": 1.3680192245112519e-05, + "loss": 8.8277, + "step": 109160 + }, + { + "epoch": 0.5451821518639667, + "grad_norm": 0.09348422288894653, + "learning_rate": 1.3678690330170969e-05, + "loss": 8.8309, + "step": 109170 + }, + { + "epoch": 0.5452320906889061, + "grad_norm": 0.09665758162736893, + "learning_rate": 1.3677188415229417e-05, + "loss": 8.8489, + "step": 109180 + }, + { + "epoch": 0.5452820295138455, + "grad_norm": 0.09331992268562317, + "learning_rate": 1.3675686500287867e-05, + "loss": 8.8213, + "step": 109190 + }, + { + "epoch": 0.545331968338785, + "grad_norm": 0.08620268851518631, + "learning_rate": 1.3674184585346318e-05, + "loss": 8.8371, + "step": 109200 + }, + { + "epoch": 0.5453819071637245, + "grad_norm": 0.09555631130933762, + "learning_rate": 1.3672682670404766e-05, + "loss": 8.819, + "step": 109210 + }, + { + "epoch": 0.5454318459886639, + "grad_norm": 0.09607243537902832, + "learning_rate": 1.3671180755463216e-05, + "loss": 8.8082, + "step": 109220 + }, + { + "epoch": 0.5454817848136033, + "grad_norm": 0.09805332869291306, + "learning_rate": 1.3669678840521665e-05, + "loss": 8.8269, + "step": 109230 + }, + { + "epoch": 0.5455317236385427, + "grad_norm": 0.09287256002426147, + "learning_rate": 1.3668176925580115e-05, + "loss": 8.8131, + "step": 109240 + }, + { + "epoch": 0.5455816624634823, + "grad_norm": 0.08924273401498795, + "learning_rate": 1.3666675010638565e-05, + "loss": 8.8197, + "step": 109250 + }, + { + "epoch": 0.5456316012884217, + "grad_norm": 0.09110093861818314, + "learning_rate": 1.3665173095697014e-05, + "loss": 8.8382, + "step": 109260 + }, + { + "epoch": 0.5456815401133611, + "grad_norm": 0.09054429084062576, + "learning_rate": 1.3663671180755464e-05, + "loss": 8.8259, + "step": 109270 + }, + { + "epoch": 0.5457314789383005, + "grad_norm": 0.09228412806987762, + "learning_rate": 1.3662169265813912e-05, + "loss": 8.8154, + "step": 109280 + }, + { + "epoch": 0.5457814177632401, + "grad_norm": 0.09101639688014984, + "learning_rate": 1.3660667350872362e-05, + "loss": 8.8172, + "step": 109290 + }, + { + "epoch": 0.5458313565881795, + "grad_norm": 0.09964627027511597, + "learning_rate": 1.3659165435930813e-05, + "loss": 8.8302, + "step": 109300 + }, + { + "epoch": 0.5458812954131189, + "grad_norm": 0.0952049046754837, + "learning_rate": 1.3657663520989261e-05, + "loss": 8.8477, + "step": 109310 + }, + { + "epoch": 0.5459312342380583, + "grad_norm": 0.094510518014431, + "learning_rate": 1.3656161606047711e-05, + "loss": 8.8274, + "step": 109320 + }, + { + "epoch": 0.5459811730629979, + "grad_norm": 0.09812169522047043, + "learning_rate": 1.365465969110616e-05, + "loss": 8.8186, + "step": 109330 + }, + { + "epoch": 0.5460311118879373, + "grad_norm": 0.09070379287004471, + "learning_rate": 1.365315777616461e-05, + "loss": 8.8214, + "step": 109340 + }, + { + "epoch": 0.5460810507128767, + "grad_norm": 0.09096910059452057, + "learning_rate": 1.365165586122306e-05, + "loss": 8.8309, + "step": 109350 + }, + { + "epoch": 0.5461309895378161, + "grad_norm": 0.0929383784532547, + "learning_rate": 1.365015394628151e-05, + "loss": 8.8311, + "step": 109360 + }, + { + "epoch": 0.5461809283627557, + "grad_norm": 0.09064075350761414, + "learning_rate": 1.3648652031339959e-05, + "loss": 8.8385, + "step": 109370 + }, + { + "epoch": 0.5462308671876951, + "grad_norm": 0.09253614395856857, + "learning_rate": 1.3647150116398407e-05, + "loss": 8.8222, + "step": 109380 + }, + { + "epoch": 0.5462808060126345, + "grad_norm": 0.09299398958683014, + "learning_rate": 1.3645648201456857e-05, + "loss": 8.8109, + "step": 109390 + }, + { + "epoch": 0.5463307448375739, + "grad_norm": 0.09167538583278656, + "learning_rate": 1.3644146286515308e-05, + "loss": 8.8226, + "step": 109400 + }, + { + "epoch": 0.5463806836625135, + "grad_norm": 0.09108640998601913, + "learning_rate": 1.3642644371573758e-05, + "loss": 8.823, + "step": 109410 + }, + { + "epoch": 0.5464306224874529, + "grad_norm": 0.09583522379398346, + "learning_rate": 1.3641142456632206e-05, + "loss": 8.83, + "step": 109420 + }, + { + "epoch": 0.5464805613123923, + "grad_norm": 0.08716997504234314, + "learning_rate": 1.3639640541690655e-05, + "loss": 8.8318, + "step": 109430 + }, + { + "epoch": 0.5465305001373317, + "grad_norm": 0.08698141574859619, + "learning_rate": 1.3638138626749105e-05, + "loss": 8.8372, + "step": 109440 + }, + { + "epoch": 0.5465804389622713, + "grad_norm": 0.08734197914600372, + "learning_rate": 1.3636636711807555e-05, + "loss": 8.8252, + "step": 109450 + }, + { + "epoch": 0.5466303777872107, + "grad_norm": 0.08939076960086823, + "learning_rate": 1.3635134796866005e-05, + "loss": 8.8243, + "step": 109460 + }, + { + "epoch": 0.5466803166121501, + "grad_norm": 0.09037525951862335, + "learning_rate": 1.3633632881924454e-05, + "loss": 8.8242, + "step": 109470 + }, + { + "epoch": 0.5467302554370895, + "grad_norm": 0.09184879809617996, + "learning_rate": 1.3632130966982902e-05, + "loss": 8.8219, + "step": 109480 + }, + { + "epoch": 0.5467801942620291, + "grad_norm": 0.09089411050081253, + "learning_rate": 1.3630629052041352e-05, + "loss": 8.8242, + "step": 109490 + }, + { + "epoch": 0.5468301330869685, + "grad_norm": 0.09041132777929306, + "learning_rate": 1.3629127137099803e-05, + "loss": 8.8208, + "step": 109500 + }, + { + "epoch": 0.5468800719119079, + "grad_norm": 0.08768140524625778, + "learning_rate": 1.3627625222158253e-05, + "loss": 8.8216, + "step": 109510 + }, + { + "epoch": 0.5469300107368473, + "grad_norm": 0.08897078037261963, + "learning_rate": 1.3626123307216703e-05, + "loss": 8.8137, + "step": 109520 + }, + { + "epoch": 0.5469799495617869, + "grad_norm": 0.0939832553267479, + "learning_rate": 1.362462139227515e-05, + "loss": 8.8204, + "step": 109530 + }, + { + "epoch": 0.5470298883867263, + "grad_norm": 0.09440961480140686, + "learning_rate": 1.36231194773336e-05, + "loss": 8.8241, + "step": 109540 + }, + { + "epoch": 0.5470798272116657, + "grad_norm": 0.08818075060844421, + "learning_rate": 1.362161756239205e-05, + "loss": 8.8393, + "step": 109550 + }, + { + "epoch": 0.5471297660366051, + "grad_norm": 0.09029530733823776, + "learning_rate": 1.36201156474505e-05, + "loss": 8.8109, + "step": 109560 + }, + { + "epoch": 0.5471797048615447, + "grad_norm": 0.09235231578350067, + "learning_rate": 1.361861373250895e-05, + "loss": 8.8183, + "step": 109570 + }, + { + "epoch": 0.5472296436864841, + "grad_norm": 0.09568846970796585, + "learning_rate": 1.3617111817567397e-05, + "loss": 8.8189, + "step": 109580 + }, + { + "epoch": 0.5472795825114235, + "grad_norm": 0.09308406710624695, + "learning_rate": 1.3615609902625847e-05, + "loss": 8.827, + "step": 109590 + }, + { + "epoch": 0.5473295213363629, + "grad_norm": 0.09407919645309448, + "learning_rate": 1.3614107987684298e-05, + "loss": 8.8238, + "step": 109600 + }, + { + "epoch": 0.5473794601613025, + "grad_norm": 0.09674926102161407, + "learning_rate": 1.3612606072742748e-05, + "loss": 8.8228, + "step": 109610 + }, + { + "epoch": 0.5474293989862419, + "grad_norm": 0.09146232157945633, + "learning_rate": 1.3611104157801198e-05, + "loss": 8.8094, + "step": 109620 + }, + { + "epoch": 0.5474793378111813, + "grad_norm": 0.09639618545770645, + "learning_rate": 1.3609602242859645e-05, + "loss": 8.8284, + "step": 109630 + }, + { + "epoch": 0.5475292766361207, + "grad_norm": 0.09209806472063065, + "learning_rate": 1.3608100327918095e-05, + "loss": 8.8338, + "step": 109640 + }, + { + "epoch": 0.5475792154610603, + "grad_norm": 0.09675581008195877, + "learning_rate": 1.3606598412976545e-05, + "loss": 8.8283, + "step": 109650 + }, + { + "epoch": 0.5476291542859997, + "grad_norm": 0.09263081103563309, + "learning_rate": 1.3605096498034995e-05, + "loss": 8.8038, + "step": 109660 + }, + { + "epoch": 0.5476790931109391, + "grad_norm": 0.09105950593948364, + "learning_rate": 1.3603594583093446e-05, + "loss": 8.8312, + "step": 109670 + }, + { + "epoch": 0.5477290319358785, + "grad_norm": 0.09509497880935669, + "learning_rate": 1.3602092668151894e-05, + "loss": 8.831, + "step": 109680 + }, + { + "epoch": 0.547778970760818, + "grad_norm": 0.09219329804182053, + "learning_rate": 1.3600590753210343e-05, + "loss": 8.8016, + "step": 109690 + }, + { + "epoch": 0.5478289095857575, + "grad_norm": 0.09572474658489227, + "learning_rate": 1.3599088838268793e-05, + "loss": 8.8219, + "step": 109700 + }, + { + "epoch": 0.5478788484106969, + "grad_norm": 0.10046906024217606, + "learning_rate": 1.3597586923327243e-05, + "loss": 8.8214, + "step": 109710 + }, + { + "epoch": 0.5479287872356363, + "grad_norm": 0.09304700791835785, + "learning_rate": 1.3596085008385693e-05, + "loss": 8.8302, + "step": 109720 + }, + { + "epoch": 0.5479787260605758, + "grad_norm": 0.09092891216278076, + "learning_rate": 1.3594583093444142e-05, + "loss": 8.8251, + "step": 109730 + }, + { + "epoch": 0.5480286648855153, + "grad_norm": 0.09128131717443466, + "learning_rate": 1.359308117850259e-05, + "loss": 8.829, + "step": 109740 + }, + { + "epoch": 0.5480786037104547, + "grad_norm": 0.08997046947479248, + "learning_rate": 1.359157926356104e-05, + "loss": 8.8308, + "step": 109750 + }, + { + "epoch": 0.5481285425353941, + "grad_norm": 0.09499360620975494, + "learning_rate": 1.359007734861949e-05, + "loss": 8.8291, + "step": 109760 + }, + { + "epoch": 0.5481784813603335, + "grad_norm": 0.09103532880544662, + "learning_rate": 1.358857543367794e-05, + "loss": 8.8168, + "step": 109770 + }, + { + "epoch": 0.5482284201852731, + "grad_norm": 0.08995458483695984, + "learning_rate": 1.3587073518736389e-05, + "loss": 8.8261, + "step": 109780 + }, + { + "epoch": 0.5482783590102125, + "grad_norm": 0.09766585379838943, + "learning_rate": 1.3585571603794838e-05, + "loss": 8.8222, + "step": 109790 + }, + { + "epoch": 0.5483282978351519, + "grad_norm": 0.09343060851097107, + "learning_rate": 1.3584069688853288e-05, + "loss": 8.817, + "step": 109800 + }, + { + "epoch": 0.5483782366600913, + "grad_norm": 0.09289675951004028, + "learning_rate": 1.3582567773911738e-05, + "loss": 8.8271, + "step": 109810 + }, + { + "epoch": 0.5484281754850309, + "grad_norm": 0.0947795957326889, + "learning_rate": 1.3581065858970188e-05, + "loss": 8.8265, + "step": 109820 + }, + { + "epoch": 0.5484781143099703, + "grad_norm": 0.09564196318387985, + "learning_rate": 1.3579563944028637e-05, + "loss": 8.823, + "step": 109830 + }, + { + "epoch": 0.5485280531349097, + "grad_norm": 0.08977336436510086, + "learning_rate": 1.3578062029087087e-05, + "loss": 8.8181, + "step": 109840 + }, + { + "epoch": 0.5485779919598491, + "grad_norm": 0.0958486795425415, + "learning_rate": 1.3576560114145535e-05, + "loss": 8.8321, + "step": 109850 + }, + { + "epoch": 0.5486279307847887, + "grad_norm": 0.09179677069187164, + "learning_rate": 1.3575058199203985e-05, + "loss": 8.8342, + "step": 109860 + }, + { + "epoch": 0.5486778696097281, + "grad_norm": 0.09745611250400543, + "learning_rate": 1.3573556284262436e-05, + "loss": 8.8287, + "step": 109870 + }, + { + "epoch": 0.5487278084346675, + "grad_norm": 0.09767097979784012, + "learning_rate": 1.3572054369320884e-05, + "loss": 8.8168, + "step": 109880 + }, + { + "epoch": 0.5487777472596069, + "grad_norm": 0.0993611216545105, + "learning_rate": 1.3570552454379334e-05, + "loss": 8.8124, + "step": 109890 + }, + { + "epoch": 0.5488276860845465, + "grad_norm": 0.09202627837657928, + "learning_rate": 1.3569050539437783e-05, + "loss": 8.8306, + "step": 109900 + }, + { + "epoch": 0.5488776249094859, + "grad_norm": 0.09640886634588242, + "learning_rate": 1.3567548624496233e-05, + "loss": 8.8248, + "step": 109910 + }, + { + "epoch": 0.5489275637344253, + "grad_norm": 0.09196940064430237, + "learning_rate": 1.3566046709554683e-05, + "loss": 8.8267, + "step": 109920 + }, + { + "epoch": 0.5489775025593647, + "grad_norm": 0.09446310251951218, + "learning_rate": 1.3564544794613132e-05, + "loss": 8.8257, + "step": 109930 + }, + { + "epoch": 0.5490274413843043, + "grad_norm": 0.0930735394358635, + "learning_rate": 1.3563042879671582e-05, + "loss": 8.8324, + "step": 109940 + }, + { + "epoch": 0.5490773802092437, + "grad_norm": 0.0945110097527504, + "learning_rate": 1.356154096473003e-05, + "loss": 8.8224, + "step": 109950 + }, + { + "epoch": 0.5491273190341831, + "grad_norm": 0.09478355199098587, + "learning_rate": 1.356003904978848e-05, + "loss": 8.8194, + "step": 109960 + }, + { + "epoch": 0.5491772578591225, + "grad_norm": 0.09091106802225113, + "learning_rate": 1.355853713484693e-05, + "loss": 8.8413, + "step": 109970 + }, + { + "epoch": 0.5492271966840621, + "grad_norm": 0.0927327498793602, + "learning_rate": 1.3557035219905379e-05, + "loss": 8.8189, + "step": 109980 + }, + { + "epoch": 0.5492771355090015, + "grad_norm": 0.08513586223125458, + "learning_rate": 1.355553330496383e-05, + "loss": 8.8279, + "step": 109990 + }, + { + "epoch": 0.5493270743339409, + "grad_norm": 0.08400266617536545, + "learning_rate": 1.355403139002228e-05, + "loss": 8.8285, + "step": 110000 + }, + { + "epoch": 0.5493770131588803, + "grad_norm": 0.09037226438522339, + "learning_rate": 1.3552529475080728e-05, + "loss": 8.8035, + "step": 110010 + }, + { + "epoch": 0.5494269519838199, + "grad_norm": 0.09207148849964142, + "learning_rate": 1.3551027560139178e-05, + "loss": 8.8098, + "step": 110020 + }, + { + "epoch": 0.5494768908087593, + "grad_norm": 0.09380757808685303, + "learning_rate": 1.3549525645197627e-05, + "loss": 8.8365, + "step": 110030 + }, + { + "epoch": 0.5495268296336987, + "grad_norm": 0.10270896553993225, + "learning_rate": 1.3548023730256077e-05, + "loss": 8.8368, + "step": 110040 + }, + { + "epoch": 0.5495767684586381, + "grad_norm": 0.09271609783172607, + "learning_rate": 1.3546521815314527e-05, + "loss": 8.8293, + "step": 110050 + }, + { + "epoch": 0.5496267072835777, + "grad_norm": 0.0941663458943367, + "learning_rate": 1.3545019900372975e-05, + "loss": 8.8156, + "step": 110060 + }, + { + "epoch": 0.5496766461085171, + "grad_norm": 0.09754271805286407, + "learning_rate": 1.3543517985431426e-05, + "loss": 8.8233, + "step": 110070 + }, + { + "epoch": 0.5497265849334565, + "grad_norm": 0.09003855288028717, + "learning_rate": 1.3542016070489874e-05, + "loss": 8.8332, + "step": 110080 + }, + { + "epoch": 0.5497765237583959, + "grad_norm": 0.08914513885974884, + "learning_rate": 1.3540514155548324e-05, + "loss": 8.8161, + "step": 110090 + }, + { + "epoch": 0.5498264625833355, + "grad_norm": 0.09949744492769241, + "learning_rate": 1.3539012240606774e-05, + "loss": 8.8143, + "step": 110100 + }, + { + "epoch": 0.5498764014082749, + "grad_norm": 0.0968717560172081, + "learning_rate": 1.3537510325665223e-05, + "loss": 8.8253, + "step": 110110 + }, + { + "epoch": 0.5499263402332143, + "grad_norm": 0.09448512643575668, + "learning_rate": 1.3536008410723673e-05, + "loss": 8.8268, + "step": 110120 + }, + { + "epoch": 0.5499762790581537, + "grad_norm": 0.09558204561471939, + "learning_rate": 1.3534506495782123e-05, + "loss": 8.8139, + "step": 110130 + }, + { + "epoch": 0.5500262178830932, + "grad_norm": 0.09001177549362183, + "learning_rate": 1.3533004580840572e-05, + "loss": 8.8172, + "step": 110140 + }, + { + "epoch": 0.5500761567080327, + "grad_norm": 0.09885483235120773, + "learning_rate": 1.3531502665899022e-05, + "loss": 8.8299, + "step": 110150 + }, + { + "epoch": 0.5501260955329721, + "grad_norm": 0.09453386068344116, + "learning_rate": 1.3530000750957472e-05, + "loss": 8.8231, + "step": 110160 + }, + { + "epoch": 0.5501760343579115, + "grad_norm": 0.09200888872146606, + "learning_rate": 1.352849883601592e-05, + "loss": 8.8221, + "step": 110170 + }, + { + "epoch": 0.550225973182851, + "grad_norm": 0.09591957181692123, + "learning_rate": 1.352699692107437e-05, + "loss": 8.8092, + "step": 110180 + }, + { + "epoch": 0.5502759120077905, + "grad_norm": 0.09270251542329788, + "learning_rate": 1.352549500613282e-05, + "loss": 8.824, + "step": 110190 + }, + { + "epoch": 0.5503258508327299, + "grad_norm": 0.0944395586848259, + "learning_rate": 1.352399309119127e-05, + "loss": 8.8054, + "step": 110200 + }, + { + "epoch": 0.5503757896576693, + "grad_norm": 0.09260264784097672, + "learning_rate": 1.352249117624972e-05, + "loss": 8.8058, + "step": 110210 + }, + { + "epoch": 0.5504257284826088, + "grad_norm": 0.09659282863140106, + "learning_rate": 1.3520989261308168e-05, + "loss": 8.8116, + "step": 110220 + }, + { + "epoch": 0.5504756673075483, + "grad_norm": 0.10288282483816147, + "learning_rate": 1.3519487346366618e-05, + "loss": 8.8174, + "step": 110230 + }, + { + "epoch": 0.5505256061324877, + "grad_norm": 0.08334256708621979, + "learning_rate": 1.3517985431425067e-05, + "loss": 8.8262, + "step": 110240 + }, + { + "epoch": 0.5505755449574271, + "grad_norm": 0.0915384441614151, + "learning_rate": 1.3516483516483517e-05, + "loss": 8.8317, + "step": 110250 + }, + { + "epoch": 0.5506254837823666, + "grad_norm": 0.09057852625846863, + "learning_rate": 1.3514981601541967e-05, + "loss": 8.8207, + "step": 110260 + }, + { + "epoch": 0.5506754226073061, + "grad_norm": 0.09614181518554688, + "learning_rate": 1.3513479686600416e-05, + "loss": 8.8095, + "step": 110270 + }, + { + "epoch": 0.5507253614322455, + "grad_norm": 0.09297794103622437, + "learning_rate": 1.3511977771658866e-05, + "loss": 8.8024, + "step": 110280 + }, + { + "epoch": 0.5507753002571849, + "grad_norm": 0.09804685413837433, + "learning_rate": 1.3510475856717314e-05, + "loss": 8.8088, + "step": 110290 + }, + { + "epoch": 0.5508252390821244, + "grad_norm": 0.09318984299898148, + "learning_rate": 1.3508973941775764e-05, + "loss": 8.8085, + "step": 110300 + }, + { + "epoch": 0.5508751779070639, + "grad_norm": 0.09332066029310226, + "learning_rate": 1.3507472026834215e-05, + "loss": 8.809, + "step": 110310 + }, + { + "epoch": 0.5509251167320033, + "grad_norm": 0.0949983298778534, + "learning_rate": 1.3505970111892665e-05, + "loss": 8.8238, + "step": 110320 + }, + { + "epoch": 0.5509750555569427, + "grad_norm": 0.09308378398418427, + "learning_rate": 1.3504468196951113e-05, + "loss": 8.844, + "step": 110330 + }, + { + "epoch": 0.5510249943818822, + "grad_norm": 0.09120608866214752, + "learning_rate": 1.3502966282009562e-05, + "loss": 8.8099, + "step": 110340 + }, + { + "epoch": 0.5510749332068217, + "grad_norm": 0.09323354065418243, + "learning_rate": 1.3501464367068012e-05, + "loss": 8.8136, + "step": 110350 + }, + { + "epoch": 0.5511248720317611, + "grad_norm": 0.0914924144744873, + "learning_rate": 1.3499962452126462e-05, + "loss": 8.8114, + "step": 110360 + }, + { + "epoch": 0.5511748108567005, + "grad_norm": 0.09144311398267746, + "learning_rate": 1.3498460537184912e-05, + "loss": 8.8016, + "step": 110370 + }, + { + "epoch": 0.55122474968164, + "grad_norm": 0.0942022055387497, + "learning_rate": 1.349695862224336e-05, + "loss": 8.8147, + "step": 110380 + }, + { + "epoch": 0.5512746885065795, + "grad_norm": 0.09916293621063232, + "learning_rate": 1.349545670730181e-05, + "loss": 8.8379, + "step": 110390 + }, + { + "epoch": 0.5513246273315189, + "grad_norm": 0.09357352554798126, + "learning_rate": 1.349395479236026e-05, + "loss": 8.8308, + "step": 110400 + }, + { + "epoch": 0.5513745661564583, + "grad_norm": 0.09357526153326035, + "learning_rate": 1.349245287741871e-05, + "loss": 8.8173, + "step": 110410 + }, + { + "epoch": 0.5514245049813978, + "grad_norm": 0.08764225989580154, + "learning_rate": 1.349095096247716e-05, + "loss": 8.8153, + "step": 110420 + }, + { + "epoch": 0.5514744438063373, + "grad_norm": 0.09387609362602234, + "learning_rate": 1.3489449047535608e-05, + "loss": 8.8175, + "step": 110430 + }, + { + "epoch": 0.5515243826312767, + "grad_norm": 0.09247896075248718, + "learning_rate": 1.3487947132594057e-05, + "loss": 8.809, + "step": 110440 + }, + { + "epoch": 0.5515743214562161, + "grad_norm": 0.09111214429140091, + "learning_rate": 1.3486445217652507e-05, + "loss": 8.8131, + "step": 110450 + }, + { + "epoch": 0.5516242602811556, + "grad_norm": 0.09693354368209839, + "learning_rate": 1.3484943302710957e-05, + "loss": 8.8086, + "step": 110460 + }, + { + "epoch": 0.551674199106095, + "grad_norm": 0.09754697233438492, + "learning_rate": 1.3483441387769407e-05, + "loss": 8.8201, + "step": 110470 + }, + { + "epoch": 0.5517241379310345, + "grad_norm": 0.09200212359428406, + "learning_rate": 1.3481939472827857e-05, + "loss": 8.8175, + "step": 110480 + }, + { + "epoch": 0.5517740767559739, + "grad_norm": 0.10172619670629501, + "learning_rate": 1.3480437557886304e-05, + "loss": 8.8139, + "step": 110490 + }, + { + "epoch": 0.5518240155809134, + "grad_norm": 0.09638337045907974, + "learning_rate": 1.3478935642944754e-05, + "loss": 8.8247, + "step": 110500 + }, + { + "epoch": 0.5518739544058529, + "grad_norm": 0.09530887752771378, + "learning_rate": 1.3477433728003205e-05, + "loss": 8.8224, + "step": 110510 + }, + { + "epoch": 0.5519238932307923, + "grad_norm": 0.09654687345027924, + "learning_rate": 1.3475931813061655e-05, + "loss": 8.8239, + "step": 110520 + }, + { + "epoch": 0.5519738320557317, + "grad_norm": 0.0979146957397461, + "learning_rate": 1.3474429898120105e-05, + "loss": 8.827, + "step": 110530 + }, + { + "epoch": 0.5520237708806712, + "grad_norm": 0.09774132072925568, + "learning_rate": 1.3472927983178552e-05, + "loss": 8.7931, + "step": 110540 + }, + { + "epoch": 0.5520737097056106, + "grad_norm": 0.09289306402206421, + "learning_rate": 1.3471426068237002e-05, + "loss": 8.8123, + "step": 110550 + }, + { + "epoch": 0.5521236485305501, + "grad_norm": 0.08744509518146515, + "learning_rate": 1.3469924153295452e-05, + "loss": 8.8039, + "step": 110560 + }, + { + "epoch": 0.5521735873554895, + "grad_norm": 0.08979026228189468, + "learning_rate": 1.3468422238353902e-05, + "loss": 8.8056, + "step": 110570 + }, + { + "epoch": 0.552223526180429, + "grad_norm": 0.09543202817440033, + "learning_rate": 1.3466920323412352e-05, + "loss": 8.8093, + "step": 110580 + }, + { + "epoch": 0.5522734650053684, + "grad_norm": 0.09224719554185867, + "learning_rate": 1.34654184084708e-05, + "loss": 8.8181, + "step": 110590 + }, + { + "epoch": 0.5523234038303079, + "grad_norm": 0.0893092080950737, + "learning_rate": 1.346391649352925e-05, + "loss": 8.8243, + "step": 110600 + }, + { + "epoch": 0.5523733426552473, + "grad_norm": 0.09298286586999893, + "learning_rate": 1.34624145785877e-05, + "loss": 8.8224, + "step": 110610 + }, + { + "epoch": 0.5524232814801868, + "grad_norm": 0.09143733233213425, + "learning_rate": 1.346091266364615e-05, + "loss": 8.8141, + "step": 110620 + }, + { + "epoch": 0.5524732203051262, + "grad_norm": 0.09493601322174072, + "learning_rate": 1.34594107487046e-05, + "loss": 8.8273, + "step": 110630 + }, + { + "epoch": 0.5525231591300657, + "grad_norm": 0.09791026264429092, + "learning_rate": 1.3457908833763048e-05, + "loss": 8.801, + "step": 110640 + }, + { + "epoch": 0.5525730979550051, + "grad_norm": 0.08950292319059372, + "learning_rate": 1.3456406918821497e-05, + "loss": 8.8103, + "step": 110650 + }, + { + "epoch": 0.5526230367799446, + "grad_norm": 0.09202896058559418, + "learning_rate": 1.3454905003879947e-05, + "loss": 8.8157, + "step": 110660 + }, + { + "epoch": 0.552672975604884, + "grad_norm": 0.09055034816265106, + "learning_rate": 1.3453403088938397e-05, + "loss": 8.8016, + "step": 110670 + }, + { + "epoch": 0.5527229144298235, + "grad_norm": 0.09382042288780212, + "learning_rate": 1.3451901173996847e-05, + "loss": 8.8212, + "step": 110680 + }, + { + "epoch": 0.5527728532547629, + "grad_norm": 0.09259592741727829, + "learning_rate": 1.3450399259055296e-05, + "loss": 8.799, + "step": 110690 + }, + { + "epoch": 0.5528227920797024, + "grad_norm": 0.09515759348869324, + "learning_rate": 1.3448897344113744e-05, + "loss": 8.8096, + "step": 110700 + }, + { + "epoch": 0.5528727309046418, + "grad_norm": 0.09235917776823044, + "learning_rate": 1.3447395429172195e-05, + "loss": 8.8192, + "step": 110710 + }, + { + "epoch": 0.5529226697295813, + "grad_norm": 0.09815475344657898, + "learning_rate": 1.3445893514230645e-05, + "loss": 8.8107, + "step": 110720 + }, + { + "epoch": 0.5529726085545207, + "grad_norm": 0.09027951210737228, + "learning_rate": 1.3444391599289095e-05, + "loss": 8.8183, + "step": 110730 + }, + { + "epoch": 0.5530225473794602, + "grad_norm": 0.09175124019384384, + "learning_rate": 1.3442889684347543e-05, + "loss": 8.8093, + "step": 110740 + }, + { + "epoch": 0.5530724862043996, + "grad_norm": 0.0899006649851799, + "learning_rate": 1.3441387769405992e-05, + "loss": 8.8136, + "step": 110750 + }, + { + "epoch": 0.5531224250293391, + "grad_norm": 0.0890357568860054, + "learning_rate": 1.3439885854464442e-05, + "loss": 8.8141, + "step": 110760 + }, + { + "epoch": 0.5531723638542785, + "grad_norm": 0.09818465262651443, + "learning_rate": 1.3438383939522892e-05, + "loss": 8.8089, + "step": 110770 + }, + { + "epoch": 0.5532223026792179, + "grad_norm": 0.09356721490621567, + "learning_rate": 1.3436882024581342e-05, + "loss": 8.8082, + "step": 110780 + }, + { + "epoch": 0.5532722415041574, + "grad_norm": 0.09822935611009598, + "learning_rate": 1.3435380109639791e-05, + "loss": 8.8022, + "step": 110790 + }, + { + "epoch": 0.5533221803290969, + "grad_norm": 0.09416577219963074, + "learning_rate": 1.3433878194698241e-05, + "loss": 8.8152, + "step": 110800 + }, + { + "epoch": 0.5533721191540363, + "grad_norm": 0.08785787969827652, + "learning_rate": 1.343237627975669e-05, + "loss": 8.8243, + "step": 110810 + }, + { + "epoch": 0.5534220579789757, + "grad_norm": 0.09121017158031464, + "learning_rate": 1.343087436481514e-05, + "loss": 8.8282, + "step": 110820 + }, + { + "epoch": 0.5534719968039152, + "grad_norm": 0.09230327606201172, + "learning_rate": 1.342937244987359e-05, + "loss": 8.807, + "step": 110830 + }, + { + "epoch": 0.5535219356288547, + "grad_norm": 0.09492280334234238, + "learning_rate": 1.3427870534932038e-05, + "loss": 8.805, + "step": 110840 + }, + { + "epoch": 0.5535718744537941, + "grad_norm": 0.09432464092969894, + "learning_rate": 1.3426368619990489e-05, + "loss": 8.8028, + "step": 110850 + }, + { + "epoch": 0.5536218132787335, + "grad_norm": 0.08931145071983337, + "learning_rate": 1.3424866705048937e-05, + "loss": 8.8191, + "step": 110860 + }, + { + "epoch": 0.553671752103673, + "grad_norm": 0.08781544119119644, + "learning_rate": 1.3423364790107387e-05, + "loss": 8.8121, + "step": 110870 + }, + { + "epoch": 0.5537216909286125, + "grad_norm": 0.09263685345649719, + "learning_rate": 1.3421862875165837e-05, + "loss": 8.8321, + "step": 110880 + }, + { + "epoch": 0.5537716297535519, + "grad_norm": 0.0927627757191658, + "learning_rate": 1.3420360960224286e-05, + "loss": 8.8092, + "step": 110890 + }, + { + "epoch": 0.5538215685784913, + "grad_norm": 0.09033826738595963, + "learning_rate": 1.3418859045282736e-05, + "loss": 8.8183, + "step": 110900 + }, + { + "epoch": 0.5538715074034308, + "grad_norm": 0.09372294694185257, + "learning_rate": 1.3417357130341185e-05, + "loss": 8.807, + "step": 110910 + }, + { + "epoch": 0.5539214462283703, + "grad_norm": 0.09317562729120255, + "learning_rate": 1.3415855215399635e-05, + "loss": 8.8221, + "step": 110920 + }, + { + "epoch": 0.5539713850533097, + "grad_norm": 0.09567903727293015, + "learning_rate": 1.3414353300458085e-05, + "loss": 8.8016, + "step": 110930 + }, + { + "epoch": 0.5540213238782491, + "grad_norm": 0.09196696430444717, + "learning_rate": 1.3412851385516533e-05, + "loss": 8.7924, + "step": 110940 + }, + { + "epoch": 0.5540712627031886, + "grad_norm": 0.09338729828596115, + "learning_rate": 1.3411349470574984e-05, + "loss": 8.8085, + "step": 110950 + }, + { + "epoch": 0.554121201528128, + "grad_norm": 0.08997715264558792, + "learning_rate": 1.3409847555633434e-05, + "loss": 8.8152, + "step": 110960 + }, + { + "epoch": 0.5541711403530675, + "grad_norm": 0.09378908574581146, + "learning_rate": 1.3408345640691882e-05, + "loss": 8.8156, + "step": 110970 + }, + { + "epoch": 0.5542210791780069, + "grad_norm": 0.09587135910987854, + "learning_rate": 1.3406843725750333e-05, + "loss": 8.8153, + "step": 110980 + }, + { + "epoch": 0.5542710180029464, + "grad_norm": 0.09000777453184128, + "learning_rate": 1.3405341810808781e-05, + "loss": 8.8142, + "step": 110990 + }, + { + "epoch": 0.5543209568278858, + "grad_norm": 0.09232640266418457, + "learning_rate": 1.3403839895867231e-05, + "loss": 8.8225, + "step": 111000 + }, + { + "epoch": 0.5543708956528253, + "grad_norm": 0.0918559655547142, + "learning_rate": 1.3402337980925681e-05, + "loss": 8.8173, + "step": 111010 + }, + { + "epoch": 0.5544208344777647, + "grad_norm": 0.09692919254302979, + "learning_rate": 1.340083606598413e-05, + "loss": 8.8041, + "step": 111020 + }, + { + "epoch": 0.5544707733027042, + "grad_norm": 0.09214792400598526, + "learning_rate": 1.339933415104258e-05, + "loss": 8.8139, + "step": 111030 + }, + { + "epoch": 0.5545207121276436, + "grad_norm": 0.10509046912193298, + "learning_rate": 1.3397832236101028e-05, + "loss": 8.8006, + "step": 111040 + }, + { + "epoch": 0.5545706509525831, + "grad_norm": 0.09465457499027252, + "learning_rate": 1.3396330321159479e-05, + "loss": 8.8227, + "step": 111050 + }, + { + "epoch": 0.5546205897775225, + "grad_norm": 0.09613688290119171, + "learning_rate": 1.3394828406217929e-05, + "loss": 8.8222, + "step": 111060 + }, + { + "epoch": 0.554670528602462, + "grad_norm": 0.09685207903385162, + "learning_rate": 1.3393326491276377e-05, + "loss": 8.805, + "step": 111070 + }, + { + "epoch": 0.5547204674274014, + "grad_norm": 0.09501661360263824, + "learning_rate": 1.3391824576334828e-05, + "loss": 8.8135, + "step": 111080 + }, + { + "epoch": 0.5547704062523409, + "grad_norm": 0.08681835234165192, + "learning_rate": 1.3390322661393276e-05, + "loss": 8.8151, + "step": 111090 + }, + { + "epoch": 0.5548203450772803, + "grad_norm": 0.09274887293577194, + "learning_rate": 1.3388820746451726e-05, + "loss": 8.8216, + "step": 111100 + }, + { + "epoch": 0.5548702839022198, + "grad_norm": 0.08929115533828735, + "learning_rate": 1.3387318831510176e-05, + "loss": 8.8071, + "step": 111110 + }, + { + "epoch": 0.5549202227271592, + "grad_norm": 0.0890386626124382, + "learning_rate": 1.3385816916568627e-05, + "loss": 8.8029, + "step": 111120 + }, + { + "epoch": 0.5549701615520987, + "grad_norm": 0.09131958335638046, + "learning_rate": 1.3384315001627075e-05, + "loss": 8.8071, + "step": 111130 + }, + { + "epoch": 0.5550201003770381, + "grad_norm": 0.09491490572690964, + "learning_rate": 1.3382813086685524e-05, + "loss": 8.8114, + "step": 111140 + }, + { + "epoch": 0.5550700392019776, + "grad_norm": 0.09056004881858826, + "learning_rate": 1.3381311171743974e-05, + "loss": 8.8142, + "step": 111150 + }, + { + "epoch": 0.555119978026917, + "grad_norm": 0.09362094849348068, + "learning_rate": 1.3379809256802424e-05, + "loss": 8.8108, + "step": 111160 + }, + { + "epoch": 0.5551699168518565, + "grad_norm": 0.08976966887712479, + "learning_rate": 1.3378307341860874e-05, + "loss": 8.8087, + "step": 111170 + }, + { + "epoch": 0.5552198556767959, + "grad_norm": 0.0920504778623581, + "learning_rate": 1.3376805426919323e-05, + "loss": 8.8133, + "step": 111180 + }, + { + "epoch": 0.5552697945017354, + "grad_norm": 0.09543319791555405, + "learning_rate": 1.3375303511977771e-05, + "loss": 8.8236, + "step": 111190 + }, + { + "epoch": 0.5553197333266748, + "grad_norm": 0.08917569369077682, + "learning_rate": 1.3373801597036221e-05, + "loss": 8.8106, + "step": 111200 + }, + { + "epoch": 0.5553696721516143, + "grad_norm": 0.0944250300526619, + "learning_rate": 1.3372299682094671e-05, + "loss": 8.8051, + "step": 111210 + }, + { + "epoch": 0.5554196109765537, + "grad_norm": 0.09428086876869202, + "learning_rate": 1.3370797767153122e-05, + "loss": 8.8179, + "step": 111220 + }, + { + "epoch": 0.5554695498014932, + "grad_norm": 0.09255554527044296, + "learning_rate": 1.336929585221157e-05, + "loss": 8.8145, + "step": 111230 + }, + { + "epoch": 0.5555194886264326, + "grad_norm": 0.09141411632299423, + "learning_rate": 1.3367793937270019e-05, + "loss": 8.8118, + "step": 111240 + }, + { + "epoch": 0.555569427451372, + "grad_norm": 0.0931018739938736, + "learning_rate": 1.3366292022328469e-05, + "loss": 8.8022, + "step": 111250 + }, + { + "epoch": 0.5556193662763115, + "grad_norm": 0.09240280836820602, + "learning_rate": 1.3364790107386919e-05, + "loss": 8.8012, + "step": 111260 + }, + { + "epoch": 0.555669305101251, + "grad_norm": 0.09618735313415527, + "learning_rate": 1.3363288192445369e-05, + "loss": 8.8128, + "step": 111270 + }, + { + "epoch": 0.5557192439261904, + "grad_norm": 0.097829669713974, + "learning_rate": 1.336178627750382e-05, + "loss": 8.8086, + "step": 111280 + }, + { + "epoch": 0.5557691827511299, + "grad_norm": 0.09861978143453598, + "learning_rate": 1.3360284362562266e-05, + "loss": 8.8114, + "step": 111290 + }, + { + "epoch": 0.5558191215760693, + "grad_norm": 0.08871912211179733, + "learning_rate": 1.3358782447620716e-05, + "loss": 8.8007, + "step": 111300 + }, + { + "epoch": 0.5558690604010088, + "grad_norm": 0.09760592132806778, + "learning_rate": 1.3357280532679166e-05, + "loss": 8.8187, + "step": 111310 + }, + { + "epoch": 0.5559189992259482, + "grad_norm": 0.09599092602729797, + "learning_rate": 1.3355778617737617e-05, + "loss": 8.8205, + "step": 111320 + }, + { + "epoch": 0.5559689380508877, + "grad_norm": 0.0927274227142334, + "learning_rate": 1.3354276702796067e-05, + "loss": 8.8211, + "step": 111330 + }, + { + "epoch": 0.5560188768758271, + "grad_norm": 0.0984937772154808, + "learning_rate": 1.3352774787854514e-05, + "loss": 8.8142, + "step": 111340 + }, + { + "epoch": 0.5560688157007666, + "grad_norm": 0.09656939655542374, + "learning_rate": 1.3351272872912964e-05, + "loss": 8.8249, + "step": 111350 + }, + { + "epoch": 0.556118754525706, + "grad_norm": 0.0973270907998085, + "learning_rate": 1.3349770957971414e-05, + "loss": 8.8217, + "step": 111360 + }, + { + "epoch": 0.5561686933506454, + "grad_norm": 0.09051787108182907, + "learning_rate": 1.3348269043029864e-05, + "loss": 8.791, + "step": 111370 + }, + { + "epoch": 0.5562186321755849, + "grad_norm": 0.09165889024734497, + "learning_rate": 1.3346767128088314e-05, + "loss": 8.8026, + "step": 111380 + }, + { + "epoch": 0.5562685710005244, + "grad_norm": 0.09260290116071701, + "learning_rate": 1.3345265213146761e-05, + "loss": 8.8246, + "step": 111390 + }, + { + "epoch": 0.5563185098254638, + "grad_norm": 0.08657737821340561, + "learning_rate": 1.3343763298205211e-05, + "loss": 8.8278, + "step": 111400 + }, + { + "epoch": 0.5563684486504032, + "grad_norm": 0.09201980382204056, + "learning_rate": 1.3342261383263661e-05, + "loss": 8.8152, + "step": 111410 + }, + { + "epoch": 0.5564183874753427, + "grad_norm": 0.09139979630708694, + "learning_rate": 1.3340759468322112e-05, + "loss": 8.8084, + "step": 111420 + }, + { + "epoch": 0.5564683263002822, + "grad_norm": 0.08889581263065338, + "learning_rate": 1.3339257553380562e-05, + "loss": 8.8078, + "step": 111430 + }, + { + "epoch": 0.5565182651252216, + "grad_norm": 0.09374164789915085, + "learning_rate": 1.3337755638439009e-05, + "loss": 8.8154, + "step": 111440 + }, + { + "epoch": 0.556568203950161, + "grad_norm": 0.08838196843862534, + "learning_rate": 1.3336253723497459e-05, + "loss": 8.8182, + "step": 111450 + }, + { + "epoch": 0.5566181427751005, + "grad_norm": 0.09526786208152771, + "learning_rate": 1.3334751808555909e-05, + "loss": 8.8142, + "step": 111460 + }, + { + "epoch": 0.55666808160004, + "grad_norm": 0.09561596810817719, + "learning_rate": 1.3333249893614359e-05, + "loss": 8.809, + "step": 111470 + }, + { + "epoch": 0.5567180204249794, + "grad_norm": 0.09850651025772095, + "learning_rate": 1.333174797867281e-05, + "loss": 8.8064, + "step": 111480 + }, + { + "epoch": 0.5567679592499188, + "grad_norm": 0.09541216492652893, + "learning_rate": 1.3330246063731258e-05, + "loss": 8.8111, + "step": 111490 + }, + { + "epoch": 0.5568178980748583, + "grad_norm": 0.08966288715600967, + "learning_rate": 1.3328744148789706e-05, + "loss": 8.8197, + "step": 111500 + }, + { + "epoch": 0.5568678368997978, + "grad_norm": 0.09588926285505295, + "learning_rate": 1.3327242233848156e-05, + "loss": 8.81, + "step": 111510 + }, + { + "epoch": 0.5569177757247372, + "grad_norm": 0.09180673956871033, + "learning_rate": 1.3325740318906607e-05, + "loss": 8.7976, + "step": 111520 + }, + { + "epoch": 0.5569677145496766, + "grad_norm": 0.09179525822401047, + "learning_rate": 1.3324238403965057e-05, + "loss": 8.8198, + "step": 111530 + }, + { + "epoch": 0.5570176533746161, + "grad_norm": 0.09658701717853546, + "learning_rate": 1.3322736489023505e-05, + "loss": 8.8084, + "step": 111540 + }, + { + "epoch": 0.5570675921995556, + "grad_norm": 0.09038843214511871, + "learning_rate": 1.3321234574081954e-05, + "loss": 8.8118, + "step": 111550 + }, + { + "epoch": 0.557117531024495, + "grad_norm": 0.09037397056818008, + "learning_rate": 1.3319732659140404e-05, + "loss": 8.8033, + "step": 111560 + }, + { + "epoch": 0.5571674698494344, + "grad_norm": 0.09661097079515457, + "learning_rate": 1.3318230744198854e-05, + "loss": 8.8129, + "step": 111570 + }, + { + "epoch": 0.5572174086743739, + "grad_norm": 0.09827481955289841, + "learning_rate": 1.3316728829257304e-05, + "loss": 8.8086, + "step": 111580 + }, + { + "epoch": 0.5572673474993134, + "grad_norm": 0.09280990809202194, + "learning_rate": 1.3315226914315753e-05, + "loss": 8.822, + "step": 111590 + }, + { + "epoch": 0.5573172863242528, + "grad_norm": 0.09101619571447372, + "learning_rate": 1.3313724999374201e-05, + "loss": 8.8055, + "step": 111600 + }, + { + "epoch": 0.5573672251491922, + "grad_norm": 0.09069830179214478, + "learning_rate": 1.3312223084432651e-05, + "loss": 8.8176, + "step": 111610 + }, + { + "epoch": 0.5574171639741317, + "grad_norm": 0.08932122588157654, + "learning_rate": 1.3310721169491102e-05, + "loss": 8.8185, + "step": 111620 + }, + { + "epoch": 0.5574671027990712, + "grad_norm": 0.08982568234205246, + "learning_rate": 1.3309219254549552e-05, + "loss": 8.8209, + "step": 111630 + }, + { + "epoch": 0.5575170416240106, + "grad_norm": 0.09528286010026932, + "learning_rate": 1.3307717339608e-05, + "loss": 8.8101, + "step": 111640 + }, + { + "epoch": 0.55756698044895, + "grad_norm": 0.09759854525327682, + "learning_rate": 1.330621542466645e-05, + "loss": 8.7972, + "step": 111650 + }, + { + "epoch": 0.5576169192738895, + "grad_norm": 0.09867121279239655, + "learning_rate": 1.3304713509724899e-05, + "loss": 8.817, + "step": 111660 + }, + { + "epoch": 0.557666858098829, + "grad_norm": 0.09287247806787491, + "learning_rate": 1.3303211594783349e-05, + "loss": 8.8053, + "step": 111670 + }, + { + "epoch": 0.5577167969237684, + "grad_norm": 0.09172237664461136, + "learning_rate": 1.33017096798418e-05, + "loss": 8.8155, + "step": 111680 + }, + { + "epoch": 0.5577667357487078, + "grad_norm": 0.08916451781988144, + "learning_rate": 1.3300207764900248e-05, + "loss": 8.8164, + "step": 111690 + }, + { + "epoch": 0.5578166745736473, + "grad_norm": 0.08944099396467209, + "learning_rate": 1.3298705849958698e-05, + "loss": 8.8193, + "step": 111700 + }, + { + "epoch": 0.5578666133985868, + "grad_norm": 0.09073956310749054, + "learning_rate": 1.3297203935017146e-05, + "loss": 8.8127, + "step": 111710 + }, + { + "epoch": 0.5579165522235262, + "grad_norm": 0.09659486263990402, + "learning_rate": 1.3295702020075597e-05, + "loss": 8.7983, + "step": 111720 + }, + { + "epoch": 0.5579664910484656, + "grad_norm": 0.09616279602050781, + "learning_rate": 1.3294200105134047e-05, + "loss": 8.8022, + "step": 111730 + }, + { + "epoch": 0.558016429873405, + "grad_norm": 0.09376160800457001, + "learning_rate": 1.3292698190192495e-05, + "loss": 8.8009, + "step": 111740 + }, + { + "epoch": 0.5580663686983445, + "grad_norm": 0.10133592039346695, + "learning_rate": 1.3291196275250945e-05, + "loss": 8.8166, + "step": 111750 + }, + { + "epoch": 0.558116307523284, + "grad_norm": 0.09426382184028625, + "learning_rate": 1.3289694360309394e-05, + "loss": 8.8035, + "step": 111760 + }, + { + "epoch": 0.5581662463482234, + "grad_norm": 0.09278400242328644, + "learning_rate": 1.3288192445367844e-05, + "loss": 8.8152, + "step": 111770 + }, + { + "epoch": 0.5582161851731628, + "grad_norm": 0.09196425974369049, + "learning_rate": 1.3286690530426294e-05, + "loss": 8.8105, + "step": 111780 + }, + { + "epoch": 0.5582661239981023, + "grad_norm": 0.09005366265773773, + "learning_rate": 1.3285188615484743e-05, + "loss": 8.8056, + "step": 111790 + }, + { + "epoch": 0.5583160628230418, + "grad_norm": 0.09192261844873428, + "learning_rate": 1.3283686700543193e-05, + "loss": 8.8059, + "step": 111800 + }, + { + "epoch": 0.5583660016479812, + "grad_norm": 0.09564708918333054, + "learning_rate": 1.3282184785601643e-05, + "loss": 8.8168, + "step": 111810 + }, + { + "epoch": 0.5584159404729206, + "grad_norm": 0.09438828378915787, + "learning_rate": 1.3280682870660092e-05, + "loss": 8.8086, + "step": 111820 + }, + { + "epoch": 0.5584658792978601, + "grad_norm": 0.0901927575469017, + "learning_rate": 1.3279180955718542e-05, + "loss": 8.8193, + "step": 111830 + }, + { + "epoch": 0.5585158181227996, + "grad_norm": 0.0903163030743599, + "learning_rate": 1.327767904077699e-05, + "loss": 8.8052, + "step": 111840 + }, + { + "epoch": 0.558565756947739, + "grad_norm": 0.08931218832731247, + "learning_rate": 1.327617712583544e-05, + "loss": 8.8079, + "step": 111850 + }, + { + "epoch": 0.5586156957726784, + "grad_norm": 0.0938708558678627, + "learning_rate": 1.327467521089389e-05, + "loss": 8.7983, + "step": 111860 + }, + { + "epoch": 0.5586656345976179, + "grad_norm": 0.09172417223453522, + "learning_rate": 1.3273173295952339e-05, + "loss": 8.8037, + "step": 111870 + }, + { + "epoch": 0.5587155734225574, + "grad_norm": 0.0920783281326294, + "learning_rate": 1.327167138101079e-05, + "loss": 8.7911, + "step": 111880 + }, + { + "epoch": 0.5587655122474968, + "grad_norm": 0.09476281702518463, + "learning_rate": 1.3270169466069238e-05, + "loss": 8.7965, + "step": 111890 + }, + { + "epoch": 0.5588154510724362, + "grad_norm": 0.09168004244565964, + "learning_rate": 1.3268667551127688e-05, + "loss": 8.8016, + "step": 111900 + }, + { + "epoch": 0.5588653898973757, + "grad_norm": 0.09263158589601517, + "learning_rate": 1.3267165636186138e-05, + "loss": 8.8066, + "step": 111910 + }, + { + "epoch": 0.5589153287223152, + "grad_norm": 0.09056047350168228, + "learning_rate": 1.3265663721244587e-05, + "loss": 8.8106, + "step": 111920 + }, + { + "epoch": 0.5589652675472546, + "grad_norm": 0.09256269037723541, + "learning_rate": 1.3264161806303037e-05, + "loss": 8.8149, + "step": 111930 + }, + { + "epoch": 0.559015206372194, + "grad_norm": 0.09468802064657211, + "learning_rate": 1.3262659891361485e-05, + "loss": 8.7959, + "step": 111940 + }, + { + "epoch": 0.5590651451971335, + "grad_norm": 0.09123507142066956, + "learning_rate": 1.3261157976419935e-05, + "loss": 8.7775, + "step": 111950 + }, + { + "epoch": 0.559115084022073, + "grad_norm": 0.08695657551288605, + "learning_rate": 1.3259656061478386e-05, + "loss": 8.8059, + "step": 111960 + }, + { + "epoch": 0.5591650228470124, + "grad_norm": 0.09323490411043167, + "learning_rate": 1.3258154146536836e-05, + "loss": 8.8008, + "step": 111970 + }, + { + "epoch": 0.5592149616719518, + "grad_norm": 0.09762334078550339, + "learning_rate": 1.3256652231595284e-05, + "loss": 8.8023, + "step": 111980 + }, + { + "epoch": 0.5592649004968913, + "grad_norm": 0.0962090715765953, + "learning_rate": 1.3255150316653733e-05, + "loss": 8.8002, + "step": 111990 + }, + { + "epoch": 0.5593148393218308, + "grad_norm": 0.09336007386445999, + "learning_rate": 1.3253648401712183e-05, + "loss": 8.8044, + "step": 112000 + }, + { + "epoch": 0.5593647781467702, + "grad_norm": 0.09324468672275543, + "learning_rate": 1.3252146486770633e-05, + "loss": 8.7984, + "step": 112010 + }, + { + "epoch": 0.5594147169717096, + "grad_norm": 0.09045629948377609, + "learning_rate": 1.3250644571829083e-05, + "loss": 8.7981, + "step": 112020 + }, + { + "epoch": 0.5594646557966491, + "grad_norm": 0.09312763065099716, + "learning_rate": 1.3249142656887532e-05, + "loss": 8.7998, + "step": 112030 + }, + { + "epoch": 0.5595145946215886, + "grad_norm": 0.09190090000629425, + "learning_rate": 1.324764074194598e-05, + "loss": 8.8192, + "step": 112040 + }, + { + "epoch": 0.559564533446528, + "grad_norm": 0.09083940088748932, + "learning_rate": 1.324613882700443e-05, + "loss": 8.8084, + "step": 112050 + }, + { + "epoch": 0.5596144722714674, + "grad_norm": 0.09351202100515366, + "learning_rate": 1.324463691206288e-05, + "loss": 8.8105, + "step": 112060 + }, + { + "epoch": 0.5596644110964069, + "grad_norm": 0.09421859681606293, + "learning_rate": 1.324313499712133e-05, + "loss": 8.7967, + "step": 112070 + }, + { + "epoch": 0.5597143499213464, + "grad_norm": 0.09057319909334183, + "learning_rate": 1.324163308217978e-05, + "loss": 8.8044, + "step": 112080 + }, + { + "epoch": 0.5597642887462858, + "grad_norm": 0.09181645512580872, + "learning_rate": 1.3240131167238228e-05, + "loss": 8.798, + "step": 112090 + }, + { + "epoch": 0.5598142275712252, + "grad_norm": 0.08992860466241837, + "learning_rate": 1.3238629252296678e-05, + "loss": 8.7979, + "step": 112100 + }, + { + "epoch": 0.5598641663961647, + "grad_norm": 0.09091092646121979, + "learning_rate": 1.3237127337355128e-05, + "loss": 8.8105, + "step": 112110 + }, + { + "epoch": 0.5599141052211042, + "grad_norm": 0.0932513177394867, + "learning_rate": 1.3235625422413578e-05, + "loss": 8.7957, + "step": 112120 + }, + { + "epoch": 0.5599640440460436, + "grad_norm": 0.08964350819587708, + "learning_rate": 1.3234123507472028e-05, + "loss": 8.8212, + "step": 112130 + }, + { + "epoch": 0.560013982870983, + "grad_norm": 0.09178796410560608, + "learning_rate": 1.3232621592530475e-05, + "loss": 8.7955, + "step": 112140 + }, + { + "epoch": 0.5600639216959225, + "grad_norm": 0.09413108229637146, + "learning_rate": 1.3231119677588925e-05, + "loss": 8.799, + "step": 112150 + }, + { + "epoch": 0.560113860520862, + "grad_norm": 0.09713994711637497, + "learning_rate": 1.3229617762647376e-05, + "loss": 8.8062, + "step": 112160 + }, + { + "epoch": 0.5601637993458014, + "grad_norm": 0.09629582613706589, + "learning_rate": 1.3228115847705826e-05, + "loss": 8.8064, + "step": 112170 + }, + { + "epoch": 0.5602137381707408, + "grad_norm": 0.09238281100988388, + "learning_rate": 1.3226613932764276e-05, + "loss": 8.8076, + "step": 112180 + }, + { + "epoch": 0.5602636769956802, + "grad_norm": 0.0891227275133133, + "learning_rate": 1.3225112017822723e-05, + "loss": 8.8078, + "step": 112190 + }, + { + "epoch": 0.5603136158206198, + "grad_norm": 0.09361886978149414, + "learning_rate": 1.3223610102881173e-05, + "loss": 8.8078, + "step": 112200 + }, + { + "epoch": 0.5603635546455592, + "grad_norm": 0.09347978234291077, + "learning_rate": 1.3222108187939623e-05, + "loss": 8.7925, + "step": 112210 + }, + { + "epoch": 0.5604134934704986, + "grad_norm": 0.09022147953510284, + "learning_rate": 1.3220606272998073e-05, + "loss": 8.7998, + "step": 112220 + }, + { + "epoch": 0.560463432295438, + "grad_norm": 0.09901180863380432, + "learning_rate": 1.3219104358056523e-05, + "loss": 8.7882, + "step": 112230 + }, + { + "epoch": 0.5605133711203776, + "grad_norm": 0.09106682986021042, + "learning_rate": 1.3217602443114972e-05, + "loss": 8.7916, + "step": 112240 + }, + { + "epoch": 0.560563309945317, + "grad_norm": 0.09734653681516647, + "learning_rate": 1.321610052817342e-05, + "loss": 8.7859, + "step": 112250 + }, + { + "epoch": 0.5606132487702564, + "grad_norm": 0.09442363679409027, + "learning_rate": 1.321459861323187e-05, + "loss": 8.7861, + "step": 112260 + }, + { + "epoch": 0.5606631875951958, + "grad_norm": 0.09389246255159378, + "learning_rate": 1.321309669829032e-05, + "loss": 8.8063, + "step": 112270 + }, + { + "epoch": 0.5607131264201354, + "grad_norm": 0.09472633898258209, + "learning_rate": 1.3211594783348771e-05, + "loss": 8.7963, + "step": 112280 + }, + { + "epoch": 0.5607630652450748, + "grad_norm": 0.0962495282292366, + "learning_rate": 1.321009286840722e-05, + "loss": 8.8155, + "step": 112290 + }, + { + "epoch": 0.5608130040700142, + "grad_norm": 0.09074840694665909, + "learning_rate": 1.3208590953465668e-05, + "loss": 8.8103, + "step": 112300 + }, + { + "epoch": 0.5608629428949536, + "grad_norm": 0.09516070038080215, + "learning_rate": 1.3207089038524118e-05, + "loss": 8.7933, + "step": 112310 + }, + { + "epoch": 0.5609128817198932, + "grad_norm": 0.09915341436862946, + "learning_rate": 1.3205587123582568e-05, + "loss": 8.8079, + "step": 112320 + }, + { + "epoch": 0.5609628205448326, + "grad_norm": 0.09369552135467529, + "learning_rate": 1.3204085208641018e-05, + "loss": 8.8061, + "step": 112330 + }, + { + "epoch": 0.561012759369772, + "grad_norm": 0.09473207592964172, + "learning_rate": 1.3202583293699467e-05, + "loss": 8.8014, + "step": 112340 + }, + { + "epoch": 0.5610626981947114, + "grad_norm": 0.09303644299507141, + "learning_rate": 1.3201081378757915e-05, + "loss": 8.8019, + "step": 112350 + }, + { + "epoch": 0.561112637019651, + "grad_norm": 0.09537196904420853, + "learning_rate": 1.3199579463816366e-05, + "loss": 8.8123, + "step": 112360 + }, + { + "epoch": 0.5611625758445904, + "grad_norm": 0.08959188312292099, + "learning_rate": 1.3198077548874816e-05, + "loss": 8.7895, + "step": 112370 + }, + { + "epoch": 0.5612125146695298, + "grad_norm": 0.0941653922200203, + "learning_rate": 1.3196575633933266e-05, + "loss": 8.7977, + "step": 112380 + }, + { + "epoch": 0.5612624534944692, + "grad_norm": 0.09285327792167664, + "learning_rate": 1.3195073718991716e-05, + "loss": 8.8058, + "step": 112390 + }, + { + "epoch": 0.5613123923194088, + "grad_norm": 0.09470658004283905, + "learning_rate": 1.3193571804050163e-05, + "loss": 8.7981, + "step": 112400 + }, + { + "epoch": 0.5613623311443482, + "grad_norm": 0.0920797735452652, + "learning_rate": 1.3192069889108613e-05, + "loss": 8.8114, + "step": 112410 + }, + { + "epoch": 0.5614122699692876, + "grad_norm": 0.09267135709524155, + "learning_rate": 1.3190567974167063e-05, + "loss": 8.8038, + "step": 112420 + }, + { + "epoch": 0.561462208794227, + "grad_norm": 0.0882105603814125, + "learning_rate": 1.3189066059225513e-05, + "loss": 8.804, + "step": 112430 + }, + { + "epoch": 0.5615121476191666, + "grad_norm": 0.09088388085365295, + "learning_rate": 1.3187564144283964e-05, + "loss": 8.7979, + "step": 112440 + }, + { + "epoch": 0.561562086444106, + "grad_norm": 0.09451723098754883, + "learning_rate": 1.3186062229342412e-05, + "loss": 8.8055, + "step": 112450 + }, + { + "epoch": 0.5616120252690454, + "grad_norm": 0.08706700801849365, + "learning_rate": 1.318456031440086e-05, + "loss": 8.8008, + "step": 112460 + }, + { + "epoch": 0.5616619640939848, + "grad_norm": 0.09282489866018295, + "learning_rate": 1.318305839945931e-05, + "loss": 8.7917, + "step": 112470 + }, + { + "epoch": 0.5617119029189244, + "grad_norm": 0.08970408886671066, + "learning_rate": 1.3181556484517761e-05, + "loss": 8.8134, + "step": 112480 + }, + { + "epoch": 0.5617618417438638, + "grad_norm": 0.09117847681045532, + "learning_rate": 1.3180054569576211e-05, + "loss": 8.8164, + "step": 112490 + }, + { + "epoch": 0.5618117805688032, + "grad_norm": 0.0940316915512085, + "learning_rate": 1.317855265463466e-05, + "loss": 8.81, + "step": 112500 + }, + { + "epoch": 0.5618617193937426, + "grad_norm": 0.08806363493204117, + "learning_rate": 1.3177050739693108e-05, + "loss": 8.8093, + "step": 112510 + }, + { + "epoch": 0.5619116582186822, + "grad_norm": 0.0929834246635437, + "learning_rate": 1.3175548824751558e-05, + "loss": 8.79, + "step": 112520 + }, + { + "epoch": 0.5619615970436216, + "grad_norm": 0.09379177540540695, + "learning_rate": 1.3174046909810009e-05, + "loss": 8.7881, + "step": 112530 + }, + { + "epoch": 0.562011535868561, + "grad_norm": 0.0958777591586113, + "learning_rate": 1.3172544994868459e-05, + "loss": 8.7917, + "step": 112540 + }, + { + "epoch": 0.5620614746935004, + "grad_norm": 0.09718500822782516, + "learning_rate": 1.3171043079926907e-05, + "loss": 8.807, + "step": 112550 + }, + { + "epoch": 0.56211141351844, + "grad_norm": 0.09176292270421982, + "learning_rate": 1.3169541164985356e-05, + "loss": 8.8189, + "step": 112560 + }, + { + "epoch": 0.5621613523433794, + "grad_norm": 0.09247379750013351, + "learning_rate": 1.3168039250043806e-05, + "loss": 8.798, + "step": 112570 + }, + { + "epoch": 0.5622112911683188, + "grad_norm": 0.10087962448596954, + "learning_rate": 1.3166537335102256e-05, + "loss": 8.7897, + "step": 112580 + }, + { + "epoch": 0.5622612299932582, + "grad_norm": 0.08978106826543808, + "learning_rate": 1.3165035420160706e-05, + "loss": 8.801, + "step": 112590 + }, + { + "epoch": 0.5623111688181978, + "grad_norm": 0.09266199916601181, + "learning_rate": 1.3163533505219155e-05, + "loss": 8.7918, + "step": 112600 + }, + { + "epoch": 0.5623611076431372, + "grad_norm": 0.087232805788517, + "learning_rate": 1.3162031590277605e-05, + "loss": 8.8059, + "step": 112610 + }, + { + "epoch": 0.5624110464680766, + "grad_norm": 0.09095864742994308, + "learning_rate": 1.3160529675336053e-05, + "loss": 8.8065, + "step": 112620 + }, + { + "epoch": 0.562460985293016, + "grad_norm": 0.09693886339664459, + "learning_rate": 1.3159027760394504e-05, + "loss": 8.7969, + "step": 112630 + }, + { + "epoch": 0.5625109241179556, + "grad_norm": 0.09412913024425507, + "learning_rate": 1.3157525845452954e-05, + "loss": 8.8179, + "step": 112640 + }, + { + "epoch": 0.562560862942895, + "grad_norm": 0.0964449867606163, + "learning_rate": 1.3156023930511402e-05, + "loss": 8.7957, + "step": 112650 + }, + { + "epoch": 0.5626108017678344, + "grad_norm": 0.09539590030908585, + "learning_rate": 1.3154522015569852e-05, + "loss": 8.7885, + "step": 112660 + }, + { + "epoch": 0.5626607405927738, + "grad_norm": 0.0972166359424591, + "learning_rate": 1.31530201006283e-05, + "loss": 8.8137, + "step": 112670 + }, + { + "epoch": 0.5627106794177134, + "grad_norm": 0.092460498213768, + "learning_rate": 1.3151518185686751e-05, + "loss": 8.8034, + "step": 112680 + }, + { + "epoch": 0.5627606182426528, + "grad_norm": 0.0895271971821785, + "learning_rate": 1.3150016270745201e-05, + "loss": 8.8099, + "step": 112690 + }, + { + "epoch": 0.5628105570675922, + "grad_norm": 0.09291508048772812, + "learning_rate": 1.314851435580365e-05, + "loss": 8.802, + "step": 112700 + }, + { + "epoch": 0.5628604958925316, + "grad_norm": 0.08741452544927597, + "learning_rate": 1.31470124408621e-05, + "loss": 8.7987, + "step": 112710 + }, + { + "epoch": 0.5629104347174712, + "grad_norm": 0.09074393659830093, + "learning_rate": 1.3145510525920548e-05, + "loss": 8.7936, + "step": 112720 + }, + { + "epoch": 0.5629603735424106, + "grad_norm": 0.09287057816982269, + "learning_rate": 1.3144008610978999e-05, + "loss": 8.7991, + "step": 112730 + }, + { + "epoch": 0.56301031236735, + "grad_norm": 0.09430692344903946, + "learning_rate": 1.3142506696037449e-05, + "loss": 8.8014, + "step": 112740 + }, + { + "epoch": 0.5630602511922894, + "grad_norm": 0.09498278051614761, + "learning_rate": 1.3141004781095897e-05, + "loss": 8.8221, + "step": 112750 + }, + { + "epoch": 0.5631101900172288, + "grad_norm": 0.09470874071121216, + "learning_rate": 1.3139502866154347e-05, + "loss": 8.7972, + "step": 112760 + }, + { + "epoch": 0.5631601288421684, + "grad_norm": 0.09391231089830399, + "learning_rate": 1.3138000951212798e-05, + "loss": 8.781, + "step": 112770 + }, + { + "epoch": 0.5632100676671078, + "grad_norm": 0.09245941042900085, + "learning_rate": 1.3136499036271246e-05, + "loss": 8.8051, + "step": 112780 + }, + { + "epoch": 0.5632600064920472, + "grad_norm": 0.09180227667093277, + "learning_rate": 1.3134997121329696e-05, + "loss": 8.7889, + "step": 112790 + }, + { + "epoch": 0.5633099453169866, + "grad_norm": 0.10003501176834106, + "learning_rate": 1.3133495206388145e-05, + "loss": 8.7907, + "step": 112800 + }, + { + "epoch": 0.5633598841419262, + "grad_norm": 0.09212306886911392, + "learning_rate": 1.3131993291446595e-05, + "loss": 8.7978, + "step": 112810 + }, + { + "epoch": 0.5634098229668656, + "grad_norm": 0.0921301320195198, + "learning_rate": 1.3130491376505045e-05, + "loss": 8.797, + "step": 112820 + }, + { + "epoch": 0.563459761791805, + "grad_norm": 0.09204480797052383, + "learning_rate": 1.3128989461563494e-05, + "loss": 8.7892, + "step": 112830 + }, + { + "epoch": 0.5635097006167444, + "grad_norm": 0.09469878673553467, + "learning_rate": 1.3127487546621944e-05, + "loss": 8.8042, + "step": 112840 + }, + { + "epoch": 0.563559639441684, + "grad_norm": 0.09218306094408035, + "learning_rate": 1.3125985631680392e-05, + "loss": 8.8014, + "step": 112850 + }, + { + "epoch": 0.5636095782666234, + "grad_norm": 0.09181991964578629, + "learning_rate": 1.3124483716738842e-05, + "loss": 8.7919, + "step": 112860 + }, + { + "epoch": 0.5636595170915628, + "grad_norm": 0.09692985564470291, + "learning_rate": 1.3122981801797293e-05, + "loss": 8.8035, + "step": 112870 + }, + { + "epoch": 0.5637094559165022, + "grad_norm": 0.09414488822221756, + "learning_rate": 1.3121479886855741e-05, + "loss": 8.7791, + "step": 112880 + }, + { + "epoch": 0.5637593947414418, + "grad_norm": 0.09031105786561966, + "learning_rate": 1.3119977971914191e-05, + "loss": 8.7972, + "step": 112890 + }, + { + "epoch": 0.5638093335663812, + "grad_norm": 0.08820466697216034, + "learning_rate": 1.311847605697264e-05, + "loss": 8.8065, + "step": 112900 + }, + { + "epoch": 0.5638592723913206, + "grad_norm": 0.09765250235795975, + "learning_rate": 1.311697414203109e-05, + "loss": 8.8143, + "step": 112910 + }, + { + "epoch": 0.56390921121626, + "grad_norm": 0.09003455191850662, + "learning_rate": 1.311547222708954e-05, + "loss": 8.7868, + "step": 112920 + }, + { + "epoch": 0.5639591500411996, + "grad_norm": 0.09266629070043564, + "learning_rate": 1.311397031214799e-05, + "loss": 8.8078, + "step": 112930 + }, + { + "epoch": 0.564009088866139, + "grad_norm": 0.08923901617527008, + "learning_rate": 1.3112468397206439e-05, + "loss": 8.8024, + "step": 112940 + }, + { + "epoch": 0.5640590276910784, + "grad_norm": 0.09165368974208832, + "learning_rate": 1.3110966482264887e-05, + "loss": 8.8027, + "step": 112950 + }, + { + "epoch": 0.5641089665160178, + "grad_norm": 0.09243005514144897, + "learning_rate": 1.3109464567323337e-05, + "loss": 8.8092, + "step": 112960 + }, + { + "epoch": 0.5641589053409574, + "grad_norm": 0.08696895092725754, + "learning_rate": 1.3107962652381788e-05, + "loss": 8.8019, + "step": 112970 + }, + { + "epoch": 0.5642088441658968, + "grad_norm": 0.0982133075594902, + "learning_rate": 1.3106460737440238e-05, + "loss": 8.7784, + "step": 112980 + }, + { + "epoch": 0.5642587829908362, + "grad_norm": 0.09230280667543411, + "learning_rate": 1.3104958822498686e-05, + "loss": 8.8033, + "step": 112990 + }, + { + "epoch": 0.5643087218157756, + "grad_norm": 0.09832031279802322, + "learning_rate": 1.3103456907557135e-05, + "loss": 8.8019, + "step": 113000 + }, + { + "epoch": 0.5643586606407152, + "grad_norm": 0.09023145586252213, + "learning_rate": 1.3101954992615585e-05, + "loss": 8.8122, + "step": 113010 + }, + { + "epoch": 0.5644085994656546, + "grad_norm": 0.09258860349655151, + "learning_rate": 1.3100453077674035e-05, + "loss": 8.8005, + "step": 113020 + }, + { + "epoch": 0.564458538290594, + "grad_norm": 0.09020715951919556, + "learning_rate": 1.3098951162732485e-05, + "loss": 8.8047, + "step": 113030 + }, + { + "epoch": 0.5645084771155334, + "grad_norm": 0.09244858473539352, + "learning_rate": 1.3097449247790934e-05, + "loss": 8.7822, + "step": 113040 + }, + { + "epoch": 0.564558415940473, + "grad_norm": 0.09436847269535065, + "learning_rate": 1.3095947332849382e-05, + "loss": 8.8044, + "step": 113050 + }, + { + "epoch": 0.5646083547654124, + "grad_norm": 0.0903688296675682, + "learning_rate": 1.3094445417907832e-05, + "loss": 8.7983, + "step": 113060 + }, + { + "epoch": 0.5646582935903518, + "grad_norm": 0.08826575428247452, + "learning_rate": 1.3092943502966283e-05, + "loss": 8.7907, + "step": 113070 + }, + { + "epoch": 0.5647082324152912, + "grad_norm": 0.09253015369176865, + "learning_rate": 1.3091441588024733e-05, + "loss": 8.7995, + "step": 113080 + }, + { + "epoch": 0.5647581712402308, + "grad_norm": 0.08847915381193161, + "learning_rate": 1.3089939673083183e-05, + "loss": 8.7927, + "step": 113090 + }, + { + "epoch": 0.5648081100651702, + "grad_norm": 0.09462785720825195, + "learning_rate": 1.308843775814163e-05, + "loss": 8.7956, + "step": 113100 + }, + { + "epoch": 0.5648580488901096, + "grad_norm": 0.09631918370723724, + "learning_rate": 1.308693584320008e-05, + "loss": 8.8033, + "step": 113110 + }, + { + "epoch": 0.564907987715049, + "grad_norm": 0.09395527094602585, + "learning_rate": 1.308543392825853e-05, + "loss": 8.7929, + "step": 113120 + }, + { + "epoch": 0.5649579265399886, + "grad_norm": 0.09398861974477768, + "learning_rate": 1.308393201331698e-05, + "loss": 8.7886, + "step": 113130 + }, + { + "epoch": 0.565007865364928, + "grad_norm": 0.09166686236858368, + "learning_rate": 1.308243009837543e-05, + "loss": 8.7877, + "step": 113140 + }, + { + "epoch": 0.5650578041898674, + "grad_norm": 0.09073825925588608, + "learning_rate": 1.3080928183433877e-05, + "loss": 8.8058, + "step": 113150 + }, + { + "epoch": 0.5651077430148068, + "grad_norm": 0.09182006120681763, + "learning_rate": 1.3079426268492327e-05, + "loss": 8.7976, + "step": 113160 + }, + { + "epoch": 0.5651576818397464, + "grad_norm": 0.09310693293809891, + "learning_rate": 1.3077924353550778e-05, + "loss": 8.7859, + "step": 113170 + }, + { + "epoch": 0.5652076206646858, + "grad_norm": 0.09456516802310944, + "learning_rate": 1.3076422438609228e-05, + "loss": 8.8033, + "step": 113180 + }, + { + "epoch": 0.5652575594896252, + "grad_norm": 0.08986683934926987, + "learning_rate": 1.3074920523667678e-05, + "loss": 8.808, + "step": 113190 + }, + { + "epoch": 0.5653074983145646, + "grad_norm": 0.09818089008331299, + "learning_rate": 1.3073418608726125e-05, + "loss": 8.7929, + "step": 113200 + }, + { + "epoch": 0.5653574371395041, + "grad_norm": 0.09592483192682266, + "learning_rate": 1.3071916693784575e-05, + "loss": 8.8035, + "step": 113210 + }, + { + "epoch": 0.5654073759644436, + "grad_norm": 0.09427493065595627, + "learning_rate": 1.3070414778843025e-05, + "loss": 8.7978, + "step": 113220 + }, + { + "epoch": 0.565457314789383, + "grad_norm": 0.09667732566595078, + "learning_rate": 1.3068912863901475e-05, + "loss": 8.7764, + "step": 113230 + }, + { + "epoch": 0.5655072536143224, + "grad_norm": 0.10168339312076569, + "learning_rate": 1.3067410948959925e-05, + "loss": 8.7989, + "step": 113240 + }, + { + "epoch": 0.565557192439262, + "grad_norm": 0.09616504609584808, + "learning_rate": 1.3065909034018374e-05, + "loss": 8.8068, + "step": 113250 + }, + { + "epoch": 0.5656071312642014, + "grad_norm": 0.09338529407978058, + "learning_rate": 1.3064407119076822e-05, + "loss": 8.7902, + "step": 113260 + }, + { + "epoch": 0.5656570700891408, + "grad_norm": 0.09293079376220703, + "learning_rate": 1.3062905204135273e-05, + "loss": 8.7938, + "step": 113270 + }, + { + "epoch": 0.5657070089140802, + "grad_norm": 0.09134235978126526, + "learning_rate": 1.3061403289193723e-05, + "loss": 8.7961, + "step": 113280 + }, + { + "epoch": 0.5657569477390197, + "grad_norm": 0.0925818383693695, + "learning_rate": 1.3059901374252173e-05, + "loss": 8.7874, + "step": 113290 + }, + { + "epoch": 0.5658068865639592, + "grad_norm": 0.09122247248888016, + "learning_rate": 1.3058399459310621e-05, + "loss": 8.807, + "step": 113300 + }, + { + "epoch": 0.5658568253888986, + "grad_norm": 0.08886589109897614, + "learning_rate": 1.305689754436907e-05, + "loss": 8.792, + "step": 113310 + }, + { + "epoch": 0.565906764213838, + "grad_norm": 0.09335944801568985, + "learning_rate": 1.305539562942752e-05, + "loss": 8.7786, + "step": 113320 + }, + { + "epoch": 0.5659567030387775, + "grad_norm": 0.088760145008564, + "learning_rate": 1.305389371448597e-05, + "loss": 8.791, + "step": 113330 + }, + { + "epoch": 0.566006641863717, + "grad_norm": 0.09438331425189972, + "learning_rate": 1.305239179954442e-05, + "loss": 8.7911, + "step": 113340 + }, + { + "epoch": 0.5660565806886564, + "grad_norm": 0.09462893754243851, + "learning_rate": 1.3050889884602869e-05, + "loss": 8.7829, + "step": 113350 + }, + { + "epoch": 0.5661065195135958, + "grad_norm": 0.09259051084518433, + "learning_rate": 1.3049387969661317e-05, + "loss": 8.8002, + "step": 113360 + }, + { + "epoch": 0.5661564583385353, + "grad_norm": 0.09230745583772659, + "learning_rate": 1.3047886054719768e-05, + "loss": 8.7846, + "step": 113370 + }, + { + "epoch": 0.5662063971634748, + "grad_norm": 0.09083818644285202, + "learning_rate": 1.3046384139778218e-05, + "loss": 8.7832, + "step": 113380 + }, + { + "epoch": 0.5662563359884142, + "grad_norm": 0.09257438033819199, + "learning_rate": 1.3044882224836668e-05, + "loss": 8.7789, + "step": 113390 + }, + { + "epoch": 0.5663062748133536, + "grad_norm": 0.09438567608594894, + "learning_rate": 1.3043380309895116e-05, + "loss": 8.7963, + "step": 113400 + }, + { + "epoch": 0.5663562136382931, + "grad_norm": 0.09249507635831833, + "learning_rate": 1.3041878394953567e-05, + "loss": 8.7973, + "step": 113410 + }, + { + "epoch": 0.5664061524632326, + "grad_norm": 0.09157245606184006, + "learning_rate": 1.3040376480012015e-05, + "loss": 8.7952, + "step": 113420 + }, + { + "epoch": 0.566456091288172, + "grad_norm": 0.09889290481805801, + "learning_rate": 1.3038874565070465e-05, + "loss": 8.8033, + "step": 113430 + }, + { + "epoch": 0.5665060301131114, + "grad_norm": 0.09277231991291046, + "learning_rate": 1.3037372650128915e-05, + "loss": 8.8005, + "step": 113440 + }, + { + "epoch": 0.5665559689380509, + "grad_norm": 0.09674065560102463, + "learning_rate": 1.3035870735187364e-05, + "loss": 8.7933, + "step": 113450 + }, + { + "epoch": 0.5666059077629904, + "grad_norm": 0.09464483708143234, + "learning_rate": 1.3034368820245814e-05, + "loss": 8.7983, + "step": 113460 + }, + { + "epoch": 0.5666558465879298, + "grad_norm": 0.09485206753015518, + "learning_rate": 1.3032866905304263e-05, + "loss": 8.797, + "step": 113470 + }, + { + "epoch": 0.5667057854128692, + "grad_norm": 0.08628489077091217, + "learning_rate": 1.3031364990362713e-05, + "loss": 8.8084, + "step": 113480 + }, + { + "epoch": 0.5667557242378087, + "grad_norm": 0.09606333822011948, + "learning_rate": 1.3029863075421163e-05, + "loss": 8.7985, + "step": 113490 + }, + { + "epoch": 0.5668056630627482, + "grad_norm": 0.09314040094614029, + "learning_rate": 1.3028361160479611e-05, + "loss": 8.8068, + "step": 113500 + }, + { + "epoch": 0.5668556018876876, + "grad_norm": 0.09132982790470123, + "learning_rate": 1.3026859245538062e-05, + "loss": 8.7969, + "step": 113510 + }, + { + "epoch": 0.566905540712627, + "grad_norm": 0.09420787543058395, + "learning_rate": 1.302535733059651e-05, + "loss": 8.7799, + "step": 113520 + }, + { + "epoch": 0.5669554795375665, + "grad_norm": 0.09258543699979782, + "learning_rate": 1.302385541565496e-05, + "loss": 8.8007, + "step": 113530 + }, + { + "epoch": 0.567005418362506, + "grad_norm": 0.09835248440504074, + "learning_rate": 1.302235350071341e-05, + "loss": 8.8051, + "step": 113540 + }, + { + "epoch": 0.5670553571874454, + "grad_norm": 0.093469999730587, + "learning_rate": 1.3020851585771859e-05, + "loss": 8.81, + "step": 113550 + }, + { + "epoch": 0.5671052960123848, + "grad_norm": 0.09576337039470673, + "learning_rate": 1.3019349670830309e-05, + "loss": 8.7956, + "step": 113560 + }, + { + "epoch": 0.5671552348373243, + "grad_norm": 0.09052767604589462, + "learning_rate": 1.301784775588876e-05, + "loss": 8.7867, + "step": 113570 + }, + { + "epoch": 0.5672051736622638, + "grad_norm": 0.09173665940761566, + "learning_rate": 1.3016345840947208e-05, + "loss": 8.7889, + "step": 113580 + }, + { + "epoch": 0.5672551124872032, + "grad_norm": 0.09585422277450562, + "learning_rate": 1.3014843926005658e-05, + "loss": 8.799, + "step": 113590 + }, + { + "epoch": 0.5673050513121426, + "grad_norm": 0.09513553977012634, + "learning_rate": 1.3013342011064106e-05, + "loss": 8.767, + "step": 113600 + }, + { + "epoch": 0.5673549901370821, + "grad_norm": 0.08924262225627899, + "learning_rate": 1.3011840096122557e-05, + "loss": 8.8002, + "step": 113610 + }, + { + "epoch": 0.5674049289620215, + "grad_norm": 0.0911087617278099, + "learning_rate": 1.3010338181181007e-05, + "loss": 8.794, + "step": 113620 + }, + { + "epoch": 0.567454867786961, + "grad_norm": 0.09030284732580185, + "learning_rate": 1.3008836266239455e-05, + "loss": 8.8027, + "step": 113630 + }, + { + "epoch": 0.5675048066119004, + "grad_norm": 0.09411875158548355, + "learning_rate": 1.3007334351297905e-05, + "loss": 8.7871, + "step": 113640 + }, + { + "epoch": 0.5675547454368399, + "grad_norm": 0.09722502529621124, + "learning_rate": 1.3005832436356354e-05, + "loss": 8.7956, + "step": 113650 + }, + { + "epoch": 0.5676046842617793, + "grad_norm": 0.09281644970178604, + "learning_rate": 1.3004330521414804e-05, + "loss": 8.787, + "step": 113660 + }, + { + "epoch": 0.5676546230867188, + "grad_norm": 0.0939922034740448, + "learning_rate": 1.3002828606473254e-05, + "loss": 8.7992, + "step": 113670 + }, + { + "epoch": 0.5677045619116582, + "grad_norm": 0.08864696323871613, + "learning_rate": 1.3001326691531703e-05, + "loss": 8.7967, + "step": 113680 + }, + { + "epoch": 0.5677545007365977, + "grad_norm": 0.09028666466474533, + "learning_rate": 1.2999824776590153e-05, + "loss": 8.792, + "step": 113690 + }, + { + "epoch": 0.5678044395615371, + "grad_norm": 0.09309808909893036, + "learning_rate": 1.2998322861648601e-05, + "loss": 8.7893, + "step": 113700 + }, + { + "epoch": 0.5678543783864766, + "grad_norm": 0.08978434652090073, + "learning_rate": 1.2996820946707052e-05, + "loss": 8.7902, + "step": 113710 + }, + { + "epoch": 0.567904317211416, + "grad_norm": 0.09246104210615158, + "learning_rate": 1.2995319031765502e-05, + "loss": 8.7838, + "step": 113720 + }, + { + "epoch": 0.5679542560363554, + "grad_norm": 0.0925283282995224, + "learning_rate": 1.2993817116823952e-05, + "loss": 8.7888, + "step": 113730 + }, + { + "epoch": 0.5680041948612949, + "grad_norm": 0.09267081320285797, + "learning_rate": 1.29923152018824e-05, + "loss": 8.7942, + "step": 113740 + }, + { + "epoch": 0.5680541336862344, + "grad_norm": 0.09131681174039841, + "learning_rate": 1.2990813286940849e-05, + "loss": 8.7979, + "step": 113750 + }, + { + "epoch": 0.5681040725111738, + "grad_norm": 0.09731698036193848, + "learning_rate": 1.2989311371999299e-05, + "loss": 8.7936, + "step": 113760 + }, + { + "epoch": 0.5681540113361132, + "grad_norm": 0.09286783635616302, + "learning_rate": 1.298780945705775e-05, + "loss": 8.8072, + "step": 113770 + }, + { + "epoch": 0.5682039501610527, + "grad_norm": 0.08999673277139664, + "learning_rate": 1.29863075421162e-05, + "loss": 8.7972, + "step": 113780 + }, + { + "epoch": 0.5682538889859922, + "grad_norm": 0.09515384584665298, + "learning_rate": 1.2984805627174648e-05, + "loss": 8.8005, + "step": 113790 + }, + { + "epoch": 0.5683038278109316, + "grad_norm": 0.09427673369646072, + "learning_rate": 1.2983303712233096e-05, + "loss": 8.7889, + "step": 113800 + }, + { + "epoch": 0.568353766635871, + "grad_norm": 0.09467284381389618, + "learning_rate": 1.2981801797291547e-05, + "loss": 8.8057, + "step": 113810 + }, + { + "epoch": 0.5684037054608105, + "grad_norm": 0.09350381791591644, + "learning_rate": 1.2980299882349997e-05, + "loss": 8.7858, + "step": 113820 + }, + { + "epoch": 0.56845364428575, + "grad_norm": 0.08991306275129318, + "learning_rate": 1.2978797967408447e-05, + "loss": 8.7852, + "step": 113830 + }, + { + "epoch": 0.5685035831106894, + "grad_norm": 0.08979171514511108, + "learning_rate": 1.2977296052466895e-05, + "loss": 8.7917, + "step": 113840 + }, + { + "epoch": 0.5685535219356288, + "grad_norm": 0.09261246025562286, + "learning_rate": 1.2975794137525344e-05, + "loss": 8.8051, + "step": 113850 + }, + { + "epoch": 0.5686034607605683, + "grad_norm": 0.09397750347852707, + "learning_rate": 1.2974292222583794e-05, + "loss": 8.7922, + "step": 113860 + }, + { + "epoch": 0.5686533995855078, + "grad_norm": 0.08991680294275284, + "learning_rate": 1.2972790307642244e-05, + "loss": 8.8009, + "step": 113870 + }, + { + "epoch": 0.5687033384104472, + "grad_norm": 0.08592606335878372, + "learning_rate": 1.2971288392700694e-05, + "loss": 8.7829, + "step": 113880 + }, + { + "epoch": 0.5687532772353866, + "grad_norm": 0.08787564933300018, + "learning_rate": 1.2969786477759145e-05, + "loss": 8.7925, + "step": 113890 + }, + { + "epoch": 0.5688032160603261, + "grad_norm": 0.09271685034036636, + "learning_rate": 1.2968284562817591e-05, + "loss": 8.7928, + "step": 113900 + }, + { + "epoch": 0.5688531548852656, + "grad_norm": 0.09563078731298447, + "learning_rate": 1.2966782647876042e-05, + "loss": 8.7901, + "step": 113910 + }, + { + "epoch": 0.568903093710205, + "grad_norm": 0.08803986757993698, + "learning_rate": 1.2965280732934492e-05, + "loss": 8.7871, + "step": 113920 + }, + { + "epoch": 0.5689530325351444, + "grad_norm": 0.09317762404680252, + "learning_rate": 1.2963778817992942e-05, + "loss": 8.7962, + "step": 113930 + }, + { + "epoch": 0.5690029713600839, + "grad_norm": 0.09467476606369019, + "learning_rate": 1.2962276903051392e-05, + "loss": 8.8057, + "step": 113940 + }, + { + "epoch": 0.5690529101850234, + "grad_norm": 0.0917622447013855, + "learning_rate": 1.2960774988109839e-05, + "loss": 8.7854, + "step": 113950 + }, + { + "epoch": 0.5691028490099628, + "grad_norm": 0.09286537766456604, + "learning_rate": 1.2959273073168289e-05, + "loss": 8.7928, + "step": 113960 + }, + { + "epoch": 0.5691527878349022, + "grad_norm": 0.09457229822874069, + "learning_rate": 1.295777115822674e-05, + "loss": 8.8018, + "step": 113970 + }, + { + "epoch": 0.5692027266598417, + "grad_norm": 0.09735032916069031, + "learning_rate": 1.295626924328519e-05, + "loss": 8.7764, + "step": 113980 + }, + { + "epoch": 0.5692526654847812, + "grad_norm": 0.09656371176242828, + "learning_rate": 1.295476732834364e-05, + "loss": 8.7861, + "step": 113990 + }, + { + "epoch": 0.5693026043097206, + "grad_norm": 0.0926496610045433, + "learning_rate": 1.2953265413402086e-05, + "loss": 8.7779, + "step": 114000 + }, + { + "epoch": 0.56935254313466, + "grad_norm": 0.09257472306489944, + "learning_rate": 1.2951763498460537e-05, + "loss": 8.7978, + "step": 114010 + }, + { + "epoch": 0.5694024819595995, + "grad_norm": 0.09308171272277832, + "learning_rate": 1.2950261583518987e-05, + "loss": 8.7984, + "step": 114020 + }, + { + "epoch": 0.569452420784539, + "grad_norm": 0.09056945145130157, + "learning_rate": 1.2948759668577437e-05, + "loss": 8.8057, + "step": 114030 + }, + { + "epoch": 0.5695023596094784, + "grad_norm": 0.09501245617866516, + "learning_rate": 1.2947257753635887e-05, + "loss": 8.7859, + "step": 114040 + }, + { + "epoch": 0.5695522984344178, + "grad_norm": 0.08686729520559311, + "learning_rate": 1.2945755838694336e-05, + "loss": 8.7834, + "step": 114050 + }, + { + "epoch": 0.5696022372593573, + "grad_norm": 0.08709965646266937, + "learning_rate": 1.2944253923752784e-05, + "loss": 8.7863, + "step": 114060 + }, + { + "epoch": 0.5696521760842967, + "grad_norm": 0.09584898501634598, + "learning_rate": 1.2942752008811234e-05, + "loss": 8.7893, + "step": 114070 + }, + { + "epoch": 0.5697021149092362, + "grad_norm": 0.09185926616191864, + "learning_rate": 1.2941250093869685e-05, + "loss": 8.7821, + "step": 114080 + }, + { + "epoch": 0.5697520537341756, + "grad_norm": 0.09461195766925812, + "learning_rate": 1.2939748178928135e-05, + "loss": 8.7804, + "step": 114090 + }, + { + "epoch": 0.5698019925591151, + "grad_norm": 0.09803737699985504, + "learning_rate": 1.2938246263986583e-05, + "loss": 8.7888, + "step": 114100 + }, + { + "epoch": 0.5698519313840545, + "grad_norm": 0.08924354612827301, + "learning_rate": 1.2936744349045032e-05, + "loss": 8.7978, + "step": 114110 + }, + { + "epoch": 0.569901870208994, + "grad_norm": 0.0952063798904419, + "learning_rate": 1.2935242434103482e-05, + "loss": 8.8019, + "step": 114120 + }, + { + "epoch": 0.5699518090339334, + "grad_norm": 0.09857337176799774, + "learning_rate": 1.2933740519161932e-05, + "loss": 8.7956, + "step": 114130 + }, + { + "epoch": 0.5700017478588729, + "grad_norm": 0.09166187793016434, + "learning_rate": 1.2932238604220382e-05, + "loss": 8.7832, + "step": 114140 + }, + { + "epoch": 0.5700516866838123, + "grad_norm": 0.09211006760597229, + "learning_rate": 1.293073668927883e-05, + "loss": 8.7998, + "step": 114150 + }, + { + "epoch": 0.5701016255087518, + "grad_norm": 0.09411497414112091, + "learning_rate": 1.292923477433728e-05, + "loss": 8.7912, + "step": 114160 + }, + { + "epoch": 0.5701515643336912, + "grad_norm": 0.09173117578029633, + "learning_rate": 1.292773285939573e-05, + "loss": 8.8004, + "step": 114170 + }, + { + "epoch": 0.5702015031586307, + "grad_norm": 0.09168483316898346, + "learning_rate": 1.292623094445418e-05, + "loss": 8.7954, + "step": 114180 + }, + { + "epoch": 0.5702514419835701, + "grad_norm": 0.09110462665557861, + "learning_rate": 1.292472902951263e-05, + "loss": 8.7839, + "step": 114190 + }, + { + "epoch": 0.5703013808085096, + "grad_norm": 0.0920116975903511, + "learning_rate": 1.2923227114571078e-05, + "loss": 8.7785, + "step": 114200 + }, + { + "epoch": 0.570351319633449, + "grad_norm": 0.08953049778938293, + "learning_rate": 1.2921725199629528e-05, + "loss": 8.7941, + "step": 114210 + }, + { + "epoch": 0.5704012584583885, + "grad_norm": 0.09635299444198608, + "learning_rate": 1.2920223284687977e-05, + "loss": 8.7874, + "step": 114220 + }, + { + "epoch": 0.5704511972833279, + "grad_norm": 0.09404581040143967, + "learning_rate": 1.2918721369746427e-05, + "loss": 8.7777, + "step": 114230 + }, + { + "epoch": 0.5705011361082674, + "grad_norm": 0.08653539419174194, + "learning_rate": 1.2917219454804877e-05, + "loss": 8.7868, + "step": 114240 + }, + { + "epoch": 0.5705510749332068, + "grad_norm": 0.08841050416231155, + "learning_rate": 1.2915717539863326e-05, + "loss": 8.7813, + "step": 114250 + }, + { + "epoch": 0.5706010137581463, + "grad_norm": 0.0912218913435936, + "learning_rate": 1.2914215624921776e-05, + "loss": 8.8012, + "step": 114260 + }, + { + "epoch": 0.5706509525830857, + "grad_norm": 0.09305412322282791, + "learning_rate": 1.2912713709980224e-05, + "loss": 8.8028, + "step": 114270 + }, + { + "epoch": 0.5707008914080252, + "grad_norm": 0.09391267597675323, + "learning_rate": 1.2911211795038675e-05, + "loss": 8.7863, + "step": 114280 + }, + { + "epoch": 0.5707508302329646, + "grad_norm": 0.09395688027143478, + "learning_rate": 1.2909709880097125e-05, + "loss": 8.796, + "step": 114290 + }, + { + "epoch": 0.5708007690579041, + "grad_norm": 0.09076563268899918, + "learning_rate": 1.2908207965155573e-05, + "loss": 8.7863, + "step": 114300 + }, + { + "epoch": 0.5708507078828435, + "grad_norm": 0.09484805911779404, + "learning_rate": 1.2906706050214023e-05, + "loss": 8.7788, + "step": 114310 + }, + { + "epoch": 0.570900646707783, + "grad_norm": 0.0906333476305008, + "learning_rate": 1.2905204135272472e-05, + "loss": 8.7787, + "step": 114320 + }, + { + "epoch": 0.5709505855327224, + "grad_norm": 0.0936955064535141, + "learning_rate": 1.2903702220330922e-05, + "loss": 8.7984, + "step": 114330 + }, + { + "epoch": 0.5710005243576619, + "grad_norm": 0.09082095324993134, + "learning_rate": 1.2902200305389372e-05, + "loss": 8.7958, + "step": 114340 + }, + { + "epoch": 0.5710504631826013, + "grad_norm": 0.096087247133255, + "learning_rate": 1.290069839044782e-05, + "loss": 8.7895, + "step": 114350 + }, + { + "epoch": 0.5711004020075408, + "grad_norm": 0.09378685802221298, + "learning_rate": 1.2899196475506271e-05, + "loss": 8.7897, + "step": 114360 + }, + { + "epoch": 0.5711503408324802, + "grad_norm": 0.09574133902788162, + "learning_rate": 1.289769456056472e-05, + "loss": 8.7893, + "step": 114370 + }, + { + "epoch": 0.5712002796574197, + "grad_norm": 0.09573321789503098, + "learning_rate": 1.289619264562317e-05, + "loss": 8.7822, + "step": 114380 + }, + { + "epoch": 0.5712502184823591, + "grad_norm": 0.0985926166176796, + "learning_rate": 1.289469073068162e-05, + "loss": 8.7745, + "step": 114390 + }, + { + "epoch": 0.5713001573072986, + "grad_norm": 0.09592541307210922, + "learning_rate": 1.2893188815740068e-05, + "loss": 8.7653, + "step": 114400 + }, + { + "epoch": 0.571350096132238, + "grad_norm": 0.09706228971481323, + "learning_rate": 1.2891686900798518e-05, + "loss": 8.787, + "step": 114410 + }, + { + "epoch": 0.5714000349571775, + "grad_norm": 0.09211396425962448, + "learning_rate": 1.2890184985856969e-05, + "loss": 8.7994, + "step": 114420 + }, + { + "epoch": 0.5714499737821169, + "grad_norm": 0.08897708356380463, + "learning_rate": 1.2888683070915417e-05, + "loss": 8.7914, + "step": 114430 + }, + { + "epoch": 0.5714999126070563, + "grad_norm": 0.09851206839084625, + "learning_rate": 1.2887181155973867e-05, + "loss": 8.7865, + "step": 114440 + }, + { + "epoch": 0.5715498514319958, + "grad_norm": 0.0946231335401535, + "learning_rate": 1.2885679241032316e-05, + "loss": 8.7873, + "step": 114450 + }, + { + "epoch": 0.5715997902569353, + "grad_norm": 0.096876360476017, + "learning_rate": 1.2884177326090766e-05, + "loss": 8.7855, + "step": 114460 + }, + { + "epoch": 0.5716497290818747, + "grad_norm": 0.09491303563117981, + "learning_rate": 1.2882675411149216e-05, + "loss": 8.806, + "step": 114470 + }, + { + "epoch": 0.5716996679068141, + "grad_norm": 0.08952588587999344, + "learning_rate": 1.2881173496207665e-05, + "loss": 8.7896, + "step": 114480 + }, + { + "epoch": 0.5717496067317536, + "grad_norm": 0.08914480358362198, + "learning_rate": 1.2879671581266115e-05, + "loss": 8.7765, + "step": 114490 + }, + { + "epoch": 0.5717995455566931, + "grad_norm": 0.09196481853723526, + "learning_rate": 1.2878169666324565e-05, + "loss": 8.7767, + "step": 114500 + }, + { + "epoch": 0.5718494843816325, + "grad_norm": 0.08663848787546158, + "learning_rate": 1.2876667751383013e-05, + "loss": 8.7991, + "step": 114510 + }, + { + "epoch": 0.5718994232065719, + "grad_norm": 0.08811726421117783, + "learning_rate": 1.2875165836441464e-05, + "loss": 8.786, + "step": 114520 + }, + { + "epoch": 0.5719493620315114, + "grad_norm": 0.09745927155017853, + "learning_rate": 1.2873663921499912e-05, + "loss": 8.7802, + "step": 114530 + }, + { + "epoch": 0.5719993008564509, + "grad_norm": 0.09472652524709702, + "learning_rate": 1.2872162006558362e-05, + "loss": 8.7909, + "step": 114540 + }, + { + "epoch": 0.5720492396813903, + "grad_norm": 0.10268702358007431, + "learning_rate": 1.2870660091616812e-05, + "loss": 8.7917, + "step": 114550 + }, + { + "epoch": 0.5720991785063297, + "grad_norm": 0.09564602375030518, + "learning_rate": 1.2869158176675261e-05, + "loss": 8.7963, + "step": 114560 + }, + { + "epoch": 0.5721491173312692, + "grad_norm": 0.08768244832754135, + "learning_rate": 1.2867656261733711e-05, + "loss": 8.7989, + "step": 114570 + }, + { + "epoch": 0.5721990561562087, + "grad_norm": 0.09054630994796753, + "learning_rate": 1.2866154346792161e-05, + "loss": 8.7797, + "step": 114580 + }, + { + "epoch": 0.5722489949811481, + "grad_norm": 0.09199676662683487, + "learning_rate": 1.286465243185061e-05, + "loss": 8.7763, + "step": 114590 + }, + { + "epoch": 0.5722989338060875, + "grad_norm": 0.09266217797994614, + "learning_rate": 1.286315051690906e-05, + "loss": 8.793, + "step": 114600 + }, + { + "epoch": 0.572348872631027, + "grad_norm": 0.09249533712863922, + "learning_rate": 1.2861648601967508e-05, + "loss": 8.7705, + "step": 114610 + }, + { + "epoch": 0.5723988114559665, + "grad_norm": 0.0948820635676384, + "learning_rate": 1.2860146687025959e-05, + "loss": 8.7812, + "step": 114620 + }, + { + "epoch": 0.5724487502809059, + "grad_norm": 0.08931874483823776, + "learning_rate": 1.2858644772084409e-05, + "loss": 8.782, + "step": 114630 + }, + { + "epoch": 0.5724986891058453, + "grad_norm": 0.09403346478939056, + "learning_rate": 1.2857142857142857e-05, + "loss": 8.7976, + "step": 114640 + }, + { + "epoch": 0.5725486279307848, + "grad_norm": 0.0919768437743187, + "learning_rate": 1.2855640942201307e-05, + "loss": 8.7873, + "step": 114650 + }, + { + "epoch": 0.5725985667557243, + "grad_norm": 0.09091559052467346, + "learning_rate": 1.2854139027259756e-05, + "loss": 8.8047, + "step": 114660 + }, + { + "epoch": 0.5726485055806637, + "grad_norm": 0.09633230417966843, + "learning_rate": 1.2852637112318206e-05, + "loss": 8.7847, + "step": 114670 + }, + { + "epoch": 0.5726984444056031, + "grad_norm": 0.0949721410870552, + "learning_rate": 1.2851135197376656e-05, + "loss": 8.8071, + "step": 114680 + }, + { + "epoch": 0.5727483832305426, + "grad_norm": 0.09185748547315598, + "learning_rate": 1.2849633282435105e-05, + "loss": 8.7908, + "step": 114690 + }, + { + "epoch": 0.5727983220554821, + "grad_norm": 0.0928587019443512, + "learning_rate": 1.2848131367493555e-05, + "loss": 8.787, + "step": 114700 + }, + { + "epoch": 0.5728482608804215, + "grad_norm": 0.09270650893449783, + "learning_rate": 1.2846629452552003e-05, + "loss": 8.7901, + "step": 114710 + }, + { + "epoch": 0.5728981997053609, + "grad_norm": 0.09454198181629181, + "learning_rate": 1.2845127537610454e-05, + "loss": 8.7726, + "step": 114720 + }, + { + "epoch": 0.5729481385303004, + "grad_norm": 0.09954354166984558, + "learning_rate": 1.2843625622668904e-05, + "loss": 8.7879, + "step": 114730 + }, + { + "epoch": 0.5729980773552398, + "grad_norm": 0.09377159178256989, + "learning_rate": 1.2842123707727354e-05, + "loss": 8.7744, + "step": 114740 + }, + { + "epoch": 0.5730480161801793, + "grad_norm": 0.08639419823884964, + "learning_rate": 1.2840621792785802e-05, + "loss": 8.7816, + "step": 114750 + }, + { + "epoch": 0.5730979550051187, + "grad_norm": 0.0929587110877037, + "learning_rate": 1.2839119877844251e-05, + "loss": 8.7803, + "step": 114760 + }, + { + "epoch": 0.5731478938300582, + "grad_norm": 0.09413690865039825, + "learning_rate": 1.2837617962902701e-05, + "loss": 8.7931, + "step": 114770 + }, + { + "epoch": 0.5731978326549976, + "grad_norm": 0.09625568985939026, + "learning_rate": 1.2836116047961151e-05, + "loss": 8.774, + "step": 114780 + }, + { + "epoch": 0.5732477714799371, + "grad_norm": 0.09371865540742874, + "learning_rate": 1.2834614133019601e-05, + "loss": 8.797, + "step": 114790 + }, + { + "epoch": 0.5732977103048765, + "grad_norm": 0.09570909291505814, + "learning_rate": 1.283311221807805e-05, + "loss": 8.7847, + "step": 114800 + }, + { + "epoch": 0.573347649129816, + "grad_norm": 0.09836754947900772, + "learning_rate": 1.2831610303136498e-05, + "loss": 8.7969, + "step": 114810 + }, + { + "epoch": 0.5733975879547554, + "grad_norm": 0.08818056434392929, + "learning_rate": 1.2830108388194949e-05, + "loss": 8.7888, + "step": 114820 + }, + { + "epoch": 0.5734475267796949, + "grad_norm": 0.09481636434793472, + "learning_rate": 1.2828606473253399e-05, + "loss": 8.7942, + "step": 114830 + }, + { + "epoch": 0.5734974656046343, + "grad_norm": 0.09127473831176758, + "learning_rate": 1.2827104558311849e-05, + "loss": 8.7794, + "step": 114840 + }, + { + "epoch": 0.5735474044295737, + "grad_norm": 0.08786597847938538, + "learning_rate": 1.2825602643370297e-05, + "loss": 8.7874, + "step": 114850 + }, + { + "epoch": 0.5735973432545132, + "grad_norm": 0.10287149995565414, + "learning_rate": 1.2824100728428746e-05, + "loss": 8.7911, + "step": 114860 + }, + { + "epoch": 0.5736472820794527, + "grad_norm": 0.09321471303701401, + "learning_rate": 1.2822598813487196e-05, + "loss": 8.7907, + "step": 114870 + }, + { + "epoch": 0.5736972209043921, + "grad_norm": 0.08879370242357254, + "learning_rate": 1.2821096898545646e-05, + "loss": 8.7877, + "step": 114880 + }, + { + "epoch": 0.5737471597293315, + "grad_norm": 0.0937252789735794, + "learning_rate": 1.2819594983604096e-05, + "loss": 8.7674, + "step": 114890 + }, + { + "epoch": 0.573797098554271, + "grad_norm": 0.09370169043540955, + "learning_rate": 1.2818093068662547e-05, + "loss": 8.7896, + "step": 114900 + }, + { + "epoch": 0.5738470373792105, + "grad_norm": 0.09470123052597046, + "learning_rate": 1.2816591153720993e-05, + "loss": 8.7953, + "step": 114910 + }, + { + "epoch": 0.5738969762041499, + "grad_norm": 0.09248079359531403, + "learning_rate": 1.2815089238779444e-05, + "loss": 8.79, + "step": 114920 + }, + { + "epoch": 0.5739469150290893, + "grad_norm": 0.09325503557920456, + "learning_rate": 1.2813587323837894e-05, + "loss": 8.7838, + "step": 114930 + }, + { + "epoch": 0.5739968538540288, + "grad_norm": 0.0931696742773056, + "learning_rate": 1.2812085408896344e-05, + "loss": 8.7742, + "step": 114940 + }, + { + "epoch": 0.5740467926789683, + "grad_norm": 0.09374930709600449, + "learning_rate": 1.2810583493954794e-05, + "loss": 8.7925, + "step": 114950 + }, + { + "epoch": 0.5740967315039077, + "grad_norm": 0.08893626183271408, + "learning_rate": 1.2809081579013241e-05, + "loss": 8.7882, + "step": 114960 + }, + { + "epoch": 0.5741466703288471, + "grad_norm": 0.08865047246217728, + "learning_rate": 1.2807579664071691e-05, + "loss": 8.7788, + "step": 114970 + }, + { + "epoch": 0.5741966091537866, + "grad_norm": 0.09421852231025696, + "learning_rate": 1.2806077749130141e-05, + "loss": 8.7768, + "step": 114980 + }, + { + "epoch": 0.5742465479787261, + "grad_norm": 0.08916622400283813, + "learning_rate": 1.2804575834188591e-05, + "loss": 8.785, + "step": 114990 + }, + { + "epoch": 0.5742964868036655, + "grad_norm": 0.08971542119979858, + "learning_rate": 1.2803073919247042e-05, + "loss": 8.7801, + "step": 115000 + }, + { + "epoch": 0.5743464256286049, + "grad_norm": 0.09803227335214615, + "learning_rate": 1.2801572004305488e-05, + "loss": 8.7843, + "step": 115010 + }, + { + "epoch": 0.5743963644535444, + "grad_norm": 0.0927937850356102, + "learning_rate": 1.2800070089363939e-05, + "loss": 8.7938, + "step": 115020 + }, + { + "epoch": 0.5744463032784839, + "grad_norm": 0.09266925603151321, + "learning_rate": 1.2798568174422389e-05, + "loss": 8.7783, + "step": 115030 + }, + { + "epoch": 0.5744962421034233, + "grad_norm": 0.09375444054603577, + "learning_rate": 1.2797066259480839e-05, + "loss": 8.7916, + "step": 115040 + }, + { + "epoch": 0.5745461809283627, + "grad_norm": 0.09235719591379166, + "learning_rate": 1.2795564344539289e-05, + "loss": 8.79, + "step": 115050 + }, + { + "epoch": 0.5745961197533022, + "grad_norm": 0.09506914019584656, + "learning_rate": 1.2794062429597738e-05, + "loss": 8.7758, + "step": 115060 + }, + { + "epoch": 0.5746460585782417, + "grad_norm": 0.09061712771654129, + "learning_rate": 1.2792560514656186e-05, + "loss": 8.785, + "step": 115070 + }, + { + "epoch": 0.5746959974031811, + "grad_norm": 0.09398726373910904, + "learning_rate": 1.2791058599714636e-05, + "loss": 8.7835, + "step": 115080 + }, + { + "epoch": 0.5747459362281205, + "grad_norm": 0.09454075247049332, + "learning_rate": 1.2789556684773086e-05, + "loss": 8.7984, + "step": 115090 + }, + { + "epoch": 0.57479587505306, + "grad_norm": 0.09011261910200119, + "learning_rate": 1.2788054769831537e-05, + "loss": 8.7811, + "step": 115100 + }, + { + "epoch": 0.5748458138779995, + "grad_norm": 0.09368349611759186, + "learning_rate": 1.2786552854889985e-05, + "loss": 8.7701, + "step": 115110 + }, + { + "epoch": 0.5748957527029389, + "grad_norm": 0.09060084819793701, + "learning_rate": 1.2785050939948434e-05, + "loss": 8.78, + "step": 115120 + }, + { + "epoch": 0.5749456915278783, + "grad_norm": 0.09300053119659424, + "learning_rate": 1.2783549025006884e-05, + "loss": 8.7671, + "step": 115130 + }, + { + "epoch": 0.5749956303528178, + "grad_norm": 0.09098996222019196, + "learning_rate": 1.2782047110065334e-05, + "loss": 8.7889, + "step": 115140 + }, + { + "epoch": 0.5750455691777573, + "grad_norm": 0.09513608366250992, + "learning_rate": 1.2780545195123784e-05, + "loss": 8.7721, + "step": 115150 + }, + { + "epoch": 0.5750955080026967, + "grad_norm": 0.09404340386390686, + "learning_rate": 1.2779043280182233e-05, + "loss": 8.7911, + "step": 115160 + }, + { + "epoch": 0.5751454468276361, + "grad_norm": 0.0925978347659111, + "learning_rate": 1.2777541365240681e-05, + "loss": 8.784, + "step": 115170 + }, + { + "epoch": 0.5751953856525756, + "grad_norm": 0.09595629572868347, + "learning_rate": 1.2776039450299131e-05, + "loss": 8.7783, + "step": 115180 + }, + { + "epoch": 0.5752453244775151, + "grad_norm": 0.09918168187141418, + "learning_rate": 1.2774537535357581e-05, + "loss": 8.7753, + "step": 115190 + }, + { + "epoch": 0.5752952633024545, + "grad_norm": 0.09095029532909393, + "learning_rate": 1.2773035620416032e-05, + "loss": 8.7743, + "step": 115200 + }, + { + "epoch": 0.5753452021273939, + "grad_norm": 0.09520848840475082, + "learning_rate": 1.277153370547448e-05, + "loss": 8.7882, + "step": 115210 + }, + { + "epoch": 0.5753951409523334, + "grad_norm": 0.0901302695274353, + "learning_rate": 1.277003179053293e-05, + "loss": 8.7796, + "step": 115220 + }, + { + "epoch": 0.5754450797772729, + "grad_norm": 0.08923668414354324, + "learning_rate": 1.2768529875591379e-05, + "loss": 8.7896, + "step": 115230 + }, + { + "epoch": 0.5754950186022123, + "grad_norm": 0.09207792580127716, + "learning_rate": 1.2767027960649829e-05, + "loss": 8.7796, + "step": 115240 + }, + { + "epoch": 0.5755449574271517, + "grad_norm": 0.09478235244750977, + "learning_rate": 1.2765526045708279e-05, + "loss": 8.7792, + "step": 115250 + }, + { + "epoch": 0.5755948962520911, + "grad_norm": 0.09395542740821838, + "learning_rate": 1.2764024130766728e-05, + "loss": 8.791, + "step": 115260 + }, + { + "epoch": 0.5756448350770307, + "grad_norm": 0.09286878257989883, + "learning_rate": 1.2762522215825178e-05, + "loss": 8.7711, + "step": 115270 + }, + { + "epoch": 0.5756947739019701, + "grad_norm": 0.09151309728622437, + "learning_rate": 1.2761020300883626e-05, + "loss": 8.785, + "step": 115280 + }, + { + "epoch": 0.5757447127269095, + "grad_norm": 0.09942897409200668, + "learning_rate": 1.2759518385942076e-05, + "loss": 8.7799, + "step": 115290 + }, + { + "epoch": 0.575794651551849, + "grad_norm": 0.09082534909248352, + "learning_rate": 1.2758016471000527e-05, + "loss": 8.7904, + "step": 115300 + }, + { + "epoch": 0.5758445903767885, + "grad_norm": 0.09576486796140671, + "learning_rate": 1.2756514556058975e-05, + "loss": 8.7816, + "step": 115310 + }, + { + "epoch": 0.5758945292017279, + "grad_norm": 0.09835550934076309, + "learning_rate": 1.2755012641117425e-05, + "loss": 8.7852, + "step": 115320 + }, + { + "epoch": 0.5759444680266673, + "grad_norm": 0.09621113538742065, + "learning_rate": 1.2753510726175874e-05, + "loss": 8.7841, + "step": 115330 + }, + { + "epoch": 0.5759944068516067, + "grad_norm": 0.09935037046670914, + "learning_rate": 1.2752008811234324e-05, + "loss": 8.7874, + "step": 115340 + }, + { + "epoch": 0.5760443456765463, + "grad_norm": 0.09516027569770813, + "learning_rate": 1.2750506896292774e-05, + "loss": 8.7866, + "step": 115350 + }, + { + "epoch": 0.5760942845014857, + "grad_norm": 0.09592787176370621, + "learning_rate": 1.2749004981351223e-05, + "loss": 8.7759, + "step": 115360 + }, + { + "epoch": 0.5761442233264251, + "grad_norm": 0.09324520081281662, + "learning_rate": 1.2747503066409673e-05, + "loss": 8.7756, + "step": 115370 + }, + { + "epoch": 0.5761941621513645, + "grad_norm": 0.09475477784872055, + "learning_rate": 1.2746001151468123e-05, + "loss": 8.7751, + "step": 115380 + }, + { + "epoch": 0.5762441009763041, + "grad_norm": 0.09500320255756378, + "learning_rate": 1.2744499236526571e-05, + "loss": 8.7876, + "step": 115390 + }, + { + "epoch": 0.5762940398012435, + "grad_norm": 0.09369548410177231, + "learning_rate": 1.2742997321585022e-05, + "loss": 8.7883, + "step": 115400 + }, + { + "epoch": 0.5763439786261829, + "grad_norm": 0.0879288762807846, + "learning_rate": 1.274149540664347e-05, + "loss": 8.7739, + "step": 115410 + }, + { + "epoch": 0.5763939174511223, + "grad_norm": 0.09155362099409103, + "learning_rate": 1.273999349170192e-05, + "loss": 8.7865, + "step": 115420 + }, + { + "epoch": 0.5764438562760619, + "grad_norm": 0.09589493274688721, + "learning_rate": 1.273849157676037e-05, + "loss": 8.7779, + "step": 115430 + }, + { + "epoch": 0.5764937951010013, + "grad_norm": 0.09291618317365646, + "learning_rate": 1.2736989661818819e-05, + "loss": 8.7973, + "step": 115440 + }, + { + "epoch": 0.5765437339259407, + "grad_norm": 0.08438339829444885, + "learning_rate": 1.273548774687727e-05, + "loss": 8.779, + "step": 115450 + }, + { + "epoch": 0.5765936727508801, + "grad_norm": 0.09597896039485931, + "learning_rate": 1.2733985831935718e-05, + "loss": 8.7717, + "step": 115460 + }, + { + "epoch": 0.5766436115758197, + "grad_norm": 0.09120340645313263, + "learning_rate": 1.2732483916994168e-05, + "loss": 8.7793, + "step": 115470 + }, + { + "epoch": 0.5766935504007591, + "grad_norm": 0.09236761182546616, + "learning_rate": 1.2730982002052618e-05, + "loss": 8.7749, + "step": 115480 + }, + { + "epoch": 0.5767434892256985, + "grad_norm": 0.09631557762622833, + "learning_rate": 1.2729480087111066e-05, + "loss": 8.7847, + "step": 115490 + }, + { + "epoch": 0.5767934280506379, + "grad_norm": 0.09939119219779968, + "learning_rate": 1.2727978172169517e-05, + "loss": 8.7732, + "step": 115500 + }, + { + "epoch": 0.5768433668755775, + "grad_norm": 0.08828873187303543, + "learning_rate": 1.2726476257227965e-05, + "loss": 8.7684, + "step": 115510 + }, + { + "epoch": 0.5768933057005169, + "grad_norm": 0.09669400006532669, + "learning_rate": 1.2724974342286415e-05, + "loss": 8.782, + "step": 115520 + }, + { + "epoch": 0.5769432445254563, + "grad_norm": 0.08688271045684814, + "learning_rate": 1.2723472427344866e-05, + "loss": 8.7748, + "step": 115530 + }, + { + "epoch": 0.5769931833503957, + "grad_norm": 0.09424971789121628, + "learning_rate": 1.2721970512403316e-05, + "loss": 8.7726, + "step": 115540 + }, + { + "epoch": 0.5770431221753353, + "grad_norm": 0.09156131744384766, + "learning_rate": 1.2720468597461764e-05, + "loss": 8.7737, + "step": 115550 + }, + { + "epoch": 0.5770930610002747, + "grad_norm": 0.09321281313896179, + "learning_rate": 1.2718966682520213e-05, + "loss": 8.7779, + "step": 115560 + }, + { + "epoch": 0.5771429998252141, + "grad_norm": 0.09447400271892548, + "learning_rate": 1.2717464767578663e-05, + "loss": 8.7736, + "step": 115570 + }, + { + "epoch": 0.5771929386501535, + "grad_norm": 0.09802529215812683, + "learning_rate": 1.2715962852637113e-05, + "loss": 8.7908, + "step": 115580 + }, + { + "epoch": 0.5772428774750931, + "grad_norm": 0.09059146791696548, + "learning_rate": 1.2714460937695563e-05, + "loss": 8.7868, + "step": 115590 + }, + { + "epoch": 0.5772928163000325, + "grad_norm": 0.09354998171329498, + "learning_rate": 1.2712959022754012e-05, + "loss": 8.7936, + "step": 115600 + }, + { + "epoch": 0.5773427551249719, + "grad_norm": 0.09096765518188477, + "learning_rate": 1.271145710781246e-05, + "loss": 8.7762, + "step": 115610 + }, + { + "epoch": 0.5773926939499113, + "grad_norm": 0.09621896594762802, + "learning_rate": 1.270995519287091e-05, + "loss": 8.7725, + "step": 115620 + }, + { + "epoch": 0.5774426327748509, + "grad_norm": 0.09030432254076004, + "learning_rate": 1.270845327792936e-05, + "loss": 8.7869, + "step": 115630 + }, + { + "epoch": 0.5774925715997903, + "grad_norm": 0.0910486951470375, + "learning_rate": 1.270695136298781e-05, + "loss": 8.7925, + "step": 115640 + }, + { + "epoch": 0.5775425104247297, + "grad_norm": 0.09086786955595016, + "learning_rate": 1.270544944804626e-05, + "loss": 8.7813, + "step": 115650 + }, + { + "epoch": 0.5775924492496691, + "grad_norm": 0.08619526028633118, + "learning_rate": 1.2703947533104708e-05, + "loss": 8.7701, + "step": 115660 + }, + { + "epoch": 0.5776423880746087, + "grad_norm": 0.09702730923891068, + "learning_rate": 1.2702445618163158e-05, + "loss": 8.78, + "step": 115670 + }, + { + "epoch": 0.5776923268995481, + "grad_norm": 0.0912908986210823, + "learning_rate": 1.2700943703221608e-05, + "loss": 8.7743, + "step": 115680 + }, + { + "epoch": 0.5777422657244875, + "grad_norm": 0.09520822018384933, + "learning_rate": 1.2699441788280058e-05, + "loss": 8.7819, + "step": 115690 + }, + { + "epoch": 0.5777922045494269, + "grad_norm": 0.0918937474489212, + "learning_rate": 1.2697939873338508e-05, + "loss": 8.7656, + "step": 115700 + }, + { + "epoch": 0.5778421433743663, + "grad_norm": 0.09397950768470764, + "learning_rate": 1.2696437958396955e-05, + "loss": 8.7783, + "step": 115710 + }, + { + "epoch": 0.5778920821993059, + "grad_norm": 0.09476295858621597, + "learning_rate": 1.2694936043455405e-05, + "loss": 8.7867, + "step": 115720 + }, + { + "epoch": 0.5779420210242453, + "grad_norm": 0.09248529374599457, + "learning_rate": 1.2693434128513856e-05, + "loss": 8.7845, + "step": 115730 + }, + { + "epoch": 0.5779919598491847, + "grad_norm": 0.0914454385638237, + "learning_rate": 1.2691932213572306e-05, + "loss": 8.7718, + "step": 115740 + }, + { + "epoch": 0.5780418986741241, + "grad_norm": 0.09722574055194855, + "learning_rate": 1.2690430298630756e-05, + "loss": 8.7653, + "step": 115750 + }, + { + "epoch": 0.5780918374990637, + "grad_norm": 0.09682019799947739, + "learning_rate": 1.2688928383689203e-05, + "loss": 8.7623, + "step": 115760 + }, + { + "epoch": 0.5781417763240031, + "grad_norm": 0.08837232738733292, + "learning_rate": 1.2687426468747653e-05, + "loss": 8.78, + "step": 115770 + }, + { + "epoch": 0.5781917151489425, + "grad_norm": 0.0944056510925293, + "learning_rate": 1.2685924553806103e-05, + "loss": 8.7849, + "step": 115780 + }, + { + "epoch": 0.5782416539738819, + "grad_norm": 0.0868343710899353, + "learning_rate": 1.2684422638864553e-05, + "loss": 8.7678, + "step": 115790 + }, + { + "epoch": 0.5782915927988215, + "grad_norm": 0.09442039579153061, + "learning_rate": 1.2682920723923003e-05, + "loss": 8.7778, + "step": 115800 + }, + { + "epoch": 0.5783415316237609, + "grad_norm": 0.08963579684495926, + "learning_rate": 1.268141880898145e-05, + "loss": 8.7754, + "step": 115810 + }, + { + "epoch": 0.5783914704487003, + "grad_norm": 0.09150784462690353, + "learning_rate": 1.26799168940399e-05, + "loss": 8.7636, + "step": 115820 + }, + { + "epoch": 0.5784414092736397, + "grad_norm": 0.09078077971935272, + "learning_rate": 1.267841497909835e-05, + "loss": 8.7658, + "step": 115830 + }, + { + "epoch": 0.5784913480985793, + "grad_norm": 0.09242097288370132, + "learning_rate": 1.26769130641568e-05, + "loss": 8.7743, + "step": 115840 + }, + { + "epoch": 0.5785412869235187, + "grad_norm": 0.09405604004859924, + "learning_rate": 1.2675411149215251e-05, + "loss": 8.7668, + "step": 115850 + }, + { + "epoch": 0.5785912257484581, + "grad_norm": 0.09389903396368027, + "learning_rate": 1.26739092342737e-05, + "loss": 8.7732, + "step": 115860 + }, + { + "epoch": 0.5786411645733975, + "grad_norm": 0.08914590626955032, + "learning_rate": 1.2672407319332148e-05, + "loss": 8.7808, + "step": 115870 + }, + { + "epoch": 0.5786911033983371, + "grad_norm": 0.09185650944709778, + "learning_rate": 1.2670905404390598e-05, + "loss": 8.779, + "step": 115880 + }, + { + "epoch": 0.5787410422232765, + "grad_norm": 0.091721311211586, + "learning_rate": 1.2669403489449048e-05, + "loss": 8.778, + "step": 115890 + }, + { + "epoch": 0.5787909810482159, + "grad_norm": 0.08753747493028641, + "learning_rate": 1.2667901574507498e-05, + "loss": 8.7874, + "step": 115900 + }, + { + "epoch": 0.5788409198731553, + "grad_norm": 0.09685467183589935, + "learning_rate": 1.2666399659565947e-05, + "loss": 8.7786, + "step": 115910 + }, + { + "epoch": 0.5788908586980949, + "grad_norm": 0.09479455649852753, + "learning_rate": 1.2664897744624395e-05, + "loss": 8.7859, + "step": 115920 + }, + { + "epoch": 0.5789407975230343, + "grad_norm": 0.09902866929769516, + "learning_rate": 1.2663395829682846e-05, + "loss": 8.7763, + "step": 115930 + }, + { + "epoch": 0.5789907363479737, + "grad_norm": 0.09214528650045395, + "learning_rate": 1.2661893914741296e-05, + "loss": 8.7802, + "step": 115940 + }, + { + "epoch": 0.5790406751729131, + "grad_norm": 0.09406938403844833, + "learning_rate": 1.2660391999799746e-05, + "loss": 8.7633, + "step": 115950 + }, + { + "epoch": 0.5790906139978527, + "grad_norm": 0.0909062996506691, + "learning_rate": 1.2658890084858194e-05, + "loss": 8.7809, + "step": 115960 + }, + { + "epoch": 0.5791405528227921, + "grad_norm": 0.08931753784418106, + "learning_rate": 1.2657388169916643e-05, + "loss": 8.781, + "step": 115970 + }, + { + "epoch": 0.5791904916477315, + "grad_norm": 0.100962333381176, + "learning_rate": 1.2655886254975093e-05, + "loss": 8.7796, + "step": 115980 + }, + { + "epoch": 0.5792404304726709, + "grad_norm": 0.09826654195785522, + "learning_rate": 1.2654384340033543e-05, + "loss": 8.7885, + "step": 115990 + }, + { + "epoch": 0.5792903692976105, + "grad_norm": 0.09005758911371231, + "learning_rate": 1.2652882425091993e-05, + "loss": 8.7879, + "step": 116000 + }, + { + "epoch": 0.5793403081225499, + "grad_norm": 0.09193582087755203, + "learning_rate": 1.2651380510150442e-05, + "loss": 8.7698, + "step": 116010 + }, + { + "epoch": 0.5793902469474893, + "grad_norm": 0.09053417295217514, + "learning_rate": 1.2649878595208892e-05, + "loss": 8.7836, + "step": 116020 + }, + { + "epoch": 0.5794401857724287, + "grad_norm": 0.09887261688709259, + "learning_rate": 1.264837668026734e-05, + "loss": 8.7667, + "step": 116030 + }, + { + "epoch": 0.5794901245973683, + "grad_norm": 0.09462026506662369, + "learning_rate": 1.264687476532579e-05, + "loss": 8.7674, + "step": 116040 + }, + { + "epoch": 0.5795400634223077, + "grad_norm": 0.0957246646285057, + "learning_rate": 1.2645372850384241e-05, + "loss": 8.7798, + "step": 116050 + }, + { + "epoch": 0.5795900022472471, + "grad_norm": 0.10332443565130234, + "learning_rate": 1.264387093544269e-05, + "loss": 8.7828, + "step": 116060 + }, + { + "epoch": 0.5796399410721865, + "grad_norm": 0.08929960429668427, + "learning_rate": 1.264236902050114e-05, + "loss": 8.7821, + "step": 116070 + }, + { + "epoch": 0.5796898798971261, + "grad_norm": 0.09699279814958572, + "learning_rate": 1.2640867105559588e-05, + "loss": 8.7763, + "step": 116080 + }, + { + "epoch": 0.5797398187220655, + "grad_norm": 0.0911206379532814, + "learning_rate": 1.2639365190618038e-05, + "loss": 8.7705, + "step": 116090 + }, + { + "epoch": 0.5797897575470049, + "grad_norm": 0.09146827459335327, + "learning_rate": 1.2637863275676488e-05, + "loss": 8.7882, + "step": 116100 + }, + { + "epoch": 0.5798396963719443, + "grad_norm": 0.09372538328170776, + "learning_rate": 1.2636361360734937e-05, + "loss": 8.7599, + "step": 116110 + }, + { + "epoch": 0.5798896351968839, + "grad_norm": 0.09762945026159286, + "learning_rate": 1.2634859445793387e-05, + "loss": 8.7786, + "step": 116120 + }, + { + "epoch": 0.5799395740218233, + "grad_norm": 0.09360945969820023, + "learning_rate": 1.2633357530851836e-05, + "loss": 8.7724, + "step": 116130 + }, + { + "epoch": 0.5799895128467627, + "grad_norm": 0.09288088977336884, + "learning_rate": 1.2631855615910286e-05, + "loss": 8.7698, + "step": 116140 + }, + { + "epoch": 0.5800394516717021, + "grad_norm": 0.09539160132408142, + "learning_rate": 1.2630353700968736e-05, + "loss": 8.7872, + "step": 116150 + }, + { + "epoch": 0.5800893904966417, + "grad_norm": 0.08673470467329025, + "learning_rate": 1.2628851786027184e-05, + "loss": 8.7901, + "step": 116160 + }, + { + "epoch": 0.5801393293215811, + "grad_norm": 0.09566935151815414, + "learning_rate": 1.2627349871085635e-05, + "loss": 8.7816, + "step": 116170 + }, + { + "epoch": 0.5801892681465205, + "grad_norm": 0.09150587022304535, + "learning_rate": 1.2625847956144085e-05, + "loss": 8.7651, + "step": 116180 + }, + { + "epoch": 0.5802392069714599, + "grad_norm": 0.08590695261955261, + "learning_rate": 1.2624346041202533e-05, + "loss": 8.7845, + "step": 116190 + }, + { + "epoch": 0.5802891457963995, + "grad_norm": 0.09772205352783203, + "learning_rate": 1.2622844126260983e-05, + "loss": 8.7651, + "step": 116200 + }, + { + "epoch": 0.5803390846213389, + "grad_norm": 0.09127950668334961, + "learning_rate": 1.2621342211319432e-05, + "loss": 8.7905, + "step": 116210 + }, + { + "epoch": 0.5803890234462783, + "grad_norm": 0.09437844902276993, + "learning_rate": 1.2619840296377882e-05, + "loss": 8.7782, + "step": 116220 + }, + { + "epoch": 0.5804389622712177, + "grad_norm": 0.08986880630254745, + "learning_rate": 1.2618338381436332e-05, + "loss": 8.7814, + "step": 116230 + }, + { + "epoch": 0.5804889010961573, + "grad_norm": 0.09137626737356186, + "learning_rate": 1.261683646649478e-05, + "loss": 8.7681, + "step": 116240 + }, + { + "epoch": 0.5805388399210967, + "grad_norm": 0.08857541531324387, + "learning_rate": 1.2615334551553231e-05, + "loss": 8.776, + "step": 116250 + }, + { + "epoch": 0.5805887787460361, + "grad_norm": 0.0896126851439476, + "learning_rate": 1.261383263661168e-05, + "loss": 8.7803, + "step": 116260 + }, + { + "epoch": 0.5806387175709755, + "grad_norm": 0.08985196799039841, + "learning_rate": 1.261233072167013e-05, + "loss": 8.7829, + "step": 116270 + }, + { + "epoch": 0.580688656395915, + "grad_norm": 0.09273276478052139, + "learning_rate": 1.261082880672858e-05, + "loss": 8.7826, + "step": 116280 + }, + { + "epoch": 0.5807385952208545, + "grad_norm": 0.08667043596506119, + "learning_rate": 1.2609326891787028e-05, + "loss": 8.7681, + "step": 116290 + }, + { + "epoch": 0.5807885340457939, + "grad_norm": 0.09057953208684921, + "learning_rate": 1.2607824976845478e-05, + "loss": 8.7694, + "step": 116300 + }, + { + "epoch": 0.5808384728707333, + "grad_norm": 0.09501492232084274, + "learning_rate": 1.2606323061903927e-05, + "loss": 8.7712, + "step": 116310 + }, + { + "epoch": 0.5808884116956728, + "grad_norm": 0.08906516432762146, + "learning_rate": 1.2604821146962377e-05, + "loss": 8.788, + "step": 116320 + }, + { + "epoch": 0.5809383505206123, + "grad_norm": 0.09163739532232285, + "learning_rate": 1.2603319232020827e-05, + "loss": 8.7738, + "step": 116330 + }, + { + "epoch": 0.5809882893455517, + "grad_norm": 0.08950494229793549, + "learning_rate": 1.2601817317079277e-05, + "loss": 8.7851, + "step": 116340 + }, + { + "epoch": 0.5810382281704911, + "grad_norm": 0.09091368317604065, + "learning_rate": 1.2600315402137726e-05, + "loss": 8.7618, + "step": 116350 + }, + { + "epoch": 0.5810881669954306, + "grad_norm": 0.09785959124565125, + "learning_rate": 1.2598813487196174e-05, + "loss": 8.7833, + "step": 116360 + }, + { + "epoch": 0.5811381058203701, + "grad_norm": 0.09358986467123032, + "learning_rate": 1.2597311572254625e-05, + "loss": 8.7742, + "step": 116370 + }, + { + "epoch": 0.5811880446453095, + "grad_norm": 0.09452328830957413, + "learning_rate": 1.2595809657313075e-05, + "loss": 8.7679, + "step": 116380 + }, + { + "epoch": 0.5812379834702489, + "grad_norm": 0.09855862706899643, + "learning_rate": 1.2594307742371525e-05, + "loss": 8.7724, + "step": 116390 + }, + { + "epoch": 0.5812879222951884, + "grad_norm": 0.09987233579158783, + "learning_rate": 1.2592805827429973e-05, + "loss": 8.7737, + "step": 116400 + }, + { + "epoch": 0.5813378611201279, + "grad_norm": 0.09587401151657104, + "learning_rate": 1.2591303912488422e-05, + "loss": 8.7721, + "step": 116410 + }, + { + "epoch": 0.5813877999450673, + "grad_norm": 0.08716830611228943, + "learning_rate": 1.2589801997546872e-05, + "loss": 8.7768, + "step": 116420 + }, + { + "epoch": 0.5814377387700067, + "grad_norm": 0.09757670015096664, + "learning_rate": 1.2588300082605322e-05, + "loss": 8.7556, + "step": 116430 + }, + { + "epoch": 0.5814876775949462, + "grad_norm": 0.09444671124219894, + "learning_rate": 1.2586798167663772e-05, + "loss": 8.763, + "step": 116440 + }, + { + "epoch": 0.5815376164198857, + "grad_norm": 0.091017946600914, + "learning_rate": 1.2585296252722221e-05, + "loss": 8.7753, + "step": 116450 + }, + { + "epoch": 0.5815875552448251, + "grad_norm": 0.09647946804761887, + "learning_rate": 1.258379433778067e-05, + "loss": 8.7778, + "step": 116460 + }, + { + "epoch": 0.5816374940697645, + "grad_norm": 0.09835417568683624, + "learning_rate": 1.258229242283912e-05, + "loss": 8.7821, + "step": 116470 + }, + { + "epoch": 0.581687432894704, + "grad_norm": 0.09176066517829895, + "learning_rate": 1.258079050789757e-05, + "loss": 8.7742, + "step": 116480 + }, + { + "epoch": 0.5817373717196435, + "grad_norm": 0.09349832683801651, + "learning_rate": 1.257928859295602e-05, + "loss": 8.7629, + "step": 116490 + }, + { + "epoch": 0.5817873105445829, + "grad_norm": 0.0905727818608284, + "learning_rate": 1.257778667801447e-05, + "loss": 8.7742, + "step": 116500 + }, + { + "epoch": 0.5818372493695223, + "grad_norm": 0.09379737824201584, + "learning_rate": 1.2576284763072917e-05, + "loss": 8.7643, + "step": 116510 + }, + { + "epoch": 0.5818871881944618, + "grad_norm": 0.09791654348373413, + "learning_rate": 1.2574782848131367e-05, + "loss": 8.7811, + "step": 116520 + }, + { + "epoch": 0.5819371270194013, + "grad_norm": 0.09698036313056946, + "learning_rate": 1.2573280933189817e-05, + "loss": 8.7858, + "step": 116530 + }, + { + "epoch": 0.5819870658443407, + "grad_norm": 0.09158887714147568, + "learning_rate": 1.2571779018248267e-05, + "loss": 8.7815, + "step": 116540 + }, + { + "epoch": 0.5820370046692801, + "grad_norm": 0.08619420230388641, + "learning_rate": 1.2570277103306718e-05, + "loss": 8.7721, + "step": 116550 + }, + { + "epoch": 0.5820869434942196, + "grad_norm": 0.09012347459793091, + "learning_rate": 1.2568775188365164e-05, + "loss": 8.7651, + "step": 116560 + }, + { + "epoch": 0.582136882319159, + "grad_norm": 0.0954323559999466, + "learning_rate": 1.2567273273423615e-05, + "loss": 8.7664, + "step": 116570 + }, + { + "epoch": 0.5821868211440985, + "grad_norm": 0.09117588400840759, + "learning_rate": 1.2565771358482065e-05, + "loss": 8.7764, + "step": 116580 + }, + { + "epoch": 0.5822367599690379, + "grad_norm": 0.09088102728128433, + "learning_rate": 1.2564269443540515e-05, + "loss": 8.7799, + "step": 116590 + }, + { + "epoch": 0.5822866987939774, + "grad_norm": 0.09504615515470505, + "learning_rate": 1.2562767528598965e-05, + "loss": 8.7671, + "step": 116600 + }, + { + "epoch": 0.5823366376189169, + "grad_norm": 0.09126445651054382, + "learning_rate": 1.2561265613657414e-05, + "loss": 8.7732, + "step": 116610 + }, + { + "epoch": 0.5823865764438563, + "grad_norm": 0.0928245261311531, + "learning_rate": 1.2559763698715862e-05, + "loss": 8.7712, + "step": 116620 + }, + { + "epoch": 0.5824365152687957, + "grad_norm": 0.09340432286262512, + "learning_rate": 1.2558261783774312e-05, + "loss": 8.7847, + "step": 116630 + }, + { + "epoch": 0.5824864540937352, + "grad_norm": 0.09035646170377731, + "learning_rate": 1.2556759868832762e-05, + "loss": 8.7678, + "step": 116640 + }, + { + "epoch": 0.5825363929186747, + "grad_norm": 0.09360393136739731, + "learning_rate": 1.2555257953891213e-05, + "loss": 8.7652, + "step": 116650 + }, + { + "epoch": 0.5825863317436141, + "grad_norm": 0.09272781759500504, + "learning_rate": 1.2553756038949661e-05, + "loss": 8.7708, + "step": 116660 + }, + { + "epoch": 0.5826362705685535, + "grad_norm": 0.09432695060968399, + "learning_rate": 1.255225412400811e-05, + "loss": 8.7648, + "step": 116670 + }, + { + "epoch": 0.582686209393493, + "grad_norm": 0.10040241479873657, + "learning_rate": 1.255075220906656e-05, + "loss": 8.7673, + "step": 116680 + }, + { + "epoch": 0.5827361482184324, + "grad_norm": 0.09128203243017197, + "learning_rate": 1.254925029412501e-05, + "loss": 8.7639, + "step": 116690 + }, + { + "epoch": 0.5827860870433719, + "grad_norm": 0.08589877933263779, + "learning_rate": 1.254774837918346e-05, + "loss": 8.7859, + "step": 116700 + }, + { + "epoch": 0.5828360258683113, + "grad_norm": 0.09354274719953537, + "learning_rate": 1.2546246464241909e-05, + "loss": 8.7649, + "step": 116710 + }, + { + "epoch": 0.5828859646932507, + "grad_norm": 0.09442020207643509, + "learning_rate": 1.2544744549300357e-05, + "loss": 8.7609, + "step": 116720 + }, + { + "epoch": 0.5829359035181902, + "grad_norm": 0.0930701419711113, + "learning_rate": 1.2543242634358807e-05, + "loss": 8.7645, + "step": 116730 + }, + { + "epoch": 0.5829858423431297, + "grad_norm": 0.09396909922361374, + "learning_rate": 1.2541740719417257e-05, + "loss": 8.7725, + "step": 116740 + }, + { + "epoch": 0.5830357811680691, + "grad_norm": 0.09508174657821655, + "learning_rate": 1.2540238804475708e-05, + "loss": 8.7659, + "step": 116750 + }, + { + "epoch": 0.5830857199930085, + "grad_norm": 0.09655360132455826, + "learning_rate": 1.2538736889534158e-05, + "loss": 8.7918, + "step": 116760 + }, + { + "epoch": 0.583135658817948, + "grad_norm": 0.09458701312541962, + "learning_rate": 1.2537234974592605e-05, + "loss": 8.7626, + "step": 116770 + }, + { + "epoch": 0.5831855976428875, + "grad_norm": 0.09564093500375748, + "learning_rate": 1.2535733059651055e-05, + "loss": 8.77, + "step": 116780 + }, + { + "epoch": 0.5832355364678269, + "grad_norm": 0.09138777107000351, + "learning_rate": 1.2534231144709505e-05, + "loss": 8.7656, + "step": 116790 + }, + { + "epoch": 0.5832854752927663, + "grad_norm": 0.09551417827606201, + "learning_rate": 1.2532729229767955e-05, + "loss": 8.755, + "step": 116800 + }, + { + "epoch": 0.5833354141177058, + "grad_norm": 0.09259354323148727, + "learning_rate": 1.2531227314826405e-05, + "loss": 8.7714, + "step": 116810 + }, + { + "epoch": 0.5833853529426453, + "grad_norm": 0.09137726575136185, + "learning_rate": 1.2529725399884854e-05, + "loss": 8.7712, + "step": 116820 + }, + { + "epoch": 0.5834352917675847, + "grad_norm": 0.08896147459745407, + "learning_rate": 1.2528223484943302e-05, + "loss": 8.7618, + "step": 116830 + }, + { + "epoch": 0.5834852305925241, + "grad_norm": 0.09602434188127518, + "learning_rate": 1.2526721570001752e-05, + "loss": 8.7643, + "step": 116840 + }, + { + "epoch": 0.5835351694174636, + "grad_norm": 0.0915607139468193, + "learning_rate": 1.2525219655060203e-05, + "loss": 8.7753, + "step": 116850 + }, + { + "epoch": 0.5835851082424031, + "grad_norm": 0.09016492962837219, + "learning_rate": 1.2523717740118653e-05, + "loss": 8.7609, + "step": 116860 + }, + { + "epoch": 0.5836350470673425, + "grad_norm": 0.08857151120901108, + "learning_rate": 1.2522215825177101e-05, + "loss": 8.7689, + "step": 116870 + }, + { + "epoch": 0.5836849858922819, + "grad_norm": 0.09395987540483475, + "learning_rate": 1.252071391023555e-05, + "loss": 8.765, + "step": 116880 + }, + { + "epoch": 0.5837349247172214, + "grad_norm": 0.09694436937570572, + "learning_rate": 1.2519211995294e-05, + "loss": 8.7747, + "step": 116890 + }, + { + "epoch": 0.5837848635421609, + "grad_norm": 0.0934179425239563, + "learning_rate": 1.251771008035245e-05, + "loss": 8.7531, + "step": 116900 + }, + { + "epoch": 0.5838348023671003, + "grad_norm": 0.08705223351716995, + "learning_rate": 1.25162081654109e-05, + "loss": 8.7724, + "step": 116910 + }, + { + "epoch": 0.5838847411920397, + "grad_norm": 0.09162668138742447, + "learning_rate": 1.2514706250469349e-05, + "loss": 8.7788, + "step": 116920 + }, + { + "epoch": 0.5839346800169792, + "grad_norm": 0.08836338669061661, + "learning_rate": 1.2513204335527797e-05, + "loss": 8.7742, + "step": 116930 + }, + { + "epoch": 0.5839846188419187, + "grad_norm": 0.08714136481285095, + "learning_rate": 1.2511702420586247e-05, + "loss": 8.7651, + "step": 116940 + }, + { + "epoch": 0.5840345576668581, + "grad_norm": 0.09328541159629822, + "learning_rate": 1.2510200505644698e-05, + "loss": 8.7717, + "step": 116950 + }, + { + "epoch": 0.5840844964917975, + "grad_norm": 0.09145410358905792, + "learning_rate": 1.2508698590703148e-05, + "loss": 8.77, + "step": 116960 + }, + { + "epoch": 0.584134435316737, + "grad_norm": 0.08732394874095917, + "learning_rate": 1.2507196675761596e-05, + "loss": 8.7813, + "step": 116970 + }, + { + "epoch": 0.5841843741416765, + "grad_norm": 0.09267169237136841, + "learning_rate": 1.2505694760820046e-05, + "loss": 8.7717, + "step": 116980 + }, + { + "epoch": 0.5842343129666159, + "grad_norm": 0.08930670469999313, + "learning_rate": 1.2504192845878495e-05, + "loss": 8.7648, + "step": 116990 + }, + { + "epoch": 0.5842842517915553, + "grad_norm": 0.09315798431634903, + "learning_rate": 1.2502690930936945e-05, + "loss": 8.7702, + "step": 117000 + }, + { + "epoch": 0.5843341906164948, + "grad_norm": 0.0956578180193901, + "learning_rate": 1.2501189015995395e-05, + "loss": 8.7584, + "step": 117010 + }, + { + "epoch": 0.5843841294414343, + "grad_norm": 0.0876522958278656, + "learning_rate": 1.2499687101053844e-05, + "loss": 8.7719, + "step": 117020 + }, + { + "epoch": 0.5844340682663737, + "grad_norm": 0.09839426726102829, + "learning_rate": 1.2498185186112294e-05, + "loss": 8.7733, + "step": 117030 + }, + { + "epoch": 0.5844840070913131, + "grad_norm": 0.09518906474113464, + "learning_rate": 1.2496683271170742e-05, + "loss": 8.7664, + "step": 117040 + }, + { + "epoch": 0.5845339459162526, + "grad_norm": 0.09161265939474106, + "learning_rate": 1.2495181356229193e-05, + "loss": 8.7651, + "step": 117050 + }, + { + "epoch": 0.584583884741192, + "grad_norm": 0.08892537653446198, + "learning_rate": 1.2493679441287643e-05, + "loss": 8.7663, + "step": 117060 + }, + { + "epoch": 0.5846338235661315, + "grad_norm": 0.09384747594594955, + "learning_rate": 1.2492177526346091e-05, + "loss": 8.7786, + "step": 117070 + }, + { + "epoch": 0.5846837623910709, + "grad_norm": 0.09078042209148407, + "learning_rate": 1.2490675611404542e-05, + "loss": 8.7735, + "step": 117080 + }, + { + "epoch": 0.5847337012160104, + "grad_norm": 0.09043184667825699, + "learning_rate": 1.248917369646299e-05, + "loss": 8.7555, + "step": 117090 + }, + { + "epoch": 0.5847836400409498, + "grad_norm": 0.09139489382505417, + "learning_rate": 1.248767178152144e-05, + "loss": 8.7699, + "step": 117100 + }, + { + "epoch": 0.5848335788658893, + "grad_norm": 0.09362711757421494, + "learning_rate": 1.248616986657989e-05, + "loss": 8.7641, + "step": 117110 + }, + { + "epoch": 0.5848835176908287, + "grad_norm": 0.09047945588827133, + "learning_rate": 1.2484667951638339e-05, + "loss": 8.766, + "step": 117120 + }, + { + "epoch": 0.5849334565157682, + "grad_norm": 0.09149231016635895, + "learning_rate": 1.2483166036696789e-05, + "loss": 8.7654, + "step": 117130 + }, + { + "epoch": 0.5849833953407076, + "grad_norm": 0.09510745108127594, + "learning_rate": 1.2481664121755237e-05, + "loss": 8.7704, + "step": 117140 + }, + { + "epoch": 0.5850333341656471, + "grad_norm": 0.09530346840620041, + "learning_rate": 1.2480162206813688e-05, + "loss": 8.777, + "step": 117150 + }, + { + "epoch": 0.5850832729905865, + "grad_norm": 0.09704668074846268, + "learning_rate": 1.2478660291872138e-05, + "loss": 8.7655, + "step": 117160 + }, + { + "epoch": 0.585133211815526, + "grad_norm": 0.095854252576828, + "learning_rate": 1.2477158376930586e-05, + "loss": 8.7692, + "step": 117170 + }, + { + "epoch": 0.5851831506404654, + "grad_norm": 0.09275175631046295, + "learning_rate": 1.2475656461989037e-05, + "loss": 8.7775, + "step": 117180 + }, + { + "epoch": 0.5852330894654049, + "grad_norm": 0.09659481048583984, + "learning_rate": 1.2474154547047487e-05, + "loss": 8.7822, + "step": 117190 + }, + { + "epoch": 0.5852830282903443, + "grad_norm": 0.08653581887483597, + "learning_rate": 1.2472652632105935e-05, + "loss": 8.774, + "step": 117200 + }, + { + "epoch": 0.5853329671152838, + "grad_norm": 0.0911843553185463, + "learning_rate": 1.2471150717164385e-05, + "loss": 8.7828, + "step": 117210 + }, + { + "epoch": 0.5853829059402232, + "grad_norm": 0.09394507855176926, + "learning_rate": 1.2469648802222834e-05, + "loss": 8.7798, + "step": 117220 + }, + { + "epoch": 0.5854328447651627, + "grad_norm": 0.0936194658279419, + "learning_rate": 1.2468146887281284e-05, + "loss": 8.7758, + "step": 117230 + }, + { + "epoch": 0.5854827835901021, + "grad_norm": 0.09510088711977005, + "learning_rate": 1.2466644972339734e-05, + "loss": 8.7771, + "step": 117240 + }, + { + "epoch": 0.5855327224150416, + "grad_norm": 0.09112706780433655, + "learning_rate": 1.2465143057398183e-05, + "loss": 8.7511, + "step": 117250 + }, + { + "epoch": 0.585582661239981, + "grad_norm": 0.08992750197649002, + "learning_rate": 1.2463641142456633e-05, + "loss": 8.766, + "step": 117260 + }, + { + "epoch": 0.5856326000649205, + "grad_norm": 0.09287506341934204, + "learning_rate": 1.2462139227515081e-05, + "loss": 8.7652, + "step": 117270 + }, + { + "epoch": 0.5856825388898599, + "grad_norm": 0.09044741094112396, + "learning_rate": 1.2460637312573532e-05, + "loss": 8.7573, + "step": 117280 + }, + { + "epoch": 0.5857324777147994, + "grad_norm": 0.09210161119699478, + "learning_rate": 1.2459135397631982e-05, + "loss": 8.7738, + "step": 117290 + }, + { + "epoch": 0.5857824165397388, + "grad_norm": 0.08944103866815567, + "learning_rate": 1.245763348269043e-05, + "loss": 8.766, + "step": 117300 + }, + { + "epoch": 0.5858323553646783, + "grad_norm": 0.09100234508514404, + "learning_rate": 1.245613156774888e-05, + "loss": 8.78, + "step": 117310 + }, + { + "epoch": 0.5858822941896177, + "grad_norm": 0.09310677647590637, + "learning_rate": 1.2454629652807329e-05, + "loss": 8.7623, + "step": 117320 + }, + { + "epoch": 0.5859322330145572, + "grad_norm": 0.09582335501909256, + "learning_rate": 1.2453127737865779e-05, + "loss": 8.7645, + "step": 117330 + }, + { + "epoch": 0.5859821718394966, + "grad_norm": 0.09254664927721024, + "learning_rate": 1.245162582292423e-05, + "loss": 8.7731, + "step": 117340 + }, + { + "epoch": 0.5860321106644361, + "grad_norm": 0.09486523270606995, + "learning_rate": 1.245012390798268e-05, + "loss": 8.7579, + "step": 117350 + }, + { + "epoch": 0.5860820494893755, + "grad_norm": 0.101737841963768, + "learning_rate": 1.2448621993041128e-05, + "loss": 8.7666, + "step": 117360 + }, + { + "epoch": 0.586131988314315, + "grad_norm": 0.08502329140901566, + "learning_rate": 1.2447120078099576e-05, + "loss": 8.7802, + "step": 117370 + }, + { + "epoch": 0.5861819271392544, + "grad_norm": 0.09242632240056992, + "learning_rate": 1.2445618163158027e-05, + "loss": 8.7565, + "step": 117380 + }, + { + "epoch": 0.5862318659641939, + "grad_norm": 0.08945038169622421, + "learning_rate": 1.2444116248216477e-05, + "loss": 8.7817, + "step": 117390 + }, + { + "epoch": 0.5862818047891333, + "grad_norm": 0.09282662719488144, + "learning_rate": 1.2442614333274927e-05, + "loss": 8.7616, + "step": 117400 + }, + { + "epoch": 0.5863317436140728, + "grad_norm": 0.08877874165773392, + "learning_rate": 1.2441112418333375e-05, + "loss": 8.7676, + "step": 117410 + }, + { + "epoch": 0.5863816824390122, + "grad_norm": 0.09669318050146103, + "learning_rate": 1.2439610503391824e-05, + "loss": 8.7787, + "step": 117420 + }, + { + "epoch": 0.5864316212639517, + "grad_norm": 0.08970874547958374, + "learning_rate": 1.2438108588450274e-05, + "loss": 8.7688, + "step": 117430 + }, + { + "epoch": 0.5864815600888911, + "grad_norm": 0.09568735212087631, + "learning_rate": 1.2436606673508724e-05, + "loss": 8.7597, + "step": 117440 + }, + { + "epoch": 0.5865314989138306, + "grad_norm": 0.09088582545518875, + "learning_rate": 1.2435104758567174e-05, + "loss": 8.7621, + "step": 117450 + }, + { + "epoch": 0.58658143773877, + "grad_norm": 0.08822001516819, + "learning_rate": 1.2433602843625623e-05, + "loss": 8.7695, + "step": 117460 + }, + { + "epoch": 0.5866313765637095, + "grad_norm": 0.09296117722988129, + "learning_rate": 1.2432100928684071e-05, + "loss": 8.7744, + "step": 117470 + }, + { + "epoch": 0.5866813153886489, + "grad_norm": 0.09976507723331451, + "learning_rate": 1.2430599013742522e-05, + "loss": 8.7743, + "step": 117480 + }, + { + "epoch": 0.5867312542135884, + "grad_norm": 0.09362848848104477, + "learning_rate": 1.2429097098800972e-05, + "loss": 8.7735, + "step": 117490 + }, + { + "epoch": 0.5867811930385278, + "grad_norm": 0.08755030483007431, + "learning_rate": 1.2427595183859422e-05, + "loss": 8.772, + "step": 117500 + }, + { + "epoch": 0.5868311318634672, + "grad_norm": 0.09280279278755188, + "learning_rate": 1.2426093268917872e-05, + "loss": 8.7612, + "step": 117510 + }, + { + "epoch": 0.5868810706884067, + "grad_norm": 0.08822941035032272, + "learning_rate": 1.2424591353976319e-05, + "loss": 8.7682, + "step": 117520 + }, + { + "epoch": 0.5869310095133462, + "grad_norm": 0.09698329120874405, + "learning_rate": 1.2423089439034769e-05, + "loss": 8.753, + "step": 117530 + }, + { + "epoch": 0.5869809483382856, + "grad_norm": 0.09743697941303253, + "learning_rate": 1.242158752409322e-05, + "loss": 8.7608, + "step": 117540 + }, + { + "epoch": 0.587030887163225, + "grad_norm": 0.09233416616916656, + "learning_rate": 1.242008560915167e-05, + "loss": 8.7681, + "step": 117550 + }, + { + "epoch": 0.5870808259881645, + "grad_norm": 0.09132979065179825, + "learning_rate": 1.241858369421012e-05, + "loss": 8.7779, + "step": 117560 + }, + { + "epoch": 0.587130764813104, + "grad_norm": 0.08776623755693436, + "learning_rate": 1.2417081779268566e-05, + "loss": 8.7762, + "step": 117570 + }, + { + "epoch": 0.5871807036380434, + "grad_norm": 0.09042856097221375, + "learning_rate": 1.2415579864327017e-05, + "loss": 8.7604, + "step": 117580 + }, + { + "epoch": 0.5872306424629828, + "grad_norm": 0.09197208285331726, + "learning_rate": 1.2414077949385467e-05, + "loss": 8.7629, + "step": 117590 + }, + { + "epoch": 0.5872805812879223, + "grad_norm": 0.09105636179447174, + "learning_rate": 1.2412576034443917e-05, + "loss": 8.7535, + "step": 117600 + }, + { + "epoch": 0.5873305201128618, + "grad_norm": 0.0907004103064537, + "learning_rate": 1.2411074119502367e-05, + "loss": 8.7604, + "step": 117610 + }, + { + "epoch": 0.5873804589378012, + "grad_norm": 0.09452099353075027, + "learning_rate": 1.2409572204560814e-05, + "loss": 8.757, + "step": 117620 + }, + { + "epoch": 0.5874303977627406, + "grad_norm": 0.09557855874300003, + "learning_rate": 1.2408070289619264e-05, + "loss": 8.7649, + "step": 117630 + }, + { + "epoch": 0.5874803365876801, + "grad_norm": 0.09412212669849396, + "learning_rate": 1.2406568374677714e-05, + "loss": 8.7602, + "step": 117640 + }, + { + "epoch": 0.5875302754126196, + "grad_norm": 0.09182430058717728, + "learning_rate": 1.2405066459736164e-05, + "loss": 8.7597, + "step": 117650 + }, + { + "epoch": 0.587580214237559, + "grad_norm": 0.08901126682758331, + "learning_rate": 1.2403564544794615e-05, + "loss": 8.7648, + "step": 117660 + }, + { + "epoch": 0.5876301530624984, + "grad_norm": 0.09120889008045197, + "learning_rate": 1.2402062629853063e-05, + "loss": 8.752, + "step": 117670 + }, + { + "epoch": 0.5876800918874379, + "grad_norm": 0.09408021718263626, + "learning_rate": 1.2400560714911512e-05, + "loss": 8.7826, + "step": 117680 + }, + { + "epoch": 0.5877300307123773, + "grad_norm": 0.0881025642156601, + "learning_rate": 1.2399058799969962e-05, + "loss": 8.7617, + "step": 117690 + }, + { + "epoch": 0.5877799695373168, + "grad_norm": 0.09191694110631943, + "learning_rate": 1.2397556885028412e-05, + "loss": 8.7643, + "step": 117700 + }, + { + "epoch": 0.5878299083622562, + "grad_norm": 0.08953646570444107, + "learning_rate": 1.2396054970086862e-05, + "loss": 8.7694, + "step": 117710 + }, + { + "epoch": 0.5878798471871957, + "grad_norm": 0.08859028667211533, + "learning_rate": 1.239455305514531e-05, + "loss": 8.773, + "step": 117720 + }, + { + "epoch": 0.5879297860121351, + "grad_norm": 0.09517339617013931, + "learning_rate": 1.2393051140203759e-05, + "loss": 8.7629, + "step": 117730 + }, + { + "epoch": 0.5879797248370746, + "grad_norm": 0.09675818681716919, + "learning_rate": 1.239154922526221e-05, + "loss": 8.7473, + "step": 117740 + }, + { + "epoch": 0.588029663662014, + "grad_norm": 0.09151841700077057, + "learning_rate": 1.239004731032066e-05, + "loss": 8.7576, + "step": 117750 + }, + { + "epoch": 0.5880796024869535, + "grad_norm": 0.09673187136650085, + "learning_rate": 1.238854539537911e-05, + "loss": 8.7646, + "step": 117760 + }, + { + "epoch": 0.5881295413118929, + "grad_norm": 0.0907597616314888, + "learning_rate": 1.2387043480437558e-05, + "loss": 8.7586, + "step": 117770 + }, + { + "epoch": 0.5881794801368324, + "grad_norm": 0.09877236932516098, + "learning_rate": 1.2385541565496007e-05, + "loss": 8.759, + "step": 117780 + }, + { + "epoch": 0.5882294189617718, + "grad_norm": 0.09087306261062622, + "learning_rate": 1.2384039650554457e-05, + "loss": 8.7683, + "step": 117790 + }, + { + "epoch": 0.5882793577867113, + "grad_norm": 0.09113863110542297, + "learning_rate": 1.2382537735612907e-05, + "loss": 8.7498, + "step": 117800 + }, + { + "epoch": 0.5883292966116507, + "grad_norm": 0.09708728641271591, + "learning_rate": 1.2381035820671357e-05, + "loss": 8.7612, + "step": 117810 + }, + { + "epoch": 0.5883792354365902, + "grad_norm": 0.09207798540592194, + "learning_rate": 1.2379533905729806e-05, + "loss": 8.771, + "step": 117820 + }, + { + "epoch": 0.5884291742615296, + "grad_norm": 0.09187018126249313, + "learning_rate": 1.2378031990788256e-05, + "loss": 8.7732, + "step": 117830 + }, + { + "epoch": 0.588479113086469, + "grad_norm": 0.09126574546098709, + "learning_rate": 1.2376530075846704e-05, + "loss": 8.7423, + "step": 117840 + }, + { + "epoch": 0.5885290519114085, + "grad_norm": 0.09289278835058212, + "learning_rate": 1.2375028160905154e-05, + "loss": 8.7448, + "step": 117850 + }, + { + "epoch": 0.588578990736348, + "grad_norm": 0.09014803171157837, + "learning_rate": 1.2373526245963605e-05, + "loss": 8.7721, + "step": 117860 + }, + { + "epoch": 0.5886289295612874, + "grad_norm": 0.09293773025274277, + "learning_rate": 1.2372024331022053e-05, + "loss": 8.7588, + "step": 117870 + }, + { + "epoch": 0.5886788683862269, + "grad_norm": 0.08857384324073792, + "learning_rate": 1.2370522416080503e-05, + "loss": 8.7656, + "step": 117880 + }, + { + "epoch": 0.5887288072111663, + "grad_norm": 0.09308899194002151, + "learning_rate": 1.2369020501138952e-05, + "loss": 8.7587, + "step": 117890 + }, + { + "epoch": 0.5887787460361058, + "grad_norm": 0.09509921073913574, + "learning_rate": 1.2367518586197402e-05, + "loss": 8.7589, + "step": 117900 + }, + { + "epoch": 0.5888286848610452, + "grad_norm": 0.090652696788311, + "learning_rate": 1.2366016671255852e-05, + "loss": 8.7635, + "step": 117910 + }, + { + "epoch": 0.5888786236859846, + "grad_norm": 0.08929238468408585, + "learning_rate": 1.23645147563143e-05, + "loss": 8.7458, + "step": 117920 + }, + { + "epoch": 0.5889285625109241, + "grad_norm": 0.09252835810184479, + "learning_rate": 1.236301284137275e-05, + "loss": 8.7504, + "step": 117930 + }, + { + "epoch": 0.5889785013358636, + "grad_norm": 0.09552726149559021, + "learning_rate": 1.23615109264312e-05, + "loss": 8.7493, + "step": 117940 + }, + { + "epoch": 0.589028440160803, + "grad_norm": 0.08871292322874069, + "learning_rate": 1.236000901148965e-05, + "loss": 8.7727, + "step": 117950 + }, + { + "epoch": 0.5890783789857424, + "grad_norm": 0.09849249571561813, + "learning_rate": 1.23585070965481e-05, + "loss": 8.7701, + "step": 117960 + }, + { + "epoch": 0.5891283178106819, + "grad_norm": 0.09064419567584991, + "learning_rate": 1.2357005181606548e-05, + "loss": 8.7677, + "step": 117970 + }, + { + "epoch": 0.5891782566356214, + "grad_norm": 0.096795454621315, + "learning_rate": 1.2355503266664998e-05, + "loss": 8.7601, + "step": 117980 + }, + { + "epoch": 0.5892281954605608, + "grad_norm": 0.09287415444850922, + "learning_rate": 1.2354001351723448e-05, + "loss": 8.7754, + "step": 117990 + }, + { + "epoch": 0.5892781342855002, + "grad_norm": 0.09310322254896164, + "learning_rate": 1.2352499436781897e-05, + "loss": 8.7775, + "step": 118000 + }, + { + "epoch": 0.5893280731104397, + "grad_norm": 0.09387247264385223, + "learning_rate": 1.2350997521840347e-05, + "loss": 8.7668, + "step": 118010 + }, + { + "epoch": 0.5893780119353792, + "grad_norm": 0.09476472437381744, + "learning_rate": 1.2349495606898796e-05, + "loss": 8.7517, + "step": 118020 + }, + { + "epoch": 0.5894279507603186, + "grad_norm": 0.09105034172534943, + "learning_rate": 1.2347993691957246e-05, + "loss": 8.7663, + "step": 118030 + }, + { + "epoch": 0.589477889585258, + "grad_norm": 0.09070300310850143, + "learning_rate": 1.2346491777015696e-05, + "loss": 8.7565, + "step": 118040 + }, + { + "epoch": 0.5895278284101975, + "grad_norm": 0.09422162175178528, + "learning_rate": 1.2344989862074144e-05, + "loss": 8.7575, + "step": 118050 + }, + { + "epoch": 0.589577767235137, + "grad_norm": 0.08815252780914307, + "learning_rate": 1.2343487947132595e-05, + "loss": 8.7475, + "step": 118060 + }, + { + "epoch": 0.5896277060600764, + "grad_norm": 0.08904334157705307, + "learning_rate": 1.2341986032191043e-05, + "loss": 8.7518, + "step": 118070 + }, + { + "epoch": 0.5896776448850158, + "grad_norm": 0.09929101914167404, + "learning_rate": 1.2340484117249493e-05, + "loss": 8.7426, + "step": 118080 + }, + { + "epoch": 0.5897275837099553, + "grad_norm": 0.09539781510829926, + "learning_rate": 1.2338982202307943e-05, + "loss": 8.7484, + "step": 118090 + }, + { + "epoch": 0.5897775225348948, + "grad_norm": 0.09773596376180649, + "learning_rate": 1.2337480287366392e-05, + "loss": 8.7371, + "step": 118100 + }, + { + "epoch": 0.5898274613598342, + "grad_norm": 0.09675204008817673, + "learning_rate": 1.2335978372424842e-05, + "loss": 8.7507, + "step": 118110 + }, + { + "epoch": 0.5898774001847736, + "grad_norm": 0.09443136304616928, + "learning_rate": 1.233447645748329e-05, + "loss": 8.7688, + "step": 118120 + }, + { + "epoch": 0.5899273390097131, + "grad_norm": 0.09123744815587997, + "learning_rate": 1.233297454254174e-05, + "loss": 8.7677, + "step": 118130 + }, + { + "epoch": 0.5899772778346526, + "grad_norm": 0.09106560051441193, + "learning_rate": 1.2331472627600191e-05, + "loss": 8.7569, + "step": 118140 + }, + { + "epoch": 0.590027216659592, + "grad_norm": 0.08924289792776108, + "learning_rate": 1.2329970712658641e-05, + "loss": 8.7577, + "step": 118150 + }, + { + "epoch": 0.5900771554845314, + "grad_norm": 0.08827322721481323, + "learning_rate": 1.232846879771709e-05, + "loss": 8.7717, + "step": 118160 + }, + { + "epoch": 0.5901270943094709, + "grad_norm": 0.08779691904783249, + "learning_rate": 1.2326966882775538e-05, + "loss": 8.7642, + "step": 118170 + }, + { + "epoch": 0.5901770331344104, + "grad_norm": 0.09139509499073029, + "learning_rate": 1.2325464967833988e-05, + "loss": 8.777, + "step": 118180 + }, + { + "epoch": 0.5902269719593498, + "grad_norm": 0.0951077789068222, + "learning_rate": 1.2323963052892438e-05, + "loss": 8.7487, + "step": 118190 + }, + { + "epoch": 0.5902769107842892, + "grad_norm": 0.09444812685251236, + "learning_rate": 1.2322461137950889e-05, + "loss": 8.752, + "step": 118200 + }, + { + "epoch": 0.5903268496092287, + "grad_norm": 0.0971173644065857, + "learning_rate": 1.2320959223009337e-05, + "loss": 8.7693, + "step": 118210 + }, + { + "epoch": 0.5903767884341682, + "grad_norm": 0.08963154256343842, + "learning_rate": 1.2319457308067786e-05, + "loss": 8.7772, + "step": 118220 + }, + { + "epoch": 0.5904267272591076, + "grad_norm": 0.09564877301454544, + "learning_rate": 1.2317955393126236e-05, + "loss": 8.771, + "step": 118230 + }, + { + "epoch": 0.590476666084047, + "grad_norm": 0.09680573642253876, + "learning_rate": 1.2316453478184686e-05, + "loss": 8.7641, + "step": 118240 + }, + { + "epoch": 0.5905266049089865, + "grad_norm": 0.09979761391878128, + "learning_rate": 1.2314951563243136e-05, + "loss": 8.7556, + "step": 118250 + }, + { + "epoch": 0.590576543733926, + "grad_norm": 0.0942818745970726, + "learning_rate": 1.2313449648301585e-05, + "loss": 8.7516, + "step": 118260 + }, + { + "epoch": 0.5906264825588654, + "grad_norm": 0.0858750268816948, + "learning_rate": 1.2311947733360033e-05, + "loss": 8.7616, + "step": 118270 + }, + { + "epoch": 0.5906764213838048, + "grad_norm": 0.09308760613203049, + "learning_rate": 1.2310445818418483e-05, + "loss": 8.7686, + "step": 118280 + }, + { + "epoch": 0.5907263602087443, + "grad_norm": 0.09751647710800171, + "learning_rate": 1.2308943903476933e-05, + "loss": 8.7575, + "step": 118290 + }, + { + "epoch": 0.5907762990336838, + "grad_norm": 0.09501097351312637, + "learning_rate": 1.2307441988535384e-05, + "loss": 8.7436, + "step": 118300 + }, + { + "epoch": 0.5908262378586232, + "grad_norm": 0.09237045794725418, + "learning_rate": 1.2305940073593834e-05, + "loss": 8.7537, + "step": 118310 + }, + { + "epoch": 0.5908761766835626, + "grad_norm": 0.08817274123430252, + "learning_rate": 1.230443815865228e-05, + "loss": 8.7618, + "step": 118320 + }, + { + "epoch": 0.590926115508502, + "grad_norm": 0.08590534329414368, + "learning_rate": 1.230293624371073e-05, + "loss": 8.7611, + "step": 118330 + }, + { + "epoch": 0.5909760543334416, + "grad_norm": 0.09068264812231064, + "learning_rate": 1.2301434328769181e-05, + "loss": 8.7521, + "step": 118340 + }, + { + "epoch": 0.591025993158381, + "grad_norm": 0.08929526805877686, + "learning_rate": 1.2299932413827631e-05, + "loss": 8.7686, + "step": 118350 + }, + { + "epoch": 0.5910759319833204, + "grad_norm": 0.09027723968029022, + "learning_rate": 1.2298430498886081e-05, + "loss": 8.7556, + "step": 118360 + }, + { + "epoch": 0.5911258708082598, + "grad_norm": 0.08829135447740555, + "learning_rate": 1.2296928583944528e-05, + "loss": 8.7631, + "step": 118370 + }, + { + "epoch": 0.5911758096331994, + "grad_norm": 0.08966977894306183, + "learning_rate": 1.2295426669002978e-05, + "loss": 8.7739, + "step": 118380 + }, + { + "epoch": 0.5912257484581388, + "grad_norm": 0.09342916309833527, + "learning_rate": 1.2293924754061428e-05, + "loss": 8.76, + "step": 118390 + }, + { + "epoch": 0.5912756872830782, + "grad_norm": 0.09105119109153748, + "learning_rate": 1.2292422839119879e-05, + "loss": 8.7592, + "step": 118400 + }, + { + "epoch": 0.5913256261080176, + "grad_norm": 0.09159651398658752, + "learning_rate": 1.2290920924178329e-05, + "loss": 8.7641, + "step": 118410 + }, + { + "epoch": 0.5913755649329572, + "grad_norm": 0.08399590849876404, + "learning_rate": 1.2289419009236776e-05, + "loss": 8.7847, + "step": 118420 + }, + { + "epoch": 0.5914255037578966, + "grad_norm": 0.08947955816984177, + "learning_rate": 1.2287917094295226e-05, + "loss": 8.7599, + "step": 118430 + }, + { + "epoch": 0.591475442582836, + "grad_norm": 0.08784180134534836, + "learning_rate": 1.2286415179353676e-05, + "loss": 8.7619, + "step": 118440 + }, + { + "epoch": 0.5915253814077754, + "grad_norm": 0.09851372241973877, + "learning_rate": 1.2284913264412126e-05, + "loss": 8.7478, + "step": 118450 + }, + { + "epoch": 0.591575320232715, + "grad_norm": 0.09141961485147476, + "learning_rate": 1.2283411349470576e-05, + "loss": 8.7752, + "step": 118460 + }, + { + "epoch": 0.5916252590576544, + "grad_norm": 0.09081381559371948, + "learning_rate": 1.2281909434529025e-05, + "loss": 8.7681, + "step": 118470 + }, + { + "epoch": 0.5916751978825938, + "grad_norm": 0.0934201031923294, + "learning_rate": 1.2280407519587473e-05, + "loss": 8.7523, + "step": 118480 + }, + { + "epoch": 0.5917251367075332, + "grad_norm": 0.09097079187631607, + "learning_rate": 1.2278905604645923e-05, + "loss": 8.7642, + "step": 118490 + }, + { + "epoch": 0.5917750755324728, + "grad_norm": 0.09448742121458054, + "learning_rate": 1.2277403689704374e-05, + "loss": 8.7639, + "step": 118500 + }, + { + "epoch": 0.5918250143574122, + "grad_norm": 0.09550192207098007, + "learning_rate": 1.2275901774762824e-05, + "loss": 8.7621, + "step": 118510 + }, + { + "epoch": 0.5918749531823516, + "grad_norm": 0.0902007445693016, + "learning_rate": 1.2274399859821272e-05, + "loss": 8.7534, + "step": 118520 + }, + { + "epoch": 0.591924892007291, + "grad_norm": 0.09537496417760849, + "learning_rate": 1.227289794487972e-05, + "loss": 8.7706, + "step": 118530 + }, + { + "epoch": 0.5919748308322306, + "grad_norm": 0.09085094183683395, + "learning_rate": 1.2271396029938171e-05, + "loss": 8.755, + "step": 118540 + }, + { + "epoch": 0.59202476965717, + "grad_norm": 0.09145759046077728, + "learning_rate": 1.2269894114996621e-05, + "loss": 8.7584, + "step": 118550 + }, + { + "epoch": 0.5920747084821094, + "grad_norm": 0.08843749761581421, + "learning_rate": 1.2268392200055071e-05, + "loss": 8.7606, + "step": 118560 + }, + { + "epoch": 0.5921246473070488, + "grad_norm": 0.08855432271957397, + "learning_rate": 1.226689028511352e-05, + "loss": 8.7522, + "step": 118570 + }, + { + "epoch": 0.5921745861319884, + "grad_norm": 0.09247451275587082, + "learning_rate": 1.2265388370171968e-05, + "loss": 8.756, + "step": 118580 + }, + { + "epoch": 0.5922245249569278, + "grad_norm": 0.09341256320476532, + "learning_rate": 1.2263886455230418e-05, + "loss": 8.7578, + "step": 118590 + }, + { + "epoch": 0.5922744637818672, + "grad_norm": 0.08816874027252197, + "learning_rate": 1.2262384540288869e-05, + "loss": 8.7486, + "step": 118600 + }, + { + "epoch": 0.5923244026068066, + "grad_norm": 0.08888726681470871, + "learning_rate": 1.2260882625347319e-05, + "loss": 8.7589, + "step": 118610 + }, + { + "epoch": 0.5923743414317462, + "grad_norm": 0.08801273256540298, + "learning_rate": 1.2259380710405767e-05, + "loss": 8.7634, + "step": 118620 + }, + { + "epoch": 0.5924242802566856, + "grad_norm": 0.09357626736164093, + "learning_rate": 1.2257878795464218e-05, + "loss": 8.7542, + "step": 118630 + }, + { + "epoch": 0.592474219081625, + "grad_norm": 0.09778086096048355, + "learning_rate": 1.2256376880522666e-05, + "loss": 8.7537, + "step": 118640 + }, + { + "epoch": 0.5925241579065644, + "grad_norm": 0.09161866456270218, + "learning_rate": 1.2254874965581116e-05, + "loss": 8.7466, + "step": 118650 + }, + { + "epoch": 0.592574096731504, + "grad_norm": 0.09025179594755173, + "learning_rate": 1.2253373050639566e-05, + "loss": 8.7628, + "step": 118660 + }, + { + "epoch": 0.5926240355564434, + "grad_norm": 0.09360655397176743, + "learning_rate": 1.2251871135698015e-05, + "loss": 8.7526, + "step": 118670 + }, + { + "epoch": 0.5926739743813828, + "grad_norm": 0.09320328384637833, + "learning_rate": 1.2250369220756465e-05, + "loss": 8.7654, + "step": 118680 + }, + { + "epoch": 0.5927239132063222, + "grad_norm": 0.08667218685150146, + "learning_rate": 1.2248867305814913e-05, + "loss": 8.763, + "step": 118690 + }, + { + "epoch": 0.5927738520312616, + "grad_norm": 0.08786769956350327, + "learning_rate": 1.2247365390873364e-05, + "loss": 8.7849, + "step": 118700 + }, + { + "epoch": 0.5928237908562012, + "grad_norm": 0.09707433730363846, + "learning_rate": 1.2245863475931814e-05, + "loss": 8.75, + "step": 118710 + }, + { + "epoch": 0.5928737296811406, + "grad_norm": 0.09028863161802292, + "learning_rate": 1.2244361560990262e-05, + "loss": 8.7534, + "step": 118720 + }, + { + "epoch": 0.59292366850608, + "grad_norm": 0.08839409798383713, + "learning_rate": 1.2242859646048713e-05, + "loss": 8.7801, + "step": 118730 + }, + { + "epoch": 0.5929736073310194, + "grad_norm": 0.09288914501667023, + "learning_rate": 1.2241357731107161e-05, + "loss": 8.7674, + "step": 118740 + }, + { + "epoch": 0.593023546155959, + "grad_norm": 0.08941594511270523, + "learning_rate": 1.2239855816165611e-05, + "loss": 8.7506, + "step": 118750 + }, + { + "epoch": 0.5930734849808984, + "grad_norm": 0.08502016216516495, + "learning_rate": 1.2238353901224061e-05, + "loss": 8.7585, + "step": 118760 + }, + { + "epoch": 0.5931234238058378, + "grad_norm": 0.09299997240304947, + "learning_rate": 1.223685198628251e-05, + "loss": 8.7511, + "step": 118770 + }, + { + "epoch": 0.5931733626307772, + "grad_norm": 0.09595504403114319, + "learning_rate": 1.223535007134096e-05, + "loss": 8.7624, + "step": 118780 + }, + { + "epoch": 0.5932233014557168, + "grad_norm": 0.09487087279558182, + "learning_rate": 1.223384815639941e-05, + "loss": 8.7711, + "step": 118790 + }, + { + "epoch": 0.5932732402806562, + "grad_norm": 0.09738994389772415, + "learning_rate": 1.2232346241457859e-05, + "loss": 8.7638, + "step": 118800 + }, + { + "epoch": 0.5933231791055956, + "grad_norm": 0.09411431103944778, + "learning_rate": 1.2230844326516309e-05, + "loss": 8.7554, + "step": 118810 + }, + { + "epoch": 0.593373117930535, + "grad_norm": 0.09381046146154404, + "learning_rate": 1.2229342411574757e-05, + "loss": 8.7498, + "step": 118820 + }, + { + "epoch": 0.5934230567554746, + "grad_norm": 0.08876951038837433, + "learning_rate": 1.2227840496633208e-05, + "loss": 8.7571, + "step": 118830 + }, + { + "epoch": 0.593472995580414, + "grad_norm": 0.08969464153051376, + "learning_rate": 1.2226338581691658e-05, + "loss": 8.7615, + "step": 118840 + }, + { + "epoch": 0.5935229344053534, + "grad_norm": 0.09235970675945282, + "learning_rate": 1.2224836666750106e-05, + "loss": 8.7604, + "step": 118850 + }, + { + "epoch": 0.5935728732302928, + "grad_norm": 0.09277419745922089, + "learning_rate": 1.2223334751808556e-05, + "loss": 8.7511, + "step": 118860 + }, + { + "epoch": 0.5936228120552324, + "grad_norm": 0.08957847207784653, + "learning_rate": 1.2221832836867007e-05, + "loss": 8.7614, + "step": 118870 + }, + { + "epoch": 0.5936727508801718, + "grad_norm": 0.09086732566356659, + "learning_rate": 1.2220330921925455e-05, + "loss": 8.7601, + "step": 118880 + }, + { + "epoch": 0.5937226897051112, + "grad_norm": 0.09175397455692291, + "learning_rate": 1.2218829006983905e-05, + "loss": 8.7584, + "step": 118890 + }, + { + "epoch": 0.5937726285300506, + "grad_norm": 0.09422596544027328, + "learning_rate": 1.2217327092042354e-05, + "loss": 8.7508, + "step": 118900 + }, + { + "epoch": 0.5938225673549902, + "grad_norm": 0.09395137429237366, + "learning_rate": 1.2215825177100804e-05, + "loss": 8.753, + "step": 118910 + }, + { + "epoch": 0.5938725061799296, + "grad_norm": 0.08856262266635895, + "learning_rate": 1.2214323262159254e-05, + "loss": 8.7564, + "step": 118920 + }, + { + "epoch": 0.593922445004869, + "grad_norm": 0.08992553502321243, + "learning_rate": 1.2212821347217703e-05, + "loss": 8.7466, + "step": 118930 + }, + { + "epoch": 0.5939723838298084, + "grad_norm": 0.094720758497715, + "learning_rate": 1.2211319432276153e-05, + "loss": 8.7473, + "step": 118940 + }, + { + "epoch": 0.594022322654748, + "grad_norm": 0.09277505427598953, + "learning_rate": 1.2209817517334603e-05, + "loss": 8.7496, + "step": 118950 + }, + { + "epoch": 0.5940722614796874, + "grad_norm": 0.09039611369371414, + "learning_rate": 1.2208315602393051e-05, + "loss": 8.746, + "step": 118960 + }, + { + "epoch": 0.5941222003046268, + "grad_norm": 0.09134664386510849, + "learning_rate": 1.2206813687451502e-05, + "loss": 8.7584, + "step": 118970 + }, + { + "epoch": 0.5941721391295662, + "grad_norm": 0.09162914752960205, + "learning_rate": 1.220531177250995e-05, + "loss": 8.754, + "step": 118980 + }, + { + "epoch": 0.5942220779545058, + "grad_norm": 0.0885622650384903, + "learning_rate": 1.22038098575684e-05, + "loss": 8.7476, + "step": 118990 + }, + { + "epoch": 0.5942720167794452, + "grad_norm": 0.09717550873756409, + "learning_rate": 1.220230794262685e-05, + "loss": 8.759, + "step": 119000 + }, + { + "epoch": 0.5943219556043846, + "grad_norm": 0.0922437235713005, + "learning_rate": 1.2200806027685299e-05, + "loss": 8.7528, + "step": 119010 + }, + { + "epoch": 0.594371894429324, + "grad_norm": 0.08450714498758316, + "learning_rate": 1.2199304112743749e-05, + "loss": 8.7721, + "step": 119020 + }, + { + "epoch": 0.5944218332542636, + "grad_norm": 0.09239663183689117, + "learning_rate": 1.2197802197802198e-05, + "loss": 8.7442, + "step": 119030 + }, + { + "epoch": 0.594471772079203, + "grad_norm": 0.09686051309108734, + "learning_rate": 1.2196300282860648e-05, + "loss": 8.7684, + "step": 119040 + }, + { + "epoch": 0.5945217109041424, + "grad_norm": 0.0940653383731842, + "learning_rate": 1.2194798367919098e-05, + "loss": 8.7579, + "step": 119050 + }, + { + "epoch": 0.5945716497290818, + "grad_norm": 0.08960103243589401, + "learning_rate": 1.2193296452977546e-05, + "loss": 8.7545, + "step": 119060 + }, + { + "epoch": 0.5946215885540214, + "grad_norm": 0.09707477688789368, + "learning_rate": 1.2191794538035997e-05, + "loss": 8.7575, + "step": 119070 + }, + { + "epoch": 0.5946715273789608, + "grad_norm": 0.09536729753017426, + "learning_rate": 1.2190292623094445e-05, + "loss": 8.7516, + "step": 119080 + }, + { + "epoch": 0.5947214662039002, + "grad_norm": 0.09189159423112869, + "learning_rate": 1.2188790708152895e-05, + "loss": 8.7709, + "step": 119090 + }, + { + "epoch": 0.5947714050288396, + "grad_norm": 0.09311476349830627, + "learning_rate": 1.2187288793211345e-05, + "loss": 8.7534, + "step": 119100 + }, + { + "epoch": 0.5948213438537792, + "grad_norm": 0.09297338128089905, + "learning_rate": 1.2185786878269796e-05, + "loss": 8.7442, + "step": 119110 + }, + { + "epoch": 0.5948712826787186, + "grad_norm": 0.09508251398801804, + "learning_rate": 1.2184284963328244e-05, + "loss": 8.7543, + "step": 119120 + }, + { + "epoch": 0.594921221503658, + "grad_norm": 0.09149755537509918, + "learning_rate": 1.2182783048386693e-05, + "loss": 8.741, + "step": 119130 + }, + { + "epoch": 0.5949711603285974, + "grad_norm": 0.09345537424087524, + "learning_rate": 1.2181281133445143e-05, + "loss": 8.7353, + "step": 119140 + }, + { + "epoch": 0.595021099153537, + "grad_norm": 0.08903185278177261, + "learning_rate": 1.2179779218503593e-05, + "loss": 8.7532, + "step": 119150 + }, + { + "epoch": 0.5950710379784764, + "grad_norm": 0.08982034772634506, + "learning_rate": 1.2178277303562043e-05, + "loss": 8.7417, + "step": 119160 + }, + { + "epoch": 0.5951209768034158, + "grad_norm": 0.09356318414211273, + "learning_rate": 1.2176775388620492e-05, + "loss": 8.7377, + "step": 119170 + }, + { + "epoch": 0.5951709156283552, + "grad_norm": 0.09444409608840942, + "learning_rate": 1.217527347367894e-05, + "loss": 8.7521, + "step": 119180 + }, + { + "epoch": 0.5952208544532948, + "grad_norm": 0.09872134029865265, + "learning_rate": 1.217377155873739e-05, + "loss": 8.7463, + "step": 119190 + }, + { + "epoch": 0.5952707932782342, + "grad_norm": 0.09588941931724548, + "learning_rate": 1.217226964379584e-05, + "loss": 8.7578, + "step": 119200 + }, + { + "epoch": 0.5953207321031736, + "grad_norm": 0.09109771251678467, + "learning_rate": 1.217076772885429e-05, + "loss": 8.7556, + "step": 119210 + }, + { + "epoch": 0.595370670928113, + "grad_norm": 0.09148535132408142, + "learning_rate": 1.2169265813912739e-05, + "loss": 8.7584, + "step": 119220 + }, + { + "epoch": 0.5954206097530526, + "grad_norm": 0.09875836223363876, + "learning_rate": 1.2167763898971188e-05, + "loss": 8.7525, + "step": 119230 + }, + { + "epoch": 0.595470548577992, + "grad_norm": 0.09247757494449615, + "learning_rate": 1.2166261984029638e-05, + "loss": 8.7402, + "step": 119240 + }, + { + "epoch": 0.5955204874029314, + "grad_norm": 0.09525955468416214, + "learning_rate": 1.2164760069088088e-05, + "loss": 8.7735, + "step": 119250 + }, + { + "epoch": 0.5955704262278708, + "grad_norm": 0.09496864676475525, + "learning_rate": 1.2163258154146538e-05, + "loss": 8.7534, + "step": 119260 + }, + { + "epoch": 0.5956203650528104, + "grad_norm": 0.09750999510288239, + "learning_rate": 1.2161756239204988e-05, + "loss": 8.7551, + "step": 119270 + }, + { + "epoch": 0.5956703038777498, + "grad_norm": 0.0901816189289093, + "learning_rate": 1.2160254324263435e-05, + "loss": 8.7569, + "step": 119280 + }, + { + "epoch": 0.5957202427026892, + "grad_norm": 0.09008877724409103, + "learning_rate": 1.2158752409321885e-05, + "loss": 8.7433, + "step": 119290 + }, + { + "epoch": 0.5957701815276286, + "grad_norm": 0.08970709890127182, + "learning_rate": 1.2157250494380335e-05, + "loss": 8.7651, + "step": 119300 + }, + { + "epoch": 0.5958201203525681, + "grad_norm": 0.09239272028207779, + "learning_rate": 1.2155748579438786e-05, + "loss": 8.746, + "step": 119310 + }, + { + "epoch": 0.5958700591775076, + "grad_norm": 0.0948515385389328, + "learning_rate": 1.2154246664497236e-05, + "loss": 8.7497, + "step": 119320 + }, + { + "epoch": 0.595919998002447, + "grad_norm": 0.09544006735086441, + "learning_rate": 1.2152744749555683e-05, + "loss": 8.7772, + "step": 119330 + }, + { + "epoch": 0.5959699368273864, + "grad_norm": 0.092537522315979, + "learning_rate": 1.2151242834614133e-05, + "loss": 8.7515, + "step": 119340 + }, + { + "epoch": 0.596019875652326, + "grad_norm": 0.09258683770895004, + "learning_rate": 1.2149740919672583e-05, + "loss": 8.7646, + "step": 119350 + }, + { + "epoch": 0.5960698144772654, + "grad_norm": 0.09671740233898163, + "learning_rate": 1.2148239004731033e-05, + "loss": 8.7454, + "step": 119360 + }, + { + "epoch": 0.5961197533022048, + "grad_norm": 0.0934329703450203, + "learning_rate": 1.2146737089789483e-05, + "loss": 8.7486, + "step": 119370 + }, + { + "epoch": 0.5961696921271442, + "grad_norm": 0.09332381933927536, + "learning_rate": 1.214523517484793e-05, + "loss": 8.7586, + "step": 119380 + }, + { + "epoch": 0.5962196309520837, + "grad_norm": 0.09403794258832932, + "learning_rate": 1.214373325990638e-05, + "loss": 8.7459, + "step": 119390 + }, + { + "epoch": 0.5962695697770232, + "grad_norm": 0.097443126142025, + "learning_rate": 1.214223134496483e-05, + "loss": 8.7482, + "step": 119400 + }, + { + "epoch": 0.5963195086019626, + "grad_norm": 0.09268827736377716, + "learning_rate": 1.214072943002328e-05, + "loss": 8.75, + "step": 119410 + }, + { + "epoch": 0.596369447426902, + "grad_norm": 0.09062253683805466, + "learning_rate": 1.213922751508173e-05, + "loss": 8.7481, + "step": 119420 + }, + { + "epoch": 0.5964193862518415, + "grad_norm": 0.08736632764339447, + "learning_rate": 1.213772560014018e-05, + "loss": 8.7588, + "step": 119430 + }, + { + "epoch": 0.596469325076781, + "grad_norm": 0.09273076057434082, + "learning_rate": 1.2136223685198628e-05, + "loss": 8.7602, + "step": 119440 + }, + { + "epoch": 0.5965192639017204, + "grad_norm": 0.0936063900589943, + "learning_rate": 1.2134721770257078e-05, + "loss": 8.7539, + "step": 119450 + }, + { + "epoch": 0.5965692027266598, + "grad_norm": 0.09075089544057846, + "learning_rate": 1.2133219855315528e-05, + "loss": 8.7523, + "step": 119460 + }, + { + "epoch": 0.5966191415515993, + "grad_norm": 0.09237466752529144, + "learning_rate": 1.2131717940373978e-05, + "loss": 8.7609, + "step": 119470 + }, + { + "epoch": 0.5966690803765388, + "grad_norm": 0.09360762685537338, + "learning_rate": 1.2130216025432427e-05, + "loss": 8.7495, + "step": 119480 + }, + { + "epoch": 0.5967190192014782, + "grad_norm": 0.09769395738840103, + "learning_rate": 1.2128714110490875e-05, + "loss": 8.7676, + "step": 119490 + }, + { + "epoch": 0.5967689580264176, + "grad_norm": 0.0937671959400177, + "learning_rate": 1.2127212195549325e-05, + "loss": 8.7536, + "step": 119500 + }, + { + "epoch": 0.5968188968513571, + "grad_norm": 0.09220635890960693, + "learning_rate": 1.2125710280607776e-05, + "loss": 8.7522, + "step": 119510 + }, + { + "epoch": 0.5968688356762966, + "grad_norm": 0.0912616029381752, + "learning_rate": 1.2124208365666226e-05, + "loss": 8.7607, + "step": 119520 + }, + { + "epoch": 0.596918774501236, + "grad_norm": 0.09352380782365799, + "learning_rate": 1.2122706450724674e-05, + "loss": 8.7563, + "step": 119530 + }, + { + "epoch": 0.5969687133261754, + "grad_norm": 0.09166228026151657, + "learning_rate": 1.2121204535783123e-05, + "loss": 8.7546, + "step": 119540 + }, + { + "epoch": 0.5970186521511149, + "grad_norm": 0.09042517840862274, + "learning_rate": 1.2119702620841573e-05, + "loss": 8.7494, + "step": 119550 + }, + { + "epoch": 0.5970685909760544, + "grad_norm": 0.09637751430273056, + "learning_rate": 1.2118200705900023e-05, + "loss": 8.7369, + "step": 119560 + }, + { + "epoch": 0.5971185298009938, + "grad_norm": 0.09652339667081833, + "learning_rate": 1.2116698790958473e-05, + "loss": 8.7499, + "step": 119570 + }, + { + "epoch": 0.5971684686259332, + "grad_norm": 0.08974555879831314, + "learning_rate": 1.2115196876016922e-05, + "loss": 8.7598, + "step": 119580 + }, + { + "epoch": 0.5972184074508727, + "grad_norm": 0.09256293624639511, + "learning_rate": 1.2113694961075372e-05, + "loss": 8.7288, + "step": 119590 + }, + { + "epoch": 0.5972683462758122, + "grad_norm": 0.09114140272140503, + "learning_rate": 1.211219304613382e-05, + "loss": 8.77, + "step": 119600 + }, + { + "epoch": 0.5973182851007516, + "grad_norm": 0.09448247402906418, + "learning_rate": 1.211069113119227e-05, + "loss": 8.7269, + "step": 119610 + }, + { + "epoch": 0.597368223925691, + "grad_norm": 0.09690418839454651, + "learning_rate": 1.210918921625072e-05, + "loss": 8.7554, + "step": 119620 + }, + { + "epoch": 0.5974181627506305, + "grad_norm": 0.09861066192388535, + "learning_rate": 1.210768730130917e-05, + "loss": 8.76, + "step": 119630 + }, + { + "epoch": 0.59746810157557, + "grad_norm": 0.09121115505695343, + "learning_rate": 1.210618538636762e-05, + "loss": 8.7617, + "step": 119640 + }, + { + "epoch": 0.5975180404005094, + "grad_norm": 0.09317606687545776, + "learning_rate": 1.2104683471426068e-05, + "loss": 8.7548, + "step": 119650 + }, + { + "epoch": 0.5975679792254488, + "grad_norm": 0.08856779336929321, + "learning_rate": 1.2103181556484518e-05, + "loss": 8.7495, + "step": 119660 + }, + { + "epoch": 0.5976179180503882, + "grad_norm": 0.09526504576206207, + "learning_rate": 1.2101679641542968e-05, + "loss": 8.7478, + "step": 119670 + }, + { + "epoch": 0.5976678568753278, + "grad_norm": 0.0859864354133606, + "learning_rate": 1.2100177726601417e-05, + "loss": 8.7426, + "step": 119680 + }, + { + "epoch": 0.5977177957002672, + "grad_norm": 0.08864925801753998, + "learning_rate": 1.2098675811659867e-05, + "loss": 8.7371, + "step": 119690 + }, + { + "epoch": 0.5977677345252066, + "grad_norm": 0.08991964906454086, + "learning_rate": 1.2097173896718315e-05, + "loss": 8.7372, + "step": 119700 + }, + { + "epoch": 0.597817673350146, + "grad_norm": 0.09090539813041687, + "learning_rate": 1.2095671981776766e-05, + "loss": 8.7734, + "step": 119710 + }, + { + "epoch": 0.5978676121750855, + "grad_norm": 0.09099775552749634, + "learning_rate": 1.2094170066835216e-05, + "loss": 8.7409, + "step": 119720 + }, + { + "epoch": 0.597917551000025, + "grad_norm": 0.08712445944547653, + "learning_rate": 1.2092668151893664e-05, + "loss": 8.7475, + "step": 119730 + }, + { + "epoch": 0.5979674898249644, + "grad_norm": 0.08881444483995438, + "learning_rate": 1.2091166236952114e-05, + "loss": 8.7383, + "step": 119740 + }, + { + "epoch": 0.5980174286499038, + "grad_norm": 0.0887836366891861, + "learning_rate": 1.2089664322010565e-05, + "loss": 8.7615, + "step": 119750 + }, + { + "epoch": 0.5980673674748433, + "grad_norm": 0.09183062613010406, + "learning_rate": 1.2088162407069013e-05, + "loss": 8.7624, + "step": 119760 + }, + { + "epoch": 0.5981173062997828, + "grad_norm": 0.09438557177782059, + "learning_rate": 1.2086660492127463e-05, + "loss": 8.7546, + "step": 119770 + }, + { + "epoch": 0.5981672451247222, + "grad_norm": 0.09165972471237183, + "learning_rate": 1.2085158577185912e-05, + "loss": 8.7515, + "step": 119780 + }, + { + "epoch": 0.5982171839496616, + "grad_norm": 0.0887795090675354, + "learning_rate": 1.2083656662244362e-05, + "loss": 8.7527, + "step": 119790 + }, + { + "epoch": 0.5982671227746011, + "grad_norm": 0.09268735349178314, + "learning_rate": 1.2082154747302812e-05, + "loss": 8.7633, + "step": 119800 + }, + { + "epoch": 0.5983170615995406, + "grad_norm": 0.09194294363260269, + "learning_rate": 1.208065283236126e-05, + "loss": 8.7406, + "step": 119810 + }, + { + "epoch": 0.59836700042448, + "grad_norm": 0.09086401760578156, + "learning_rate": 1.207915091741971e-05, + "loss": 8.7622, + "step": 119820 + }, + { + "epoch": 0.5984169392494194, + "grad_norm": 0.0923188179731369, + "learning_rate": 1.207764900247816e-05, + "loss": 8.7406, + "step": 119830 + }, + { + "epoch": 0.5984668780743589, + "grad_norm": 0.09538555890321732, + "learning_rate": 1.207614708753661e-05, + "loss": 8.7567, + "step": 119840 + }, + { + "epoch": 0.5985168168992984, + "grad_norm": 0.09424526989459991, + "learning_rate": 1.207464517259506e-05, + "loss": 8.7516, + "step": 119850 + }, + { + "epoch": 0.5985667557242378, + "grad_norm": 0.09127725660800934, + "learning_rate": 1.2073143257653508e-05, + "loss": 8.762, + "step": 119860 + }, + { + "epoch": 0.5986166945491772, + "grad_norm": 0.09290522336959839, + "learning_rate": 1.2071641342711958e-05, + "loss": 8.752, + "step": 119870 + }, + { + "epoch": 0.5986666333741167, + "grad_norm": 0.09055527299642563, + "learning_rate": 1.2070139427770407e-05, + "loss": 8.7407, + "step": 119880 + }, + { + "epoch": 0.5987165721990562, + "grad_norm": 0.09295734018087387, + "learning_rate": 1.2068637512828857e-05, + "loss": 8.7368, + "step": 119890 + }, + { + "epoch": 0.5987665110239956, + "grad_norm": 0.09094783663749695, + "learning_rate": 1.2067135597887307e-05, + "loss": 8.7513, + "step": 119900 + }, + { + "epoch": 0.598816449848935, + "grad_norm": 0.09828267991542816, + "learning_rate": 1.2065633682945757e-05, + "loss": 8.7433, + "step": 119910 + }, + { + "epoch": 0.5988663886738745, + "grad_norm": 0.10480784624814987, + "learning_rate": 1.2064131768004206e-05, + "loss": 8.729, + "step": 119920 + }, + { + "epoch": 0.598916327498814, + "grad_norm": 0.09408173710107803, + "learning_rate": 1.2062629853062654e-05, + "loss": 8.748, + "step": 119930 + }, + { + "epoch": 0.5989662663237534, + "grad_norm": 0.09466306120157242, + "learning_rate": 1.2061127938121104e-05, + "loss": 8.739, + "step": 119940 + }, + { + "epoch": 0.5990162051486928, + "grad_norm": 0.09082759916782379, + "learning_rate": 1.2059626023179555e-05, + "loss": 8.751, + "step": 119950 + }, + { + "epoch": 0.5990661439736323, + "grad_norm": 0.09120157361030579, + "learning_rate": 1.2058124108238005e-05, + "loss": 8.7516, + "step": 119960 + }, + { + "epoch": 0.5991160827985718, + "grad_norm": 0.08926384896039963, + "learning_rate": 1.2056622193296453e-05, + "loss": 8.7449, + "step": 119970 + }, + { + "epoch": 0.5991660216235112, + "grad_norm": 0.09408469498157501, + "learning_rate": 1.2055120278354902e-05, + "loss": 8.7444, + "step": 119980 + }, + { + "epoch": 0.5992159604484506, + "grad_norm": 0.09376782178878784, + "learning_rate": 1.2053618363413352e-05, + "loss": 8.7396, + "step": 119990 + }, + { + "epoch": 0.5992658992733901, + "grad_norm": 0.0928616002202034, + "learning_rate": 1.2052116448471802e-05, + "loss": 8.7482, + "step": 120000 + }, + { + "epoch": 0.5993158380983296, + "grad_norm": 0.09431707859039307, + "learning_rate": 1.2050614533530252e-05, + "loss": 8.7567, + "step": 120010 + }, + { + "epoch": 0.599365776923269, + "grad_norm": 0.08612378686666489, + "learning_rate": 1.20491126185887e-05, + "loss": 8.7719, + "step": 120020 + }, + { + "epoch": 0.5994157157482084, + "grad_norm": 0.08852338045835495, + "learning_rate": 1.204761070364715e-05, + "loss": 8.7456, + "step": 120030 + }, + { + "epoch": 0.5994656545731479, + "grad_norm": 0.09709616750478745, + "learning_rate": 1.20461087887056e-05, + "loss": 8.7524, + "step": 120040 + }, + { + "epoch": 0.5995155933980874, + "grad_norm": 0.08951878547668457, + "learning_rate": 1.204460687376405e-05, + "loss": 8.7406, + "step": 120050 + }, + { + "epoch": 0.5995655322230268, + "grad_norm": 0.08870140463113785, + "learning_rate": 1.20431049588225e-05, + "loss": 8.746, + "step": 120060 + }, + { + "epoch": 0.5996154710479662, + "grad_norm": 0.0954098254442215, + "learning_rate": 1.2041603043880948e-05, + "loss": 8.7421, + "step": 120070 + }, + { + "epoch": 0.5996654098729057, + "grad_norm": 0.09251780807971954, + "learning_rate": 1.2040101128939397e-05, + "loss": 8.726, + "step": 120080 + }, + { + "epoch": 0.5997153486978452, + "grad_norm": 0.09680180251598358, + "learning_rate": 1.2038599213997847e-05, + "loss": 8.7528, + "step": 120090 + }, + { + "epoch": 0.5997652875227846, + "grad_norm": 0.08500494062900543, + "learning_rate": 1.2037097299056297e-05, + "loss": 8.7571, + "step": 120100 + }, + { + "epoch": 0.599815226347724, + "grad_norm": 0.08889224380254745, + "learning_rate": 1.2035595384114747e-05, + "loss": 8.758, + "step": 120110 + }, + { + "epoch": 0.5998651651726635, + "grad_norm": 0.09139248728752136, + "learning_rate": 1.2034093469173198e-05, + "loss": 8.7463, + "step": 120120 + }, + { + "epoch": 0.599915103997603, + "grad_norm": 0.09338033199310303, + "learning_rate": 1.2032591554231644e-05, + "loss": 8.7608, + "step": 120130 + }, + { + "epoch": 0.5999650428225424, + "grad_norm": 0.09576421976089478, + "learning_rate": 1.2031089639290094e-05, + "loss": 8.7344, + "step": 120140 + }, + { + "epoch": 0.6000149816474818, + "grad_norm": 0.0958356186747551, + "learning_rate": 1.2029587724348545e-05, + "loss": 8.7332, + "step": 120150 + }, + { + "epoch": 0.6000649204724213, + "grad_norm": 0.10234443098306656, + "learning_rate": 1.2028085809406995e-05, + "loss": 8.7325, + "step": 120160 + }, + { + "epoch": 0.6001148592973607, + "grad_norm": 0.08857684582471848, + "learning_rate": 1.2026583894465445e-05, + "loss": 8.7573, + "step": 120170 + }, + { + "epoch": 0.6001647981223002, + "grad_norm": 0.09901963919401169, + "learning_rate": 1.2025081979523892e-05, + "loss": 8.7471, + "step": 120180 + }, + { + "epoch": 0.6002147369472396, + "grad_norm": 0.09748201072216034, + "learning_rate": 1.2023580064582342e-05, + "loss": 8.7285, + "step": 120190 + }, + { + "epoch": 0.6002646757721791, + "grad_norm": 0.09091214835643768, + "learning_rate": 1.2022078149640792e-05, + "loss": 8.7214, + "step": 120200 + }, + { + "epoch": 0.6003146145971185, + "grad_norm": 0.09007634967565536, + "learning_rate": 1.2020576234699242e-05, + "loss": 8.7256, + "step": 120210 + }, + { + "epoch": 0.600364553422058, + "grad_norm": 0.0863836258649826, + "learning_rate": 1.2019074319757693e-05, + "loss": 8.7418, + "step": 120220 + }, + { + "epoch": 0.6004144922469974, + "grad_norm": 0.09045583009719849, + "learning_rate": 1.201757240481614e-05, + "loss": 8.7637, + "step": 120230 + }, + { + "epoch": 0.6004644310719369, + "grad_norm": 0.09015191346406937, + "learning_rate": 1.201607048987459e-05, + "loss": 8.7327, + "step": 120240 + }, + { + "epoch": 0.6005143698968763, + "grad_norm": 0.09545261412858963, + "learning_rate": 1.201456857493304e-05, + "loss": 8.7351, + "step": 120250 + }, + { + "epoch": 0.6005643087218158, + "grad_norm": 0.09164721518754959, + "learning_rate": 1.201306665999149e-05, + "loss": 8.7474, + "step": 120260 + }, + { + "epoch": 0.6006142475467552, + "grad_norm": 0.09745185077190399, + "learning_rate": 1.201156474504994e-05, + "loss": 8.7403, + "step": 120270 + }, + { + "epoch": 0.6006641863716947, + "grad_norm": 0.08840557932853699, + "learning_rate": 1.2010062830108389e-05, + "loss": 8.7636, + "step": 120280 + }, + { + "epoch": 0.6007141251966341, + "grad_norm": 0.10116366297006607, + "learning_rate": 1.2008560915166837e-05, + "loss": 8.7441, + "step": 120290 + }, + { + "epoch": 0.6007640640215736, + "grad_norm": 0.09359109401702881, + "learning_rate": 1.2007059000225287e-05, + "loss": 8.7407, + "step": 120300 + }, + { + "epoch": 0.600814002846513, + "grad_norm": 0.09176816046237946, + "learning_rate": 1.2005557085283737e-05, + "loss": 8.7537, + "step": 120310 + }, + { + "epoch": 0.6008639416714525, + "grad_norm": 0.09760214388370514, + "learning_rate": 1.2004055170342188e-05, + "loss": 8.7465, + "step": 120320 + }, + { + "epoch": 0.6009138804963919, + "grad_norm": 0.09278165549039841, + "learning_rate": 1.2002553255400636e-05, + "loss": 8.737, + "step": 120330 + }, + { + "epoch": 0.6009638193213314, + "grad_norm": 0.09701333940029144, + "learning_rate": 1.2001051340459085e-05, + "loss": 8.7367, + "step": 120340 + }, + { + "epoch": 0.6010137581462708, + "grad_norm": 0.09638296812772751, + "learning_rate": 1.1999549425517535e-05, + "loss": 8.7463, + "step": 120350 + }, + { + "epoch": 0.6010636969712103, + "grad_norm": 0.09861765056848526, + "learning_rate": 1.1998047510575985e-05, + "loss": 8.7523, + "step": 120360 + }, + { + "epoch": 0.6011136357961497, + "grad_norm": 0.08915422856807709, + "learning_rate": 1.1996545595634435e-05, + "loss": 8.7447, + "step": 120370 + }, + { + "epoch": 0.6011635746210892, + "grad_norm": 0.09036835283041, + "learning_rate": 1.1995043680692884e-05, + "loss": 8.7562, + "step": 120380 + }, + { + "epoch": 0.6012135134460286, + "grad_norm": 0.08992304652929306, + "learning_rate": 1.1993541765751332e-05, + "loss": 8.7521, + "step": 120390 + }, + { + "epoch": 0.6012634522709681, + "grad_norm": 0.0943126529455185, + "learning_rate": 1.1992039850809782e-05, + "loss": 8.7471, + "step": 120400 + }, + { + "epoch": 0.6013133910959075, + "grad_norm": 0.09551472961902618, + "learning_rate": 1.1990537935868232e-05, + "loss": 8.7448, + "step": 120410 + }, + { + "epoch": 0.601363329920847, + "grad_norm": 0.08826518803834915, + "learning_rate": 1.1989036020926683e-05, + "loss": 8.7382, + "step": 120420 + }, + { + "epoch": 0.6014132687457864, + "grad_norm": 0.09639231860637665, + "learning_rate": 1.1987534105985131e-05, + "loss": 8.7435, + "step": 120430 + }, + { + "epoch": 0.6014632075707259, + "grad_norm": 0.09125574678182602, + "learning_rate": 1.1986032191043581e-05, + "loss": 8.7593, + "step": 120440 + }, + { + "epoch": 0.6015131463956653, + "grad_norm": 0.08899689465761185, + "learning_rate": 1.198453027610203e-05, + "loss": 8.761, + "step": 120450 + }, + { + "epoch": 0.6015630852206048, + "grad_norm": 0.09570741653442383, + "learning_rate": 1.198302836116048e-05, + "loss": 8.7517, + "step": 120460 + }, + { + "epoch": 0.6016130240455442, + "grad_norm": 0.09493447840213776, + "learning_rate": 1.198152644621893e-05, + "loss": 8.7354, + "step": 120470 + }, + { + "epoch": 0.6016629628704837, + "grad_norm": 0.09143593907356262, + "learning_rate": 1.1980024531277379e-05, + "loss": 8.7383, + "step": 120480 + }, + { + "epoch": 0.6017129016954231, + "grad_norm": 0.09237073361873627, + "learning_rate": 1.1978522616335829e-05, + "loss": 8.7553, + "step": 120490 + }, + { + "epoch": 0.6017628405203626, + "grad_norm": 0.0897151529788971, + "learning_rate": 1.1977020701394277e-05, + "loss": 8.7465, + "step": 120500 + }, + { + "epoch": 0.601812779345302, + "grad_norm": 0.08980879932641983, + "learning_rate": 1.1975518786452727e-05, + "loss": 8.7424, + "step": 120510 + }, + { + "epoch": 0.6018627181702415, + "grad_norm": 0.09323468804359436, + "learning_rate": 1.1974016871511178e-05, + "loss": 8.7519, + "step": 120520 + }, + { + "epoch": 0.6019126569951809, + "grad_norm": 0.092279814183712, + "learning_rate": 1.1972514956569626e-05, + "loss": 8.7398, + "step": 120530 + }, + { + "epoch": 0.6019625958201203, + "grad_norm": 0.09498938918113708, + "learning_rate": 1.1971013041628076e-05, + "loss": 8.7409, + "step": 120540 + }, + { + "epoch": 0.6020125346450598, + "grad_norm": 0.09677129238843918, + "learning_rate": 1.1969511126686525e-05, + "loss": 8.7443, + "step": 120550 + }, + { + "epoch": 0.6020624734699993, + "grad_norm": 0.0944163128733635, + "learning_rate": 1.1968009211744975e-05, + "loss": 8.745, + "step": 120560 + }, + { + "epoch": 0.6021124122949387, + "grad_norm": 0.08608414977788925, + "learning_rate": 1.1966507296803425e-05, + "loss": 8.7553, + "step": 120570 + }, + { + "epoch": 0.6021623511198781, + "grad_norm": 0.094789057970047, + "learning_rate": 1.1965005381861874e-05, + "loss": 8.7447, + "step": 120580 + }, + { + "epoch": 0.6022122899448176, + "grad_norm": 0.09222722053527832, + "learning_rate": 1.1963503466920324e-05, + "loss": 8.7381, + "step": 120590 + }, + { + "epoch": 0.6022622287697571, + "grad_norm": 0.08701804280281067, + "learning_rate": 1.1962001551978774e-05, + "loss": 8.7559, + "step": 120600 + }, + { + "epoch": 0.6023121675946965, + "grad_norm": 0.09150071442127228, + "learning_rate": 1.1960499637037222e-05, + "loss": 8.735, + "step": 120610 + }, + { + "epoch": 0.602362106419636, + "grad_norm": 0.09063006192445755, + "learning_rate": 1.1958997722095673e-05, + "loss": 8.7467, + "step": 120620 + }, + { + "epoch": 0.6024120452445754, + "grad_norm": 0.09188646078109741, + "learning_rate": 1.1957495807154121e-05, + "loss": 8.752, + "step": 120630 + }, + { + "epoch": 0.6024619840695149, + "grad_norm": 0.09145389497280121, + "learning_rate": 1.1955993892212571e-05, + "loss": 8.767, + "step": 120640 + }, + { + "epoch": 0.6025119228944543, + "grad_norm": 0.0874108150601387, + "learning_rate": 1.1954491977271021e-05, + "loss": 8.7427, + "step": 120650 + }, + { + "epoch": 0.6025618617193937, + "grad_norm": 0.09589160233736038, + "learning_rate": 1.195299006232947e-05, + "loss": 8.7374, + "step": 120660 + }, + { + "epoch": 0.6026118005443332, + "grad_norm": 0.0927971825003624, + "learning_rate": 1.195148814738792e-05, + "loss": 8.7538, + "step": 120670 + }, + { + "epoch": 0.6026617393692726, + "grad_norm": 0.09196257591247559, + "learning_rate": 1.1949986232446369e-05, + "loss": 8.7561, + "step": 120680 + }, + { + "epoch": 0.6027116781942121, + "grad_norm": 0.0907442569732666, + "learning_rate": 1.1948484317504819e-05, + "loss": 8.7335, + "step": 120690 + }, + { + "epoch": 0.6027616170191515, + "grad_norm": 0.09081535786390305, + "learning_rate": 1.1946982402563269e-05, + "loss": 8.7354, + "step": 120700 + }, + { + "epoch": 0.602811555844091, + "grad_norm": 0.0993124321103096, + "learning_rate": 1.1945480487621717e-05, + "loss": 8.7478, + "step": 120710 + }, + { + "epoch": 0.6028614946690304, + "grad_norm": 0.09617297351360321, + "learning_rate": 1.1943978572680168e-05, + "loss": 8.7466, + "step": 120720 + }, + { + "epoch": 0.6029114334939699, + "grad_norm": 0.09750398993492126, + "learning_rate": 1.1942476657738616e-05, + "loss": 8.7461, + "step": 120730 + }, + { + "epoch": 0.6029613723189093, + "grad_norm": 0.09524727612733841, + "learning_rate": 1.1940974742797066e-05, + "loss": 8.7452, + "step": 120740 + }, + { + "epoch": 0.6030113111438488, + "grad_norm": 0.08593402057886124, + "learning_rate": 1.1939472827855516e-05, + "loss": 8.7495, + "step": 120750 + }, + { + "epoch": 0.6030612499687882, + "grad_norm": 0.0903341993689537, + "learning_rate": 1.1937970912913967e-05, + "loss": 8.7438, + "step": 120760 + }, + { + "epoch": 0.6031111887937277, + "grad_norm": 0.09396269917488098, + "learning_rate": 1.1936468997972415e-05, + "loss": 8.7225, + "step": 120770 + }, + { + "epoch": 0.6031611276186671, + "grad_norm": 0.09335647523403168, + "learning_rate": 1.1934967083030864e-05, + "loss": 8.7488, + "step": 120780 + }, + { + "epoch": 0.6032110664436066, + "grad_norm": 0.09076468646526337, + "learning_rate": 1.1933465168089314e-05, + "loss": 8.7418, + "step": 120790 + }, + { + "epoch": 0.603261005268546, + "grad_norm": 0.09674550592899323, + "learning_rate": 1.1931963253147764e-05, + "loss": 8.7379, + "step": 120800 + }, + { + "epoch": 0.6033109440934855, + "grad_norm": 0.09369368851184845, + "learning_rate": 1.1930461338206214e-05, + "loss": 8.7453, + "step": 120810 + }, + { + "epoch": 0.6033608829184249, + "grad_norm": 0.09193939715623856, + "learning_rate": 1.1928959423264663e-05, + "loss": 8.7374, + "step": 120820 + }, + { + "epoch": 0.6034108217433644, + "grad_norm": 0.08715419471263885, + "learning_rate": 1.1927457508323111e-05, + "loss": 8.7427, + "step": 120830 + }, + { + "epoch": 0.6034607605683038, + "grad_norm": 0.0879080593585968, + "learning_rate": 1.1925955593381561e-05, + "loss": 8.7488, + "step": 120840 + }, + { + "epoch": 0.6035106993932433, + "grad_norm": 0.09147755801677704, + "learning_rate": 1.1924453678440011e-05, + "loss": 8.7333, + "step": 120850 + }, + { + "epoch": 0.6035606382181827, + "grad_norm": 0.09098514169454575, + "learning_rate": 1.1922951763498462e-05, + "loss": 8.7172, + "step": 120860 + }, + { + "epoch": 0.6036105770431222, + "grad_norm": 0.08796340972185135, + "learning_rate": 1.192144984855691e-05, + "loss": 8.7379, + "step": 120870 + }, + { + "epoch": 0.6036605158680616, + "grad_norm": 0.09227326512336731, + "learning_rate": 1.1919947933615359e-05, + "loss": 8.739, + "step": 120880 + }, + { + "epoch": 0.6037104546930011, + "grad_norm": 0.09190311282873154, + "learning_rate": 1.1918446018673809e-05, + "loss": 8.7631, + "step": 120890 + }, + { + "epoch": 0.6037603935179405, + "grad_norm": 0.09431488811969757, + "learning_rate": 1.1916944103732259e-05, + "loss": 8.7499, + "step": 120900 + }, + { + "epoch": 0.60381033234288, + "grad_norm": 0.09246110171079636, + "learning_rate": 1.1915442188790709e-05, + "loss": 8.7489, + "step": 120910 + }, + { + "epoch": 0.6038602711678194, + "grad_norm": 0.093075692653656, + "learning_rate": 1.191394027384916e-05, + "loss": 8.7325, + "step": 120920 + }, + { + "epoch": 0.6039102099927589, + "grad_norm": 0.09103310108184814, + "learning_rate": 1.1912438358907606e-05, + "loss": 8.7337, + "step": 120930 + }, + { + "epoch": 0.6039601488176983, + "grad_norm": 0.1019456684589386, + "learning_rate": 1.1910936443966056e-05, + "loss": 8.7356, + "step": 120940 + }, + { + "epoch": 0.6040100876426377, + "grad_norm": 0.08999264985322952, + "learning_rate": 1.1909434529024506e-05, + "loss": 8.7455, + "step": 120950 + }, + { + "epoch": 0.6040600264675772, + "grad_norm": 0.09247510880231857, + "learning_rate": 1.1907932614082957e-05, + "loss": 8.7358, + "step": 120960 + }, + { + "epoch": 0.6041099652925167, + "grad_norm": 0.0908162072300911, + "learning_rate": 1.1906430699141407e-05, + "loss": 8.7354, + "step": 120970 + }, + { + "epoch": 0.6041599041174561, + "grad_norm": 0.09764672070741653, + "learning_rate": 1.1904928784199855e-05, + "loss": 8.7378, + "step": 120980 + }, + { + "epoch": 0.6042098429423955, + "grad_norm": 0.09343189001083374, + "learning_rate": 1.1903426869258304e-05, + "loss": 8.7326, + "step": 120990 + }, + { + "epoch": 0.604259781767335, + "grad_norm": 0.0914471447467804, + "learning_rate": 1.1901924954316754e-05, + "loss": 8.7343, + "step": 121000 + }, + { + "epoch": 0.6043097205922745, + "grad_norm": 0.09239111840724945, + "learning_rate": 1.1900423039375204e-05, + "loss": 8.7356, + "step": 121010 + }, + { + "epoch": 0.6043596594172139, + "grad_norm": 0.09467372298240662, + "learning_rate": 1.1898921124433654e-05, + "loss": 8.7418, + "step": 121020 + }, + { + "epoch": 0.6044095982421533, + "grad_norm": 0.09846384823322296, + "learning_rate": 1.1897419209492103e-05, + "loss": 8.7513, + "step": 121030 + }, + { + "epoch": 0.6044595370670928, + "grad_norm": 0.09008269757032394, + "learning_rate": 1.1895917294550551e-05, + "loss": 8.7377, + "step": 121040 + }, + { + "epoch": 0.6045094758920323, + "grad_norm": 0.0888042226433754, + "learning_rate": 1.1894415379609001e-05, + "loss": 8.7362, + "step": 121050 + }, + { + "epoch": 0.6045594147169717, + "grad_norm": 0.09224848449230194, + "learning_rate": 1.1892913464667452e-05, + "loss": 8.768, + "step": 121060 + }, + { + "epoch": 0.6046093535419111, + "grad_norm": 0.0956854522228241, + "learning_rate": 1.1891411549725902e-05, + "loss": 8.7352, + "step": 121070 + }, + { + "epoch": 0.6046592923668506, + "grad_norm": 0.09198862314224243, + "learning_rate": 1.188990963478435e-05, + "loss": 8.7352, + "step": 121080 + }, + { + "epoch": 0.6047092311917901, + "grad_norm": 0.09782185405492783, + "learning_rate": 1.1888407719842799e-05, + "loss": 8.7306, + "step": 121090 + }, + { + "epoch": 0.6047591700167295, + "grad_norm": 0.0954626128077507, + "learning_rate": 1.1886905804901249e-05, + "loss": 8.7577, + "step": 121100 + }, + { + "epoch": 0.6048091088416689, + "grad_norm": 0.0895036831498146, + "learning_rate": 1.1885403889959699e-05, + "loss": 8.7356, + "step": 121110 + }, + { + "epoch": 0.6048590476666084, + "grad_norm": 0.09198148548603058, + "learning_rate": 1.188390197501815e-05, + "loss": 8.743, + "step": 121120 + }, + { + "epoch": 0.6049089864915479, + "grad_norm": 0.0872054398059845, + "learning_rate": 1.18824000600766e-05, + "loss": 8.7409, + "step": 121130 + }, + { + "epoch": 0.6049589253164873, + "grad_norm": 0.0946134701371193, + "learning_rate": 1.1880898145135046e-05, + "loss": 8.7335, + "step": 121140 + }, + { + "epoch": 0.6050088641414267, + "grad_norm": 0.08696244657039642, + "learning_rate": 1.1879396230193496e-05, + "loss": 8.7231, + "step": 121150 + }, + { + "epoch": 0.6050588029663662, + "grad_norm": 0.09812266379594803, + "learning_rate": 1.1877894315251947e-05, + "loss": 8.7275, + "step": 121160 + }, + { + "epoch": 0.6051087417913057, + "grad_norm": 0.09273399412631989, + "learning_rate": 1.1876392400310397e-05, + "loss": 8.738, + "step": 121170 + }, + { + "epoch": 0.6051586806162451, + "grad_norm": 0.09106171131134033, + "learning_rate": 1.1874890485368847e-05, + "loss": 8.7407, + "step": 121180 + }, + { + "epoch": 0.6052086194411845, + "grad_norm": 0.08590860664844513, + "learning_rate": 1.1873388570427294e-05, + "loss": 8.743, + "step": 121190 + }, + { + "epoch": 0.605258558266124, + "grad_norm": 0.09189659357070923, + "learning_rate": 1.1871886655485744e-05, + "loss": 8.7424, + "step": 121200 + }, + { + "epoch": 0.6053084970910635, + "grad_norm": 0.10102283954620361, + "learning_rate": 1.1870384740544194e-05, + "loss": 8.7519, + "step": 121210 + }, + { + "epoch": 0.6053584359160029, + "grad_norm": 0.09137275069952011, + "learning_rate": 1.1868882825602644e-05, + "loss": 8.7475, + "step": 121220 + }, + { + "epoch": 0.6054083747409423, + "grad_norm": 0.09499847143888474, + "learning_rate": 1.1867380910661094e-05, + "loss": 8.7518, + "step": 121230 + }, + { + "epoch": 0.6054583135658818, + "grad_norm": 0.09718917310237885, + "learning_rate": 1.1865878995719543e-05, + "loss": 8.7258, + "step": 121240 + }, + { + "epoch": 0.6055082523908213, + "grad_norm": 0.08984164148569107, + "learning_rate": 1.1864377080777991e-05, + "loss": 8.751, + "step": 121250 + }, + { + "epoch": 0.6055581912157607, + "grad_norm": 0.09550242125988007, + "learning_rate": 1.1862875165836442e-05, + "loss": 8.7504, + "step": 121260 + }, + { + "epoch": 0.6056081300407001, + "grad_norm": 0.09198052436113358, + "learning_rate": 1.1861373250894892e-05, + "loss": 8.7366, + "step": 121270 + }, + { + "epoch": 0.6056580688656396, + "grad_norm": 0.09440373629331589, + "learning_rate": 1.1859871335953342e-05, + "loss": 8.7489, + "step": 121280 + }, + { + "epoch": 0.6057080076905791, + "grad_norm": 0.09069590270519257, + "learning_rate": 1.185836942101179e-05, + "loss": 8.7381, + "step": 121290 + }, + { + "epoch": 0.6057579465155185, + "grad_norm": 0.09249866008758545, + "learning_rate": 1.1856867506070239e-05, + "loss": 8.7219, + "step": 121300 + }, + { + "epoch": 0.6058078853404579, + "grad_norm": 0.09518305212259293, + "learning_rate": 1.1855365591128689e-05, + "loss": 8.743, + "step": 121310 + }, + { + "epoch": 0.6058578241653974, + "grad_norm": 0.09274056553840637, + "learning_rate": 1.185386367618714e-05, + "loss": 8.7459, + "step": 121320 + }, + { + "epoch": 0.6059077629903369, + "grad_norm": 0.09070045500993729, + "learning_rate": 1.185236176124559e-05, + "loss": 8.7359, + "step": 121330 + }, + { + "epoch": 0.6059577018152763, + "grad_norm": 0.09082917869091034, + "learning_rate": 1.1850859846304038e-05, + "loss": 8.742, + "step": 121340 + }, + { + "epoch": 0.6060076406402157, + "grad_norm": 0.09195704758167267, + "learning_rate": 1.1849357931362486e-05, + "loss": 8.7441, + "step": 121350 + }, + { + "epoch": 0.6060575794651551, + "grad_norm": 0.0921049565076828, + "learning_rate": 1.1847856016420937e-05, + "loss": 8.73, + "step": 121360 + }, + { + "epoch": 0.6061075182900947, + "grad_norm": 0.09024003148078918, + "learning_rate": 1.1846354101479387e-05, + "loss": 8.7474, + "step": 121370 + }, + { + "epoch": 0.6061574571150341, + "grad_norm": 0.09810420870780945, + "learning_rate": 1.1844852186537837e-05, + "loss": 8.7297, + "step": 121380 + }, + { + "epoch": 0.6062073959399735, + "grad_norm": 0.08957557380199432, + "learning_rate": 1.1843350271596285e-05, + "loss": 8.7334, + "step": 121390 + }, + { + "epoch": 0.606257334764913, + "grad_norm": 0.095646932721138, + "learning_rate": 1.1841848356654736e-05, + "loss": 8.7491, + "step": 121400 + }, + { + "epoch": 0.6063072735898525, + "grad_norm": 0.09269688278436661, + "learning_rate": 1.1840346441713184e-05, + "loss": 8.7294, + "step": 121410 + }, + { + "epoch": 0.6063572124147919, + "grad_norm": 0.09186460822820663, + "learning_rate": 1.1838844526771634e-05, + "loss": 8.7326, + "step": 121420 + }, + { + "epoch": 0.6064071512397313, + "grad_norm": 0.09095993638038635, + "learning_rate": 1.1837342611830084e-05, + "loss": 8.7583, + "step": 121430 + }, + { + "epoch": 0.6064570900646707, + "grad_norm": 0.0951274111866951, + "learning_rate": 1.1835840696888533e-05, + "loss": 8.7412, + "step": 121440 + }, + { + "epoch": 0.6065070288896103, + "grad_norm": 0.09069649875164032, + "learning_rate": 1.1834338781946983e-05, + "loss": 8.7235, + "step": 121450 + }, + { + "epoch": 0.6065569677145497, + "grad_norm": 0.09192630648612976, + "learning_rate": 1.1832836867005432e-05, + "loss": 8.7422, + "step": 121460 + }, + { + "epoch": 0.6066069065394891, + "grad_norm": 0.09435165673494339, + "learning_rate": 1.1831334952063882e-05, + "loss": 8.7295, + "step": 121470 + }, + { + "epoch": 0.6066568453644285, + "grad_norm": 0.09608633071184158, + "learning_rate": 1.1829833037122332e-05, + "loss": 8.7352, + "step": 121480 + }, + { + "epoch": 0.6067067841893681, + "grad_norm": 0.09027303010225296, + "learning_rate": 1.182833112218078e-05, + "loss": 8.7404, + "step": 121490 + }, + { + "epoch": 0.6067567230143075, + "grad_norm": 0.08934234827756882, + "learning_rate": 1.182682920723923e-05, + "loss": 8.7349, + "step": 121500 + }, + { + "epoch": 0.6068066618392469, + "grad_norm": 0.09478524327278137, + "learning_rate": 1.1825327292297679e-05, + "loss": 8.754, + "step": 121510 + }, + { + "epoch": 0.6068566006641863, + "grad_norm": 0.09414006769657135, + "learning_rate": 1.182382537735613e-05, + "loss": 8.7425, + "step": 121520 + }, + { + "epoch": 0.6069065394891259, + "grad_norm": 0.09682093560695648, + "learning_rate": 1.182232346241458e-05, + "loss": 8.7536, + "step": 121530 + }, + { + "epoch": 0.6069564783140653, + "grad_norm": 0.09124701470136642, + "learning_rate": 1.1820821547473028e-05, + "loss": 8.7449, + "step": 121540 + }, + { + "epoch": 0.6070064171390047, + "grad_norm": 0.08872292190790176, + "learning_rate": 1.1819319632531478e-05, + "loss": 8.7353, + "step": 121550 + }, + { + "epoch": 0.6070563559639441, + "grad_norm": 0.09001273661851883, + "learning_rate": 1.1817817717589928e-05, + "loss": 8.7428, + "step": 121560 + }, + { + "epoch": 0.6071062947888837, + "grad_norm": 0.08708401024341583, + "learning_rate": 1.1816315802648377e-05, + "loss": 8.7396, + "step": 121570 + }, + { + "epoch": 0.6071562336138231, + "grad_norm": 0.09001478552818298, + "learning_rate": 1.1814813887706827e-05, + "loss": 8.7425, + "step": 121580 + }, + { + "epoch": 0.6072061724387625, + "grad_norm": 0.0907810628414154, + "learning_rate": 1.1813311972765275e-05, + "loss": 8.7552, + "step": 121590 + }, + { + "epoch": 0.6072561112637019, + "grad_norm": 0.08939887583255768, + "learning_rate": 1.1811810057823726e-05, + "loss": 8.7563, + "step": 121600 + }, + { + "epoch": 0.6073060500886415, + "grad_norm": 0.09514924883842468, + "learning_rate": 1.1810308142882176e-05, + "loss": 8.7546, + "step": 121610 + }, + { + "epoch": 0.6073559889135809, + "grad_norm": 0.09000418335199356, + "learning_rate": 1.1808806227940624e-05, + "loss": 8.7427, + "step": 121620 + }, + { + "epoch": 0.6074059277385203, + "grad_norm": 0.09303897619247437, + "learning_rate": 1.1807304312999075e-05, + "loss": 8.7311, + "step": 121630 + }, + { + "epoch": 0.6074558665634597, + "grad_norm": 0.09306817501783371, + "learning_rate": 1.1805802398057523e-05, + "loss": 8.7395, + "step": 121640 + }, + { + "epoch": 0.6075058053883992, + "grad_norm": 0.08681345731019974, + "learning_rate": 1.1804300483115973e-05, + "loss": 8.7435, + "step": 121650 + }, + { + "epoch": 0.6075557442133387, + "grad_norm": 0.09274129569530487, + "learning_rate": 1.1802798568174423e-05, + "loss": 8.7352, + "step": 121660 + }, + { + "epoch": 0.6076056830382781, + "grad_norm": 0.090678870677948, + "learning_rate": 1.1801296653232872e-05, + "loss": 8.7323, + "step": 121670 + }, + { + "epoch": 0.6076556218632175, + "grad_norm": 0.09506350755691528, + "learning_rate": 1.1799794738291322e-05, + "loss": 8.7349, + "step": 121680 + }, + { + "epoch": 0.607705560688157, + "grad_norm": 0.09269612282514572, + "learning_rate": 1.179829282334977e-05, + "loss": 8.7393, + "step": 121690 + }, + { + "epoch": 0.6077554995130965, + "grad_norm": 0.09288372099399567, + "learning_rate": 1.179679090840822e-05, + "loss": 8.7256, + "step": 121700 + }, + { + "epoch": 0.6078054383380359, + "grad_norm": 0.09377559274435043, + "learning_rate": 1.179528899346667e-05, + "loss": 8.7359, + "step": 121710 + }, + { + "epoch": 0.6078553771629753, + "grad_norm": 0.088566854596138, + "learning_rate": 1.1793787078525121e-05, + "loss": 8.7409, + "step": 121720 + }, + { + "epoch": 0.6079053159879148, + "grad_norm": 0.09255025535821915, + "learning_rate": 1.179228516358357e-05, + "loss": 8.7415, + "step": 121730 + }, + { + "epoch": 0.6079552548128543, + "grad_norm": 0.09494113177061081, + "learning_rate": 1.1790783248642018e-05, + "loss": 8.7397, + "step": 121740 + }, + { + "epoch": 0.6080051936377937, + "grad_norm": 0.08850625157356262, + "learning_rate": 1.1789281333700468e-05, + "loss": 8.7394, + "step": 121750 + }, + { + "epoch": 0.6080551324627331, + "grad_norm": 0.09062092751264572, + "learning_rate": 1.1787779418758918e-05, + "loss": 8.7512, + "step": 121760 + }, + { + "epoch": 0.6081050712876725, + "grad_norm": 0.0890820100903511, + "learning_rate": 1.1786277503817369e-05, + "loss": 8.7326, + "step": 121770 + }, + { + "epoch": 0.6081550101126121, + "grad_norm": 0.09788678586483002, + "learning_rate": 1.1784775588875817e-05, + "loss": 8.7336, + "step": 121780 + }, + { + "epoch": 0.6082049489375515, + "grad_norm": 0.09738252311944962, + "learning_rate": 1.1783273673934265e-05, + "loss": 8.7479, + "step": 121790 + }, + { + "epoch": 0.6082548877624909, + "grad_norm": 0.09170432388782501, + "learning_rate": 1.1781771758992716e-05, + "loss": 8.7248, + "step": 121800 + }, + { + "epoch": 0.6083048265874303, + "grad_norm": 0.0888880267739296, + "learning_rate": 1.1780269844051166e-05, + "loss": 8.7402, + "step": 121810 + }, + { + "epoch": 0.6083547654123699, + "grad_norm": 0.09058916568756104, + "learning_rate": 1.1778767929109616e-05, + "loss": 8.7334, + "step": 121820 + }, + { + "epoch": 0.6084047042373093, + "grad_norm": 0.09217052161693573, + "learning_rate": 1.1777266014168065e-05, + "loss": 8.7499, + "step": 121830 + }, + { + "epoch": 0.6084546430622487, + "grad_norm": 0.09062588959932327, + "learning_rate": 1.1775764099226513e-05, + "loss": 8.7287, + "step": 121840 + }, + { + "epoch": 0.6085045818871881, + "grad_norm": 0.09828715771436691, + "learning_rate": 1.1774262184284963e-05, + "loss": 8.7276, + "step": 121850 + }, + { + "epoch": 0.6085545207121277, + "grad_norm": 0.08966988325119019, + "learning_rate": 1.1772760269343413e-05, + "loss": 8.7422, + "step": 121860 + }, + { + "epoch": 0.6086044595370671, + "grad_norm": 0.08841178566217422, + "learning_rate": 1.1771258354401864e-05, + "loss": 8.748, + "step": 121870 + }, + { + "epoch": 0.6086543983620065, + "grad_norm": 0.08782627433538437, + "learning_rate": 1.1769756439460314e-05, + "loss": 8.7351, + "step": 121880 + }, + { + "epoch": 0.6087043371869459, + "grad_norm": 0.08963312953710556, + "learning_rate": 1.176825452451876e-05, + "loss": 8.7342, + "step": 121890 + }, + { + "epoch": 0.6087542760118855, + "grad_norm": 0.08687534928321838, + "learning_rate": 1.176675260957721e-05, + "loss": 8.7416, + "step": 121900 + }, + { + "epoch": 0.6088042148368249, + "grad_norm": 0.08947011828422546, + "learning_rate": 1.1765250694635661e-05, + "loss": 8.7506, + "step": 121910 + }, + { + "epoch": 0.6088541536617643, + "grad_norm": 0.09872882813215256, + "learning_rate": 1.1763748779694111e-05, + "loss": 8.7542, + "step": 121920 + }, + { + "epoch": 0.6089040924867037, + "grad_norm": 0.09389559924602509, + "learning_rate": 1.1762246864752561e-05, + "loss": 8.7213, + "step": 121930 + }, + { + "epoch": 0.6089540313116433, + "grad_norm": 0.09409285336732864, + "learning_rate": 1.1760744949811008e-05, + "loss": 8.7131, + "step": 121940 + }, + { + "epoch": 0.6090039701365827, + "grad_norm": 0.08693234622478485, + "learning_rate": 1.1759243034869458e-05, + "loss": 8.7416, + "step": 121950 + }, + { + "epoch": 0.6090539089615221, + "grad_norm": 0.09217488020658493, + "learning_rate": 1.1757741119927908e-05, + "loss": 8.7226, + "step": 121960 + }, + { + "epoch": 0.6091038477864615, + "grad_norm": 0.09177994728088379, + "learning_rate": 1.1756239204986359e-05, + "loss": 8.7294, + "step": 121970 + }, + { + "epoch": 0.6091537866114011, + "grad_norm": 0.09113775938749313, + "learning_rate": 1.1754737290044809e-05, + "loss": 8.7382, + "step": 121980 + }, + { + "epoch": 0.6092037254363405, + "grad_norm": 0.09432590007781982, + "learning_rate": 1.1753235375103256e-05, + "loss": 8.7495, + "step": 121990 + }, + { + "epoch": 0.6092536642612799, + "grad_norm": 0.09742003679275513, + "learning_rate": 1.1751733460161706e-05, + "loss": 8.7237, + "step": 122000 + }, + { + "epoch": 0.6093036030862193, + "grad_norm": 0.09325713664293289, + "learning_rate": 1.1750231545220156e-05, + "loss": 8.7452, + "step": 122010 + }, + { + "epoch": 0.6093535419111589, + "grad_norm": 0.09275127202272415, + "learning_rate": 1.1748729630278606e-05, + "loss": 8.7461, + "step": 122020 + }, + { + "epoch": 0.6094034807360983, + "grad_norm": 0.09381189197301865, + "learning_rate": 1.1747227715337056e-05, + "loss": 8.7323, + "step": 122030 + }, + { + "epoch": 0.6094534195610377, + "grad_norm": 0.09303104132413864, + "learning_rate": 1.1745725800395505e-05, + "loss": 8.7343, + "step": 122040 + }, + { + "epoch": 0.6095033583859771, + "grad_norm": 0.09670752286911011, + "learning_rate": 1.1744223885453953e-05, + "loss": 8.7394, + "step": 122050 + }, + { + "epoch": 0.6095532972109167, + "grad_norm": 0.09118695557117462, + "learning_rate": 1.1742721970512403e-05, + "loss": 8.7528, + "step": 122060 + }, + { + "epoch": 0.6096032360358561, + "grad_norm": 0.09087878465652466, + "learning_rate": 1.1741220055570854e-05, + "loss": 8.7145, + "step": 122070 + }, + { + "epoch": 0.6096531748607955, + "grad_norm": 0.0934610515832901, + "learning_rate": 1.1739718140629304e-05, + "loss": 8.7274, + "step": 122080 + }, + { + "epoch": 0.6097031136857349, + "grad_norm": 0.0913190245628357, + "learning_rate": 1.1738216225687752e-05, + "loss": 8.7205, + "step": 122090 + }, + { + "epoch": 0.6097530525106745, + "grad_norm": 0.09773539006710052, + "learning_rate": 1.17367143107462e-05, + "loss": 8.738, + "step": 122100 + }, + { + "epoch": 0.6098029913356139, + "grad_norm": 0.09631845355033875, + "learning_rate": 1.1735212395804651e-05, + "loss": 8.7265, + "step": 122110 + }, + { + "epoch": 0.6098529301605533, + "grad_norm": 0.08930249512195587, + "learning_rate": 1.1733710480863101e-05, + "loss": 8.7458, + "step": 122120 + }, + { + "epoch": 0.6099028689854927, + "grad_norm": 0.08900859951972961, + "learning_rate": 1.1732208565921551e-05, + "loss": 8.7685, + "step": 122130 + }, + { + "epoch": 0.6099528078104323, + "grad_norm": 0.0874401181936264, + "learning_rate": 1.173070665098e-05, + "loss": 8.739, + "step": 122140 + }, + { + "epoch": 0.6100027466353717, + "grad_norm": 0.10340723395347595, + "learning_rate": 1.1729204736038448e-05, + "loss": 8.7406, + "step": 122150 + }, + { + "epoch": 0.6100526854603111, + "grad_norm": 0.09558750689029694, + "learning_rate": 1.1727702821096898e-05, + "loss": 8.7341, + "step": 122160 + }, + { + "epoch": 0.6101026242852505, + "grad_norm": 0.09469115734100342, + "learning_rate": 1.1726200906155349e-05, + "loss": 8.7295, + "step": 122170 + }, + { + "epoch": 0.6101525631101901, + "grad_norm": 0.09226387739181519, + "learning_rate": 1.1724698991213799e-05, + "loss": 8.7433, + "step": 122180 + }, + { + "epoch": 0.6102025019351295, + "grad_norm": 0.09578849375247955, + "learning_rate": 1.1723197076272247e-05, + "loss": 8.7148, + "step": 122190 + }, + { + "epoch": 0.6102524407600689, + "grad_norm": 0.08702340722084045, + "learning_rate": 1.1721695161330697e-05, + "loss": 8.7389, + "step": 122200 + }, + { + "epoch": 0.6103023795850083, + "grad_norm": 0.0902688279747963, + "learning_rate": 1.1720193246389146e-05, + "loss": 8.7441, + "step": 122210 + }, + { + "epoch": 0.6103523184099479, + "grad_norm": 0.08830910176038742, + "learning_rate": 1.1718691331447596e-05, + "loss": 8.7218, + "step": 122220 + }, + { + "epoch": 0.6104022572348873, + "grad_norm": 0.09016358107328415, + "learning_rate": 1.1717189416506046e-05, + "loss": 8.7398, + "step": 122230 + }, + { + "epoch": 0.6104521960598267, + "grad_norm": 0.09306544810533524, + "learning_rate": 1.1715687501564495e-05, + "loss": 8.7381, + "step": 122240 + }, + { + "epoch": 0.6105021348847661, + "grad_norm": 0.09133855253458023, + "learning_rate": 1.1714185586622945e-05, + "loss": 8.7466, + "step": 122250 + }, + { + "epoch": 0.6105520737097057, + "grad_norm": 0.09112872183322906, + "learning_rate": 1.1712683671681393e-05, + "loss": 8.7304, + "step": 122260 + }, + { + "epoch": 0.6106020125346451, + "grad_norm": 0.08973613381385803, + "learning_rate": 1.1711181756739844e-05, + "loss": 8.7265, + "step": 122270 + }, + { + "epoch": 0.6106519513595845, + "grad_norm": 0.08785367757081985, + "learning_rate": 1.1709679841798294e-05, + "loss": 8.7227, + "step": 122280 + }, + { + "epoch": 0.6107018901845239, + "grad_norm": 0.09560448676347733, + "learning_rate": 1.1708177926856742e-05, + "loss": 8.7347, + "step": 122290 + }, + { + "epoch": 0.6107518290094635, + "grad_norm": 0.09626337140798569, + "learning_rate": 1.1706676011915192e-05, + "loss": 8.7301, + "step": 122300 + }, + { + "epoch": 0.6108017678344029, + "grad_norm": 0.08685650676488876, + "learning_rate": 1.1705174096973641e-05, + "loss": 8.7399, + "step": 122310 + }, + { + "epoch": 0.6108517066593423, + "grad_norm": 0.09066285192966461, + "learning_rate": 1.1703672182032091e-05, + "loss": 8.7531, + "step": 122320 + }, + { + "epoch": 0.6109016454842817, + "grad_norm": 0.09555506706237793, + "learning_rate": 1.1702170267090541e-05, + "loss": 8.7348, + "step": 122330 + }, + { + "epoch": 0.6109515843092213, + "grad_norm": 0.09356008470058441, + "learning_rate": 1.170066835214899e-05, + "loss": 8.7236, + "step": 122340 + }, + { + "epoch": 0.6110015231341607, + "grad_norm": 0.0910077691078186, + "learning_rate": 1.169916643720744e-05, + "loss": 8.7322, + "step": 122350 + }, + { + "epoch": 0.6110514619591001, + "grad_norm": 0.09046470373868942, + "learning_rate": 1.169766452226589e-05, + "loss": 8.7302, + "step": 122360 + }, + { + "epoch": 0.6111014007840395, + "grad_norm": 0.09235422313213348, + "learning_rate": 1.1696162607324339e-05, + "loss": 8.7346, + "step": 122370 + }, + { + "epoch": 0.611151339608979, + "grad_norm": 0.09250746667385101, + "learning_rate": 1.1694660692382789e-05, + "loss": 8.747, + "step": 122380 + }, + { + "epoch": 0.6112012784339185, + "grad_norm": 0.09652043879032135, + "learning_rate": 1.1693158777441237e-05, + "loss": 8.7317, + "step": 122390 + }, + { + "epoch": 0.6112512172588579, + "grad_norm": 0.0955178514122963, + "learning_rate": 1.1691656862499687e-05, + "loss": 8.7403, + "step": 122400 + }, + { + "epoch": 0.6113011560837973, + "grad_norm": 0.08899587392807007, + "learning_rate": 1.1690154947558138e-05, + "loss": 8.7483, + "step": 122410 + }, + { + "epoch": 0.6113510949087368, + "grad_norm": 0.08871007710695267, + "learning_rate": 1.1688653032616586e-05, + "loss": 8.7446, + "step": 122420 + }, + { + "epoch": 0.6114010337336763, + "grad_norm": 0.09327740967273712, + "learning_rate": 1.1687151117675036e-05, + "loss": 8.7326, + "step": 122430 + }, + { + "epoch": 0.6114509725586157, + "grad_norm": 0.09347925335168839, + "learning_rate": 1.1685649202733485e-05, + "loss": 8.7262, + "step": 122440 + }, + { + "epoch": 0.6115009113835551, + "grad_norm": 0.08919844776391983, + "learning_rate": 1.1684147287791935e-05, + "loss": 8.7354, + "step": 122450 + }, + { + "epoch": 0.6115508502084946, + "grad_norm": 0.09506843984127045, + "learning_rate": 1.1682645372850385e-05, + "loss": 8.7218, + "step": 122460 + }, + { + "epoch": 0.6116007890334341, + "grad_norm": 0.09201342612504959, + "learning_rate": 1.1681143457908834e-05, + "loss": 8.7338, + "step": 122470 + }, + { + "epoch": 0.6116507278583735, + "grad_norm": 0.09908469021320343, + "learning_rate": 1.1679641542967284e-05, + "loss": 8.7404, + "step": 122480 + }, + { + "epoch": 0.6117006666833129, + "grad_norm": 0.09065460413694382, + "learning_rate": 1.1678139628025732e-05, + "loss": 8.7288, + "step": 122490 + }, + { + "epoch": 0.6117506055082524, + "grad_norm": 0.09107660502195358, + "learning_rate": 1.1676637713084182e-05, + "loss": 8.7291, + "step": 122500 + }, + { + "epoch": 0.6118005443331919, + "grad_norm": 0.0926177054643631, + "learning_rate": 1.1675135798142633e-05, + "loss": 8.734, + "step": 122510 + }, + { + "epoch": 0.6118504831581313, + "grad_norm": 0.09085045754909515, + "learning_rate": 1.1673633883201083e-05, + "loss": 8.7309, + "step": 122520 + }, + { + "epoch": 0.6119004219830707, + "grad_norm": 0.09174468368291855, + "learning_rate": 1.1672131968259531e-05, + "loss": 8.7436, + "step": 122530 + }, + { + "epoch": 0.6119503608080102, + "grad_norm": 0.09456495195627213, + "learning_rate": 1.167063005331798e-05, + "loss": 8.7272, + "step": 122540 + }, + { + "epoch": 0.6120002996329497, + "grad_norm": 0.09345948696136475, + "learning_rate": 1.166912813837643e-05, + "loss": 8.7226, + "step": 122550 + }, + { + "epoch": 0.6120502384578891, + "grad_norm": 0.0974060595035553, + "learning_rate": 1.166762622343488e-05, + "loss": 8.7425, + "step": 122560 + }, + { + "epoch": 0.6121001772828285, + "grad_norm": 0.09621378034353256, + "learning_rate": 1.166612430849333e-05, + "loss": 8.7256, + "step": 122570 + }, + { + "epoch": 0.612150116107768, + "grad_norm": 0.09098070859909058, + "learning_rate": 1.1664622393551779e-05, + "loss": 8.7195, + "step": 122580 + }, + { + "epoch": 0.6122000549327075, + "grad_norm": 0.08891347795724869, + "learning_rate": 1.1663120478610227e-05, + "loss": 8.739, + "step": 122590 + }, + { + "epoch": 0.6122499937576469, + "grad_norm": 0.09389282763004303, + "learning_rate": 1.1661618563668677e-05, + "loss": 8.717, + "step": 122600 + }, + { + "epoch": 0.6122999325825863, + "grad_norm": 0.0891786739230156, + "learning_rate": 1.1660116648727128e-05, + "loss": 8.7401, + "step": 122610 + }, + { + "epoch": 0.6123498714075257, + "grad_norm": 0.09159950911998749, + "learning_rate": 1.1658614733785578e-05, + "loss": 8.7326, + "step": 122620 + }, + { + "epoch": 0.6123998102324653, + "grad_norm": 0.0896199643611908, + "learning_rate": 1.1657112818844026e-05, + "loss": 8.7444, + "step": 122630 + }, + { + "epoch": 0.6124497490574047, + "grad_norm": 0.0880565494298935, + "learning_rate": 1.1655610903902475e-05, + "loss": 8.7348, + "step": 122640 + }, + { + "epoch": 0.6124996878823441, + "grad_norm": 0.08740682154893875, + "learning_rate": 1.1654108988960925e-05, + "loss": 8.7177, + "step": 122650 + }, + { + "epoch": 0.6125496267072835, + "grad_norm": 0.09321615844964981, + "learning_rate": 1.1652607074019375e-05, + "loss": 8.7422, + "step": 122660 + }, + { + "epoch": 0.6125995655322231, + "grad_norm": 0.09611321985721588, + "learning_rate": 1.1651105159077825e-05, + "loss": 8.7159, + "step": 122670 + }, + { + "epoch": 0.6126495043571625, + "grad_norm": 0.08695535361766815, + "learning_rate": 1.1649603244136275e-05, + "loss": 8.7261, + "step": 122680 + }, + { + "epoch": 0.6126994431821019, + "grad_norm": 0.09775250405073166, + "learning_rate": 1.1648101329194722e-05, + "loss": 8.7415, + "step": 122690 + }, + { + "epoch": 0.6127493820070413, + "grad_norm": 0.094340018928051, + "learning_rate": 1.1646599414253172e-05, + "loss": 8.7466, + "step": 122700 + }, + { + "epoch": 0.6127993208319809, + "grad_norm": 0.09047188609838486, + "learning_rate": 1.1645097499311623e-05, + "loss": 8.7268, + "step": 122710 + }, + { + "epoch": 0.6128492596569203, + "grad_norm": 0.0914597436785698, + "learning_rate": 1.1643595584370073e-05, + "loss": 8.7365, + "step": 122720 + }, + { + "epoch": 0.6128991984818597, + "grad_norm": 0.08962114155292511, + "learning_rate": 1.1642093669428523e-05, + "loss": 8.731, + "step": 122730 + }, + { + "epoch": 0.6129491373067991, + "grad_norm": 0.0948721170425415, + "learning_rate": 1.164059175448697e-05, + "loss": 8.7166, + "step": 122740 + }, + { + "epoch": 0.6129990761317387, + "grad_norm": 0.08996075391769409, + "learning_rate": 1.163908983954542e-05, + "loss": 8.7231, + "step": 122750 + }, + { + "epoch": 0.6130490149566781, + "grad_norm": 0.09304836392402649, + "learning_rate": 1.163758792460387e-05, + "loss": 8.7423, + "step": 122760 + }, + { + "epoch": 0.6130989537816175, + "grad_norm": 0.08591028302907944, + "learning_rate": 1.163608600966232e-05, + "loss": 8.723, + "step": 122770 + }, + { + "epoch": 0.6131488926065569, + "grad_norm": 0.08985080569982529, + "learning_rate": 1.163458409472077e-05, + "loss": 8.7389, + "step": 122780 + }, + { + "epoch": 0.6131988314314964, + "grad_norm": 0.09522908180952072, + "learning_rate": 1.1633082179779217e-05, + "loss": 8.7362, + "step": 122790 + }, + { + "epoch": 0.6132487702564359, + "grad_norm": 0.08915939182043076, + "learning_rate": 1.1631580264837667e-05, + "loss": 8.7425, + "step": 122800 + }, + { + "epoch": 0.6132987090813753, + "grad_norm": 0.0966191291809082, + "learning_rate": 1.1630078349896118e-05, + "loss": 8.7361, + "step": 122810 + }, + { + "epoch": 0.6133486479063147, + "grad_norm": 0.08906877040863037, + "learning_rate": 1.1628576434954568e-05, + "loss": 8.726, + "step": 122820 + }, + { + "epoch": 0.6133985867312542, + "grad_norm": 0.0922885611653328, + "learning_rate": 1.1627074520013018e-05, + "loss": 8.7094, + "step": 122830 + }, + { + "epoch": 0.6134485255561937, + "grad_norm": 0.0915333479642868, + "learning_rate": 1.1625572605071466e-05, + "loss": 8.7258, + "step": 122840 + }, + { + "epoch": 0.6134984643811331, + "grad_norm": 0.09396089613437653, + "learning_rate": 1.1624070690129915e-05, + "loss": 8.7101, + "step": 122850 + }, + { + "epoch": 0.6135484032060725, + "grad_norm": 0.09335985779762268, + "learning_rate": 1.1622568775188365e-05, + "loss": 8.735, + "step": 122860 + }, + { + "epoch": 0.613598342031012, + "grad_norm": 0.09260796010494232, + "learning_rate": 1.1621066860246815e-05, + "loss": 8.7354, + "step": 122870 + }, + { + "epoch": 0.6136482808559515, + "grad_norm": 0.09389949589967728, + "learning_rate": 1.1619564945305265e-05, + "loss": 8.7136, + "step": 122880 + }, + { + "epoch": 0.6136982196808909, + "grad_norm": 0.0865430235862732, + "learning_rate": 1.1618063030363714e-05, + "loss": 8.743, + "step": 122890 + }, + { + "epoch": 0.6137481585058303, + "grad_norm": 0.08791029453277588, + "learning_rate": 1.1616561115422162e-05, + "loss": 8.7337, + "step": 122900 + }, + { + "epoch": 0.6137980973307698, + "grad_norm": 0.0919414758682251, + "learning_rate": 1.1615059200480613e-05, + "loss": 8.7336, + "step": 122910 + }, + { + "epoch": 0.6138480361557093, + "grad_norm": 0.08799885213375092, + "learning_rate": 1.1613557285539063e-05, + "loss": 8.7203, + "step": 122920 + }, + { + "epoch": 0.6138979749806487, + "grad_norm": 0.08847959339618683, + "learning_rate": 1.1612055370597513e-05, + "loss": 8.7178, + "step": 122930 + }, + { + "epoch": 0.6139479138055881, + "grad_norm": 0.10149681568145752, + "learning_rate": 1.1610553455655961e-05, + "loss": 8.7265, + "step": 122940 + }, + { + "epoch": 0.6139978526305276, + "grad_norm": 0.09459761530160904, + "learning_rate": 1.160905154071441e-05, + "loss": 8.7224, + "step": 122950 + }, + { + "epoch": 0.6140477914554671, + "grad_norm": 0.09227167069911957, + "learning_rate": 1.160754962577286e-05, + "loss": 8.7274, + "step": 122960 + }, + { + "epoch": 0.6140977302804065, + "grad_norm": 0.08995702862739563, + "learning_rate": 1.160604771083131e-05, + "loss": 8.718, + "step": 122970 + }, + { + "epoch": 0.6141476691053459, + "grad_norm": 0.08922126144170761, + "learning_rate": 1.160454579588976e-05, + "loss": 8.7368, + "step": 122980 + }, + { + "epoch": 0.6141976079302854, + "grad_norm": 0.08837445080280304, + "learning_rate": 1.1603043880948209e-05, + "loss": 8.7421, + "step": 122990 + }, + { + "epoch": 0.6142475467552249, + "grad_norm": 0.0875106155872345, + "learning_rate": 1.1601541966006657e-05, + "loss": 8.7198, + "step": 123000 + }, + { + "epoch": 0.6142974855801643, + "grad_norm": 0.0882338210940361, + "learning_rate": 1.1600040051065108e-05, + "loss": 8.7162, + "step": 123010 + }, + { + "epoch": 0.6143474244051037, + "grad_norm": 0.08920801430940628, + "learning_rate": 1.1598538136123558e-05, + "loss": 8.717, + "step": 123020 + }, + { + "epoch": 0.6143973632300432, + "grad_norm": 0.08952096849679947, + "learning_rate": 1.1597036221182008e-05, + "loss": 8.7223, + "step": 123030 + }, + { + "epoch": 0.6144473020549827, + "grad_norm": 0.09667718410491943, + "learning_rate": 1.1595534306240456e-05, + "loss": 8.7406, + "step": 123040 + }, + { + "epoch": 0.6144972408799221, + "grad_norm": 0.08800283074378967, + "learning_rate": 1.1594032391298907e-05, + "loss": 8.741, + "step": 123050 + }, + { + "epoch": 0.6145471797048615, + "grad_norm": 0.09324777871370316, + "learning_rate": 1.1592530476357355e-05, + "loss": 8.7304, + "step": 123060 + }, + { + "epoch": 0.614597118529801, + "grad_norm": 0.09224963933229446, + "learning_rate": 1.1591028561415805e-05, + "loss": 8.7463, + "step": 123070 + }, + { + "epoch": 0.6146470573547405, + "grad_norm": 0.09018932282924652, + "learning_rate": 1.1589526646474255e-05, + "loss": 8.7387, + "step": 123080 + }, + { + "epoch": 0.6146969961796799, + "grad_norm": 0.09440066665410995, + "learning_rate": 1.1588024731532704e-05, + "loss": 8.7013, + "step": 123090 + }, + { + "epoch": 0.6147469350046193, + "grad_norm": 0.09074559062719345, + "learning_rate": 1.1586522816591154e-05, + "loss": 8.7298, + "step": 123100 + }, + { + "epoch": 0.6147968738295588, + "grad_norm": 0.09388012439012527, + "learning_rate": 1.1585020901649603e-05, + "loss": 8.7206, + "step": 123110 + }, + { + "epoch": 0.6148468126544983, + "grad_norm": 0.09513651579618454, + "learning_rate": 1.1583518986708053e-05, + "loss": 8.7438, + "step": 123120 + }, + { + "epoch": 0.6148967514794377, + "grad_norm": 0.08741681277751923, + "learning_rate": 1.1582017071766503e-05, + "loss": 8.7586, + "step": 123130 + }, + { + "epoch": 0.6149466903043771, + "grad_norm": 0.0910298153758049, + "learning_rate": 1.1580515156824951e-05, + "loss": 8.7249, + "step": 123140 + }, + { + "epoch": 0.6149966291293166, + "grad_norm": 0.0909765362739563, + "learning_rate": 1.1579013241883402e-05, + "loss": 8.7324, + "step": 123150 + }, + { + "epoch": 0.615046567954256, + "grad_norm": 0.09115078300237656, + "learning_rate": 1.157751132694185e-05, + "loss": 8.7132, + "step": 123160 + }, + { + "epoch": 0.6150965067791955, + "grad_norm": 0.09587119519710541, + "learning_rate": 1.15760094120003e-05, + "loss": 8.7349, + "step": 123170 + }, + { + "epoch": 0.6151464456041349, + "grad_norm": 0.09301666915416718, + "learning_rate": 1.157450749705875e-05, + "loss": 8.7374, + "step": 123180 + }, + { + "epoch": 0.6151963844290744, + "grad_norm": 0.0892057865858078, + "learning_rate": 1.15730055821172e-05, + "loss": 8.7265, + "step": 123190 + }, + { + "epoch": 0.6152463232540138, + "grad_norm": 0.09476073831319809, + "learning_rate": 1.157150366717565e-05, + "loss": 8.7264, + "step": 123200 + }, + { + "epoch": 0.6152962620789533, + "grad_norm": 0.08770003169775009, + "learning_rate": 1.15700017522341e-05, + "loss": 8.734, + "step": 123210 + }, + { + "epoch": 0.6153462009038927, + "grad_norm": 0.09061672538518906, + "learning_rate": 1.1568499837292548e-05, + "loss": 8.737, + "step": 123220 + }, + { + "epoch": 0.6153961397288322, + "grad_norm": 0.09397213160991669, + "learning_rate": 1.1566997922350998e-05, + "loss": 8.7247, + "step": 123230 + }, + { + "epoch": 0.6154460785537716, + "grad_norm": 0.09056683629751205, + "learning_rate": 1.1565496007409448e-05, + "loss": 8.7302, + "step": 123240 + }, + { + "epoch": 0.6154960173787111, + "grad_norm": 0.09550472348928452, + "learning_rate": 1.1563994092467897e-05, + "loss": 8.7345, + "step": 123250 + }, + { + "epoch": 0.6155459562036505, + "grad_norm": 0.09115252643823624, + "learning_rate": 1.1562492177526347e-05, + "loss": 8.7392, + "step": 123260 + }, + { + "epoch": 0.61559589502859, + "grad_norm": 0.09301244467496872, + "learning_rate": 1.1560990262584795e-05, + "loss": 8.7234, + "step": 123270 + }, + { + "epoch": 0.6156458338535294, + "grad_norm": 0.0930706262588501, + "learning_rate": 1.1559488347643246e-05, + "loss": 8.7305, + "step": 123280 + }, + { + "epoch": 0.6156957726784689, + "grad_norm": 0.09021922200918198, + "learning_rate": 1.1557986432701696e-05, + "loss": 8.7383, + "step": 123290 + }, + { + "epoch": 0.6157457115034083, + "grad_norm": 0.09439912438392639, + "learning_rate": 1.1556484517760144e-05, + "loss": 8.7176, + "step": 123300 + }, + { + "epoch": 0.6157956503283478, + "grad_norm": 0.09691648930311203, + "learning_rate": 1.1554982602818594e-05, + "loss": 8.7301, + "step": 123310 + }, + { + "epoch": 0.6158455891532872, + "grad_norm": 0.09814375638961792, + "learning_rate": 1.1553480687877043e-05, + "loss": 8.7233, + "step": 123320 + }, + { + "epoch": 0.6158955279782267, + "grad_norm": 0.09281103312969208, + "learning_rate": 1.1551978772935493e-05, + "loss": 8.741, + "step": 123330 + }, + { + "epoch": 0.6159454668031661, + "grad_norm": 0.09556780755519867, + "learning_rate": 1.1550476857993943e-05, + "loss": 8.7346, + "step": 123340 + }, + { + "epoch": 0.6159954056281056, + "grad_norm": 0.09661609679460526, + "learning_rate": 1.1548974943052392e-05, + "loss": 8.7364, + "step": 123350 + }, + { + "epoch": 0.616045344453045, + "grad_norm": 0.08993745595216751, + "learning_rate": 1.1547473028110842e-05, + "loss": 8.7326, + "step": 123360 + }, + { + "epoch": 0.6160952832779845, + "grad_norm": 0.08858168870210648, + "learning_rate": 1.1545971113169292e-05, + "loss": 8.7335, + "step": 123370 + }, + { + "epoch": 0.6161452221029239, + "grad_norm": 0.0938563272356987, + "learning_rate": 1.154446919822774e-05, + "loss": 8.731, + "step": 123380 + }, + { + "epoch": 0.6161951609278634, + "grad_norm": 0.09345029294490814, + "learning_rate": 1.154296728328619e-05, + "loss": 8.7307, + "step": 123390 + }, + { + "epoch": 0.6162450997528028, + "grad_norm": 0.09097786992788315, + "learning_rate": 1.154146536834464e-05, + "loss": 8.7149, + "step": 123400 + }, + { + "epoch": 0.6162950385777423, + "grad_norm": 0.09178051352500916, + "learning_rate": 1.153996345340309e-05, + "loss": 8.7436, + "step": 123410 + }, + { + "epoch": 0.6163449774026817, + "grad_norm": 0.08830767124891281, + "learning_rate": 1.153846153846154e-05, + "loss": 8.7355, + "step": 123420 + }, + { + "epoch": 0.6163949162276212, + "grad_norm": 0.09343507140874863, + "learning_rate": 1.1536959623519988e-05, + "loss": 8.7295, + "step": 123430 + }, + { + "epoch": 0.6164448550525606, + "grad_norm": 0.09138908237218857, + "learning_rate": 1.1535457708578438e-05, + "loss": 8.7319, + "step": 123440 + }, + { + "epoch": 0.6164947938775001, + "grad_norm": 0.09196456521749496, + "learning_rate": 1.1533955793636887e-05, + "loss": 8.7181, + "step": 123450 + }, + { + "epoch": 0.6165447327024395, + "grad_norm": 0.09027263522148132, + "learning_rate": 1.1532453878695337e-05, + "loss": 8.7359, + "step": 123460 + }, + { + "epoch": 0.616594671527379, + "grad_norm": 0.09021011739969254, + "learning_rate": 1.1530951963753787e-05, + "loss": 8.7455, + "step": 123470 + }, + { + "epoch": 0.6166446103523184, + "grad_norm": 0.08662664890289307, + "learning_rate": 1.1529450048812236e-05, + "loss": 8.7415, + "step": 123480 + }, + { + "epoch": 0.6166945491772579, + "grad_norm": 0.09319479018449783, + "learning_rate": 1.1527948133870686e-05, + "loss": 8.7173, + "step": 123490 + }, + { + "epoch": 0.6167444880021973, + "grad_norm": 0.08931265026330948, + "learning_rate": 1.1526446218929134e-05, + "loss": 8.7091, + "step": 123500 + }, + { + "epoch": 0.6167944268271368, + "grad_norm": 0.09478646516799927, + "learning_rate": 1.1524944303987584e-05, + "loss": 8.7149, + "step": 123510 + }, + { + "epoch": 0.6168443656520762, + "grad_norm": 0.08900546282529831, + "learning_rate": 1.1523442389046035e-05, + "loss": 8.7382, + "step": 123520 + }, + { + "epoch": 0.6168943044770157, + "grad_norm": 0.09752871096134186, + "learning_rate": 1.1521940474104485e-05, + "loss": 8.7297, + "step": 123530 + }, + { + "epoch": 0.6169442433019551, + "grad_norm": 0.09565001726150513, + "learning_rate": 1.1520438559162933e-05, + "loss": 8.7219, + "step": 123540 + }, + { + "epoch": 0.6169941821268946, + "grad_norm": 0.09056803584098816, + "learning_rate": 1.1518936644221382e-05, + "loss": 8.7327, + "step": 123550 + }, + { + "epoch": 0.617044120951834, + "grad_norm": 0.09336548298597336, + "learning_rate": 1.1517434729279832e-05, + "loss": 8.7223, + "step": 123560 + }, + { + "epoch": 0.6170940597767735, + "grad_norm": 0.09907118231058121, + "learning_rate": 1.1515932814338282e-05, + "loss": 8.7273, + "step": 123570 + }, + { + "epoch": 0.6171439986017129, + "grad_norm": 0.08720168471336365, + "learning_rate": 1.1514430899396732e-05, + "loss": 8.7245, + "step": 123580 + }, + { + "epoch": 0.6171939374266524, + "grad_norm": 0.09530545026063919, + "learning_rate": 1.151292898445518e-05, + "loss": 8.7177, + "step": 123590 + }, + { + "epoch": 0.6172438762515918, + "grad_norm": 0.08918207138776779, + "learning_rate": 1.151142706951363e-05, + "loss": 8.7255, + "step": 123600 + }, + { + "epoch": 0.6172938150765312, + "grad_norm": 0.09224686026573181, + "learning_rate": 1.150992515457208e-05, + "loss": 8.7282, + "step": 123610 + }, + { + "epoch": 0.6173437539014707, + "grad_norm": 0.08980294317007065, + "learning_rate": 1.150842323963053e-05, + "loss": 8.7338, + "step": 123620 + }, + { + "epoch": 0.6173936927264101, + "grad_norm": 0.08682075142860413, + "learning_rate": 1.150692132468898e-05, + "loss": 8.7079, + "step": 123630 + }, + { + "epoch": 0.6174436315513496, + "grad_norm": 0.0889846533536911, + "learning_rate": 1.1505419409747428e-05, + "loss": 8.7336, + "step": 123640 + }, + { + "epoch": 0.617493570376289, + "grad_norm": 0.09437645971775055, + "learning_rate": 1.1503917494805877e-05, + "loss": 8.73, + "step": 123650 + }, + { + "epoch": 0.6175435092012285, + "grad_norm": 0.08967822045087814, + "learning_rate": 1.1502415579864327e-05, + "loss": 8.7262, + "step": 123660 + }, + { + "epoch": 0.6175934480261679, + "grad_norm": 0.09480708837509155, + "learning_rate": 1.1500913664922777e-05, + "loss": 8.7245, + "step": 123670 + }, + { + "epoch": 0.6176433868511074, + "grad_norm": 0.095451220870018, + "learning_rate": 1.1499411749981227e-05, + "loss": 8.7531, + "step": 123680 + }, + { + "epoch": 0.6176933256760468, + "grad_norm": 0.08989094942808151, + "learning_rate": 1.1497909835039677e-05, + "loss": 8.7383, + "step": 123690 + }, + { + "epoch": 0.6177432645009863, + "grad_norm": 0.08974773436784744, + "learning_rate": 1.1496407920098124e-05, + "loss": 8.7298, + "step": 123700 + }, + { + "epoch": 0.6177932033259257, + "grad_norm": 0.08904717862606049, + "learning_rate": 1.1494906005156574e-05, + "loss": 8.7198, + "step": 123710 + }, + { + "epoch": 0.6178431421508652, + "grad_norm": 0.0950782522559166, + "learning_rate": 1.1493404090215025e-05, + "loss": 8.7192, + "step": 123720 + }, + { + "epoch": 0.6178930809758046, + "grad_norm": 0.08988340198993683, + "learning_rate": 1.1491902175273475e-05, + "loss": 8.7233, + "step": 123730 + }, + { + "epoch": 0.6179430198007441, + "grad_norm": 0.08889536559581757, + "learning_rate": 1.1490400260331925e-05, + "loss": 8.7234, + "step": 123740 + }, + { + "epoch": 0.6179929586256835, + "grad_norm": 0.08637700229883194, + "learning_rate": 1.1488898345390372e-05, + "loss": 8.7312, + "step": 123750 + }, + { + "epoch": 0.618042897450623, + "grad_norm": 0.09260913729667664, + "learning_rate": 1.1487396430448822e-05, + "loss": 8.7229, + "step": 123760 + }, + { + "epoch": 0.6180928362755624, + "grad_norm": 0.10173122584819794, + "learning_rate": 1.1485894515507272e-05, + "loss": 8.7177, + "step": 123770 + }, + { + "epoch": 0.6181427751005019, + "grad_norm": 0.09701031446456909, + "learning_rate": 1.1484392600565722e-05, + "loss": 8.7225, + "step": 123780 + }, + { + "epoch": 0.6181927139254413, + "grad_norm": 0.09118404984474182, + "learning_rate": 1.1482890685624172e-05, + "loss": 8.7159, + "step": 123790 + }, + { + "epoch": 0.6182426527503808, + "grad_norm": 0.09571003168821335, + "learning_rate": 1.148138877068262e-05, + "loss": 8.7244, + "step": 123800 + }, + { + "epoch": 0.6182925915753202, + "grad_norm": 0.09160356968641281, + "learning_rate": 1.147988685574107e-05, + "loss": 8.7271, + "step": 123810 + }, + { + "epoch": 0.6183425304002597, + "grad_norm": 0.10198367387056351, + "learning_rate": 1.147838494079952e-05, + "loss": 8.728, + "step": 123820 + }, + { + "epoch": 0.6183924692251991, + "grad_norm": 0.090346559882164, + "learning_rate": 1.147688302585797e-05, + "loss": 8.7252, + "step": 123830 + }, + { + "epoch": 0.6184424080501386, + "grad_norm": 0.08791203796863556, + "learning_rate": 1.147538111091642e-05, + "loss": 8.7254, + "step": 123840 + }, + { + "epoch": 0.618492346875078, + "grad_norm": 0.09534545242786407, + "learning_rate": 1.1473879195974868e-05, + "loss": 8.7175, + "step": 123850 + }, + { + "epoch": 0.6185422857000175, + "grad_norm": 0.09206069260835648, + "learning_rate": 1.1472377281033317e-05, + "loss": 8.732, + "step": 123860 + }, + { + "epoch": 0.6185922245249569, + "grad_norm": 0.09079962968826294, + "learning_rate": 1.1470875366091767e-05, + "loss": 8.7245, + "step": 123870 + }, + { + "epoch": 0.6186421633498964, + "grad_norm": 0.09148161858320236, + "learning_rate": 1.1469373451150217e-05, + "loss": 8.7172, + "step": 123880 + }, + { + "epoch": 0.6186921021748358, + "grad_norm": 0.0891844853758812, + "learning_rate": 1.1467871536208667e-05, + "loss": 8.721, + "step": 123890 + }, + { + "epoch": 0.6187420409997753, + "grad_norm": 0.08975985646247864, + "learning_rate": 1.1466369621267116e-05, + "loss": 8.7102, + "step": 123900 + }, + { + "epoch": 0.6187919798247147, + "grad_norm": 0.10155780613422394, + "learning_rate": 1.1464867706325564e-05, + "loss": 8.7252, + "step": 123910 + }, + { + "epoch": 0.6188419186496542, + "grad_norm": 0.09575510025024414, + "learning_rate": 1.1463365791384015e-05, + "loss": 8.716, + "step": 123920 + }, + { + "epoch": 0.6188918574745936, + "grad_norm": 0.0914383977651596, + "learning_rate": 1.1461863876442465e-05, + "loss": 8.7268, + "step": 123930 + }, + { + "epoch": 0.618941796299533, + "grad_norm": 0.09883058816194534, + "learning_rate": 1.1460361961500915e-05, + "loss": 8.7068, + "step": 123940 + }, + { + "epoch": 0.6189917351244725, + "grad_norm": 0.09328742325305939, + "learning_rate": 1.1458860046559363e-05, + "loss": 8.7305, + "step": 123950 + }, + { + "epoch": 0.619041673949412, + "grad_norm": 0.09385626018047333, + "learning_rate": 1.1457358131617812e-05, + "loss": 8.7238, + "step": 123960 + }, + { + "epoch": 0.6190916127743514, + "grad_norm": 0.0958520770072937, + "learning_rate": 1.1455856216676262e-05, + "loss": 8.7139, + "step": 123970 + }, + { + "epoch": 0.6191415515992909, + "grad_norm": 0.09502740204334259, + "learning_rate": 1.1454354301734712e-05, + "loss": 8.7134, + "step": 123980 + }, + { + "epoch": 0.6191914904242303, + "grad_norm": 0.09292402863502502, + "learning_rate": 1.1452852386793162e-05, + "loss": 8.7092, + "step": 123990 + }, + { + "epoch": 0.6192414292491698, + "grad_norm": 0.09679784625768661, + "learning_rate": 1.1451350471851611e-05, + "loss": 8.7259, + "step": 124000 + }, + { + "epoch": 0.6192913680741092, + "grad_norm": 0.09584274888038635, + "learning_rate": 1.1449848556910061e-05, + "loss": 8.715, + "step": 124010 + }, + { + "epoch": 0.6193413068990486, + "grad_norm": 0.08926044404506683, + "learning_rate": 1.144834664196851e-05, + "loss": 8.7075, + "step": 124020 + }, + { + "epoch": 0.6193912457239881, + "grad_norm": 0.09470371901988983, + "learning_rate": 1.144684472702696e-05, + "loss": 8.7184, + "step": 124030 + }, + { + "epoch": 0.6194411845489276, + "grad_norm": 0.09544707089662552, + "learning_rate": 1.144534281208541e-05, + "loss": 8.7218, + "step": 124040 + }, + { + "epoch": 0.619491123373867, + "grad_norm": 0.09077138453722, + "learning_rate": 1.1443840897143858e-05, + "loss": 8.7214, + "step": 124050 + }, + { + "epoch": 0.6195410621988064, + "grad_norm": 0.08742060512304306, + "learning_rate": 1.1442338982202309e-05, + "loss": 8.7439, + "step": 124060 + }, + { + "epoch": 0.6195910010237459, + "grad_norm": 0.09070980548858643, + "learning_rate": 1.1440837067260757e-05, + "loss": 8.7162, + "step": 124070 + }, + { + "epoch": 0.6196409398486854, + "grad_norm": 0.09399957954883575, + "learning_rate": 1.1439335152319207e-05, + "loss": 8.7223, + "step": 124080 + }, + { + "epoch": 0.6196908786736248, + "grad_norm": 0.08889596909284592, + "learning_rate": 1.1437833237377657e-05, + "loss": 8.7342, + "step": 124090 + }, + { + "epoch": 0.6197408174985642, + "grad_norm": 0.09190160781145096, + "learning_rate": 1.1436331322436106e-05, + "loss": 8.7455, + "step": 124100 + }, + { + "epoch": 0.6197907563235037, + "grad_norm": 0.09013140201568604, + "learning_rate": 1.1434829407494556e-05, + "loss": 8.7439, + "step": 124110 + }, + { + "epoch": 0.6198406951484432, + "grad_norm": 0.0921657457947731, + "learning_rate": 1.1433327492553005e-05, + "loss": 8.7196, + "step": 124120 + }, + { + "epoch": 0.6198906339733826, + "grad_norm": 0.09478151053190231, + "learning_rate": 1.1431825577611455e-05, + "loss": 8.7308, + "step": 124130 + }, + { + "epoch": 0.619940572798322, + "grad_norm": 0.09333883225917816, + "learning_rate": 1.1430323662669905e-05, + "loss": 8.7126, + "step": 124140 + }, + { + "epoch": 0.6199905116232615, + "grad_norm": 0.0867578536272049, + "learning_rate": 1.1428821747728353e-05, + "loss": 8.7471, + "step": 124150 + }, + { + "epoch": 0.620040450448201, + "grad_norm": 0.08902301639318466, + "learning_rate": 1.1427319832786804e-05, + "loss": 8.7455, + "step": 124160 + }, + { + "epoch": 0.6200903892731404, + "grad_norm": 0.09362560510635376, + "learning_rate": 1.1425817917845254e-05, + "loss": 8.7118, + "step": 124170 + }, + { + "epoch": 0.6201403280980798, + "grad_norm": 0.09488838911056519, + "learning_rate": 1.1424316002903702e-05, + "loss": 8.7172, + "step": 124180 + }, + { + "epoch": 0.6201902669230193, + "grad_norm": 0.09388437122106552, + "learning_rate": 1.1422814087962152e-05, + "loss": 8.7249, + "step": 124190 + }, + { + "epoch": 0.6202402057479588, + "grad_norm": 0.09056483954191208, + "learning_rate": 1.1421312173020601e-05, + "loss": 8.7256, + "step": 124200 + }, + { + "epoch": 0.6202901445728982, + "grad_norm": 0.09356372058391571, + "learning_rate": 1.1419810258079051e-05, + "loss": 8.7165, + "step": 124210 + }, + { + "epoch": 0.6203400833978376, + "grad_norm": 0.09414329379796982, + "learning_rate": 1.1418308343137501e-05, + "loss": 8.7177, + "step": 124220 + }, + { + "epoch": 0.6203900222227771, + "grad_norm": 0.08978509902954102, + "learning_rate": 1.141680642819595e-05, + "loss": 8.7225, + "step": 124230 + }, + { + "epoch": 0.6204399610477166, + "grad_norm": 0.09141732007265091, + "learning_rate": 1.14153045132544e-05, + "loss": 8.7325, + "step": 124240 + }, + { + "epoch": 0.620489899872656, + "grad_norm": 0.09441263973712921, + "learning_rate": 1.1413802598312848e-05, + "loss": 8.719, + "step": 124250 + }, + { + "epoch": 0.6205398386975954, + "grad_norm": 0.09462641179561615, + "learning_rate": 1.1412300683371299e-05, + "loss": 8.716, + "step": 124260 + }, + { + "epoch": 0.6205897775225349, + "grad_norm": 0.09261373430490494, + "learning_rate": 1.1410798768429749e-05, + "loss": 8.722, + "step": 124270 + }, + { + "epoch": 0.6206397163474744, + "grad_norm": 0.09427915513515472, + "learning_rate": 1.1409296853488197e-05, + "loss": 8.7251, + "step": 124280 + }, + { + "epoch": 0.6206896551724138, + "grad_norm": 0.09489326179027557, + "learning_rate": 1.1407794938546647e-05, + "loss": 8.7197, + "step": 124290 + }, + { + "epoch": 0.6207395939973532, + "grad_norm": 0.09282520413398743, + "learning_rate": 1.1406293023605096e-05, + "loss": 8.7217, + "step": 124300 + }, + { + "epoch": 0.6207895328222927, + "grad_norm": 0.09794396162033081, + "learning_rate": 1.1404791108663546e-05, + "loss": 8.7228, + "step": 124310 + }, + { + "epoch": 0.6208394716472322, + "grad_norm": 0.09146995097398758, + "learning_rate": 1.1403289193721996e-05, + "loss": 8.7181, + "step": 124320 + }, + { + "epoch": 0.6208894104721716, + "grad_norm": 0.0925363302230835, + "learning_rate": 1.1401787278780446e-05, + "loss": 8.7161, + "step": 124330 + }, + { + "epoch": 0.620939349297111, + "grad_norm": 0.09043951332569122, + "learning_rate": 1.1400285363838895e-05, + "loss": 8.7191, + "step": 124340 + }, + { + "epoch": 0.6209892881220505, + "grad_norm": 0.09174945205450058, + "learning_rate": 1.1398783448897343e-05, + "loss": 8.7329, + "step": 124350 + }, + { + "epoch": 0.62103922694699, + "grad_norm": 0.09292636811733246, + "learning_rate": 1.1397281533955794e-05, + "loss": 8.7225, + "step": 124360 + }, + { + "epoch": 0.6210891657719294, + "grad_norm": 0.09614364802837372, + "learning_rate": 1.1395779619014244e-05, + "loss": 8.7178, + "step": 124370 + }, + { + "epoch": 0.6211391045968688, + "grad_norm": 0.09423890709877014, + "learning_rate": 1.1394277704072694e-05, + "loss": 8.7239, + "step": 124380 + }, + { + "epoch": 0.6211890434218083, + "grad_norm": 0.08773567527532578, + "learning_rate": 1.1392775789131142e-05, + "loss": 8.6973, + "step": 124390 + }, + { + "epoch": 0.6212389822467478, + "grad_norm": 0.09796379506587982, + "learning_rate": 1.1391273874189591e-05, + "loss": 8.7243, + "step": 124400 + }, + { + "epoch": 0.6212889210716872, + "grad_norm": 0.08884675800800323, + "learning_rate": 1.1389771959248041e-05, + "loss": 8.7269, + "step": 124410 + }, + { + "epoch": 0.6213388598966266, + "grad_norm": 0.09190382063388824, + "learning_rate": 1.1388270044306491e-05, + "loss": 8.7166, + "step": 124420 + }, + { + "epoch": 0.621388798721566, + "grad_norm": 0.09155641496181488, + "learning_rate": 1.1386768129364941e-05, + "loss": 8.712, + "step": 124430 + }, + { + "epoch": 0.6214387375465056, + "grad_norm": 0.09598276764154434, + "learning_rate": 1.138526621442339e-05, + "loss": 8.7123, + "step": 124440 + }, + { + "epoch": 0.621488676371445, + "grad_norm": 0.0909108817577362, + "learning_rate": 1.1383764299481838e-05, + "loss": 8.7224, + "step": 124450 + }, + { + "epoch": 0.6215386151963844, + "grad_norm": 0.0917460024356842, + "learning_rate": 1.1382262384540289e-05, + "loss": 8.72, + "step": 124460 + }, + { + "epoch": 0.6215885540213238, + "grad_norm": 0.08814991265535355, + "learning_rate": 1.1380760469598739e-05, + "loss": 8.7155, + "step": 124470 + }, + { + "epoch": 0.6216384928462634, + "grad_norm": 0.09747850894927979, + "learning_rate": 1.1379258554657189e-05, + "loss": 8.7379, + "step": 124480 + }, + { + "epoch": 0.6216884316712028, + "grad_norm": 0.09266173839569092, + "learning_rate": 1.137775663971564e-05, + "loss": 8.7156, + "step": 124490 + }, + { + "epoch": 0.6217383704961422, + "grad_norm": 0.09457944333553314, + "learning_rate": 1.1376254724774086e-05, + "loss": 8.7216, + "step": 124500 + }, + { + "epoch": 0.6217883093210816, + "grad_norm": 0.08899705111980438, + "learning_rate": 1.1374752809832536e-05, + "loss": 8.7244, + "step": 124510 + }, + { + "epoch": 0.6218382481460212, + "grad_norm": 0.0950658842921257, + "learning_rate": 1.1373250894890986e-05, + "loss": 8.7242, + "step": 124520 + }, + { + "epoch": 0.6218881869709606, + "grad_norm": 0.0944470465183258, + "learning_rate": 1.1371748979949436e-05, + "loss": 8.724, + "step": 124530 + }, + { + "epoch": 0.6219381257959, + "grad_norm": 0.08662784099578857, + "learning_rate": 1.1370247065007887e-05, + "loss": 8.7335, + "step": 124540 + }, + { + "epoch": 0.6219880646208394, + "grad_norm": 0.09047723561525345, + "learning_rate": 1.1368745150066333e-05, + "loss": 8.7357, + "step": 124550 + }, + { + "epoch": 0.622038003445779, + "grad_norm": 0.09099981188774109, + "learning_rate": 1.1367243235124784e-05, + "loss": 8.7242, + "step": 124560 + }, + { + "epoch": 0.6220879422707184, + "grad_norm": 0.0873718112707138, + "learning_rate": 1.1365741320183234e-05, + "loss": 8.7197, + "step": 124570 + }, + { + "epoch": 0.6221378810956578, + "grad_norm": 0.0861561968922615, + "learning_rate": 1.1364239405241684e-05, + "loss": 8.7394, + "step": 124580 + }, + { + "epoch": 0.6221878199205972, + "grad_norm": 0.10405902564525604, + "learning_rate": 1.1362737490300134e-05, + "loss": 8.7154, + "step": 124590 + }, + { + "epoch": 0.6222377587455367, + "grad_norm": 0.09721992909908295, + "learning_rate": 1.1361235575358581e-05, + "loss": 8.7003, + "step": 124600 + }, + { + "epoch": 0.6222876975704762, + "grad_norm": 0.09676691889762878, + "learning_rate": 1.1359733660417031e-05, + "loss": 8.7209, + "step": 124610 + }, + { + "epoch": 0.6223376363954156, + "grad_norm": 0.09333143383264542, + "learning_rate": 1.1358231745475481e-05, + "loss": 8.7237, + "step": 124620 + }, + { + "epoch": 0.622387575220355, + "grad_norm": 0.08884768187999725, + "learning_rate": 1.1356729830533931e-05, + "loss": 8.7182, + "step": 124630 + }, + { + "epoch": 0.6224375140452945, + "grad_norm": 0.09150359779596329, + "learning_rate": 1.1355227915592382e-05, + "loss": 8.7136, + "step": 124640 + }, + { + "epoch": 0.622487452870234, + "grad_norm": 0.09673064947128296, + "learning_rate": 1.135372600065083e-05, + "loss": 8.7233, + "step": 124650 + }, + { + "epoch": 0.6225373916951734, + "grad_norm": 0.09233259409666061, + "learning_rate": 1.1352224085709279e-05, + "loss": 8.7057, + "step": 124660 + }, + { + "epoch": 0.6225873305201128, + "grad_norm": 0.09211067855358124, + "learning_rate": 1.1350722170767729e-05, + "loss": 8.7187, + "step": 124670 + }, + { + "epoch": 0.6226372693450523, + "grad_norm": 0.09305453300476074, + "learning_rate": 1.1349220255826179e-05, + "loss": 8.7037, + "step": 124680 + }, + { + "epoch": 0.6226872081699918, + "grad_norm": 0.08884977549314499, + "learning_rate": 1.134771834088463e-05, + "loss": 8.7114, + "step": 124690 + }, + { + "epoch": 0.6227371469949312, + "grad_norm": 0.09412440657615662, + "learning_rate": 1.1346216425943078e-05, + "loss": 8.7272, + "step": 124700 + }, + { + "epoch": 0.6227870858198706, + "grad_norm": 0.09514528512954712, + "learning_rate": 1.1344714511001526e-05, + "loss": 8.7156, + "step": 124710 + }, + { + "epoch": 0.6228370246448101, + "grad_norm": 0.0961642786860466, + "learning_rate": 1.1343212596059976e-05, + "loss": 8.7162, + "step": 124720 + }, + { + "epoch": 0.6228869634697496, + "grad_norm": 0.09910131245851517, + "learning_rate": 1.1341710681118427e-05, + "loss": 8.7031, + "step": 124730 + }, + { + "epoch": 0.622936902294689, + "grad_norm": 0.09175023436546326, + "learning_rate": 1.1340208766176877e-05, + "loss": 8.7131, + "step": 124740 + }, + { + "epoch": 0.6229868411196284, + "grad_norm": 0.09244109690189362, + "learning_rate": 1.1338706851235325e-05, + "loss": 8.7122, + "step": 124750 + }, + { + "epoch": 0.6230367799445679, + "grad_norm": 0.09264490753412247, + "learning_rate": 1.1337204936293774e-05, + "loss": 8.7155, + "step": 124760 + }, + { + "epoch": 0.6230867187695074, + "grad_norm": 0.09691507369279861, + "learning_rate": 1.1335703021352224e-05, + "loss": 8.7146, + "step": 124770 + }, + { + "epoch": 0.6231366575944468, + "grad_norm": 0.08869668841362, + "learning_rate": 1.1334201106410674e-05, + "loss": 8.7173, + "step": 124780 + }, + { + "epoch": 0.6231865964193862, + "grad_norm": 0.09124024212360382, + "learning_rate": 1.1332699191469124e-05, + "loss": 8.7296, + "step": 124790 + }, + { + "epoch": 0.6232365352443257, + "grad_norm": 0.09584067761898041, + "learning_rate": 1.1331197276527573e-05, + "loss": 8.7047, + "step": 124800 + }, + { + "epoch": 0.6232864740692652, + "grad_norm": 0.08927422761917114, + "learning_rate": 1.1329695361586023e-05, + "loss": 8.7281, + "step": 124810 + }, + { + "epoch": 0.6233364128942046, + "grad_norm": 0.09715837985277176, + "learning_rate": 1.1328193446644471e-05, + "loss": 8.7242, + "step": 124820 + }, + { + "epoch": 0.623386351719144, + "grad_norm": 0.09197895228862762, + "learning_rate": 1.1326691531702922e-05, + "loss": 8.7181, + "step": 124830 + }, + { + "epoch": 0.6234362905440834, + "grad_norm": 0.0883011445403099, + "learning_rate": 1.1325189616761372e-05, + "loss": 8.7173, + "step": 124840 + }, + { + "epoch": 0.623486229369023, + "grad_norm": 0.08731172233819962, + "learning_rate": 1.132368770181982e-05, + "loss": 8.7087, + "step": 124850 + }, + { + "epoch": 0.6235361681939624, + "grad_norm": 0.09282612055540085, + "learning_rate": 1.132218578687827e-05, + "loss": 8.7295, + "step": 124860 + }, + { + "epoch": 0.6235861070189018, + "grad_norm": 0.0898682028055191, + "learning_rate": 1.1320683871936719e-05, + "loss": 8.7312, + "step": 124870 + }, + { + "epoch": 0.6236360458438412, + "grad_norm": 0.09973151236772537, + "learning_rate": 1.1319181956995169e-05, + "loss": 8.7089, + "step": 124880 + }, + { + "epoch": 0.6236859846687808, + "grad_norm": 0.10166613757610321, + "learning_rate": 1.131768004205362e-05, + "loss": 8.7065, + "step": 124890 + }, + { + "epoch": 0.6237359234937202, + "grad_norm": 0.10042425245046616, + "learning_rate": 1.1316178127112068e-05, + "loss": 8.716, + "step": 124900 + }, + { + "epoch": 0.6237858623186596, + "grad_norm": 0.09376679360866547, + "learning_rate": 1.1314676212170518e-05, + "loss": 8.7169, + "step": 124910 + }, + { + "epoch": 0.623835801143599, + "grad_norm": 0.09402406215667725, + "learning_rate": 1.1313174297228966e-05, + "loss": 8.7226, + "step": 124920 + }, + { + "epoch": 0.6238857399685386, + "grad_norm": 0.0880286768078804, + "learning_rate": 1.1311672382287417e-05, + "loss": 8.7149, + "step": 124930 + }, + { + "epoch": 0.623935678793478, + "grad_norm": 0.09521077573299408, + "learning_rate": 1.1310170467345867e-05, + "loss": 8.7109, + "step": 124940 + }, + { + "epoch": 0.6239856176184174, + "grad_norm": 0.09367305040359497, + "learning_rate": 1.1308668552404315e-05, + "loss": 8.7349, + "step": 124950 + }, + { + "epoch": 0.6240355564433568, + "grad_norm": 0.08822840452194214, + "learning_rate": 1.1307166637462765e-05, + "loss": 8.7135, + "step": 124960 + }, + { + "epoch": 0.6240854952682964, + "grad_norm": 0.09069608896970749, + "learning_rate": 1.1305664722521216e-05, + "loss": 8.7098, + "step": 124970 + }, + { + "epoch": 0.6241354340932358, + "grad_norm": 0.09736108034849167, + "learning_rate": 1.1304162807579664e-05, + "loss": 8.7174, + "step": 124980 + }, + { + "epoch": 0.6241853729181752, + "grad_norm": 0.09728176891803741, + "learning_rate": 1.1302660892638114e-05, + "loss": 8.7019, + "step": 124990 + }, + { + "epoch": 0.6242353117431146, + "grad_norm": 0.0918160006403923, + "learning_rate": 1.1301158977696563e-05, + "loss": 8.7286, + "step": 125000 + }, + { + "epoch": 0.6242852505680542, + "grad_norm": 0.09253043681383133, + "learning_rate": 1.1299657062755013e-05, + "loss": 8.7112, + "step": 125010 + }, + { + "epoch": 0.6243351893929936, + "grad_norm": 0.09095212817192078, + "learning_rate": 1.1298155147813463e-05, + "loss": 8.7239, + "step": 125020 + }, + { + "epoch": 0.624385128217933, + "grad_norm": 0.09760838001966476, + "learning_rate": 1.1296653232871912e-05, + "loss": 8.7218, + "step": 125030 + }, + { + "epoch": 0.6244350670428724, + "grad_norm": 0.09743019938468933, + "learning_rate": 1.1295151317930362e-05, + "loss": 8.7253, + "step": 125040 + }, + { + "epoch": 0.624485005867812, + "grad_norm": 0.09515538066625595, + "learning_rate": 1.129364940298881e-05, + "loss": 8.7092, + "step": 125050 + }, + { + "epoch": 0.6245349446927514, + "grad_norm": 0.08986768871545792, + "learning_rate": 1.129214748804726e-05, + "loss": 8.7231, + "step": 125060 + }, + { + "epoch": 0.6245848835176908, + "grad_norm": 0.09468900412321091, + "learning_rate": 1.129064557310571e-05, + "loss": 8.7225, + "step": 125070 + }, + { + "epoch": 0.6246348223426302, + "grad_norm": 0.0945427417755127, + "learning_rate": 1.1289143658164159e-05, + "loss": 8.7031, + "step": 125080 + }, + { + "epoch": 0.6246847611675698, + "grad_norm": 0.0921517014503479, + "learning_rate": 1.128764174322261e-05, + "loss": 8.712, + "step": 125090 + }, + { + "epoch": 0.6247346999925092, + "grad_norm": 0.09056264162063599, + "learning_rate": 1.1286139828281058e-05, + "loss": 8.7088, + "step": 125100 + }, + { + "epoch": 0.6247846388174486, + "grad_norm": 0.0919748991727829, + "learning_rate": 1.1284637913339508e-05, + "loss": 8.7197, + "step": 125110 + }, + { + "epoch": 0.624834577642388, + "grad_norm": 0.0946049764752388, + "learning_rate": 1.1283135998397958e-05, + "loss": 8.7279, + "step": 125120 + }, + { + "epoch": 0.6248845164673276, + "grad_norm": 0.09442543238401413, + "learning_rate": 1.1281634083456408e-05, + "loss": 8.7228, + "step": 125130 + }, + { + "epoch": 0.624934455292267, + "grad_norm": 0.09224551171064377, + "learning_rate": 1.1280132168514857e-05, + "loss": 8.7196, + "step": 125140 + }, + { + "epoch": 0.6249843941172064, + "grad_norm": 0.0918484553694725, + "learning_rate": 1.1278630253573305e-05, + "loss": 8.7279, + "step": 125150 + }, + { + "epoch": 0.6250343329421458, + "grad_norm": 0.08815157413482666, + "learning_rate": 1.1277128338631755e-05, + "loss": 8.7135, + "step": 125160 + }, + { + "epoch": 0.6250842717670854, + "grad_norm": 0.09147334843873978, + "learning_rate": 1.1275626423690206e-05, + "loss": 8.7166, + "step": 125170 + }, + { + "epoch": 0.6251342105920248, + "grad_norm": 0.09084770083427429, + "learning_rate": 1.1274124508748656e-05, + "loss": 8.7209, + "step": 125180 + }, + { + "epoch": 0.6251841494169642, + "grad_norm": 0.0943610891699791, + "learning_rate": 1.1272622593807104e-05, + "loss": 8.7096, + "step": 125190 + }, + { + "epoch": 0.6252340882419036, + "grad_norm": 0.09305550903081894, + "learning_rate": 1.1271120678865553e-05, + "loss": 8.7275, + "step": 125200 + }, + { + "epoch": 0.6252840270668432, + "grad_norm": 0.09299591183662415, + "learning_rate": 1.1269618763924003e-05, + "loss": 8.7027, + "step": 125210 + }, + { + "epoch": 0.6253339658917826, + "grad_norm": 0.0949455201625824, + "learning_rate": 1.1268116848982453e-05, + "loss": 8.7092, + "step": 125220 + }, + { + "epoch": 0.625383904716722, + "grad_norm": 0.09270406514406204, + "learning_rate": 1.1266614934040903e-05, + "loss": 8.7045, + "step": 125230 + }, + { + "epoch": 0.6254338435416614, + "grad_norm": 0.09729525446891785, + "learning_rate": 1.1265113019099352e-05, + "loss": 8.7111, + "step": 125240 + }, + { + "epoch": 0.625483782366601, + "grad_norm": 0.09073136001825333, + "learning_rate": 1.12636111041578e-05, + "loss": 8.7025, + "step": 125250 + }, + { + "epoch": 0.6255337211915404, + "grad_norm": 0.08994272351264954, + "learning_rate": 1.126210918921625e-05, + "loss": 8.7221, + "step": 125260 + }, + { + "epoch": 0.6255836600164798, + "grad_norm": 0.09095024317502975, + "learning_rate": 1.12606072742747e-05, + "loss": 8.7107, + "step": 125270 + }, + { + "epoch": 0.6256335988414192, + "grad_norm": 0.09382519870996475, + "learning_rate": 1.125910535933315e-05, + "loss": 8.6977, + "step": 125280 + }, + { + "epoch": 0.6256835376663588, + "grad_norm": 0.09506542235612869, + "learning_rate": 1.1257603444391601e-05, + "loss": 8.694, + "step": 125290 + }, + { + "epoch": 0.6257334764912982, + "grad_norm": 0.08988244086503983, + "learning_rate": 1.125610152945005e-05, + "loss": 8.7117, + "step": 125300 + }, + { + "epoch": 0.6257834153162376, + "grad_norm": 0.0954940915107727, + "learning_rate": 1.1254599614508498e-05, + "loss": 8.7261, + "step": 125310 + }, + { + "epoch": 0.625833354141177, + "grad_norm": 0.0926600992679596, + "learning_rate": 1.1253097699566948e-05, + "loss": 8.7071, + "step": 125320 + }, + { + "epoch": 0.6258832929661166, + "grad_norm": 0.09239501506090164, + "learning_rate": 1.1251595784625398e-05, + "loss": 8.7221, + "step": 125330 + }, + { + "epoch": 0.625933231791056, + "grad_norm": 0.09140519052743912, + "learning_rate": 1.1250093869683848e-05, + "loss": 8.7182, + "step": 125340 + }, + { + "epoch": 0.6259831706159954, + "grad_norm": 0.09351127594709396, + "learning_rate": 1.1248591954742297e-05, + "loss": 8.7191, + "step": 125350 + }, + { + "epoch": 0.6260331094409348, + "grad_norm": 0.09365622699260712, + "learning_rate": 1.1247090039800745e-05, + "loss": 8.7149, + "step": 125360 + }, + { + "epoch": 0.6260830482658744, + "grad_norm": 0.09343401342630386, + "learning_rate": 1.1245588124859196e-05, + "loss": 8.7062, + "step": 125370 + }, + { + "epoch": 0.6261329870908138, + "grad_norm": 0.09367363899946213, + "learning_rate": 1.1244086209917646e-05, + "loss": 8.7105, + "step": 125380 + }, + { + "epoch": 0.6261829259157532, + "grad_norm": 0.09021138399839401, + "learning_rate": 1.1242584294976096e-05, + "loss": 8.714, + "step": 125390 + }, + { + "epoch": 0.6262328647406926, + "grad_norm": 0.10367359220981598, + "learning_rate": 1.1241082380034544e-05, + "loss": 8.7316, + "step": 125400 + }, + { + "epoch": 0.6262828035656322, + "grad_norm": 0.09110212326049805, + "learning_rate": 1.1239580465092993e-05, + "loss": 8.723, + "step": 125410 + }, + { + "epoch": 0.6263327423905716, + "grad_norm": 0.09497211873531342, + "learning_rate": 1.1238078550151443e-05, + "loss": 8.719, + "step": 125420 + }, + { + "epoch": 0.626382681215511, + "grad_norm": 0.08793065696954727, + "learning_rate": 1.1236576635209893e-05, + "loss": 8.7118, + "step": 125430 + }, + { + "epoch": 0.6264326200404504, + "grad_norm": 0.09238415211439133, + "learning_rate": 1.1235074720268343e-05, + "loss": 8.7091, + "step": 125440 + }, + { + "epoch": 0.62648255886539, + "grad_norm": 0.08847446739673615, + "learning_rate": 1.1233572805326794e-05, + "loss": 8.7071, + "step": 125450 + }, + { + "epoch": 0.6265324976903294, + "grad_norm": 0.09379098564386368, + "learning_rate": 1.123207089038524e-05, + "loss": 8.7228, + "step": 125460 + }, + { + "epoch": 0.6265824365152688, + "grad_norm": 0.09281666576862335, + "learning_rate": 1.123056897544369e-05, + "loss": 8.7114, + "step": 125470 + }, + { + "epoch": 0.6266323753402082, + "grad_norm": 0.0954156294465065, + "learning_rate": 1.122906706050214e-05, + "loss": 8.7198, + "step": 125480 + }, + { + "epoch": 0.6266823141651477, + "grad_norm": 0.091185063123703, + "learning_rate": 1.1227565145560591e-05, + "loss": 8.7046, + "step": 125490 + }, + { + "epoch": 0.6267322529900872, + "grad_norm": 0.09116750210523605, + "learning_rate": 1.1226063230619041e-05, + "loss": 8.7127, + "step": 125500 + }, + { + "epoch": 0.6267821918150266, + "grad_norm": 0.09332124143838882, + "learning_rate": 1.1224561315677488e-05, + "loss": 8.7211, + "step": 125510 + }, + { + "epoch": 0.626832130639966, + "grad_norm": 0.08798982203006744, + "learning_rate": 1.1223059400735938e-05, + "loss": 8.6972, + "step": 125520 + }, + { + "epoch": 0.6268820694649055, + "grad_norm": 0.08885789662599564, + "learning_rate": 1.1221557485794388e-05, + "loss": 8.7219, + "step": 125530 + }, + { + "epoch": 0.626932008289845, + "grad_norm": 0.10069695860147476, + "learning_rate": 1.1220055570852838e-05, + "loss": 8.699, + "step": 125540 + }, + { + "epoch": 0.6269819471147844, + "grad_norm": 0.0930192843079567, + "learning_rate": 1.1218553655911289e-05, + "loss": 8.7112, + "step": 125550 + }, + { + "epoch": 0.6270318859397238, + "grad_norm": 0.09061025083065033, + "learning_rate": 1.1217051740969735e-05, + "loss": 8.7135, + "step": 125560 + }, + { + "epoch": 0.6270818247646633, + "grad_norm": 0.08885504305362701, + "learning_rate": 1.1215549826028186e-05, + "loss": 8.7175, + "step": 125570 + }, + { + "epoch": 0.6271317635896028, + "grad_norm": 0.09031138569116592, + "learning_rate": 1.1214047911086636e-05, + "loss": 8.7082, + "step": 125580 + }, + { + "epoch": 0.6271817024145422, + "grad_norm": 0.09189961850643158, + "learning_rate": 1.1212545996145086e-05, + "loss": 8.7175, + "step": 125590 + }, + { + "epoch": 0.6272316412394816, + "grad_norm": 0.09408345818519592, + "learning_rate": 1.1211044081203536e-05, + "loss": 8.7174, + "step": 125600 + }, + { + "epoch": 0.627281580064421, + "grad_norm": 0.09308337420225143, + "learning_rate": 1.1209542166261985e-05, + "loss": 8.7101, + "step": 125610 + }, + { + "epoch": 0.6273315188893606, + "grad_norm": 0.09275424480438232, + "learning_rate": 1.1208040251320433e-05, + "loss": 8.7109, + "step": 125620 + }, + { + "epoch": 0.6273814577143, + "grad_norm": 0.10062972456216812, + "learning_rate": 1.1206538336378883e-05, + "loss": 8.6987, + "step": 125630 + }, + { + "epoch": 0.6274313965392394, + "grad_norm": 0.09465427696704865, + "learning_rate": 1.1205036421437333e-05, + "loss": 8.7145, + "step": 125640 + }, + { + "epoch": 0.6274813353641788, + "grad_norm": 0.09311853349208832, + "learning_rate": 1.1203534506495784e-05, + "loss": 8.7253, + "step": 125650 + }, + { + "epoch": 0.6275312741891184, + "grad_norm": 0.088384248316288, + "learning_rate": 1.1202032591554232e-05, + "loss": 8.7049, + "step": 125660 + }, + { + "epoch": 0.6275812130140578, + "grad_norm": 0.09492412954568863, + "learning_rate": 1.120053067661268e-05, + "loss": 8.7136, + "step": 125670 + }, + { + "epoch": 0.6276311518389972, + "grad_norm": 0.10065683722496033, + "learning_rate": 1.119902876167113e-05, + "loss": 8.7108, + "step": 125680 + }, + { + "epoch": 0.6276810906639366, + "grad_norm": 0.0918295755982399, + "learning_rate": 1.1197526846729581e-05, + "loss": 8.7085, + "step": 125690 + }, + { + "epoch": 0.6277310294888762, + "grad_norm": 0.09792115539312363, + "learning_rate": 1.1196024931788031e-05, + "loss": 8.6919, + "step": 125700 + }, + { + "epoch": 0.6277809683138156, + "grad_norm": 0.08935079723596573, + "learning_rate": 1.119452301684648e-05, + "loss": 8.7047, + "step": 125710 + }, + { + "epoch": 0.627830907138755, + "grad_norm": 0.09444247931241989, + "learning_rate": 1.1193021101904928e-05, + "loss": 8.7183, + "step": 125720 + }, + { + "epoch": 0.6278808459636944, + "grad_norm": 0.09201852232217789, + "learning_rate": 1.1191519186963378e-05, + "loss": 8.7243, + "step": 125730 + }, + { + "epoch": 0.627930784788634, + "grad_norm": 0.09014883637428284, + "learning_rate": 1.1190017272021828e-05, + "loss": 8.7091, + "step": 125740 + }, + { + "epoch": 0.6279807236135734, + "grad_norm": 0.091966412961483, + "learning_rate": 1.1188515357080279e-05, + "loss": 8.7245, + "step": 125750 + }, + { + "epoch": 0.6280306624385128, + "grad_norm": 0.08749305456876755, + "learning_rate": 1.1187013442138727e-05, + "loss": 8.7155, + "step": 125760 + }, + { + "epoch": 0.6280806012634522, + "grad_norm": 0.09407436102628708, + "learning_rate": 1.1185511527197177e-05, + "loss": 8.7034, + "step": 125770 + }, + { + "epoch": 0.6281305400883918, + "grad_norm": 0.09107963740825653, + "learning_rate": 1.1184009612255626e-05, + "loss": 8.7061, + "step": 125780 + }, + { + "epoch": 0.6281804789133312, + "grad_norm": 0.09338110685348511, + "learning_rate": 1.1182507697314076e-05, + "loss": 8.7152, + "step": 125790 + }, + { + "epoch": 0.6282304177382706, + "grad_norm": 0.09084051847457886, + "learning_rate": 1.1181005782372526e-05, + "loss": 8.7097, + "step": 125800 + }, + { + "epoch": 0.62828035656321, + "grad_norm": 0.09835775941610336, + "learning_rate": 1.1179503867430975e-05, + "loss": 8.7238, + "step": 125810 + }, + { + "epoch": 0.6283302953881496, + "grad_norm": 0.09301155060529709, + "learning_rate": 1.1178001952489425e-05, + "loss": 8.7027, + "step": 125820 + }, + { + "epoch": 0.628380234213089, + "grad_norm": 0.08785326033830643, + "learning_rate": 1.1176500037547873e-05, + "loss": 8.7189, + "step": 125830 + }, + { + "epoch": 0.6284301730380284, + "grad_norm": 0.08803771436214447, + "learning_rate": 1.1174998122606323e-05, + "loss": 8.7123, + "step": 125840 + }, + { + "epoch": 0.6284801118629678, + "grad_norm": 0.0908784344792366, + "learning_rate": 1.1173496207664774e-05, + "loss": 8.7182, + "step": 125850 + }, + { + "epoch": 0.6285300506879073, + "grad_norm": 0.0862756222486496, + "learning_rate": 1.1171994292723222e-05, + "loss": 8.7111, + "step": 125860 + }, + { + "epoch": 0.6285799895128468, + "grad_norm": 0.09046090394258499, + "learning_rate": 1.1170492377781672e-05, + "loss": 8.7195, + "step": 125870 + }, + { + "epoch": 0.6286299283377862, + "grad_norm": 0.08847752958536148, + "learning_rate": 1.116899046284012e-05, + "loss": 8.7092, + "step": 125880 + }, + { + "epoch": 0.6286798671627256, + "grad_norm": 0.09415357559919357, + "learning_rate": 1.1167488547898571e-05, + "loss": 8.6979, + "step": 125890 + }, + { + "epoch": 0.6287298059876651, + "grad_norm": 0.09321362525224686, + "learning_rate": 1.1165986632957021e-05, + "loss": 8.7019, + "step": 125900 + }, + { + "epoch": 0.6287797448126046, + "grad_norm": 0.09205802530050278, + "learning_rate": 1.116448471801547e-05, + "loss": 8.7374, + "step": 125910 + }, + { + "epoch": 0.628829683637544, + "grad_norm": 0.0888669490814209, + "learning_rate": 1.116298280307392e-05, + "loss": 8.7149, + "step": 125920 + }, + { + "epoch": 0.6288796224624834, + "grad_norm": 0.08783555775880814, + "learning_rate": 1.1161480888132368e-05, + "loss": 8.7195, + "step": 125930 + }, + { + "epoch": 0.628929561287423, + "grad_norm": 0.09451974183320999, + "learning_rate": 1.1159978973190818e-05, + "loss": 8.7245, + "step": 125940 + }, + { + "epoch": 0.6289795001123624, + "grad_norm": 0.09017028659582138, + "learning_rate": 1.1158477058249269e-05, + "loss": 8.7217, + "step": 125950 + }, + { + "epoch": 0.6290294389373018, + "grad_norm": 0.09248178452253342, + "learning_rate": 1.1156975143307717e-05, + "loss": 8.7001, + "step": 125960 + }, + { + "epoch": 0.6290793777622412, + "grad_norm": 0.09288008511066437, + "learning_rate": 1.1155473228366167e-05, + "loss": 8.6956, + "step": 125970 + }, + { + "epoch": 0.6291293165871807, + "grad_norm": 0.08859378844499588, + "learning_rate": 1.1153971313424617e-05, + "loss": 8.7073, + "step": 125980 + }, + { + "epoch": 0.6291792554121202, + "grad_norm": 0.09456798434257507, + "learning_rate": 1.1152469398483066e-05, + "loss": 8.7203, + "step": 125990 + }, + { + "epoch": 0.6292291942370596, + "grad_norm": 0.09040806442499161, + "learning_rate": 1.1150967483541516e-05, + "loss": 8.711, + "step": 126000 + }, + { + "epoch": 0.629279133061999, + "grad_norm": 0.09092914313077927, + "learning_rate": 1.1149465568599965e-05, + "loss": 8.7157, + "step": 126010 + }, + { + "epoch": 0.6293290718869385, + "grad_norm": 0.08959808945655823, + "learning_rate": 1.1147963653658415e-05, + "loss": 8.7037, + "step": 126020 + }, + { + "epoch": 0.629379010711878, + "grad_norm": 0.09273937344551086, + "learning_rate": 1.1146461738716865e-05, + "loss": 8.7036, + "step": 126030 + }, + { + "epoch": 0.6294289495368174, + "grad_norm": 0.09658925980329514, + "learning_rate": 1.1144959823775313e-05, + "loss": 8.7071, + "step": 126040 + }, + { + "epoch": 0.6294788883617568, + "grad_norm": 0.09372569620609283, + "learning_rate": 1.1143457908833764e-05, + "loss": 8.7113, + "step": 126050 + }, + { + "epoch": 0.6295288271866963, + "grad_norm": 0.08820081502199173, + "learning_rate": 1.1141955993892212e-05, + "loss": 8.7279, + "step": 126060 + }, + { + "epoch": 0.6295787660116358, + "grad_norm": 0.09399138391017914, + "learning_rate": 1.1140454078950662e-05, + "loss": 8.7203, + "step": 126070 + }, + { + "epoch": 0.6296287048365752, + "grad_norm": 0.09462126344442368, + "learning_rate": 1.1138952164009112e-05, + "loss": 8.7135, + "step": 126080 + }, + { + "epoch": 0.6296786436615146, + "grad_norm": 0.09695866703987122, + "learning_rate": 1.1137450249067561e-05, + "loss": 8.6986, + "step": 126090 + }, + { + "epoch": 0.6297285824864541, + "grad_norm": 0.09450877457857132, + "learning_rate": 1.1135948334126011e-05, + "loss": 8.7052, + "step": 126100 + }, + { + "epoch": 0.6297785213113936, + "grad_norm": 0.09167155623435974, + "learning_rate": 1.113444641918446e-05, + "loss": 8.7027, + "step": 126110 + }, + { + "epoch": 0.629828460136333, + "grad_norm": 0.09358896315097809, + "learning_rate": 1.113294450424291e-05, + "loss": 8.7079, + "step": 126120 + }, + { + "epoch": 0.6298783989612724, + "grad_norm": 0.10010679066181183, + "learning_rate": 1.113144258930136e-05, + "loss": 8.701, + "step": 126130 + }, + { + "epoch": 0.6299283377862119, + "grad_norm": 0.09216713160276413, + "learning_rate": 1.112994067435981e-05, + "loss": 8.7026, + "step": 126140 + }, + { + "epoch": 0.6299782766111514, + "grad_norm": 0.08950921893119812, + "learning_rate": 1.1128438759418259e-05, + "loss": 8.6985, + "step": 126150 + }, + { + "epoch": 0.6300282154360908, + "grad_norm": 0.09360364824533463, + "learning_rate": 1.1126936844476707e-05, + "loss": 8.728, + "step": 126160 + }, + { + "epoch": 0.6300781542610302, + "grad_norm": 0.09034190326929092, + "learning_rate": 1.1125434929535157e-05, + "loss": 8.7074, + "step": 126170 + }, + { + "epoch": 0.6301280930859697, + "grad_norm": 0.09222965687513351, + "learning_rate": 1.1123933014593607e-05, + "loss": 8.7075, + "step": 126180 + }, + { + "epoch": 0.6301780319109092, + "grad_norm": 0.09020216763019562, + "learning_rate": 1.1122431099652058e-05, + "loss": 8.6951, + "step": 126190 + }, + { + "epoch": 0.6302279707358486, + "grad_norm": 0.09144456684589386, + "learning_rate": 1.1120929184710506e-05, + "loss": 8.7062, + "step": 126200 + }, + { + "epoch": 0.630277909560788, + "grad_norm": 0.09432315826416016, + "learning_rate": 1.1119427269768955e-05, + "loss": 8.712, + "step": 126210 + }, + { + "epoch": 0.6303278483857275, + "grad_norm": 0.09091579914093018, + "learning_rate": 1.1117925354827405e-05, + "loss": 8.7356, + "step": 126220 + }, + { + "epoch": 0.630377787210667, + "grad_norm": 0.08974793553352356, + "learning_rate": 1.1116423439885855e-05, + "loss": 8.7139, + "step": 126230 + }, + { + "epoch": 0.6304277260356064, + "grad_norm": 0.09444157779216766, + "learning_rate": 1.1114921524944305e-05, + "loss": 8.7199, + "step": 126240 + }, + { + "epoch": 0.6304776648605458, + "grad_norm": 0.09196534752845764, + "learning_rate": 1.1113419610002754e-05, + "loss": 8.7038, + "step": 126250 + }, + { + "epoch": 0.6305276036854853, + "grad_norm": 0.09097559005022049, + "learning_rate": 1.1111917695061202e-05, + "loss": 8.7136, + "step": 126260 + }, + { + "epoch": 0.6305775425104247, + "grad_norm": 0.08905323594808578, + "learning_rate": 1.1110415780119652e-05, + "loss": 8.7215, + "step": 126270 + }, + { + "epoch": 0.6306274813353642, + "grad_norm": 0.09112866222858429, + "learning_rate": 1.1108913865178103e-05, + "loss": 8.7085, + "step": 126280 + }, + { + "epoch": 0.6306774201603036, + "grad_norm": 0.08942396938800812, + "learning_rate": 1.1107411950236553e-05, + "loss": 8.7192, + "step": 126290 + }, + { + "epoch": 0.6307273589852431, + "grad_norm": 0.09311738610267639, + "learning_rate": 1.1105910035295003e-05, + "loss": 8.6981, + "step": 126300 + }, + { + "epoch": 0.6307772978101825, + "grad_norm": 0.09299565851688385, + "learning_rate": 1.110440812035345e-05, + "loss": 8.71, + "step": 126310 + }, + { + "epoch": 0.630827236635122, + "grad_norm": 0.09112560003995895, + "learning_rate": 1.11029062054119e-05, + "loss": 8.6983, + "step": 126320 + }, + { + "epoch": 0.6308771754600614, + "grad_norm": 0.09203418344259262, + "learning_rate": 1.110140429047035e-05, + "loss": 8.7096, + "step": 126330 + }, + { + "epoch": 0.6309271142850009, + "grad_norm": 0.09365582466125488, + "learning_rate": 1.10999023755288e-05, + "loss": 8.6942, + "step": 126340 + }, + { + "epoch": 0.6309770531099403, + "grad_norm": 0.09594764560461044, + "learning_rate": 1.109840046058725e-05, + "loss": 8.7237, + "step": 126350 + }, + { + "epoch": 0.6310269919348798, + "grad_norm": 0.08934248983860016, + "learning_rate": 1.1096898545645697e-05, + "loss": 8.702, + "step": 126360 + }, + { + "epoch": 0.6310769307598192, + "grad_norm": 0.09041931480169296, + "learning_rate": 1.1095396630704147e-05, + "loss": 8.7068, + "step": 126370 + }, + { + "epoch": 0.6311268695847587, + "grad_norm": 0.09372342377901077, + "learning_rate": 1.1093894715762598e-05, + "loss": 8.6993, + "step": 126380 + }, + { + "epoch": 0.6311768084096981, + "grad_norm": 0.09366505593061447, + "learning_rate": 1.1092392800821048e-05, + "loss": 8.7181, + "step": 126390 + }, + { + "epoch": 0.6312267472346376, + "grad_norm": 0.09063727408647537, + "learning_rate": 1.1090890885879498e-05, + "loss": 8.7059, + "step": 126400 + }, + { + "epoch": 0.631276686059577, + "grad_norm": 0.0880524069070816, + "learning_rate": 1.1089388970937945e-05, + "loss": 8.7184, + "step": 126410 + }, + { + "epoch": 0.6313266248845165, + "grad_norm": 0.09939765185117722, + "learning_rate": 1.1087887055996395e-05, + "loss": 8.7115, + "step": 126420 + }, + { + "epoch": 0.6313765637094559, + "grad_norm": 0.09165541082620621, + "learning_rate": 1.1086385141054845e-05, + "loss": 8.7044, + "step": 126430 + }, + { + "epoch": 0.6314265025343954, + "grad_norm": 0.08993414044380188, + "learning_rate": 1.1084883226113295e-05, + "loss": 8.7114, + "step": 126440 + }, + { + "epoch": 0.6314764413593348, + "grad_norm": 0.09997765719890594, + "learning_rate": 1.1083381311171745e-05, + "loss": 8.7137, + "step": 126450 + }, + { + "epoch": 0.6315263801842743, + "grad_norm": 0.09372782707214355, + "learning_rate": 1.1081879396230194e-05, + "loss": 8.6919, + "step": 126460 + }, + { + "epoch": 0.6315763190092137, + "grad_norm": 0.09590387344360352, + "learning_rate": 1.1080377481288642e-05, + "loss": 8.7069, + "step": 126470 + }, + { + "epoch": 0.6316262578341532, + "grad_norm": 0.09233605861663818, + "learning_rate": 1.1078875566347093e-05, + "loss": 8.694, + "step": 126480 + }, + { + "epoch": 0.6316761966590926, + "grad_norm": 0.09469950944185257, + "learning_rate": 1.1077373651405543e-05, + "loss": 8.7218, + "step": 126490 + }, + { + "epoch": 0.6317261354840321, + "grad_norm": 0.08625823259353638, + "learning_rate": 1.1075871736463993e-05, + "loss": 8.7083, + "step": 126500 + }, + { + "epoch": 0.6317760743089715, + "grad_norm": 0.09083887934684753, + "learning_rate": 1.1074369821522441e-05, + "loss": 8.6994, + "step": 126510 + }, + { + "epoch": 0.631826013133911, + "grad_norm": 0.097719706594944, + "learning_rate": 1.107286790658089e-05, + "loss": 8.7016, + "step": 126520 + }, + { + "epoch": 0.6318759519588504, + "grad_norm": 0.09654133021831512, + "learning_rate": 1.107136599163934e-05, + "loss": 8.7169, + "step": 126530 + }, + { + "epoch": 0.6319258907837899, + "grad_norm": 0.09275779873132706, + "learning_rate": 1.106986407669779e-05, + "loss": 8.7157, + "step": 126540 + }, + { + "epoch": 0.6319758296087293, + "grad_norm": 0.08812788128852844, + "learning_rate": 1.106836216175624e-05, + "loss": 8.6997, + "step": 126550 + }, + { + "epoch": 0.6320257684336688, + "grad_norm": 0.09277535229921341, + "learning_rate": 1.1066860246814689e-05, + "loss": 8.7109, + "step": 126560 + }, + { + "epoch": 0.6320757072586082, + "grad_norm": 0.09549902379512787, + "learning_rate": 1.1065358331873137e-05, + "loss": 8.701, + "step": 126570 + }, + { + "epoch": 0.6321256460835476, + "grad_norm": 0.08873405307531357, + "learning_rate": 1.1063856416931588e-05, + "loss": 8.7066, + "step": 126580 + }, + { + "epoch": 0.6321755849084871, + "grad_norm": 0.09165298938751221, + "learning_rate": 1.1062354501990038e-05, + "loss": 8.7016, + "step": 126590 + }, + { + "epoch": 0.6322255237334266, + "grad_norm": 0.09139636904001236, + "learning_rate": 1.1060852587048488e-05, + "loss": 8.7104, + "step": 126600 + }, + { + "epoch": 0.632275462558366, + "grad_norm": 0.09368384629487991, + "learning_rate": 1.1059350672106936e-05, + "loss": 8.7085, + "step": 126610 + }, + { + "epoch": 0.6323254013833054, + "grad_norm": 0.0939926728606224, + "learning_rate": 1.1057848757165387e-05, + "loss": 8.7038, + "step": 126620 + }, + { + "epoch": 0.6323753402082449, + "grad_norm": 0.0963914766907692, + "learning_rate": 1.1056346842223835e-05, + "loss": 8.7162, + "step": 126630 + }, + { + "epoch": 0.6324252790331844, + "grad_norm": 0.08765320479869843, + "learning_rate": 1.1054844927282285e-05, + "loss": 8.7177, + "step": 126640 + }, + { + "epoch": 0.6324752178581238, + "grad_norm": 0.09075529873371124, + "learning_rate": 1.1053343012340735e-05, + "loss": 8.7002, + "step": 126650 + }, + { + "epoch": 0.6325251566830632, + "grad_norm": 0.09381550550460815, + "learning_rate": 1.1051841097399184e-05, + "loss": 8.7145, + "step": 126660 + }, + { + "epoch": 0.6325750955080027, + "grad_norm": 0.09270758181810379, + "learning_rate": 1.1050339182457634e-05, + "loss": 8.7087, + "step": 126670 + }, + { + "epoch": 0.6326250343329421, + "grad_norm": 0.08957044035196304, + "learning_rate": 1.1048837267516083e-05, + "loss": 8.7222, + "step": 126680 + }, + { + "epoch": 0.6326749731578816, + "grad_norm": 0.09274608641862869, + "learning_rate": 1.1047335352574533e-05, + "loss": 8.7215, + "step": 126690 + }, + { + "epoch": 0.632724911982821, + "grad_norm": 0.08785900473594666, + "learning_rate": 1.1045833437632983e-05, + "loss": 8.7077, + "step": 126700 + }, + { + "epoch": 0.6327748508077605, + "grad_norm": 0.0963745191693306, + "learning_rate": 1.1044331522691431e-05, + "loss": 8.7214, + "step": 126710 + }, + { + "epoch": 0.6328247896327, + "grad_norm": 0.09362554550170898, + "learning_rate": 1.1042829607749882e-05, + "loss": 8.6988, + "step": 126720 + }, + { + "epoch": 0.6328747284576394, + "grad_norm": 0.09830887615680695, + "learning_rate": 1.104132769280833e-05, + "loss": 8.7249, + "step": 126730 + }, + { + "epoch": 0.6329246672825788, + "grad_norm": 0.09366270899772644, + "learning_rate": 1.103982577786678e-05, + "loss": 8.714, + "step": 126740 + }, + { + "epoch": 0.6329746061075183, + "grad_norm": 0.09500569850206375, + "learning_rate": 1.103832386292523e-05, + "loss": 8.7131, + "step": 126750 + }, + { + "epoch": 0.6330245449324577, + "grad_norm": 0.09202902019023895, + "learning_rate": 1.1036821947983679e-05, + "loss": 8.71, + "step": 126760 + }, + { + "epoch": 0.6330744837573972, + "grad_norm": 0.09248954057693481, + "learning_rate": 1.1035320033042129e-05, + "loss": 8.6971, + "step": 126770 + }, + { + "epoch": 0.6331244225823366, + "grad_norm": 0.09548656642436981, + "learning_rate": 1.103381811810058e-05, + "loss": 8.7115, + "step": 126780 + }, + { + "epoch": 0.6331743614072761, + "grad_norm": 0.09098765254020691, + "learning_rate": 1.1032316203159028e-05, + "loss": 8.6862, + "step": 126790 + }, + { + "epoch": 0.6332243002322155, + "grad_norm": 0.08989952504634857, + "learning_rate": 1.1030814288217478e-05, + "loss": 8.7133, + "step": 126800 + }, + { + "epoch": 0.633274239057155, + "grad_norm": 0.09226398915052414, + "learning_rate": 1.1029312373275926e-05, + "loss": 8.7104, + "step": 126810 + }, + { + "epoch": 0.6333241778820944, + "grad_norm": 0.09326343238353729, + "learning_rate": 1.1027810458334377e-05, + "loss": 8.7034, + "step": 126820 + }, + { + "epoch": 0.6333741167070339, + "grad_norm": 0.09733281284570694, + "learning_rate": 1.1026308543392827e-05, + "loss": 8.6897, + "step": 126830 + }, + { + "epoch": 0.6334240555319733, + "grad_norm": 0.0914359837770462, + "learning_rate": 1.1024806628451275e-05, + "loss": 8.7046, + "step": 126840 + }, + { + "epoch": 0.6334739943569128, + "grad_norm": 0.08934281766414642, + "learning_rate": 1.1023304713509725e-05, + "loss": 8.7141, + "step": 126850 + }, + { + "epoch": 0.6335239331818522, + "grad_norm": 0.09313992410898209, + "learning_rate": 1.1021802798568174e-05, + "loss": 8.7073, + "step": 126860 + }, + { + "epoch": 0.6335738720067917, + "grad_norm": 0.09446176141500473, + "learning_rate": 1.1020300883626624e-05, + "loss": 8.6979, + "step": 126870 + }, + { + "epoch": 0.6336238108317311, + "grad_norm": 0.09279874712228775, + "learning_rate": 1.1018798968685074e-05, + "loss": 8.6945, + "step": 126880 + }, + { + "epoch": 0.6336737496566706, + "grad_norm": 0.09727776795625687, + "learning_rate": 1.1017297053743523e-05, + "loss": 8.6937, + "step": 126890 + }, + { + "epoch": 0.63372368848161, + "grad_norm": 0.09010564535856247, + "learning_rate": 1.1015795138801973e-05, + "loss": 8.7025, + "step": 126900 + }, + { + "epoch": 0.6337736273065495, + "grad_norm": 0.08602187037467957, + "learning_rate": 1.1014293223860421e-05, + "loss": 8.7096, + "step": 126910 + }, + { + "epoch": 0.6338235661314889, + "grad_norm": 0.09525911509990692, + "learning_rate": 1.1012791308918872e-05, + "loss": 8.7116, + "step": 126920 + }, + { + "epoch": 0.6338735049564284, + "grad_norm": 0.08880488574504852, + "learning_rate": 1.1011289393977322e-05, + "loss": 8.7006, + "step": 126930 + }, + { + "epoch": 0.6339234437813678, + "grad_norm": 0.09625755250453949, + "learning_rate": 1.1009787479035772e-05, + "loss": 8.6934, + "step": 126940 + }, + { + "epoch": 0.6339733826063073, + "grad_norm": 0.0941665768623352, + "learning_rate": 1.100828556409422e-05, + "loss": 8.7106, + "step": 126950 + }, + { + "epoch": 0.6340233214312467, + "grad_norm": 0.08782870322465897, + "learning_rate": 1.1006783649152669e-05, + "loss": 8.7069, + "step": 126960 + }, + { + "epoch": 0.6340732602561862, + "grad_norm": 0.08948568999767303, + "learning_rate": 1.1005281734211119e-05, + "loss": 8.7111, + "step": 126970 + }, + { + "epoch": 0.6341231990811256, + "grad_norm": 0.09504511207342148, + "learning_rate": 1.100377981926957e-05, + "loss": 8.7132, + "step": 126980 + }, + { + "epoch": 0.6341731379060651, + "grad_norm": 0.09073743224143982, + "learning_rate": 1.100227790432802e-05, + "loss": 8.6928, + "step": 126990 + }, + { + "epoch": 0.6342230767310045, + "grad_norm": 0.09450805932283401, + "learning_rate": 1.1000775989386468e-05, + "loss": 8.6902, + "step": 127000 + }, + { + "epoch": 0.634273015555944, + "grad_norm": 0.08821328729391098, + "learning_rate": 1.0999274074444916e-05, + "loss": 8.7069, + "step": 127010 + }, + { + "epoch": 0.6343229543808834, + "grad_norm": 0.09215594828128815, + "learning_rate": 1.0997772159503367e-05, + "loss": 8.6957, + "step": 127020 + }, + { + "epoch": 0.6343728932058229, + "grad_norm": 0.09893883019685745, + "learning_rate": 1.0996270244561817e-05, + "loss": 8.6884, + "step": 127030 + }, + { + "epoch": 0.6344228320307623, + "grad_norm": 0.0937436893582344, + "learning_rate": 1.0994768329620267e-05, + "loss": 8.7161, + "step": 127040 + }, + { + "epoch": 0.6344727708557018, + "grad_norm": 0.08972315490245819, + "learning_rate": 1.0993266414678715e-05, + "loss": 8.6941, + "step": 127050 + }, + { + "epoch": 0.6345227096806412, + "grad_norm": 0.09105207026004791, + "learning_rate": 1.0991764499737164e-05, + "loss": 8.7012, + "step": 127060 + }, + { + "epoch": 0.6345726485055807, + "grad_norm": 0.09180324524641037, + "learning_rate": 1.0990262584795614e-05, + "loss": 8.6961, + "step": 127070 + }, + { + "epoch": 0.6346225873305201, + "grad_norm": 0.09050474315881729, + "learning_rate": 1.0988760669854064e-05, + "loss": 8.7129, + "step": 127080 + }, + { + "epoch": 0.6346725261554595, + "grad_norm": 0.08975604176521301, + "learning_rate": 1.0987258754912514e-05, + "loss": 8.7069, + "step": 127090 + }, + { + "epoch": 0.634722464980399, + "grad_norm": 0.09653442353010178, + "learning_rate": 1.0985756839970965e-05, + "loss": 8.6998, + "step": 127100 + }, + { + "epoch": 0.6347724038053385, + "grad_norm": 0.09031674265861511, + "learning_rate": 1.0984254925029411e-05, + "loss": 8.7128, + "step": 127110 + }, + { + "epoch": 0.6348223426302779, + "grad_norm": 0.09196629375219345, + "learning_rate": 1.0982753010087862e-05, + "loss": 8.7115, + "step": 127120 + }, + { + "epoch": 0.6348722814552173, + "grad_norm": 0.09125792980194092, + "learning_rate": 1.0981251095146312e-05, + "loss": 8.6958, + "step": 127130 + }, + { + "epoch": 0.6349222202801568, + "grad_norm": 0.09218353778123856, + "learning_rate": 1.0979749180204762e-05, + "loss": 8.7032, + "step": 127140 + }, + { + "epoch": 0.6349721591050963, + "grad_norm": 0.09349720180034637, + "learning_rate": 1.0978247265263212e-05, + "loss": 8.698, + "step": 127150 + }, + { + "epoch": 0.6350220979300357, + "grad_norm": 0.0936102643609047, + "learning_rate": 1.0976745350321659e-05, + "loss": 8.6991, + "step": 127160 + }, + { + "epoch": 0.6350720367549751, + "grad_norm": 0.0921725407242775, + "learning_rate": 1.0975243435380109e-05, + "loss": 8.7104, + "step": 127170 + }, + { + "epoch": 0.6351219755799146, + "grad_norm": 0.09616363793611526, + "learning_rate": 1.097374152043856e-05, + "loss": 8.7191, + "step": 127180 + }, + { + "epoch": 0.6351719144048541, + "grad_norm": 0.09801583737134933, + "learning_rate": 1.097223960549701e-05, + "loss": 8.702, + "step": 127190 + }, + { + "epoch": 0.6352218532297935, + "grad_norm": 0.09138066321611404, + "learning_rate": 1.097073769055546e-05, + "loss": 8.7066, + "step": 127200 + }, + { + "epoch": 0.6352717920547329, + "grad_norm": 0.09149125218391418, + "learning_rate": 1.0969235775613906e-05, + "loss": 8.7125, + "step": 127210 + }, + { + "epoch": 0.6353217308796724, + "grad_norm": 0.0975177139043808, + "learning_rate": 1.0967733860672357e-05, + "loss": 8.7044, + "step": 127220 + }, + { + "epoch": 0.6353716697046119, + "grad_norm": 0.08766981214284897, + "learning_rate": 1.0966231945730807e-05, + "loss": 8.7172, + "step": 127230 + }, + { + "epoch": 0.6354216085295513, + "grad_norm": 0.08835364878177643, + "learning_rate": 1.0964730030789257e-05, + "loss": 8.7012, + "step": 127240 + }, + { + "epoch": 0.6354715473544907, + "grad_norm": 0.09096872806549072, + "learning_rate": 1.0963228115847707e-05, + "loss": 8.702, + "step": 127250 + }, + { + "epoch": 0.6355214861794302, + "grad_norm": 0.09311988204717636, + "learning_rate": 1.0961726200906156e-05, + "loss": 8.6887, + "step": 127260 + }, + { + "epoch": 0.6355714250043697, + "grad_norm": 0.0934610664844513, + "learning_rate": 1.0960224285964604e-05, + "loss": 8.7063, + "step": 127270 + }, + { + "epoch": 0.6356213638293091, + "grad_norm": 0.098729208111763, + "learning_rate": 1.0958722371023054e-05, + "loss": 8.7185, + "step": 127280 + }, + { + "epoch": 0.6356713026542485, + "grad_norm": 0.08784567564725876, + "learning_rate": 1.0957220456081504e-05, + "loss": 8.6937, + "step": 127290 + }, + { + "epoch": 0.635721241479188, + "grad_norm": 0.08716242760419846, + "learning_rate": 1.0955718541139955e-05, + "loss": 8.7219, + "step": 127300 + }, + { + "epoch": 0.6357711803041275, + "grad_norm": 0.093071848154068, + "learning_rate": 1.0954216626198403e-05, + "loss": 8.7022, + "step": 127310 + }, + { + "epoch": 0.6358211191290669, + "grad_norm": 0.09252569824457169, + "learning_rate": 1.0952714711256852e-05, + "loss": 8.7088, + "step": 127320 + }, + { + "epoch": 0.6358710579540063, + "grad_norm": 0.09083006531000137, + "learning_rate": 1.0951212796315302e-05, + "loss": 8.7163, + "step": 127330 + }, + { + "epoch": 0.6359209967789458, + "grad_norm": 0.08883119374513626, + "learning_rate": 1.0949710881373752e-05, + "loss": 8.6963, + "step": 127340 + }, + { + "epoch": 0.6359709356038853, + "grad_norm": 0.09394201636314392, + "learning_rate": 1.0948208966432202e-05, + "loss": 8.7119, + "step": 127350 + }, + { + "epoch": 0.6360208744288247, + "grad_norm": 0.10139136761426926, + "learning_rate": 1.094670705149065e-05, + "loss": 8.704, + "step": 127360 + }, + { + "epoch": 0.6360708132537641, + "grad_norm": 0.09166630357503891, + "learning_rate": 1.0945205136549099e-05, + "loss": 8.6997, + "step": 127370 + }, + { + "epoch": 0.6361207520787036, + "grad_norm": 0.10158146917819977, + "learning_rate": 1.094370322160755e-05, + "loss": 8.7121, + "step": 127380 + }, + { + "epoch": 0.6361706909036431, + "grad_norm": 0.08964638411998749, + "learning_rate": 1.0942201306666e-05, + "loss": 8.6963, + "step": 127390 + }, + { + "epoch": 0.6362206297285825, + "grad_norm": 0.09380380064249039, + "learning_rate": 1.094069939172445e-05, + "loss": 8.7023, + "step": 127400 + }, + { + "epoch": 0.6362705685535219, + "grad_norm": 0.09249448776245117, + "learning_rate": 1.0939197476782898e-05, + "loss": 8.7212, + "step": 127410 + }, + { + "epoch": 0.6363205073784614, + "grad_norm": 0.09347539395093918, + "learning_rate": 1.0937695561841348e-05, + "loss": 8.6968, + "step": 127420 + }, + { + "epoch": 0.6363704462034009, + "grad_norm": 0.09082402288913727, + "learning_rate": 1.0936193646899797e-05, + "loss": 8.7038, + "step": 127430 + }, + { + "epoch": 0.6364203850283403, + "grad_norm": 0.09499524533748627, + "learning_rate": 1.0934691731958247e-05, + "loss": 8.7015, + "step": 127440 + }, + { + "epoch": 0.6364703238532797, + "grad_norm": 0.09662103652954102, + "learning_rate": 1.0933189817016697e-05, + "loss": 8.6867, + "step": 127450 + }, + { + "epoch": 0.6365202626782192, + "grad_norm": 0.10028364509344101, + "learning_rate": 1.0931687902075146e-05, + "loss": 8.697, + "step": 127460 + }, + { + "epoch": 0.6365702015031587, + "grad_norm": 0.09399396926164627, + "learning_rate": 1.0930185987133596e-05, + "loss": 8.7017, + "step": 127470 + }, + { + "epoch": 0.6366201403280981, + "grad_norm": 0.0959121435880661, + "learning_rate": 1.0928684072192044e-05, + "loss": 8.6941, + "step": 127480 + }, + { + "epoch": 0.6366700791530375, + "grad_norm": 0.09251075237989426, + "learning_rate": 1.0927182157250494e-05, + "loss": 8.6763, + "step": 127490 + }, + { + "epoch": 0.636720017977977, + "grad_norm": 0.09388341009616852, + "learning_rate": 1.0925680242308945e-05, + "loss": 8.698, + "step": 127500 + }, + { + "epoch": 0.6367699568029165, + "grad_norm": 0.09439099580049515, + "learning_rate": 1.0924178327367393e-05, + "loss": 8.721, + "step": 127510 + }, + { + "epoch": 0.6368198956278559, + "grad_norm": 0.0929148718714714, + "learning_rate": 1.0922676412425843e-05, + "loss": 8.7023, + "step": 127520 + }, + { + "epoch": 0.6368698344527953, + "grad_norm": 0.08788886666297913, + "learning_rate": 1.0921174497484292e-05, + "loss": 8.7087, + "step": 127530 + }, + { + "epoch": 0.6369197732777347, + "grad_norm": 0.09236535429954529, + "learning_rate": 1.0919672582542742e-05, + "loss": 8.7026, + "step": 127540 + }, + { + "epoch": 0.6369697121026743, + "grad_norm": 0.08757699280977249, + "learning_rate": 1.0918170667601192e-05, + "loss": 8.6932, + "step": 127550 + }, + { + "epoch": 0.6370196509276137, + "grad_norm": 0.09204018861055374, + "learning_rate": 1.0916668752659642e-05, + "loss": 8.7039, + "step": 127560 + }, + { + "epoch": 0.6370695897525531, + "grad_norm": 0.0933895856142044, + "learning_rate": 1.091516683771809e-05, + "loss": 8.6862, + "step": 127570 + }, + { + "epoch": 0.6371195285774925, + "grad_norm": 0.09441374242305756, + "learning_rate": 1.0913664922776541e-05, + "loss": 8.712, + "step": 127580 + }, + { + "epoch": 0.637169467402432, + "grad_norm": 0.08799993246793747, + "learning_rate": 1.091216300783499e-05, + "loss": 8.69, + "step": 127590 + }, + { + "epoch": 0.6372194062273715, + "grad_norm": 0.08958584070205688, + "learning_rate": 1.091066109289344e-05, + "loss": 8.6959, + "step": 127600 + }, + { + "epoch": 0.6372693450523109, + "grad_norm": 0.09037229418754578, + "learning_rate": 1.090915917795189e-05, + "loss": 8.7096, + "step": 127610 + }, + { + "epoch": 0.6373192838772503, + "grad_norm": 0.08842466026544571, + "learning_rate": 1.0907657263010338e-05, + "loss": 8.7115, + "step": 127620 + }, + { + "epoch": 0.6373692227021898, + "grad_norm": 0.09405070543289185, + "learning_rate": 1.0906155348068788e-05, + "loss": 8.7059, + "step": 127630 + }, + { + "epoch": 0.6374191615271293, + "grad_norm": 0.09778458625078201, + "learning_rate": 1.0904653433127237e-05, + "loss": 8.7081, + "step": 127640 + }, + { + "epoch": 0.6374691003520687, + "grad_norm": 0.09865093231201172, + "learning_rate": 1.0903151518185687e-05, + "loss": 8.7095, + "step": 127650 + }, + { + "epoch": 0.6375190391770081, + "grad_norm": 0.09222712367773056, + "learning_rate": 1.0901649603244137e-05, + "loss": 8.6945, + "step": 127660 + }, + { + "epoch": 0.6375689780019476, + "grad_norm": 0.09677907079458237, + "learning_rate": 1.0900147688302586e-05, + "loss": 8.6983, + "step": 127670 + }, + { + "epoch": 0.6376189168268871, + "grad_norm": 0.08906341344118118, + "learning_rate": 1.0898645773361036e-05, + "loss": 8.7033, + "step": 127680 + }, + { + "epoch": 0.6376688556518265, + "grad_norm": 0.09072484076023102, + "learning_rate": 1.0897143858419484e-05, + "loss": 8.7092, + "step": 127690 + }, + { + "epoch": 0.6377187944767659, + "grad_norm": 0.09344867616891861, + "learning_rate": 1.0895641943477935e-05, + "loss": 8.6877, + "step": 127700 + }, + { + "epoch": 0.6377687333017054, + "grad_norm": 0.093172587454319, + "learning_rate": 1.0894140028536385e-05, + "loss": 8.679, + "step": 127710 + }, + { + "epoch": 0.6378186721266449, + "grad_norm": 0.08979550749063492, + "learning_rate": 1.0892638113594833e-05, + "loss": 8.7013, + "step": 127720 + }, + { + "epoch": 0.6378686109515843, + "grad_norm": 0.09762118011713028, + "learning_rate": 1.0891136198653283e-05, + "loss": 8.7104, + "step": 127730 + }, + { + "epoch": 0.6379185497765237, + "grad_norm": 0.09290577471256256, + "learning_rate": 1.0889634283711734e-05, + "loss": 8.7004, + "step": 127740 + }, + { + "epoch": 0.6379684886014632, + "grad_norm": 0.0906912311911583, + "learning_rate": 1.0888132368770182e-05, + "loss": 8.7001, + "step": 127750 + }, + { + "epoch": 0.6380184274264027, + "grad_norm": 0.08934336155653, + "learning_rate": 1.0886630453828632e-05, + "loss": 8.6983, + "step": 127760 + }, + { + "epoch": 0.6380683662513421, + "grad_norm": 0.09342038631439209, + "learning_rate": 1.088512853888708e-05, + "loss": 8.6941, + "step": 127770 + }, + { + "epoch": 0.6381183050762815, + "grad_norm": 0.0915367603302002, + "learning_rate": 1.0883626623945531e-05, + "loss": 8.696, + "step": 127780 + }, + { + "epoch": 0.638168243901221, + "grad_norm": 0.08702074736356735, + "learning_rate": 1.0882124709003981e-05, + "loss": 8.6942, + "step": 127790 + }, + { + "epoch": 0.6382181827261605, + "grad_norm": 0.09180362522602081, + "learning_rate": 1.088062279406243e-05, + "loss": 8.7023, + "step": 127800 + }, + { + "epoch": 0.6382681215510999, + "grad_norm": 0.0904599204659462, + "learning_rate": 1.087912087912088e-05, + "loss": 8.7151, + "step": 127810 + }, + { + "epoch": 0.6383180603760393, + "grad_norm": 0.09203129261732101, + "learning_rate": 1.0877618964179328e-05, + "loss": 8.7078, + "step": 127820 + }, + { + "epoch": 0.6383679992009788, + "grad_norm": 0.08823869377374649, + "learning_rate": 1.0876117049237779e-05, + "loss": 8.7192, + "step": 127830 + }, + { + "epoch": 0.6384179380259183, + "grad_norm": 0.09404473751783371, + "learning_rate": 1.0874615134296229e-05, + "loss": 8.7175, + "step": 127840 + }, + { + "epoch": 0.6384678768508577, + "grad_norm": 0.09303843975067139, + "learning_rate": 1.0873113219354677e-05, + "loss": 8.7135, + "step": 127850 + }, + { + "epoch": 0.6385178156757971, + "grad_norm": 0.09125247597694397, + "learning_rate": 1.0871611304413127e-05, + "loss": 8.7052, + "step": 127860 + }, + { + "epoch": 0.6385677545007366, + "grad_norm": 0.0958453044295311, + "learning_rate": 1.0870109389471576e-05, + "loss": 8.6845, + "step": 127870 + }, + { + "epoch": 0.6386176933256761, + "grad_norm": 0.09031480550765991, + "learning_rate": 1.0868607474530026e-05, + "loss": 8.7015, + "step": 127880 + }, + { + "epoch": 0.6386676321506155, + "grad_norm": 0.10492537170648575, + "learning_rate": 1.0867105559588476e-05, + "loss": 8.7136, + "step": 127890 + }, + { + "epoch": 0.6387175709755549, + "grad_norm": 0.09470899403095245, + "learning_rate": 1.0865603644646926e-05, + "loss": 8.6868, + "step": 127900 + }, + { + "epoch": 0.6387675098004943, + "grad_norm": 0.09199722856283188, + "learning_rate": 1.0864101729705375e-05, + "loss": 8.6939, + "step": 127910 + }, + { + "epoch": 0.6388174486254339, + "grad_norm": 0.08992622792720795, + "learning_rate": 1.0862599814763823e-05, + "loss": 8.6928, + "step": 127920 + }, + { + "epoch": 0.6388673874503733, + "grad_norm": 0.09477774053812027, + "learning_rate": 1.0861097899822274e-05, + "loss": 8.6969, + "step": 127930 + }, + { + "epoch": 0.6389173262753127, + "grad_norm": 0.09002884477376938, + "learning_rate": 1.0859595984880724e-05, + "loss": 8.6955, + "step": 127940 + }, + { + "epoch": 0.6389672651002521, + "grad_norm": 0.09048038721084595, + "learning_rate": 1.0858094069939174e-05, + "loss": 8.7043, + "step": 127950 + }, + { + "epoch": 0.6390172039251917, + "grad_norm": 0.08667916804552078, + "learning_rate": 1.0856592154997622e-05, + "loss": 8.6913, + "step": 127960 + }, + { + "epoch": 0.6390671427501311, + "grad_norm": 0.09208723902702332, + "learning_rate": 1.085509024005607e-05, + "loss": 8.7021, + "step": 127970 + }, + { + "epoch": 0.6391170815750705, + "grad_norm": 0.09197891503572464, + "learning_rate": 1.0853588325114521e-05, + "loss": 8.7092, + "step": 127980 + }, + { + "epoch": 0.6391670204000099, + "grad_norm": 0.09264016896486282, + "learning_rate": 1.0852086410172971e-05, + "loss": 8.6802, + "step": 127990 + }, + { + "epoch": 0.6392169592249495, + "grad_norm": 0.0906987339258194, + "learning_rate": 1.0850584495231421e-05, + "loss": 8.6835, + "step": 128000 + }, + { + "epoch": 0.6392668980498889, + "grad_norm": 0.09663955122232437, + "learning_rate": 1.084908258028987e-05, + "loss": 8.6855, + "step": 128010 + }, + { + "epoch": 0.6393168368748283, + "grad_norm": 0.08810370415449142, + "learning_rate": 1.0847580665348318e-05, + "loss": 8.7045, + "step": 128020 + }, + { + "epoch": 0.6393667756997677, + "grad_norm": 0.09239162504673004, + "learning_rate": 1.0846078750406769e-05, + "loss": 8.7063, + "step": 128030 + }, + { + "epoch": 0.6394167145247073, + "grad_norm": 0.09910788387060165, + "learning_rate": 1.0844576835465219e-05, + "loss": 8.6895, + "step": 128040 + }, + { + "epoch": 0.6394666533496467, + "grad_norm": 0.09570180624723434, + "learning_rate": 1.0843074920523669e-05, + "loss": 8.7115, + "step": 128050 + }, + { + "epoch": 0.6395165921745861, + "grad_norm": 0.09080067276954651, + "learning_rate": 1.0841573005582119e-05, + "loss": 8.6992, + "step": 128060 + }, + { + "epoch": 0.6395665309995255, + "grad_norm": 0.09045545756816864, + "learning_rate": 1.0840071090640566e-05, + "loss": 8.7097, + "step": 128070 + }, + { + "epoch": 0.6396164698244651, + "grad_norm": 0.09077303856611252, + "learning_rate": 1.0838569175699016e-05, + "loss": 8.6892, + "step": 128080 + }, + { + "epoch": 0.6396664086494045, + "grad_norm": 0.09273795038461685, + "learning_rate": 1.0837067260757466e-05, + "loss": 8.6948, + "step": 128090 + }, + { + "epoch": 0.6397163474743439, + "grad_norm": 0.09408220648765564, + "learning_rate": 1.0835565345815916e-05, + "loss": 8.6957, + "step": 128100 + }, + { + "epoch": 0.6397662862992833, + "grad_norm": 0.09319932758808136, + "learning_rate": 1.0834063430874367e-05, + "loss": 8.6996, + "step": 128110 + }, + { + "epoch": 0.6398162251242229, + "grad_norm": 0.09193559736013412, + "learning_rate": 1.0832561515932813e-05, + "loss": 8.7085, + "step": 128120 + }, + { + "epoch": 0.6398661639491623, + "grad_norm": 0.09312306344509125, + "learning_rate": 1.0831059600991264e-05, + "loss": 8.6832, + "step": 128130 + }, + { + "epoch": 0.6399161027741017, + "grad_norm": 0.09437835961580276, + "learning_rate": 1.0829557686049714e-05, + "loss": 8.7107, + "step": 128140 + }, + { + "epoch": 0.6399660415990411, + "grad_norm": 0.09023882448673248, + "learning_rate": 1.0828055771108164e-05, + "loss": 8.6888, + "step": 128150 + }, + { + "epoch": 0.6400159804239807, + "grad_norm": 0.10062866657972336, + "learning_rate": 1.0826553856166614e-05, + "loss": 8.6884, + "step": 128160 + }, + { + "epoch": 0.6400659192489201, + "grad_norm": 0.09322546422481537, + "learning_rate": 1.0825051941225061e-05, + "loss": 8.7025, + "step": 128170 + }, + { + "epoch": 0.6401158580738595, + "grad_norm": 0.09134891629219055, + "learning_rate": 1.0823550026283511e-05, + "loss": 8.7113, + "step": 128180 + }, + { + "epoch": 0.6401657968987989, + "grad_norm": 0.0888613611459732, + "learning_rate": 1.0822048111341961e-05, + "loss": 8.6908, + "step": 128190 + }, + { + "epoch": 0.6402157357237385, + "grad_norm": 0.0894928053021431, + "learning_rate": 1.0820546196400411e-05, + "loss": 8.7003, + "step": 128200 + }, + { + "epoch": 0.6402656745486779, + "grad_norm": 0.09011103212833405, + "learning_rate": 1.0819044281458862e-05, + "loss": 8.6731, + "step": 128210 + }, + { + "epoch": 0.6403156133736173, + "grad_norm": 0.08623740077018738, + "learning_rate": 1.081754236651731e-05, + "loss": 8.699, + "step": 128220 + }, + { + "epoch": 0.6403655521985567, + "grad_norm": 0.09192583709955215, + "learning_rate": 1.0816040451575759e-05, + "loss": 8.7016, + "step": 128230 + }, + { + "epoch": 0.6404154910234963, + "grad_norm": 0.0926465094089508, + "learning_rate": 1.0814538536634209e-05, + "loss": 8.69, + "step": 128240 + }, + { + "epoch": 0.6404654298484357, + "grad_norm": 0.09219156205654144, + "learning_rate": 1.0813036621692659e-05, + "loss": 8.6993, + "step": 128250 + }, + { + "epoch": 0.6405153686733751, + "grad_norm": 0.09209728986024857, + "learning_rate": 1.0811534706751109e-05, + "loss": 8.6976, + "step": 128260 + }, + { + "epoch": 0.6405653074983145, + "grad_norm": 0.09860700368881226, + "learning_rate": 1.0810032791809558e-05, + "loss": 8.6936, + "step": 128270 + }, + { + "epoch": 0.6406152463232541, + "grad_norm": 0.09730758517980576, + "learning_rate": 1.0808530876868006e-05, + "loss": 8.7031, + "step": 128280 + }, + { + "epoch": 0.6406651851481935, + "grad_norm": 0.09535746276378632, + "learning_rate": 1.0807028961926456e-05, + "loss": 8.6945, + "step": 128290 + }, + { + "epoch": 0.6407151239731329, + "grad_norm": 0.08811869472265244, + "learning_rate": 1.0805527046984906e-05, + "loss": 8.7013, + "step": 128300 + }, + { + "epoch": 0.6407650627980723, + "grad_norm": 0.08866571635007858, + "learning_rate": 1.0804025132043357e-05, + "loss": 8.7039, + "step": 128310 + }, + { + "epoch": 0.6408150016230119, + "grad_norm": 0.0883893147110939, + "learning_rate": 1.0802523217101805e-05, + "loss": 8.7151, + "step": 128320 + }, + { + "epoch": 0.6408649404479513, + "grad_norm": 0.09385412186384201, + "learning_rate": 1.0801021302160254e-05, + "loss": 8.6897, + "step": 128330 + }, + { + "epoch": 0.6409148792728907, + "grad_norm": 0.09245074540376663, + "learning_rate": 1.0799519387218704e-05, + "loss": 8.7014, + "step": 128340 + }, + { + "epoch": 0.6409648180978301, + "grad_norm": 0.09483639895915985, + "learning_rate": 1.0798017472277154e-05, + "loss": 8.6969, + "step": 128350 + }, + { + "epoch": 0.6410147569227697, + "grad_norm": 0.09574125707149506, + "learning_rate": 1.0796515557335604e-05, + "loss": 8.6871, + "step": 128360 + }, + { + "epoch": 0.6410646957477091, + "grad_norm": 0.09570538997650146, + "learning_rate": 1.0795013642394053e-05, + "loss": 8.6784, + "step": 128370 + }, + { + "epoch": 0.6411146345726485, + "grad_norm": 0.08854909241199493, + "learning_rate": 1.0793511727452503e-05, + "loss": 8.7082, + "step": 128380 + }, + { + "epoch": 0.6411645733975879, + "grad_norm": 0.09440670907497406, + "learning_rate": 1.0792009812510951e-05, + "loss": 8.7027, + "step": 128390 + }, + { + "epoch": 0.6412145122225275, + "grad_norm": 0.08996603637933731, + "learning_rate": 1.0790507897569401e-05, + "loss": 8.7015, + "step": 128400 + }, + { + "epoch": 0.6412644510474669, + "grad_norm": 0.0914885401725769, + "learning_rate": 1.0789005982627852e-05, + "loss": 8.698, + "step": 128410 + }, + { + "epoch": 0.6413143898724063, + "grad_norm": 0.08596347272396088, + "learning_rate": 1.07875040676863e-05, + "loss": 8.7038, + "step": 128420 + }, + { + "epoch": 0.6413643286973457, + "grad_norm": 0.09728407114744186, + "learning_rate": 1.078600215274475e-05, + "loss": 8.6863, + "step": 128430 + }, + { + "epoch": 0.6414142675222853, + "grad_norm": 0.08998657763004303, + "learning_rate": 1.0784500237803199e-05, + "loss": 8.6925, + "step": 128440 + }, + { + "epoch": 0.6414642063472247, + "grad_norm": 0.09361448884010315, + "learning_rate": 1.0782998322861649e-05, + "loss": 8.7054, + "step": 128450 + }, + { + "epoch": 0.6415141451721641, + "grad_norm": 0.09714844077825546, + "learning_rate": 1.0781496407920099e-05, + "loss": 8.6882, + "step": 128460 + }, + { + "epoch": 0.6415640839971035, + "grad_norm": 0.09411020576953888, + "learning_rate": 1.0779994492978548e-05, + "loss": 8.6916, + "step": 128470 + }, + { + "epoch": 0.641614022822043, + "grad_norm": 0.09303989261388779, + "learning_rate": 1.0778492578036998e-05, + "loss": 8.6849, + "step": 128480 + }, + { + "epoch": 0.6416639616469825, + "grad_norm": 0.0919375792145729, + "learning_rate": 1.0776990663095446e-05, + "loss": 8.6868, + "step": 128490 + }, + { + "epoch": 0.6417139004719219, + "grad_norm": 0.08758861571550369, + "learning_rate": 1.0775488748153896e-05, + "loss": 8.715, + "step": 128500 + }, + { + "epoch": 0.6417638392968613, + "grad_norm": 0.09430398046970367, + "learning_rate": 1.0773986833212347e-05, + "loss": 8.6882, + "step": 128510 + }, + { + "epoch": 0.6418137781218008, + "grad_norm": 0.09385448694229126, + "learning_rate": 1.0772484918270795e-05, + "loss": 8.699, + "step": 128520 + }, + { + "epoch": 0.6418637169467403, + "grad_norm": 0.08902585506439209, + "learning_rate": 1.0770983003329245e-05, + "loss": 8.714, + "step": 128530 + }, + { + "epoch": 0.6419136557716797, + "grad_norm": 0.08999849855899811, + "learning_rate": 1.0769481088387695e-05, + "loss": 8.6958, + "step": 128540 + }, + { + "epoch": 0.6419635945966191, + "grad_norm": 0.09851477295160294, + "learning_rate": 1.0767979173446144e-05, + "loss": 8.7072, + "step": 128550 + }, + { + "epoch": 0.6420135334215585, + "grad_norm": 0.0901130884885788, + "learning_rate": 1.0766477258504594e-05, + "loss": 8.7057, + "step": 128560 + }, + { + "epoch": 0.6420634722464981, + "grad_norm": 0.0934019386768341, + "learning_rate": 1.0764975343563043e-05, + "loss": 8.6784, + "step": 128570 + }, + { + "epoch": 0.6421134110714375, + "grad_norm": 0.0917850062251091, + "learning_rate": 1.0763473428621493e-05, + "loss": 8.696, + "step": 128580 + }, + { + "epoch": 0.6421633498963769, + "grad_norm": 0.09072545170783997, + "learning_rate": 1.0761971513679943e-05, + "loss": 8.7021, + "step": 128590 + }, + { + "epoch": 0.6422132887213163, + "grad_norm": 0.08813299983739853, + "learning_rate": 1.0760469598738391e-05, + "loss": 8.6961, + "step": 128600 + }, + { + "epoch": 0.6422632275462559, + "grad_norm": 0.08745348453521729, + "learning_rate": 1.0758967683796842e-05, + "loss": 8.678, + "step": 128610 + }, + { + "epoch": 0.6423131663711953, + "grad_norm": 0.09347406774759293, + "learning_rate": 1.075746576885529e-05, + "loss": 8.7121, + "step": 128620 + }, + { + "epoch": 0.6423631051961347, + "grad_norm": 0.09024196863174438, + "learning_rate": 1.075596385391374e-05, + "loss": 8.6969, + "step": 128630 + }, + { + "epoch": 0.6424130440210741, + "grad_norm": 0.0957035943865776, + "learning_rate": 1.075446193897219e-05, + "loss": 8.6935, + "step": 128640 + }, + { + "epoch": 0.6424629828460137, + "grad_norm": 0.09494053572416306, + "learning_rate": 1.0752960024030639e-05, + "loss": 8.6902, + "step": 128650 + }, + { + "epoch": 0.6425129216709531, + "grad_norm": 0.09406720846891403, + "learning_rate": 1.0751458109089089e-05, + "loss": 8.6785, + "step": 128660 + }, + { + "epoch": 0.6425628604958925, + "grad_norm": 0.10135558992624283, + "learning_rate": 1.0749956194147538e-05, + "loss": 8.6853, + "step": 128670 + }, + { + "epoch": 0.6426127993208319, + "grad_norm": 0.09328481554985046, + "learning_rate": 1.0748454279205988e-05, + "loss": 8.6928, + "step": 128680 + }, + { + "epoch": 0.6426627381457715, + "grad_norm": 0.09335169196128845, + "learning_rate": 1.0746952364264438e-05, + "loss": 8.7002, + "step": 128690 + }, + { + "epoch": 0.6427126769707109, + "grad_norm": 0.09129825979471207, + "learning_rate": 1.0745450449322888e-05, + "loss": 8.7094, + "step": 128700 + }, + { + "epoch": 0.6427626157956503, + "grad_norm": 0.09808053821325302, + "learning_rate": 1.0743948534381337e-05, + "loss": 8.693, + "step": 128710 + }, + { + "epoch": 0.6428125546205897, + "grad_norm": 0.0911967009305954, + "learning_rate": 1.0742446619439785e-05, + "loss": 8.7, + "step": 128720 + }, + { + "epoch": 0.6428624934455293, + "grad_norm": 0.09128003567457199, + "learning_rate": 1.0740944704498235e-05, + "loss": 8.6906, + "step": 128730 + }, + { + "epoch": 0.6429124322704687, + "grad_norm": 0.09504960477352142, + "learning_rate": 1.0739442789556685e-05, + "loss": 8.7072, + "step": 128740 + }, + { + "epoch": 0.6429623710954081, + "grad_norm": 0.09051109105348587, + "learning_rate": 1.0737940874615136e-05, + "loss": 8.692, + "step": 128750 + }, + { + "epoch": 0.6430123099203475, + "grad_norm": 0.09383983165025711, + "learning_rate": 1.0736438959673584e-05, + "loss": 8.6927, + "step": 128760 + }, + { + "epoch": 0.6430622487452871, + "grad_norm": 0.09124093502759933, + "learning_rate": 1.0734937044732033e-05, + "loss": 8.6917, + "step": 128770 + }, + { + "epoch": 0.6431121875702265, + "grad_norm": 0.09348580986261368, + "learning_rate": 1.0733435129790483e-05, + "loss": 8.6897, + "step": 128780 + }, + { + "epoch": 0.6431621263951659, + "grad_norm": 0.08800901472568512, + "learning_rate": 1.0731933214848933e-05, + "loss": 8.692, + "step": 128790 + }, + { + "epoch": 0.6432120652201053, + "grad_norm": 0.08735651522874832, + "learning_rate": 1.0730431299907383e-05, + "loss": 8.697, + "step": 128800 + }, + { + "epoch": 0.6432620040450449, + "grad_norm": 0.09434789419174194, + "learning_rate": 1.0728929384965832e-05, + "loss": 8.6856, + "step": 128810 + }, + { + "epoch": 0.6433119428699843, + "grad_norm": 0.09529183804988861, + "learning_rate": 1.072742747002428e-05, + "loss": 8.7082, + "step": 128820 + }, + { + "epoch": 0.6433618816949237, + "grad_norm": 0.08833926171064377, + "learning_rate": 1.072592555508273e-05, + "loss": 8.6816, + "step": 128830 + }, + { + "epoch": 0.6434118205198631, + "grad_norm": 0.09038601070642471, + "learning_rate": 1.072442364014118e-05, + "loss": 8.6931, + "step": 128840 + }, + { + "epoch": 0.6434617593448027, + "grad_norm": 0.09003665298223495, + "learning_rate": 1.072292172519963e-05, + "loss": 8.685, + "step": 128850 + }, + { + "epoch": 0.6435116981697421, + "grad_norm": 0.08987587690353394, + "learning_rate": 1.0721419810258079e-05, + "loss": 8.707, + "step": 128860 + }, + { + "epoch": 0.6435616369946815, + "grad_norm": 0.09192398190498352, + "learning_rate": 1.0719917895316528e-05, + "loss": 8.6899, + "step": 128870 + }, + { + "epoch": 0.6436115758196209, + "grad_norm": 0.09186169505119324, + "learning_rate": 1.0718415980374978e-05, + "loss": 8.682, + "step": 128880 + }, + { + "epoch": 0.6436615146445605, + "grad_norm": 0.0946180447936058, + "learning_rate": 1.0716914065433428e-05, + "loss": 8.6664, + "step": 128890 + }, + { + "epoch": 0.6437114534694999, + "grad_norm": 0.08595947921276093, + "learning_rate": 1.0715412150491878e-05, + "loss": 8.6969, + "step": 128900 + }, + { + "epoch": 0.6437613922944393, + "grad_norm": 0.0891309604048729, + "learning_rate": 1.0713910235550328e-05, + "loss": 8.7002, + "step": 128910 + }, + { + "epoch": 0.6438113311193787, + "grad_norm": 0.08861633390188217, + "learning_rate": 1.0712408320608775e-05, + "loss": 8.7062, + "step": 128920 + }, + { + "epoch": 0.6438612699443182, + "grad_norm": 0.0909549817442894, + "learning_rate": 1.0710906405667225e-05, + "loss": 8.7044, + "step": 128930 + }, + { + "epoch": 0.6439112087692577, + "grad_norm": 0.09526665508747101, + "learning_rate": 1.0709404490725675e-05, + "loss": 8.6959, + "step": 128940 + }, + { + "epoch": 0.6439611475941971, + "grad_norm": 0.0844038724899292, + "learning_rate": 1.0707902575784126e-05, + "loss": 8.6776, + "step": 128950 + }, + { + "epoch": 0.6440110864191365, + "grad_norm": 0.09909462928771973, + "learning_rate": 1.0706400660842576e-05, + "loss": 8.6958, + "step": 128960 + }, + { + "epoch": 0.644061025244076, + "grad_norm": 0.09768630564212799, + "learning_rate": 1.0704898745901023e-05, + "loss": 8.6804, + "step": 128970 + }, + { + "epoch": 0.6441109640690155, + "grad_norm": 0.09060562402009964, + "learning_rate": 1.0703396830959473e-05, + "loss": 8.6874, + "step": 128980 + }, + { + "epoch": 0.6441609028939549, + "grad_norm": 0.09415677189826965, + "learning_rate": 1.0701894916017923e-05, + "loss": 8.6893, + "step": 128990 + }, + { + "epoch": 0.6442108417188943, + "grad_norm": 0.08882970362901688, + "learning_rate": 1.0700393001076373e-05, + "loss": 8.685, + "step": 129000 + }, + { + "epoch": 0.6442607805438338, + "grad_norm": 0.09341847151517868, + "learning_rate": 1.0698891086134823e-05, + "loss": 8.6986, + "step": 129010 + }, + { + "epoch": 0.6443107193687733, + "grad_norm": 0.08802028000354767, + "learning_rate": 1.069738917119327e-05, + "loss": 8.6974, + "step": 129020 + }, + { + "epoch": 0.6443606581937127, + "grad_norm": 0.09880810230970383, + "learning_rate": 1.069588725625172e-05, + "loss": 8.7002, + "step": 129030 + }, + { + "epoch": 0.6444105970186521, + "grad_norm": 0.09482931345701218, + "learning_rate": 1.069438534131017e-05, + "loss": 8.7117, + "step": 129040 + }, + { + "epoch": 0.6444605358435916, + "grad_norm": 0.09154761582612991, + "learning_rate": 1.069288342636862e-05, + "loss": 8.7054, + "step": 129050 + }, + { + "epoch": 0.6445104746685311, + "grad_norm": 0.08899923413991928, + "learning_rate": 1.069138151142707e-05, + "loss": 8.7215, + "step": 129060 + }, + { + "epoch": 0.6445604134934705, + "grad_norm": 0.09512390941381454, + "learning_rate": 1.068987959648552e-05, + "loss": 8.6868, + "step": 129070 + }, + { + "epoch": 0.6446103523184099, + "grad_norm": 0.09039557725191116, + "learning_rate": 1.0688377681543968e-05, + "loss": 8.6942, + "step": 129080 + }, + { + "epoch": 0.6446602911433494, + "grad_norm": 0.09328074753284454, + "learning_rate": 1.0686875766602418e-05, + "loss": 8.6928, + "step": 129090 + }, + { + "epoch": 0.6447102299682889, + "grad_norm": 0.0936678946018219, + "learning_rate": 1.0685373851660868e-05, + "loss": 8.6925, + "step": 129100 + }, + { + "epoch": 0.6447601687932283, + "grad_norm": 0.09343737363815308, + "learning_rate": 1.0683871936719318e-05, + "loss": 8.698, + "step": 129110 + }, + { + "epoch": 0.6448101076181677, + "grad_norm": 0.08791700750589371, + "learning_rate": 1.0682370021777767e-05, + "loss": 8.6843, + "step": 129120 + }, + { + "epoch": 0.6448600464431072, + "grad_norm": 0.09414328634738922, + "learning_rate": 1.0680868106836215e-05, + "loss": 8.7037, + "step": 129130 + }, + { + "epoch": 0.6449099852680467, + "grad_norm": 0.0889165922999382, + "learning_rate": 1.0679366191894665e-05, + "loss": 8.701, + "step": 129140 + }, + { + "epoch": 0.6449599240929861, + "grad_norm": 0.09565069526433945, + "learning_rate": 1.0677864276953116e-05, + "loss": 8.6856, + "step": 129150 + }, + { + "epoch": 0.6450098629179255, + "grad_norm": 0.09142804890871048, + "learning_rate": 1.0676362362011566e-05, + "loss": 8.694, + "step": 129160 + }, + { + "epoch": 0.645059801742865, + "grad_norm": 0.09595470130443573, + "learning_rate": 1.0674860447070014e-05, + "loss": 8.7063, + "step": 129170 + }, + { + "epoch": 0.6451097405678045, + "grad_norm": 0.09262866526842117, + "learning_rate": 1.0673358532128463e-05, + "loss": 8.687, + "step": 129180 + }, + { + "epoch": 0.6451596793927439, + "grad_norm": 0.09345003217458725, + "learning_rate": 1.0671856617186913e-05, + "loss": 8.6825, + "step": 129190 + }, + { + "epoch": 0.6452096182176833, + "grad_norm": 0.09604539722204208, + "learning_rate": 1.0670354702245363e-05, + "loss": 8.685, + "step": 129200 + }, + { + "epoch": 0.6452595570426228, + "grad_norm": 0.09135550260543823, + "learning_rate": 1.0668852787303813e-05, + "loss": 8.6817, + "step": 129210 + }, + { + "epoch": 0.6453094958675623, + "grad_norm": 0.09230076521635056, + "learning_rate": 1.0667350872362262e-05, + "loss": 8.7034, + "step": 129220 + }, + { + "epoch": 0.6453594346925017, + "grad_norm": 0.08917170763015747, + "learning_rate": 1.0665848957420712e-05, + "loss": 8.6952, + "step": 129230 + }, + { + "epoch": 0.6454093735174411, + "grad_norm": 0.09029002487659454, + "learning_rate": 1.066434704247916e-05, + "loss": 8.7049, + "step": 129240 + }, + { + "epoch": 0.6454593123423806, + "grad_norm": 0.09072355180978775, + "learning_rate": 1.066284512753761e-05, + "loss": 8.6823, + "step": 129250 + }, + { + "epoch": 0.64550925116732, + "grad_norm": 0.09148232638835907, + "learning_rate": 1.066134321259606e-05, + "loss": 8.6949, + "step": 129260 + }, + { + "epoch": 0.6455591899922595, + "grad_norm": 0.09294047951698303, + "learning_rate": 1.065984129765451e-05, + "loss": 8.6964, + "step": 129270 + }, + { + "epoch": 0.6456091288171989, + "grad_norm": 0.09537001699209213, + "learning_rate": 1.065833938271296e-05, + "loss": 8.6854, + "step": 129280 + }, + { + "epoch": 0.6456590676421384, + "grad_norm": 0.08766493201255798, + "learning_rate": 1.0656837467771408e-05, + "loss": 8.6707, + "step": 129290 + }, + { + "epoch": 0.6457090064670779, + "grad_norm": 0.09294747561216354, + "learning_rate": 1.0655335552829858e-05, + "loss": 8.691, + "step": 129300 + }, + { + "epoch": 0.6457589452920173, + "grad_norm": 0.09923656284809113, + "learning_rate": 1.0653833637888308e-05, + "loss": 8.6914, + "step": 129310 + }, + { + "epoch": 0.6458088841169567, + "grad_norm": 0.09559011459350586, + "learning_rate": 1.0652331722946757e-05, + "loss": 8.6932, + "step": 129320 + }, + { + "epoch": 0.6458588229418962, + "grad_norm": 0.09223086386919022, + "learning_rate": 1.0650829808005207e-05, + "loss": 8.6739, + "step": 129330 + }, + { + "epoch": 0.6459087617668356, + "grad_norm": 0.09738924354314804, + "learning_rate": 1.0649327893063655e-05, + "loss": 8.6669, + "step": 129340 + }, + { + "epoch": 0.6459587005917751, + "grad_norm": 0.09521890431642532, + "learning_rate": 1.0647825978122106e-05, + "loss": 8.7012, + "step": 129350 + }, + { + "epoch": 0.6460086394167145, + "grad_norm": 0.08749169111251831, + "learning_rate": 1.0646324063180556e-05, + "loss": 8.7065, + "step": 129360 + }, + { + "epoch": 0.646058578241654, + "grad_norm": 0.09419877082109451, + "learning_rate": 1.0644822148239004e-05, + "loss": 8.6892, + "step": 129370 + }, + { + "epoch": 0.6461085170665934, + "grad_norm": 0.1010696068406105, + "learning_rate": 1.0643320233297455e-05, + "loss": 8.6767, + "step": 129380 + }, + { + "epoch": 0.6461584558915329, + "grad_norm": 0.09025359898805618, + "learning_rate": 1.0641818318355905e-05, + "loss": 8.6869, + "step": 129390 + }, + { + "epoch": 0.6462083947164723, + "grad_norm": 0.10446906089782715, + "learning_rate": 1.0640316403414353e-05, + "loss": 8.6966, + "step": 129400 + }, + { + "epoch": 0.6462583335414118, + "grad_norm": 0.09092262387275696, + "learning_rate": 1.0638814488472803e-05, + "loss": 8.6835, + "step": 129410 + }, + { + "epoch": 0.6463082723663512, + "grad_norm": 0.08932240307331085, + "learning_rate": 1.0637312573531252e-05, + "loss": 8.6936, + "step": 129420 + }, + { + "epoch": 0.6463582111912907, + "grad_norm": 0.09258059412240982, + "learning_rate": 1.0635810658589702e-05, + "loss": 8.6914, + "step": 129430 + }, + { + "epoch": 0.6464081500162301, + "grad_norm": 0.09351666271686554, + "learning_rate": 1.0634308743648152e-05, + "loss": 8.7064, + "step": 129440 + }, + { + "epoch": 0.6464580888411696, + "grad_norm": 0.08932211250066757, + "learning_rate": 1.06328068287066e-05, + "loss": 8.7065, + "step": 129450 + }, + { + "epoch": 0.646508027666109, + "grad_norm": 0.0951404944062233, + "learning_rate": 1.0631304913765051e-05, + "loss": 8.6996, + "step": 129460 + }, + { + "epoch": 0.6465579664910485, + "grad_norm": 0.09667675197124481, + "learning_rate": 1.06298029988235e-05, + "loss": 8.6869, + "step": 129470 + }, + { + "epoch": 0.6466079053159879, + "grad_norm": 0.08983483165502548, + "learning_rate": 1.062830108388195e-05, + "loss": 8.6919, + "step": 129480 + }, + { + "epoch": 0.6466578441409274, + "grad_norm": 0.09399787336587906, + "learning_rate": 1.06267991689404e-05, + "loss": 8.6946, + "step": 129490 + }, + { + "epoch": 0.6467077829658668, + "grad_norm": 0.09137116372585297, + "learning_rate": 1.0625297253998848e-05, + "loss": 8.7024, + "step": 129500 + }, + { + "epoch": 0.6467577217908063, + "grad_norm": 0.09180816262960434, + "learning_rate": 1.0623795339057298e-05, + "loss": 8.6889, + "step": 129510 + }, + { + "epoch": 0.6468076606157457, + "grad_norm": 0.08744052797555923, + "learning_rate": 1.0622293424115747e-05, + "loss": 8.6927, + "step": 129520 + }, + { + "epoch": 0.6468575994406852, + "grad_norm": 0.09338006377220154, + "learning_rate": 1.0620791509174197e-05, + "loss": 8.6732, + "step": 129530 + }, + { + "epoch": 0.6469075382656246, + "grad_norm": 0.0890432596206665, + "learning_rate": 1.0619289594232647e-05, + "loss": 8.682, + "step": 129540 + }, + { + "epoch": 0.6469574770905641, + "grad_norm": 0.08677448332309723, + "learning_rate": 1.0617787679291097e-05, + "loss": 8.6879, + "step": 129550 + }, + { + "epoch": 0.6470074159155035, + "grad_norm": 0.08942339569330215, + "learning_rate": 1.0616285764349546e-05, + "loss": 8.6885, + "step": 129560 + }, + { + "epoch": 0.6470573547404429, + "grad_norm": 0.09570731967687607, + "learning_rate": 1.0614783849407994e-05, + "loss": 8.7066, + "step": 129570 + }, + { + "epoch": 0.6471072935653824, + "grad_norm": 0.09744849801063538, + "learning_rate": 1.0613281934466445e-05, + "loss": 8.6889, + "step": 129580 + }, + { + "epoch": 0.6471572323903219, + "grad_norm": 0.09440560638904572, + "learning_rate": 1.0611780019524895e-05, + "loss": 8.6876, + "step": 129590 + }, + { + "epoch": 0.6472071712152613, + "grad_norm": 0.09413042664527893, + "learning_rate": 1.0610278104583345e-05, + "loss": 8.7024, + "step": 129600 + }, + { + "epoch": 0.6472571100402007, + "grad_norm": 0.09831690788269043, + "learning_rate": 1.0608776189641793e-05, + "loss": 8.702, + "step": 129610 + }, + { + "epoch": 0.6473070488651402, + "grad_norm": 0.0940280631184578, + "learning_rate": 1.0607274274700242e-05, + "loss": 8.6957, + "step": 129620 + }, + { + "epoch": 0.6473569876900797, + "grad_norm": 0.09738873690366745, + "learning_rate": 1.0605772359758692e-05, + "loss": 8.6943, + "step": 129630 + }, + { + "epoch": 0.6474069265150191, + "grad_norm": 0.09439673274755478, + "learning_rate": 1.0604270444817142e-05, + "loss": 8.6919, + "step": 129640 + }, + { + "epoch": 0.6474568653399585, + "grad_norm": 0.08990797400474548, + "learning_rate": 1.0602768529875592e-05, + "loss": 8.6936, + "step": 129650 + }, + { + "epoch": 0.647506804164898, + "grad_norm": 0.09267980605363846, + "learning_rate": 1.0601266614934041e-05, + "loss": 8.6866, + "step": 129660 + }, + { + "epoch": 0.6475567429898375, + "grad_norm": 0.09749335050582886, + "learning_rate": 1.0599764699992491e-05, + "loss": 8.6843, + "step": 129670 + }, + { + "epoch": 0.6476066818147769, + "grad_norm": 0.09197567403316498, + "learning_rate": 1.059826278505094e-05, + "loss": 8.6853, + "step": 129680 + }, + { + "epoch": 0.6476566206397163, + "grad_norm": 0.08990548551082611, + "learning_rate": 1.059676087010939e-05, + "loss": 8.6833, + "step": 129690 + }, + { + "epoch": 0.6477065594646558, + "grad_norm": 0.09163473546504974, + "learning_rate": 1.059525895516784e-05, + "loss": 8.6803, + "step": 129700 + }, + { + "epoch": 0.6477564982895953, + "grad_norm": 0.08452335745096207, + "learning_rate": 1.059375704022629e-05, + "loss": 8.6832, + "step": 129710 + }, + { + "epoch": 0.6478064371145347, + "grad_norm": 0.09139879792928696, + "learning_rate": 1.0592255125284739e-05, + "loss": 8.6855, + "step": 129720 + }, + { + "epoch": 0.6478563759394741, + "grad_norm": 0.09565936774015427, + "learning_rate": 1.0590753210343187e-05, + "loss": 8.6953, + "step": 129730 + }, + { + "epoch": 0.6479063147644136, + "grad_norm": 0.09335167706012726, + "learning_rate": 1.0589251295401637e-05, + "loss": 8.6954, + "step": 129740 + }, + { + "epoch": 0.647956253589353, + "grad_norm": 0.09695900231599808, + "learning_rate": 1.0587749380460087e-05, + "loss": 8.6963, + "step": 129750 + }, + { + "epoch": 0.6480061924142925, + "grad_norm": 0.09421364217996597, + "learning_rate": 1.0586247465518538e-05, + "loss": 8.7094, + "step": 129760 + }, + { + "epoch": 0.6480561312392319, + "grad_norm": 0.09024903178215027, + "learning_rate": 1.0584745550576986e-05, + "loss": 8.6867, + "step": 129770 + }, + { + "epoch": 0.6481060700641714, + "grad_norm": 0.09890251606702805, + "learning_rate": 1.0583243635635435e-05, + "loss": 8.6979, + "step": 129780 + }, + { + "epoch": 0.6481560088891108, + "grad_norm": 0.09448616206645966, + "learning_rate": 1.0581741720693885e-05, + "loss": 8.6953, + "step": 129790 + }, + { + "epoch": 0.6482059477140503, + "grad_norm": 0.08602569997310638, + "learning_rate": 1.0580239805752335e-05, + "loss": 8.6916, + "step": 129800 + }, + { + "epoch": 0.6482558865389897, + "grad_norm": 0.08742332458496094, + "learning_rate": 1.0578737890810785e-05, + "loss": 8.6816, + "step": 129810 + }, + { + "epoch": 0.6483058253639292, + "grad_norm": 0.09227047860622406, + "learning_rate": 1.0577235975869234e-05, + "loss": 8.7025, + "step": 129820 + }, + { + "epoch": 0.6483557641888686, + "grad_norm": 0.09513872116804123, + "learning_rate": 1.0575734060927682e-05, + "loss": 8.6642, + "step": 129830 + }, + { + "epoch": 0.6484057030138081, + "grad_norm": 0.08885648846626282, + "learning_rate": 1.0574232145986132e-05, + "loss": 8.6826, + "step": 129840 + }, + { + "epoch": 0.6484556418387475, + "grad_norm": 0.09655725210905075, + "learning_rate": 1.0572730231044582e-05, + "loss": 8.6796, + "step": 129850 + }, + { + "epoch": 0.648505580663687, + "grad_norm": 0.0928000807762146, + "learning_rate": 1.0571228316103033e-05, + "loss": 8.6778, + "step": 129860 + }, + { + "epoch": 0.6485555194886264, + "grad_norm": 0.09117357432842255, + "learning_rate": 1.0569726401161483e-05, + "loss": 8.6699, + "step": 129870 + }, + { + "epoch": 0.6486054583135659, + "grad_norm": 0.09002866595983505, + "learning_rate": 1.056822448621993e-05, + "loss": 8.686, + "step": 129880 + }, + { + "epoch": 0.6486553971385053, + "grad_norm": 0.09259200841188431, + "learning_rate": 1.056672257127838e-05, + "loss": 8.6844, + "step": 129890 + }, + { + "epoch": 0.6487053359634448, + "grad_norm": 0.0892748162150383, + "learning_rate": 1.056522065633683e-05, + "loss": 8.6899, + "step": 129900 + }, + { + "epoch": 0.6487552747883842, + "grad_norm": 0.0917670950293541, + "learning_rate": 1.056371874139528e-05, + "loss": 8.6836, + "step": 129910 + }, + { + "epoch": 0.6488052136133237, + "grad_norm": 0.09188613295555115, + "learning_rate": 1.056221682645373e-05, + "loss": 8.7191, + "step": 129920 + }, + { + "epoch": 0.6488551524382631, + "grad_norm": 0.09064072370529175, + "learning_rate": 1.0560714911512177e-05, + "loss": 8.6913, + "step": 129930 + }, + { + "epoch": 0.6489050912632026, + "grad_norm": 0.09041624516248703, + "learning_rate": 1.0559212996570627e-05, + "loss": 8.6797, + "step": 129940 + }, + { + "epoch": 0.648955030088142, + "grad_norm": 0.09263037890195847, + "learning_rate": 1.0557711081629077e-05, + "loss": 8.6839, + "step": 129950 + }, + { + "epoch": 0.6490049689130815, + "grad_norm": 0.0885956883430481, + "learning_rate": 1.0556209166687528e-05, + "loss": 8.6841, + "step": 129960 + }, + { + "epoch": 0.6490549077380209, + "grad_norm": 0.08921640366315842, + "learning_rate": 1.0554707251745978e-05, + "loss": 8.6874, + "step": 129970 + }, + { + "epoch": 0.6491048465629604, + "grad_norm": 0.09531452506780624, + "learning_rate": 1.0553205336804425e-05, + "loss": 8.6952, + "step": 129980 + }, + { + "epoch": 0.6491547853878998, + "grad_norm": 0.09411237388849258, + "learning_rate": 1.0551703421862875e-05, + "loss": 8.6968, + "step": 129990 + }, + { + "epoch": 0.6492047242128393, + "grad_norm": 0.09411599487066269, + "learning_rate": 1.0550201506921325e-05, + "loss": 8.6859, + "step": 130000 + }, + { + "epoch": 0.6492546630377787, + "grad_norm": 0.09481288492679596, + "learning_rate": 1.0548699591979775e-05, + "loss": 8.695, + "step": 130010 + }, + { + "epoch": 0.6493046018627182, + "grad_norm": 0.09503282606601715, + "learning_rate": 1.0547197677038225e-05, + "loss": 8.6801, + "step": 130020 + }, + { + "epoch": 0.6493545406876576, + "grad_norm": 0.09125657379627228, + "learning_rate": 1.0545695762096674e-05, + "loss": 8.6882, + "step": 130030 + }, + { + "epoch": 0.649404479512597, + "grad_norm": 0.09231728315353394, + "learning_rate": 1.0544193847155122e-05, + "loss": 8.6794, + "step": 130040 + }, + { + "epoch": 0.6494544183375365, + "grad_norm": 0.09627514332532883, + "learning_rate": 1.0542691932213572e-05, + "loss": 8.6888, + "step": 130050 + }, + { + "epoch": 0.649504357162476, + "grad_norm": 0.0894688069820404, + "learning_rate": 1.0541190017272023e-05, + "loss": 8.6832, + "step": 130060 + }, + { + "epoch": 0.6495542959874154, + "grad_norm": 0.09295932203531265, + "learning_rate": 1.0539688102330473e-05, + "loss": 8.6835, + "step": 130070 + }, + { + "epoch": 0.6496042348123549, + "grad_norm": 0.09354085475206375, + "learning_rate": 1.0538186187388921e-05, + "loss": 8.6896, + "step": 130080 + }, + { + "epoch": 0.6496541736372943, + "grad_norm": 0.09849437326192856, + "learning_rate": 1.053668427244737e-05, + "loss": 8.67, + "step": 130090 + }, + { + "epoch": 0.6497041124622338, + "grad_norm": 0.09284999966621399, + "learning_rate": 1.053518235750582e-05, + "loss": 8.6605, + "step": 130100 + }, + { + "epoch": 0.6497540512871732, + "grad_norm": 0.0957464724779129, + "learning_rate": 1.053368044256427e-05, + "loss": 8.6809, + "step": 130110 + }, + { + "epoch": 0.6498039901121127, + "grad_norm": 0.09025140851736069, + "learning_rate": 1.053217852762272e-05, + "loss": 8.6906, + "step": 130120 + }, + { + "epoch": 0.6498539289370521, + "grad_norm": 0.0904148668050766, + "learning_rate": 1.0530676612681169e-05, + "loss": 8.6876, + "step": 130130 + }, + { + "epoch": 0.6499038677619916, + "grad_norm": 0.08794094622135162, + "learning_rate": 1.0529174697739617e-05, + "loss": 8.6838, + "step": 130140 + }, + { + "epoch": 0.649953806586931, + "grad_norm": 0.09247595816850662, + "learning_rate": 1.0527672782798067e-05, + "loss": 8.68, + "step": 130150 + }, + { + "epoch": 0.6500037454118704, + "grad_norm": 0.09172753244638443, + "learning_rate": 1.0526170867856518e-05, + "loss": 8.6877, + "step": 130160 + }, + { + "epoch": 0.6500536842368099, + "grad_norm": 0.09523764252662659, + "learning_rate": 1.0524668952914968e-05, + "loss": 8.6913, + "step": 130170 + }, + { + "epoch": 0.6501036230617494, + "grad_norm": 0.08924904465675354, + "learning_rate": 1.0523167037973416e-05, + "loss": 8.681, + "step": 130180 + }, + { + "epoch": 0.6501535618866888, + "grad_norm": 0.08906625956296921, + "learning_rate": 1.0521665123031866e-05, + "loss": 8.675, + "step": 130190 + }, + { + "epoch": 0.6502035007116282, + "grad_norm": 0.09328590333461761, + "learning_rate": 1.0520163208090315e-05, + "loss": 8.6944, + "step": 130200 + }, + { + "epoch": 0.6502534395365677, + "grad_norm": 0.09101434797048569, + "learning_rate": 1.0518661293148765e-05, + "loss": 8.6864, + "step": 130210 + }, + { + "epoch": 0.6503033783615072, + "grad_norm": 0.09618061780929565, + "learning_rate": 1.0517159378207215e-05, + "loss": 8.6875, + "step": 130220 + }, + { + "epoch": 0.6503533171864466, + "grad_norm": 0.0945608913898468, + "learning_rate": 1.0515657463265664e-05, + "loss": 8.689, + "step": 130230 + }, + { + "epoch": 0.650403256011386, + "grad_norm": 0.0871725007891655, + "learning_rate": 1.0514155548324114e-05, + "loss": 8.6919, + "step": 130240 + }, + { + "epoch": 0.6504531948363255, + "grad_norm": 0.09153078496456146, + "learning_rate": 1.0512653633382562e-05, + "loss": 8.6813, + "step": 130250 + }, + { + "epoch": 0.650503133661265, + "grad_norm": 0.09095324575901031, + "learning_rate": 1.0511151718441013e-05, + "loss": 8.6822, + "step": 130260 + }, + { + "epoch": 0.6505530724862044, + "grad_norm": 0.09055155515670776, + "learning_rate": 1.0509649803499463e-05, + "loss": 8.676, + "step": 130270 + }, + { + "epoch": 0.6506030113111438, + "grad_norm": 0.08829725533723831, + "learning_rate": 1.0508147888557911e-05, + "loss": 8.6923, + "step": 130280 + }, + { + "epoch": 0.6506529501360833, + "grad_norm": 0.09116893261671066, + "learning_rate": 1.0506645973616361e-05, + "loss": 8.6698, + "step": 130290 + }, + { + "epoch": 0.6507028889610228, + "grad_norm": 0.08940432965755463, + "learning_rate": 1.050514405867481e-05, + "loss": 8.6763, + "step": 130300 + }, + { + "epoch": 0.6507528277859622, + "grad_norm": 0.09223427623510361, + "learning_rate": 1.050364214373326e-05, + "loss": 8.6929, + "step": 130310 + }, + { + "epoch": 0.6508027666109016, + "grad_norm": 0.09368874877691269, + "learning_rate": 1.050214022879171e-05, + "loss": 8.6783, + "step": 130320 + }, + { + "epoch": 0.6508527054358411, + "grad_norm": 0.09133809059858322, + "learning_rate": 1.0500638313850159e-05, + "loss": 8.6861, + "step": 130330 + }, + { + "epoch": 0.6509026442607806, + "grad_norm": 0.09096644818782806, + "learning_rate": 1.0499136398908609e-05, + "loss": 8.6741, + "step": 130340 + }, + { + "epoch": 0.65095258308572, + "grad_norm": 0.09941566735506058, + "learning_rate": 1.0497634483967059e-05, + "loss": 8.6934, + "step": 130350 + }, + { + "epoch": 0.6510025219106594, + "grad_norm": 0.09109698235988617, + "learning_rate": 1.0496132569025508e-05, + "loss": 8.6798, + "step": 130360 + }, + { + "epoch": 0.6510524607355989, + "grad_norm": 0.09425774961709976, + "learning_rate": 1.0494630654083958e-05, + "loss": 8.6717, + "step": 130370 + }, + { + "epoch": 0.6511023995605384, + "grad_norm": 0.09574547410011292, + "learning_rate": 1.0493128739142406e-05, + "loss": 8.6847, + "step": 130380 + }, + { + "epoch": 0.6511523383854778, + "grad_norm": 0.08903608471155167, + "learning_rate": 1.0491626824200856e-05, + "loss": 8.6854, + "step": 130390 + }, + { + "epoch": 0.6512022772104172, + "grad_norm": 0.10021170973777771, + "learning_rate": 1.0490124909259307e-05, + "loss": 8.6747, + "step": 130400 + }, + { + "epoch": 0.6512522160353567, + "grad_norm": 0.09193012118339539, + "learning_rate": 1.0488622994317755e-05, + "loss": 8.6764, + "step": 130410 + }, + { + "epoch": 0.6513021548602962, + "grad_norm": 0.09240026026964188, + "learning_rate": 1.0487121079376205e-05, + "loss": 8.7007, + "step": 130420 + }, + { + "epoch": 0.6513520936852356, + "grad_norm": 0.0931842103600502, + "learning_rate": 1.0485619164434654e-05, + "loss": 8.6803, + "step": 130430 + }, + { + "epoch": 0.651402032510175, + "grad_norm": 0.0900469720363617, + "learning_rate": 1.0484117249493104e-05, + "loss": 8.6859, + "step": 130440 + }, + { + "epoch": 0.6514519713351145, + "grad_norm": 0.08829685300588608, + "learning_rate": 1.0482615334551554e-05, + "loss": 8.679, + "step": 130450 + }, + { + "epoch": 0.651501910160054, + "grad_norm": 0.08664678037166595, + "learning_rate": 1.0481113419610003e-05, + "loss": 8.6902, + "step": 130460 + }, + { + "epoch": 0.6515518489849934, + "grad_norm": 0.08698765188455582, + "learning_rate": 1.0479611504668453e-05, + "loss": 8.686, + "step": 130470 + }, + { + "epoch": 0.6516017878099328, + "grad_norm": 0.09792865067720413, + "learning_rate": 1.0478109589726901e-05, + "loss": 8.6769, + "step": 130480 + }, + { + "epoch": 0.6516517266348723, + "grad_norm": 0.08901020884513855, + "learning_rate": 1.0476607674785351e-05, + "loss": 8.6788, + "step": 130490 + }, + { + "epoch": 0.6517016654598118, + "grad_norm": 0.0966918021440506, + "learning_rate": 1.0475105759843802e-05, + "loss": 8.6681, + "step": 130500 + }, + { + "epoch": 0.6517516042847512, + "grad_norm": 0.0899975523352623, + "learning_rate": 1.0473603844902252e-05, + "loss": 8.7017, + "step": 130510 + }, + { + "epoch": 0.6518015431096906, + "grad_norm": 0.09501145780086517, + "learning_rate": 1.04721019299607e-05, + "loss": 8.698, + "step": 130520 + }, + { + "epoch": 0.65185148193463, + "grad_norm": 0.09583033621311188, + "learning_rate": 1.0470600015019149e-05, + "loss": 8.6757, + "step": 130530 + }, + { + "epoch": 0.6519014207595695, + "grad_norm": 0.09208723902702332, + "learning_rate": 1.0469098100077599e-05, + "loss": 8.6855, + "step": 130540 + }, + { + "epoch": 0.651951359584509, + "grad_norm": 0.09319637715816498, + "learning_rate": 1.0467596185136049e-05, + "loss": 8.6914, + "step": 130550 + }, + { + "epoch": 0.6520012984094484, + "grad_norm": 0.09361417591571808, + "learning_rate": 1.04660942701945e-05, + "loss": 8.683, + "step": 130560 + }, + { + "epoch": 0.6520512372343878, + "grad_norm": 0.0895400270819664, + "learning_rate": 1.0464592355252948e-05, + "loss": 8.6841, + "step": 130570 + }, + { + "epoch": 0.6521011760593273, + "grad_norm": 0.09408006817102432, + "learning_rate": 1.0463090440311396e-05, + "loss": 8.6757, + "step": 130580 + }, + { + "epoch": 0.6521511148842668, + "grad_norm": 0.09309608489274979, + "learning_rate": 1.0461588525369846e-05, + "loss": 8.6885, + "step": 130590 + }, + { + "epoch": 0.6522010537092062, + "grad_norm": 0.1002884954214096, + "learning_rate": 1.0460086610428297e-05, + "loss": 8.6925, + "step": 130600 + }, + { + "epoch": 0.6522509925341456, + "grad_norm": 0.09665568172931671, + "learning_rate": 1.0458584695486747e-05, + "loss": 8.6746, + "step": 130610 + }, + { + "epoch": 0.6523009313590851, + "grad_norm": 0.09286937117576599, + "learning_rate": 1.0457082780545195e-05, + "loss": 8.6669, + "step": 130620 + }, + { + "epoch": 0.6523508701840246, + "grad_norm": 0.094349205493927, + "learning_rate": 1.0455580865603644e-05, + "loss": 8.674, + "step": 130630 + }, + { + "epoch": 0.652400809008964, + "grad_norm": 0.0887681245803833, + "learning_rate": 1.0454078950662094e-05, + "loss": 8.6897, + "step": 130640 + }, + { + "epoch": 0.6524507478339034, + "grad_norm": 0.09428498893976212, + "learning_rate": 1.0452577035720544e-05, + "loss": 8.6805, + "step": 130650 + }, + { + "epoch": 0.6525006866588429, + "grad_norm": 0.09223032742738724, + "learning_rate": 1.0451075120778994e-05, + "loss": 8.685, + "step": 130660 + }, + { + "epoch": 0.6525506254837824, + "grad_norm": 0.09503738582134247, + "learning_rate": 1.0449573205837445e-05, + "loss": 8.6903, + "step": 130670 + }, + { + "epoch": 0.6526005643087218, + "grad_norm": 0.08950851857662201, + "learning_rate": 1.0448071290895891e-05, + "loss": 8.6922, + "step": 130680 + }, + { + "epoch": 0.6526505031336612, + "grad_norm": 0.08932535350322723, + "learning_rate": 1.0446569375954341e-05, + "loss": 8.6843, + "step": 130690 + }, + { + "epoch": 0.6527004419586007, + "grad_norm": 0.093544140458107, + "learning_rate": 1.0445067461012792e-05, + "loss": 8.6965, + "step": 130700 + }, + { + "epoch": 0.6527503807835402, + "grad_norm": 0.09570880979299545, + "learning_rate": 1.0443565546071242e-05, + "loss": 8.6699, + "step": 130710 + }, + { + "epoch": 0.6528003196084796, + "grad_norm": 0.095237135887146, + "learning_rate": 1.0442063631129692e-05, + "loss": 8.6778, + "step": 130720 + }, + { + "epoch": 0.652850258433419, + "grad_norm": 0.10413196682929993, + "learning_rate": 1.0440561716188139e-05, + "loss": 8.6738, + "step": 130730 + }, + { + "epoch": 0.6529001972583585, + "grad_norm": 0.08957455307245255, + "learning_rate": 1.0439059801246589e-05, + "loss": 8.6821, + "step": 130740 + }, + { + "epoch": 0.652950136083298, + "grad_norm": 0.1010318323969841, + "learning_rate": 1.043755788630504e-05, + "loss": 8.6882, + "step": 130750 + }, + { + "epoch": 0.6530000749082374, + "grad_norm": 0.09466128796339035, + "learning_rate": 1.043605597136349e-05, + "loss": 8.6749, + "step": 130760 + }, + { + "epoch": 0.6530500137331768, + "grad_norm": 0.0850997343659401, + "learning_rate": 1.043455405642194e-05, + "loss": 8.6898, + "step": 130770 + }, + { + "epoch": 0.6530999525581163, + "grad_norm": 0.09206338971853256, + "learning_rate": 1.0433052141480386e-05, + "loss": 8.6861, + "step": 130780 + }, + { + "epoch": 0.6531498913830558, + "grad_norm": 0.09347966313362122, + "learning_rate": 1.0431550226538836e-05, + "loss": 8.7026, + "step": 130790 + }, + { + "epoch": 0.6531998302079952, + "grad_norm": 0.08403073996305466, + "learning_rate": 1.0430048311597287e-05, + "loss": 8.6844, + "step": 130800 + }, + { + "epoch": 0.6532497690329346, + "grad_norm": 0.08621867746114731, + "learning_rate": 1.0428546396655737e-05, + "loss": 8.6709, + "step": 130810 + }, + { + "epoch": 0.6532997078578741, + "grad_norm": 0.08873015642166138, + "learning_rate": 1.0427044481714187e-05, + "loss": 8.6875, + "step": 130820 + }, + { + "epoch": 0.6533496466828136, + "grad_norm": 0.08667482435703278, + "learning_rate": 1.0425542566772636e-05, + "loss": 8.679, + "step": 130830 + }, + { + "epoch": 0.653399585507753, + "grad_norm": 0.09268020838499069, + "learning_rate": 1.0424040651831084e-05, + "loss": 8.6803, + "step": 130840 + }, + { + "epoch": 0.6534495243326924, + "grad_norm": 0.1045554131269455, + "learning_rate": 1.0422538736889534e-05, + "loss": 8.6686, + "step": 130850 + }, + { + "epoch": 0.6534994631576319, + "grad_norm": 0.08979924023151398, + "learning_rate": 1.0421036821947984e-05, + "loss": 8.6894, + "step": 130860 + }, + { + "epoch": 0.6535494019825714, + "grad_norm": 0.09369828552007675, + "learning_rate": 1.0419534907006435e-05, + "loss": 8.6807, + "step": 130870 + }, + { + "epoch": 0.6535993408075108, + "grad_norm": 0.09341225028038025, + "learning_rate": 1.0418032992064883e-05, + "loss": 8.6915, + "step": 130880 + }, + { + "epoch": 0.6536492796324502, + "grad_norm": 0.09005949646234512, + "learning_rate": 1.0416531077123331e-05, + "loss": 8.6985, + "step": 130890 + }, + { + "epoch": 0.6536992184573897, + "grad_norm": 0.08998000621795654, + "learning_rate": 1.0415029162181782e-05, + "loss": 8.6858, + "step": 130900 + }, + { + "epoch": 0.6537491572823292, + "grad_norm": 0.09265704452991486, + "learning_rate": 1.0413527247240232e-05, + "loss": 8.667, + "step": 130910 + }, + { + "epoch": 0.6537990961072686, + "grad_norm": 0.09195978194475174, + "learning_rate": 1.0412025332298682e-05, + "loss": 8.6813, + "step": 130920 + }, + { + "epoch": 0.653849034932208, + "grad_norm": 0.09529465436935425, + "learning_rate": 1.041052341735713e-05, + "loss": 8.6916, + "step": 130930 + }, + { + "epoch": 0.6538989737571475, + "grad_norm": 0.0947846919298172, + "learning_rate": 1.0409021502415579e-05, + "loss": 8.6761, + "step": 130940 + }, + { + "epoch": 0.653948912582087, + "grad_norm": 0.08519770950078964, + "learning_rate": 1.040751958747403e-05, + "loss": 8.6598, + "step": 130950 + }, + { + "epoch": 0.6539988514070264, + "grad_norm": 0.0898909941315651, + "learning_rate": 1.040601767253248e-05, + "loss": 8.6744, + "step": 130960 + }, + { + "epoch": 0.6540487902319658, + "grad_norm": 0.09745825827121735, + "learning_rate": 1.040451575759093e-05, + "loss": 8.6766, + "step": 130970 + }, + { + "epoch": 0.6540987290569052, + "grad_norm": 0.09335152804851532, + "learning_rate": 1.0403013842649378e-05, + "loss": 8.6872, + "step": 130980 + }, + { + "epoch": 0.6541486678818448, + "grad_norm": 0.09373294562101364, + "learning_rate": 1.0401511927707828e-05, + "loss": 8.6794, + "step": 130990 + }, + { + "epoch": 0.6541986067067842, + "grad_norm": 0.09135058522224426, + "learning_rate": 1.0400010012766277e-05, + "loss": 8.6976, + "step": 131000 + }, + { + "epoch": 0.6542485455317236, + "grad_norm": 0.09245934337377548, + "learning_rate": 1.0398508097824727e-05, + "loss": 8.6859, + "step": 131010 + }, + { + "epoch": 0.654298484356663, + "grad_norm": 0.08991315215826035, + "learning_rate": 1.0397006182883177e-05, + "loss": 8.6771, + "step": 131020 + }, + { + "epoch": 0.6543484231816026, + "grad_norm": 0.09018691629171371, + "learning_rate": 1.0395504267941626e-05, + "loss": 8.6944, + "step": 131030 + }, + { + "epoch": 0.654398362006542, + "grad_norm": 0.09176444262266159, + "learning_rate": 1.0394002353000076e-05, + "loss": 8.6827, + "step": 131040 + }, + { + "epoch": 0.6544483008314814, + "grad_norm": 0.09148535877466202, + "learning_rate": 1.0392500438058524e-05, + "loss": 8.68, + "step": 131050 + }, + { + "epoch": 0.6544982396564208, + "grad_norm": 0.09757846593856812, + "learning_rate": 1.0390998523116974e-05, + "loss": 8.6791, + "step": 131060 + }, + { + "epoch": 0.6545481784813604, + "grad_norm": 0.0926646739244461, + "learning_rate": 1.0389496608175425e-05, + "loss": 8.6906, + "step": 131070 + }, + { + "epoch": 0.6545981173062998, + "grad_norm": 0.09516071528196335, + "learning_rate": 1.0387994693233873e-05, + "loss": 8.674, + "step": 131080 + }, + { + "epoch": 0.6546480561312392, + "grad_norm": 0.09518009424209595, + "learning_rate": 1.0386492778292323e-05, + "loss": 8.68, + "step": 131090 + }, + { + "epoch": 0.6546979949561786, + "grad_norm": 0.09322939813137054, + "learning_rate": 1.0384990863350772e-05, + "loss": 8.677, + "step": 131100 + }, + { + "epoch": 0.6547479337811182, + "grad_norm": 0.0915323942899704, + "learning_rate": 1.0383488948409222e-05, + "loss": 8.6732, + "step": 131110 + }, + { + "epoch": 0.6547978726060576, + "grad_norm": 0.0918693020939827, + "learning_rate": 1.0381987033467672e-05, + "loss": 8.6658, + "step": 131120 + }, + { + "epoch": 0.654847811430997, + "grad_norm": 0.09240451455116272, + "learning_rate": 1.038048511852612e-05, + "loss": 8.6933, + "step": 131130 + }, + { + "epoch": 0.6548977502559364, + "grad_norm": 0.09553481638431549, + "learning_rate": 1.037898320358457e-05, + "loss": 8.6893, + "step": 131140 + }, + { + "epoch": 0.654947689080876, + "grad_norm": 0.09170468896627426, + "learning_rate": 1.0377481288643021e-05, + "loss": 8.6906, + "step": 131150 + }, + { + "epoch": 0.6549976279058154, + "grad_norm": 0.09421038627624512, + "learning_rate": 1.037597937370147e-05, + "loss": 8.6669, + "step": 131160 + }, + { + "epoch": 0.6550475667307548, + "grad_norm": 0.08922944962978363, + "learning_rate": 1.037447745875992e-05, + "loss": 8.6888, + "step": 131170 + }, + { + "epoch": 0.6550975055556942, + "grad_norm": 0.09422342479228973, + "learning_rate": 1.0372975543818368e-05, + "loss": 8.6767, + "step": 131180 + }, + { + "epoch": 0.6551474443806338, + "grad_norm": 0.09131404012441635, + "learning_rate": 1.0371473628876818e-05, + "loss": 8.6735, + "step": 131190 + }, + { + "epoch": 0.6551973832055732, + "grad_norm": 0.08546370267868042, + "learning_rate": 1.0369971713935268e-05, + "loss": 8.6832, + "step": 131200 + }, + { + "epoch": 0.6552473220305126, + "grad_norm": 0.09401803463697433, + "learning_rate": 1.0368469798993717e-05, + "loss": 8.68, + "step": 131210 + }, + { + "epoch": 0.655297260855452, + "grad_norm": 0.09075015783309937, + "learning_rate": 1.0366967884052167e-05, + "loss": 8.6723, + "step": 131220 + }, + { + "epoch": 0.6553471996803916, + "grad_norm": 0.09611096978187561, + "learning_rate": 1.0365465969110616e-05, + "loss": 8.6929, + "step": 131230 + }, + { + "epoch": 0.655397138505331, + "grad_norm": 0.09546373039484024, + "learning_rate": 1.0363964054169066e-05, + "loss": 8.6781, + "step": 131240 + }, + { + "epoch": 0.6554470773302704, + "grad_norm": 0.09710092097520828, + "learning_rate": 1.0362462139227516e-05, + "loss": 8.6739, + "step": 131250 + }, + { + "epoch": 0.6554970161552098, + "grad_norm": 0.09392837435007095, + "learning_rate": 1.0360960224285964e-05, + "loss": 8.6824, + "step": 131260 + }, + { + "epoch": 0.6555469549801494, + "grad_norm": 0.09128575772047043, + "learning_rate": 1.0359458309344415e-05, + "loss": 8.6771, + "step": 131270 + }, + { + "epoch": 0.6555968938050888, + "grad_norm": 0.09115424007177353, + "learning_rate": 1.0357956394402863e-05, + "loss": 8.6769, + "step": 131280 + }, + { + "epoch": 0.6556468326300282, + "grad_norm": 0.09360767900943756, + "learning_rate": 1.0356454479461313e-05, + "loss": 8.6999, + "step": 131290 + }, + { + "epoch": 0.6556967714549676, + "grad_norm": 0.09261327236890793, + "learning_rate": 1.0354952564519763e-05, + "loss": 8.6671, + "step": 131300 + }, + { + "epoch": 0.6557467102799072, + "grad_norm": 0.09055551886558533, + "learning_rate": 1.0353450649578214e-05, + "loss": 8.6813, + "step": 131310 + }, + { + "epoch": 0.6557966491048466, + "grad_norm": 0.0952824130654335, + "learning_rate": 1.0351948734636662e-05, + "loss": 8.682, + "step": 131320 + }, + { + "epoch": 0.655846587929786, + "grad_norm": 0.09080826491117477, + "learning_rate": 1.035044681969511e-05, + "loss": 8.6825, + "step": 131330 + }, + { + "epoch": 0.6558965267547254, + "grad_norm": 0.0927935540676117, + "learning_rate": 1.034894490475356e-05, + "loss": 8.6942, + "step": 131340 + }, + { + "epoch": 0.655946465579665, + "grad_norm": 0.09244295954704285, + "learning_rate": 1.0347442989812011e-05, + "loss": 8.6901, + "step": 131350 + }, + { + "epoch": 0.6559964044046044, + "grad_norm": 0.09503054618835449, + "learning_rate": 1.0345941074870461e-05, + "loss": 8.6633, + "step": 131360 + }, + { + "epoch": 0.6560463432295438, + "grad_norm": 0.09178107976913452, + "learning_rate": 1.034443915992891e-05, + "loss": 8.6971, + "step": 131370 + }, + { + "epoch": 0.6560962820544832, + "grad_norm": 0.09662067145109177, + "learning_rate": 1.0342937244987358e-05, + "loss": 8.675, + "step": 131380 + }, + { + "epoch": 0.6561462208794228, + "grad_norm": 0.09024110436439514, + "learning_rate": 1.0341435330045808e-05, + "loss": 8.6675, + "step": 131390 + }, + { + "epoch": 0.6561961597043622, + "grad_norm": 0.09327442944049835, + "learning_rate": 1.0339933415104258e-05, + "loss": 8.6674, + "step": 131400 + }, + { + "epoch": 0.6562460985293016, + "grad_norm": 0.09160305559635162, + "learning_rate": 1.0338431500162709e-05, + "loss": 8.6841, + "step": 131410 + }, + { + "epoch": 0.656296037354241, + "grad_norm": 0.09517280757427216, + "learning_rate": 1.0336929585221157e-05, + "loss": 8.6751, + "step": 131420 + }, + { + "epoch": 0.6563459761791806, + "grad_norm": 0.09651865065097809, + "learning_rate": 1.0335427670279606e-05, + "loss": 8.6741, + "step": 131430 + }, + { + "epoch": 0.65639591500412, + "grad_norm": 0.09696118533611298, + "learning_rate": 1.0333925755338056e-05, + "loss": 8.6646, + "step": 131440 + }, + { + "epoch": 0.6564458538290594, + "grad_norm": 0.09164685755968094, + "learning_rate": 1.0332423840396506e-05, + "loss": 8.6919, + "step": 131450 + }, + { + "epoch": 0.6564957926539988, + "grad_norm": 0.09211573749780655, + "learning_rate": 1.0330921925454956e-05, + "loss": 8.6674, + "step": 131460 + }, + { + "epoch": 0.6565457314789384, + "grad_norm": 0.09793127328157425, + "learning_rate": 1.0329420010513406e-05, + "loss": 8.6897, + "step": 131470 + }, + { + "epoch": 0.6565956703038778, + "grad_norm": 0.09273996949195862, + "learning_rate": 1.0327918095571853e-05, + "loss": 8.6733, + "step": 131480 + }, + { + "epoch": 0.6566456091288172, + "grad_norm": 0.08516659587621689, + "learning_rate": 1.0326416180630303e-05, + "loss": 8.6978, + "step": 131490 + }, + { + "epoch": 0.6566955479537566, + "grad_norm": 0.09348917752504349, + "learning_rate": 1.0324914265688753e-05, + "loss": 8.6678, + "step": 131500 + }, + { + "epoch": 0.6567454867786962, + "grad_norm": 0.09061364084482193, + "learning_rate": 1.0323412350747204e-05, + "loss": 8.688, + "step": 131510 + }, + { + "epoch": 0.6567954256036356, + "grad_norm": 0.09217441827058792, + "learning_rate": 1.0321910435805654e-05, + "loss": 8.6757, + "step": 131520 + }, + { + "epoch": 0.656845364428575, + "grad_norm": 0.09263032674789429, + "learning_rate": 1.03204085208641e-05, + "loss": 8.682, + "step": 131530 + }, + { + "epoch": 0.6568953032535144, + "grad_norm": 0.09283848851919174, + "learning_rate": 1.031890660592255e-05, + "loss": 8.6898, + "step": 131540 + }, + { + "epoch": 0.6569452420784538, + "grad_norm": 0.09214113652706146, + "learning_rate": 1.0317404690981001e-05, + "loss": 8.6754, + "step": 131550 + }, + { + "epoch": 0.6569951809033934, + "grad_norm": 0.09393681585788727, + "learning_rate": 1.0315902776039451e-05, + "loss": 8.6623, + "step": 131560 + }, + { + "epoch": 0.6570451197283328, + "grad_norm": 0.08560718595981598, + "learning_rate": 1.0314400861097901e-05, + "loss": 8.6695, + "step": 131570 + }, + { + "epoch": 0.6570950585532722, + "grad_norm": 0.09654083102941513, + "learning_rate": 1.0312898946156348e-05, + "loss": 8.6811, + "step": 131580 + }, + { + "epoch": 0.6571449973782116, + "grad_norm": 0.09015607833862305, + "learning_rate": 1.0311397031214798e-05, + "loss": 8.6836, + "step": 131590 + }, + { + "epoch": 0.6571949362031512, + "grad_norm": 0.08412002772092819, + "learning_rate": 1.0309895116273248e-05, + "loss": 8.6766, + "step": 131600 + }, + { + "epoch": 0.6572448750280906, + "grad_norm": 0.09406512975692749, + "learning_rate": 1.0308393201331699e-05, + "loss": 8.6751, + "step": 131610 + }, + { + "epoch": 0.65729481385303, + "grad_norm": 0.09097196906805038, + "learning_rate": 1.0306891286390149e-05, + "loss": 8.6757, + "step": 131620 + }, + { + "epoch": 0.6573447526779694, + "grad_norm": 0.09862224012613297, + "learning_rate": 1.0305389371448597e-05, + "loss": 8.6832, + "step": 131630 + }, + { + "epoch": 0.657394691502909, + "grad_norm": 0.09050843119621277, + "learning_rate": 1.0303887456507046e-05, + "loss": 8.6709, + "step": 131640 + }, + { + "epoch": 0.6574446303278484, + "grad_norm": 0.09337736666202545, + "learning_rate": 1.0302385541565496e-05, + "loss": 8.6862, + "step": 131650 + }, + { + "epoch": 0.6574945691527878, + "grad_norm": 0.08687882125377655, + "learning_rate": 1.0300883626623946e-05, + "loss": 8.684, + "step": 131660 + }, + { + "epoch": 0.6575445079777272, + "grad_norm": 0.09270884841680527, + "learning_rate": 1.0299381711682396e-05, + "loss": 8.6805, + "step": 131670 + }, + { + "epoch": 0.6575944468026668, + "grad_norm": 0.08802757412195206, + "learning_rate": 1.0297879796740845e-05, + "loss": 8.6734, + "step": 131680 + }, + { + "epoch": 0.6576443856276062, + "grad_norm": 0.09502147138118744, + "learning_rate": 1.0296377881799293e-05, + "loss": 8.6866, + "step": 131690 + }, + { + "epoch": 0.6576943244525456, + "grad_norm": 0.08749578148126602, + "learning_rate": 1.0294875966857743e-05, + "loss": 8.664, + "step": 131700 + }, + { + "epoch": 0.657744263277485, + "grad_norm": 0.09266682714223862, + "learning_rate": 1.0293374051916194e-05, + "loss": 8.6682, + "step": 131710 + }, + { + "epoch": 0.6577942021024246, + "grad_norm": 0.08939957618713379, + "learning_rate": 1.0291872136974644e-05, + "loss": 8.6629, + "step": 131720 + }, + { + "epoch": 0.657844140927364, + "grad_norm": 0.09014017879962921, + "learning_rate": 1.0290370222033092e-05, + "loss": 8.6807, + "step": 131730 + }, + { + "epoch": 0.6578940797523034, + "grad_norm": 0.0930858924984932, + "learning_rate": 1.028886830709154e-05, + "loss": 8.673, + "step": 131740 + }, + { + "epoch": 0.6579440185772428, + "grad_norm": 0.09012752026319504, + "learning_rate": 1.0287366392149991e-05, + "loss": 8.6753, + "step": 131750 + }, + { + "epoch": 0.6579939574021824, + "grad_norm": 0.09860425442457199, + "learning_rate": 1.0285864477208441e-05, + "loss": 8.6818, + "step": 131760 + }, + { + "epoch": 0.6580438962271218, + "grad_norm": 0.08776333183050156, + "learning_rate": 1.0284362562266891e-05, + "loss": 8.6717, + "step": 131770 + }, + { + "epoch": 0.6580938350520612, + "grad_norm": 0.09285827726125717, + "learning_rate": 1.028286064732534e-05, + "loss": 8.6907, + "step": 131780 + }, + { + "epoch": 0.6581437738770006, + "grad_norm": 0.09714893996715546, + "learning_rate": 1.0281358732383788e-05, + "loss": 8.6782, + "step": 131790 + }, + { + "epoch": 0.6581937127019402, + "grad_norm": 0.09962324798107147, + "learning_rate": 1.0279856817442238e-05, + "loss": 8.6517, + "step": 131800 + }, + { + "epoch": 0.6582436515268796, + "grad_norm": 0.0935245156288147, + "learning_rate": 1.0278354902500689e-05, + "loss": 8.6855, + "step": 131810 + }, + { + "epoch": 0.658293590351819, + "grad_norm": 0.09169341623783112, + "learning_rate": 1.0276852987559139e-05, + "loss": 8.6795, + "step": 131820 + }, + { + "epoch": 0.6583435291767584, + "grad_norm": 0.09153547137975693, + "learning_rate": 1.0275351072617587e-05, + "loss": 8.6665, + "step": 131830 + }, + { + "epoch": 0.658393468001698, + "grad_norm": 0.0904143676161766, + "learning_rate": 1.0273849157676037e-05, + "loss": 8.6895, + "step": 131840 + }, + { + "epoch": 0.6584434068266374, + "grad_norm": 0.09252644330263138, + "learning_rate": 1.0272347242734486e-05, + "loss": 8.681, + "step": 131850 + }, + { + "epoch": 0.6584933456515768, + "grad_norm": 0.08688996732234955, + "learning_rate": 1.0270845327792936e-05, + "loss": 8.6822, + "step": 131860 + }, + { + "epoch": 0.6585432844765162, + "grad_norm": 0.09266286343336105, + "learning_rate": 1.0269343412851386e-05, + "loss": 8.6785, + "step": 131870 + }, + { + "epoch": 0.6585932233014558, + "grad_norm": 0.09570097178220749, + "learning_rate": 1.0267841497909835e-05, + "loss": 8.6812, + "step": 131880 + }, + { + "epoch": 0.6586431621263952, + "grad_norm": 0.08836355060338974, + "learning_rate": 1.0266339582968285e-05, + "loss": 8.6714, + "step": 131890 + }, + { + "epoch": 0.6586931009513346, + "grad_norm": 0.09223395586013794, + "learning_rate": 1.0264837668026733e-05, + "loss": 8.6717, + "step": 131900 + }, + { + "epoch": 0.658743039776274, + "grad_norm": 0.09718294441699982, + "learning_rate": 1.0263335753085184e-05, + "loss": 8.6782, + "step": 131910 + }, + { + "epoch": 0.6587929786012136, + "grad_norm": 0.09103352576494217, + "learning_rate": 1.0261833838143634e-05, + "loss": 8.6824, + "step": 131920 + }, + { + "epoch": 0.658842917426153, + "grad_norm": 0.09198730438947678, + "learning_rate": 1.0260331923202084e-05, + "loss": 8.6817, + "step": 131930 + }, + { + "epoch": 0.6588928562510924, + "grad_norm": 0.09134362637996674, + "learning_rate": 1.0258830008260532e-05, + "loss": 8.6747, + "step": 131940 + }, + { + "epoch": 0.6589427950760318, + "grad_norm": 0.09106438606977463, + "learning_rate": 1.0257328093318981e-05, + "loss": 8.6674, + "step": 131950 + }, + { + "epoch": 0.6589927339009714, + "grad_norm": 0.08696383237838745, + "learning_rate": 1.0255826178377431e-05, + "loss": 8.6701, + "step": 131960 + }, + { + "epoch": 0.6590426727259108, + "grad_norm": 0.0889415293931961, + "learning_rate": 1.0254324263435881e-05, + "loss": 8.6741, + "step": 131970 + }, + { + "epoch": 0.6590926115508502, + "grad_norm": 0.09402123838663101, + "learning_rate": 1.0252822348494331e-05, + "loss": 8.6776, + "step": 131980 + }, + { + "epoch": 0.6591425503757896, + "grad_norm": 0.0896662175655365, + "learning_rate": 1.025132043355278e-05, + "loss": 8.6766, + "step": 131990 + }, + { + "epoch": 0.6591924892007291, + "grad_norm": 0.09504416584968567, + "learning_rate": 1.024981851861123e-05, + "loss": 8.6715, + "step": 132000 + }, + { + "epoch": 0.6592424280256686, + "grad_norm": 0.0929967388510704, + "learning_rate": 1.0248316603669679e-05, + "loss": 8.6769, + "step": 132010 + }, + { + "epoch": 0.659292366850608, + "grad_norm": 0.09154746681451797, + "learning_rate": 1.0246814688728129e-05, + "loss": 8.6683, + "step": 132020 + }, + { + "epoch": 0.6593423056755474, + "grad_norm": 0.09038006514310837, + "learning_rate": 1.0245312773786579e-05, + "loss": 8.6774, + "step": 132030 + }, + { + "epoch": 0.659392244500487, + "grad_norm": 0.08670198172330856, + "learning_rate": 1.0243810858845027e-05, + "loss": 8.6843, + "step": 132040 + }, + { + "epoch": 0.6594421833254264, + "grad_norm": 0.08812010288238525, + "learning_rate": 1.0242308943903478e-05, + "loss": 8.6761, + "step": 132050 + }, + { + "epoch": 0.6594921221503658, + "grad_norm": 0.08771144598722458, + "learning_rate": 1.0240807028961926e-05, + "loss": 8.6834, + "step": 132060 + }, + { + "epoch": 0.6595420609753052, + "grad_norm": 0.08678391575813293, + "learning_rate": 1.0239305114020376e-05, + "loss": 8.6703, + "step": 132070 + }, + { + "epoch": 0.6595919998002447, + "grad_norm": 0.09273753315210342, + "learning_rate": 1.0237803199078826e-05, + "loss": 8.6742, + "step": 132080 + }, + { + "epoch": 0.6596419386251842, + "grad_norm": 0.09024372696876526, + "learning_rate": 1.0236301284137275e-05, + "loss": 8.6711, + "step": 132090 + }, + { + "epoch": 0.6596918774501236, + "grad_norm": 0.09069012850522995, + "learning_rate": 1.0234799369195725e-05, + "loss": 8.6687, + "step": 132100 + }, + { + "epoch": 0.659741816275063, + "grad_norm": 0.0902455523610115, + "learning_rate": 1.0233297454254174e-05, + "loss": 8.667, + "step": 132110 + }, + { + "epoch": 0.6597917551000025, + "grad_norm": 0.08929167687892914, + "learning_rate": 1.0231795539312624e-05, + "loss": 8.6656, + "step": 132120 + }, + { + "epoch": 0.659841693924942, + "grad_norm": 0.08881199359893799, + "learning_rate": 1.0230293624371074e-05, + "loss": 8.6793, + "step": 132130 + }, + { + "epoch": 0.6598916327498814, + "grad_norm": 0.09602124989032745, + "learning_rate": 1.0228791709429522e-05, + "loss": 8.6794, + "step": 132140 + }, + { + "epoch": 0.6599415715748208, + "grad_norm": 0.09030324965715408, + "learning_rate": 1.0227289794487973e-05, + "loss": 8.6628, + "step": 132150 + }, + { + "epoch": 0.6599915103997603, + "grad_norm": 0.09483480453491211, + "learning_rate": 1.0225787879546423e-05, + "loss": 8.668, + "step": 132160 + }, + { + "epoch": 0.6600414492246998, + "grad_norm": 0.09794318675994873, + "learning_rate": 1.0224285964604871e-05, + "loss": 8.6631, + "step": 132170 + }, + { + "epoch": 0.6600913880496392, + "grad_norm": 0.09133700281381607, + "learning_rate": 1.0222784049663321e-05, + "loss": 8.6633, + "step": 132180 + }, + { + "epoch": 0.6601413268745786, + "grad_norm": 0.09112256020307541, + "learning_rate": 1.022128213472177e-05, + "loss": 8.6794, + "step": 132190 + }, + { + "epoch": 0.6601912656995181, + "grad_norm": 0.09767073392868042, + "learning_rate": 1.021978021978022e-05, + "loss": 8.6609, + "step": 132200 + }, + { + "epoch": 0.6602412045244576, + "grad_norm": 0.09043072909116745, + "learning_rate": 1.021827830483867e-05, + "loss": 8.6865, + "step": 132210 + }, + { + "epoch": 0.660291143349397, + "grad_norm": 0.08856266736984253, + "learning_rate": 1.0216776389897119e-05, + "loss": 8.6811, + "step": 132220 + }, + { + "epoch": 0.6603410821743364, + "grad_norm": 0.09094087034463882, + "learning_rate": 1.0215274474955569e-05, + "loss": 8.6836, + "step": 132230 + }, + { + "epoch": 0.6603910209992759, + "grad_norm": 0.09180308133363724, + "learning_rate": 1.0213772560014017e-05, + "loss": 8.677, + "step": 132240 + }, + { + "epoch": 0.6604409598242154, + "grad_norm": 0.09490694850683212, + "learning_rate": 1.0212270645072468e-05, + "loss": 8.6798, + "step": 132250 + }, + { + "epoch": 0.6604908986491548, + "grad_norm": 0.09207696467638016, + "learning_rate": 1.0210768730130918e-05, + "loss": 8.6676, + "step": 132260 + }, + { + "epoch": 0.6605408374740942, + "grad_norm": 0.08791854977607727, + "learning_rate": 1.0209266815189366e-05, + "loss": 8.6882, + "step": 132270 + }, + { + "epoch": 0.6605907762990337, + "grad_norm": 0.09454667568206787, + "learning_rate": 1.0207764900247816e-05, + "loss": 8.6748, + "step": 132280 + }, + { + "epoch": 0.6606407151239732, + "grad_norm": 0.08923207223415375, + "learning_rate": 1.0206262985306265e-05, + "loss": 8.6915, + "step": 132290 + }, + { + "epoch": 0.6606906539489126, + "grad_norm": 0.08957415819168091, + "learning_rate": 1.0204761070364715e-05, + "loss": 8.6757, + "step": 132300 + }, + { + "epoch": 0.660740592773852, + "grad_norm": 0.09061232954263687, + "learning_rate": 1.0203259155423165e-05, + "loss": 8.675, + "step": 132310 + }, + { + "epoch": 0.6607905315987915, + "grad_norm": 0.0913691371679306, + "learning_rate": 1.0201757240481616e-05, + "loss": 8.6773, + "step": 132320 + }, + { + "epoch": 0.660840470423731, + "grad_norm": 0.08861444145441055, + "learning_rate": 1.0200255325540064e-05, + "loss": 8.6856, + "step": 132330 + }, + { + "epoch": 0.6608904092486704, + "grad_norm": 0.08731677383184433, + "learning_rate": 1.0198753410598512e-05, + "loss": 8.6783, + "step": 132340 + }, + { + "epoch": 0.6609403480736098, + "grad_norm": 0.09219668805599213, + "learning_rate": 1.0197251495656963e-05, + "loss": 8.6718, + "step": 132350 + }, + { + "epoch": 0.6609902868985493, + "grad_norm": 0.08887942135334015, + "learning_rate": 1.0195749580715413e-05, + "loss": 8.6755, + "step": 132360 + }, + { + "epoch": 0.6610402257234888, + "grad_norm": 0.09542329609394073, + "learning_rate": 1.0194247665773863e-05, + "loss": 8.6684, + "step": 132370 + }, + { + "epoch": 0.6610901645484282, + "grad_norm": 0.0939764529466629, + "learning_rate": 1.0192745750832312e-05, + "loss": 8.6675, + "step": 132380 + }, + { + "epoch": 0.6611401033733676, + "grad_norm": 0.09207078814506531, + "learning_rate": 1.019124383589076e-05, + "loss": 8.6756, + "step": 132390 + }, + { + "epoch": 0.6611900421983071, + "grad_norm": 0.09722395241260529, + "learning_rate": 1.018974192094921e-05, + "loss": 8.6686, + "step": 132400 + }, + { + "epoch": 0.6612399810232465, + "grad_norm": 0.10081882029771805, + "learning_rate": 1.018824000600766e-05, + "loss": 8.6651, + "step": 132410 + }, + { + "epoch": 0.661289919848186, + "grad_norm": 0.09268742799758911, + "learning_rate": 1.018673809106611e-05, + "loss": 8.6535, + "step": 132420 + }, + { + "epoch": 0.6613398586731254, + "grad_norm": 0.08973268419504166, + "learning_rate": 1.0185236176124559e-05, + "loss": 8.6679, + "step": 132430 + }, + { + "epoch": 0.6613897974980649, + "grad_norm": 0.08811484277248383, + "learning_rate": 1.0183734261183007e-05, + "loss": 8.6566, + "step": 132440 + }, + { + "epoch": 0.6614397363230043, + "grad_norm": 0.09202300012111664, + "learning_rate": 1.0182232346241458e-05, + "loss": 8.6779, + "step": 132450 + }, + { + "epoch": 0.6614896751479438, + "grad_norm": 0.08758775144815445, + "learning_rate": 1.0180730431299908e-05, + "loss": 8.6919, + "step": 132460 + }, + { + "epoch": 0.6615396139728832, + "grad_norm": 0.08985673636198044, + "learning_rate": 1.0179228516358358e-05, + "loss": 8.678, + "step": 132470 + }, + { + "epoch": 0.6615895527978227, + "grad_norm": 0.09474221616983414, + "learning_rate": 1.0177726601416808e-05, + "loss": 8.6547, + "step": 132480 + }, + { + "epoch": 0.6616394916227621, + "grad_norm": 0.09064290672540665, + "learning_rate": 1.0176224686475255e-05, + "loss": 8.6735, + "step": 132490 + }, + { + "epoch": 0.6616894304477016, + "grad_norm": 0.09543025493621826, + "learning_rate": 1.0174722771533705e-05, + "loss": 8.6677, + "step": 132500 + }, + { + "epoch": 0.661739369272641, + "grad_norm": 0.08833234757184982, + "learning_rate": 1.0173220856592155e-05, + "loss": 8.6784, + "step": 132510 + }, + { + "epoch": 0.6617893080975804, + "grad_norm": 0.09526517242193222, + "learning_rate": 1.0171718941650606e-05, + "loss": 8.6595, + "step": 132520 + }, + { + "epoch": 0.6618392469225199, + "grad_norm": 0.09490874409675598, + "learning_rate": 1.0170217026709056e-05, + "loss": 8.6708, + "step": 132530 + }, + { + "epoch": 0.6618891857474594, + "grad_norm": 0.0921601727604866, + "learning_rate": 1.0168715111767502e-05, + "loss": 8.6814, + "step": 132540 + }, + { + "epoch": 0.6619391245723988, + "grad_norm": 0.09030061960220337, + "learning_rate": 1.0167213196825953e-05, + "loss": 8.6737, + "step": 132550 + }, + { + "epoch": 0.6619890633973382, + "grad_norm": 0.09003777801990509, + "learning_rate": 1.0165711281884403e-05, + "loss": 8.6747, + "step": 132560 + }, + { + "epoch": 0.6620390022222777, + "grad_norm": 0.09126746654510498, + "learning_rate": 1.0164209366942853e-05, + "loss": 8.6858, + "step": 132570 + }, + { + "epoch": 0.6620889410472172, + "grad_norm": 0.08809841424226761, + "learning_rate": 1.0162707452001303e-05, + "loss": 8.6722, + "step": 132580 + }, + { + "epoch": 0.6621388798721566, + "grad_norm": 0.09087785333395004, + "learning_rate": 1.016120553705975e-05, + "loss": 8.6795, + "step": 132590 + }, + { + "epoch": 0.662188818697096, + "grad_norm": 0.09890290349721909, + "learning_rate": 1.01597036221182e-05, + "loss": 8.6719, + "step": 132600 + }, + { + "epoch": 0.6622387575220355, + "grad_norm": 0.08723946660757065, + "learning_rate": 1.015820170717665e-05, + "loss": 8.6765, + "step": 132610 + }, + { + "epoch": 0.662288696346975, + "grad_norm": 0.09441614151000977, + "learning_rate": 1.01566997922351e-05, + "loss": 8.6709, + "step": 132620 + }, + { + "epoch": 0.6623386351719144, + "grad_norm": 0.09669451415538788, + "learning_rate": 1.015519787729355e-05, + "loss": 8.6856, + "step": 132630 + }, + { + "epoch": 0.6623885739968538, + "grad_norm": 0.0917108878493309, + "learning_rate": 1.0153695962352e-05, + "loss": 8.6664, + "step": 132640 + }, + { + "epoch": 0.6624385128217933, + "grad_norm": 0.0893852636218071, + "learning_rate": 1.0152194047410448e-05, + "loss": 8.6694, + "step": 132650 + }, + { + "epoch": 0.6624884516467328, + "grad_norm": 0.09049538522958755, + "learning_rate": 1.0150692132468898e-05, + "loss": 8.6595, + "step": 132660 + }, + { + "epoch": 0.6625383904716722, + "grad_norm": 0.0896064043045044, + "learning_rate": 1.0149190217527348e-05, + "loss": 8.6785, + "step": 132670 + }, + { + "epoch": 0.6625883292966116, + "grad_norm": 0.09562838077545166, + "learning_rate": 1.0147688302585798e-05, + "loss": 8.6778, + "step": 132680 + }, + { + "epoch": 0.6626382681215511, + "grad_norm": 0.08565637469291687, + "learning_rate": 1.0146186387644247e-05, + "loss": 8.6761, + "step": 132690 + }, + { + "epoch": 0.6626882069464906, + "grad_norm": 0.08810480684041977, + "learning_rate": 1.0144684472702695e-05, + "loss": 8.6872, + "step": 132700 + }, + { + "epoch": 0.66273814577143, + "grad_norm": 0.09386193752288818, + "learning_rate": 1.0143182557761145e-05, + "loss": 8.6697, + "step": 132710 + }, + { + "epoch": 0.6627880845963694, + "grad_norm": 0.08664627373218536, + "learning_rate": 1.0141680642819596e-05, + "loss": 8.6757, + "step": 132720 + }, + { + "epoch": 0.6628380234213089, + "grad_norm": 0.09146647155284882, + "learning_rate": 1.0140178727878046e-05, + "loss": 8.6781, + "step": 132730 + }, + { + "epoch": 0.6628879622462484, + "grad_norm": 0.09313146770000458, + "learning_rate": 1.0138676812936494e-05, + "loss": 8.6754, + "step": 132740 + }, + { + "epoch": 0.6629379010711878, + "grad_norm": 0.08888036757707596, + "learning_rate": 1.0137174897994943e-05, + "loss": 8.6916, + "step": 132750 + }, + { + "epoch": 0.6629878398961272, + "grad_norm": 0.09621097892522812, + "learning_rate": 1.0135672983053393e-05, + "loss": 8.6865, + "step": 132760 + }, + { + "epoch": 0.6630377787210667, + "grad_norm": 0.09131909161806107, + "learning_rate": 1.0134171068111843e-05, + "loss": 8.6703, + "step": 132770 + }, + { + "epoch": 0.6630877175460062, + "grad_norm": 0.09367981553077698, + "learning_rate": 1.0132669153170293e-05, + "loss": 8.6669, + "step": 132780 + }, + { + "epoch": 0.6631376563709456, + "grad_norm": 0.09454252570867538, + "learning_rate": 1.0131167238228742e-05, + "loss": 8.6777, + "step": 132790 + }, + { + "epoch": 0.663187595195885, + "grad_norm": 0.09207130968570709, + "learning_rate": 1.0129665323287192e-05, + "loss": 8.6612, + "step": 132800 + }, + { + "epoch": 0.6632375340208245, + "grad_norm": 0.08936728537082672, + "learning_rate": 1.012816340834564e-05, + "loss": 8.6755, + "step": 132810 + }, + { + "epoch": 0.663287472845764, + "grad_norm": 0.09480737149715424, + "learning_rate": 1.012666149340409e-05, + "loss": 8.6621, + "step": 132820 + }, + { + "epoch": 0.6633374116707034, + "grad_norm": 0.08680365234613419, + "learning_rate": 1.012515957846254e-05, + "loss": 8.6732, + "step": 132830 + }, + { + "epoch": 0.6633873504956428, + "grad_norm": 0.09133225679397583, + "learning_rate": 1.012365766352099e-05, + "loss": 8.6831, + "step": 132840 + }, + { + "epoch": 0.6634372893205823, + "grad_norm": 0.08942453563213348, + "learning_rate": 1.012215574857944e-05, + "loss": 8.6746, + "step": 132850 + }, + { + "epoch": 0.6634872281455217, + "grad_norm": 0.09342201054096222, + "learning_rate": 1.0120653833637888e-05, + "loss": 8.6761, + "step": 132860 + }, + { + "epoch": 0.6635371669704612, + "grad_norm": 0.09316600114107132, + "learning_rate": 1.0119151918696338e-05, + "loss": 8.6868, + "step": 132870 + }, + { + "epoch": 0.6635871057954006, + "grad_norm": 0.08866464346647263, + "learning_rate": 1.0117650003754788e-05, + "loss": 8.6818, + "step": 132880 + }, + { + "epoch": 0.6636370446203401, + "grad_norm": 0.09229955822229385, + "learning_rate": 1.0116148088813237e-05, + "loss": 8.6467, + "step": 132890 + }, + { + "epoch": 0.6636869834452795, + "grad_norm": 0.09364652633666992, + "learning_rate": 1.0114646173871687e-05, + "loss": 8.6595, + "step": 132900 + }, + { + "epoch": 0.663736922270219, + "grad_norm": 0.08931564539670944, + "learning_rate": 1.0113144258930135e-05, + "loss": 8.6613, + "step": 132910 + }, + { + "epoch": 0.6637868610951584, + "grad_norm": 0.09411066770553589, + "learning_rate": 1.0111642343988586e-05, + "loss": 8.6725, + "step": 132920 + }, + { + "epoch": 0.6638367999200979, + "grad_norm": 0.08930312842130661, + "learning_rate": 1.0110140429047036e-05, + "loss": 8.6819, + "step": 132930 + }, + { + "epoch": 0.6638867387450373, + "grad_norm": 0.10185947269201279, + "learning_rate": 1.0108638514105484e-05, + "loss": 8.6582, + "step": 132940 + }, + { + "epoch": 0.6639366775699768, + "grad_norm": 0.09503977745771408, + "learning_rate": 1.0107136599163934e-05, + "loss": 8.6604, + "step": 132950 + }, + { + "epoch": 0.6639866163949162, + "grad_norm": 0.08833954483270645, + "learning_rate": 1.0105634684222385e-05, + "loss": 8.6738, + "step": 132960 + }, + { + "epoch": 0.6640365552198557, + "grad_norm": 0.09546960145235062, + "learning_rate": 1.0104132769280833e-05, + "loss": 8.6512, + "step": 132970 + }, + { + "epoch": 0.6640864940447951, + "grad_norm": 0.089764803647995, + "learning_rate": 1.0102630854339283e-05, + "loss": 8.6673, + "step": 132980 + }, + { + "epoch": 0.6641364328697346, + "grad_norm": 0.09377101808786392, + "learning_rate": 1.0101128939397732e-05, + "loss": 8.6686, + "step": 132990 + }, + { + "epoch": 0.664186371694674, + "grad_norm": 0.0922766849398613, + "learning_rate": 1.0099627024456182e-05, + "loss": 8.6927, + "step": 133000 + }, + { + "epoch": 0.6642363105196135, + "grad_norm": 0.09250890463590622, + "learning_rate": 1.0098125109514632e-05, + "loss": 8.6572, + "step": 133010 + }, + { + "epoch": 0.6642862493445529, + "grad_norm": 0.09117847681045532, + "learning_rate": 1.009662319457308e-05, + "loss": 8.67, + "step": 133020 + }, + { + "epoch": 0.6643361881694924, + "grad_norm": 0.09532193839550018, + "learning_rate": 1.009512127963153e-05, + "loss": 8.6688, + "step": 133030 + }, + { + "epoch": 0.6643861269944318, + "grad_norm": 0.093431755900383, + "learning_rate": 1.009361936468998e-05, + "loss": 8.6796, + "step": 133040 + }, + { + "epoch": 0.6644360658193713, + "grad_norm": 0.09252150356769562, + "learning_rate": 1.009211744974843e-05, + "loss": 8.6743, + "step": 133050 + }, + { + "epoch": 0.6644860046443107, + "grad_norm": 0.10001608729362488, + "learning_rate": 1.009061553480688e-05, + "loss": 8.6727, + "step": 133060 + }, + { + "epoch": 0.6645359434692502, + "grad_norm": 0.09084002673625946, + "learning_rate": 1.0089113619865328e-05, + "loss": 8.6693, + "step": 133070 + }, + { + "epoch": 0.6645858822941896, + "grad_norm": 0.09234880656003952, + "learning_rate": 1.0087611704923778e-05, + "loss": 8.6681, + "step": 133080 + }, + { + "epoch": 0.6646358211191291, + "grad_norm": 0.09040534496307373, + "learning_rate": 1.0086109789982227e-05, + "loss": 8.659, + "step": 133090 + }, + { + "epoch": 0.6646857599440685, + "grad_norm": 0.0903211459517479, + "learning_rate": 1.0084607875040677e-05, + "loss": 8.6726, + "step": 133100 + }, + { + "epoch": 0.664735698769008, + "grad_norm": 0.08797595649957657, + "learning_rate": 1.0083105960099127e-05, + "loss": 8.676, + "step": 133110 + }, + { + "epoch": 0.6647856375939474, + "grad_norm": 0.0916309654712677, + "learning_rate": 1.0081604045157577e-05, + "loss": 8.676, + "step": 133120 + }, + { + "epoch": 0.6648355764188869, + "grad_norm": 0.08980787545442581, + "learning_rate": 1.0080102130216026e-05, + "loss": 8.6746, + "step": 133130 + }, + { + "epoch": 0.6648855152438263, + "grad_norm": 0.0977618619799614, + "learning_rate": 1.0078600215274474e-05, + "loss": 8.6623, + "step": 133140 + }, + { + "epoch": 0.6649354540687658, + "grad_norm": 0.08751973509788513, + "learning_rate": 1.0077098300332924e-05, + "loss": 8.6612, + "step": 133150 + }, + { + "epoch": 0.6649853928937052, + "grad_norm": 0.09779629111289978, + "learning_rate": 1.0075596385391375e-05, + "loss": 8.6694, + "step": 133160 + }, + { + "epoch": 0.6650353317186447, + "grad_norm": 0.0898473933339119, + "learning_rate": 1.0074094470449825e-05, + "loss": 8.6637, + "step": 133170 + }, + { + "epoch": 0.6650852705435841, + "grad_norm": 0.08798861503601074, + "learning_rate": 1.0072592555508273e-05, + "loss": 8.6467, + "step": 133180 + }, + { + "epoch": 0.6651352093685236, + "grad_norm": 0.09523648023605347, + "learning_rate": 1.0071090640566722e-05, + "loss": 8.6672, + "step": 133190 + }, + { + "epoch": 0.665185148193463, + "grad_norm": 0.08886650949716568, + "learning_rate": 1.0069588725625172e-05, + "loss": 8.6718, + "step": 133200 + }, + { + "epoch": 0.6652350870184025, + "grad_norm": 0.09928663820028305, + "learning_rate": 1.0068086810683622e-05, + "loss": 8.6745, + "step": 133210 + }, + { + "epoch": 0.6652850258433419, + "grad_norm": 0.09062928706407547, + "learning_rate": 1.0066584895742072e-05, + "loss": 8.6682, + "step": 133220 + }, + { + "epoch": 0.6653349646682813, + "grad_norm": 0.09397498518228531, + "learning_rate": 1.006508298080052e-05, + "loss": 8.6596, + "step": 133230 + }, + { + "epoch": 0.6653849034932208, + "grad_norm": 0.08882281184196472, + "learning_rate": 1.006358106585897e-05, + "loss": 8.6688, + "step": 133240 + }, + { + "epoch": 0.6654348423181603, + "grad_norm": 0.09210222959518433, + "learning_rate": 1.006207915091742e-05, + "loss": 8.663, + "step": 133250 + }, + { + "epoch": 0.6654847811430997, + "grad_norm": 0.08811397850513458, + "learning_rate": 1.006057723597587e-05, + "loss": 8.6646, + "step": 133260 + }, + { + "epoch": 0.6655347199680391, + "grad_norm": 0.08988697081804276, + "learning_rate": 1.005907532103432e-05, + "loss": 8.673, + "step": 133270 + }, + { + "epoch": 0.6655846587929786, + "grad_norm": 0.09060483425855637, + "learning_rate": 1.005757340609277e-05, + "loss": 8.691, + "step": 133280 + }, + { + "epoch": 0.6656345976179181, + "grad_norm": 0.09499598294496536, + "learning_rate": 1.0056071491151217e-05, + "loss": 8.6645, + "step": 133290 + }, + { + "epoch": 0.6656845364428575, + "grad_norm": 0.09347851574420929, + "learning_rate": 1.0054569576209667e-05, + "loss": 8.6652, + "step": 133300 + }, + { + "epoch": 0.6657344752677969, + "grad_norm": 0.09178951382637024, + "learning_rate": 1.0053067661268117e-05, + "loss": 8.6673, + "step": 133310 + }, + { + "epoch": 0.6657844140927364, + "grad_norm": 0.08929334580898285, + "learning_rate": 1.0051565746326567e-05, + "loss": 8.6643, + "step": 133320 + }, + { + "epoch": 0.6658343529176759, + "grad_norm": 0.08914398401975632, + "learning_rate": 1.0050063831385017e-05, + "loss": 8.6549, + "step": 133330 + }, + { + "epoch": 0.6658842917426153, + "grad_norm": 0.09610702097415924, + "learning_rate": 1.0048561916443464e-05, + "loss": 8.6626, + "step": 133340 + }, + { + "epoch": 0.6659342305675547, + "grad_norm": 0.09578754752874374, + "learning_rate": 1.0047060001501914e-05, + "loss": 8.6582, + "step": 133350 + }, + { + "epoch": 0.6659841693924942, + "grad_norm": 0.09582225978374481, + "learning_rate": 1.0045558086560365e-05, + "loss": 8.6685, + "step": 133360 + }, + { + "epoch": 0.6660341082174337, + "grad_norm": 0.08942214399576187, + "learning_rate": 1.0044056171618815e-05, + "loss": 8.6746, + "step": 133370 + }, + { + "epoch": 0.6660840470423731, + "grad_norm": 0.08953560143709183, + "learning_rate": 1.0042554256677265e-05, + "loss": 8.6745, + "step": 133380 + }, + { + "epoch": 0.6661339858673125, + "grad_norm": 0.0939660593867302, + "learning_rate": 1.0041052341735712e-05, + "loss": 8.6744, + "step": 133390 + }, + { + "epoch": 0.666183924692252, + "grad_norm": 0.09279930591583252, + "learning_rate": 1.0039550426794162e-05, + "loss": 8.6558, + "step": 133400 + }, + { + "epoch": 0.6662338635171915, + "grad_norm": 0.08562085032463074, + "learning_rate": 1.0038048511852612e-05, + "loss": 8.6662, + "step": 133410 + }, + { + "epoch": 0.6662838023421309, + "grad_norm": 0.09029558300971985, + "learning_rate": 1.0036546596911062e-05, + "loss": 8.6574, + "step": 133420 + }, + { + "epoch": 0.6663337411670703, + "grad_norm": 0.08761605620384216, + "learning_rate": 1.0035044681969512e-05, + "loss": 8.6631, + "step": 133430 + }, + { + "epoch": 0.6663836799920098, + "grad_norm": 0.09495789557695389, + "learning_rate": 1.0033542767027961e-05, + "loss": 8.66, + "step": 133440 + }, + { + "epoch": 0.6664336188169493, + "grad_norm": 0.09556636214256287, + "learning_rate": 1.003204085208641e-05, + "loss": 8.682, + "step": 133450 + }, + { + "epoch": 0.6664835576418887, + "grad_norm": 0.09460949152708054, + "learning_rate": 1.003053893714486e-05, + "loss": 8.6886, + "step": 133460 + }, + { + "epoch": 0.6665334964668281, + "grad_norm": 0.0882893055677414, + "learning_rate": 1.002903702220331e-05, + "loss": 8.668, + "step": 133470 + }, + { + "epoch": 0.6665834352917676, + "grad_norm": 0.09346653521060944, + "learning_rate": 1.002753510726176e-05, + "loss": 8.6562, + "step": 133480 + }, + { + "epoch": 0.6666333741167071, + "grad_norm": 0.09014210850000381, + "learning_rate": 1.0026033192320208e-05, + "loss": 8.6711, + "step": 133490 + }, + { + "epoch": 0.6666833129416465, + "grad_norm": 0.09201137721538544, + "learning_rate": 1.0024531277378657e-05, + "loss": 8.6789, + "step": 133500 + }, + { + "epoch": 0.6667332517665859, + "grad_norm": 0.09237250685691833, + "learning_rate": 1.0023029362437107e-05, + "loss": 8.6558, + "step": 133510 + }, + { + "epoch": 0.6667831905915254, + "grad_norm": 0.09040234982967377, + "learning_rate": 1.0021527447495557e-05, + "loss": 8.6683, + "step": 133520 + }, + { + "epoch": 0.6668331294164648, + "grad_norm": 0.10480639338493347, + "learning_rate": 1.0020025532554007e-05, + "loss": 8.6698, + "step": 133530 + }, + { + "epoch": 0.6668830682414043, + "grad_norm": 0.09072550386190414, + "learning_rate": 1.0018523617612456e-05, + "loss": 8.668, + "step": 133540 + }, + { + "epoch": 0.6669330070663437, + "grad_norm": 0.09083649516105652, + "learning_rate": 1.0017021702670904e-05, + "loss": 8.6629, + "step": 133550 + }, + { + "epoch": 0.6669829458912832, + "grad_norm": 0.08946739882230759, + "learning_rate": 1.0015519787729355e-05, + "loss": 8.6661, + "step": 133560 + }, + { + "epoch": 0.6670328847162226, + "grad_norm": 0.09026233851909637, + "learning_rate": 1.0014017872787805e-05, + "loss": 8.6714, + "step": 133570 + }, + { + "epoch": 0.6670828235411621, + "grad_norm": 0.09073863178491592, + "learning_rate": 1.0012515957846255e-05, + "loss": 8.6753, + "step": 133580 + }, + { + "epoch": 0.6671327623661015, + "grad_norm": 0.08761871606111526, + "learning_rate": 1.0011014042904703e-05, + "loss": 8.6689, + "step": 133590 + }, + { + "epoch": 0.667182701191041, + "grad_norm": 0.09583648294210434, + "learning_rate": 1.0009512127963154e-05, + "loss": 8.6691, + "step": 133600 + }, + { + "epoch": 0.6672326400159804, + "grad_norm": 0.09313435852527618, + "learning_rate": 1.0008010213021602e-05, + "loss": 8.6623, + "step": 133610 + }, + { + "epoch": 0.6672825788409199, + "grad_norm": 0.08645382523536682, + "learning_rate": 1.0006508298080052e-05, + "loss": 8.6729, + "step": 133620 + }, + { + "epoch": 0.6673325176658593, + "grad_norm": 0.09160168468952179, + "learning_rate": 1.0005006383138502e-05, + "loss": 8.6709, + "step": 133630 + }, + { + "epoch": 0.6673824564907987, + "grad_norm": 0.0923621729016304, + "learning_rate": 1.0003504468196951e-05, + "loss": 8.6641, + "step": 133640 + }, + { + "epoch": 0.6674323953157382, + "grad_norm": 0.09282460808753967, + "learning_rate": 1.0002002553255401e-05, + "loss": 8.6736, + "step": 133650 + }, + { + "epoch": 0.6674823341406777, + "grad_norm": 0.09043533354997635, + "learning_rate": 1.000050063831385e-05, + "loss": 8.6669, + "step": 133660 + }, + { + "epoch": 0.6675322729656171, + "grad_norm": 0.09773357957601547, + "learning_rate": 9.9989987233723e-06, + "loss": 8.6597, + "step": 133670 + }, + { + "epoch": 0.6675822117905565, + "grad_norm": 0.09863118827342987, + "learning_rate": 9.99749680843075e-06, + "loss": 8.683, + "step": 133680 + }, + { + "epoch": 0.667632150615496, + "grad_norm": 0.10197389125823975, + "learning_rate": 9.995994893489198e-06, + "loss": 8.6554, + "step": 133690 + }, + { + "epoch": 0.6676820894404355, + "grad_norm": 0.09325721859931946, + "learning_rate": 9.994492978547649e-06, + "loss": 8.6614, + "step": 133700 + }, + { + "epoch": 0.6677320282653749, + "grad_norm": 0.09466391056776047, + "learning_rate": 9.992991063606097e-06, + "loss": 8.6618, + "step": 133710 + }, + { + "epoch": 0.6677819670903143, + "grad_norm": 0.09134448319673538, + "learning_rate": 9.991489148664547e-06, + "loss": 8.6812, + "step": 133720 + }, + { + "epoch": 0.6678319059152538, + "grad_norm": 0.09619180858135223, + "learning_rate": 9.989987233722997e-06, + "loss": 8.6641, + "step": 133730 + }, + { + "epoch": 0.6678818447401933, + "grad_norm": 0.096577949821949, + "learning_rate": 9.988485318781446e-06, + "loss": 8.6647, + "step": 133740 + }, + { + "epoch": 0.6679317835651327, + "grad_norm": 0.08971861004829407, + "learning_rate": 9.986983403839896e-06, + "loss": 8.6594, + "step": 133750 + }, + { + "epoch": 0.6679817223900721, + "grad_norm": 0.08666064590215683, + "learning_rate": 9.985481488898346e-06, + "loss": 8.6657, + "step": 133760 + }, + { + "epoch": 0.6680316612150116, + "grad_norm": 0.09941823035478592, + "learning_rate": 9.983979573956795e-06, + "loss": 8.6705, + "step": 133770 + }, + { + "epoch": 0.6680816000399511, + "grad_norm": 0.09281333535909653, + "learning_rate": 9.982477659015245e-06, + "loss": 8.6561, + "step": 133780 + }, + { + "epoch": 0.6681315388648905, + "grad_norm": 0.09609360247850418, + "learning_rate": 9.980975744073693e-06, + "loss": 8.6606, + "step": 133790 + }, + { + "epoch": 0.6681814776898299, + "grad_norm": 0.09355857223272324, + "learning_rate": 9.979473829132144e-06, + "loss": 8.6826, + "step": 133800 + }, + { + "epoch": 0.6682314165147694, + "grad_norm": 0.09512100368738174, + "learning_rate": 9.977971914190594e-06, + "loss": 8.662, + "step": 133810 + }, + { + "epoch": 0.6682813553397089, + "grad_norm": 0.09662336111068726, + "learning_rate": 9.976469999249042e-06, + "loss": 8.6586, + "step": 133820 + }, + { + "epoch": 0.6683312941646483, + "grad_norm": 0.09392707049846649, + "learning_rate": 9.974968084307492e-06, + "loss": 8.6483, + "step": 133830 + }, + { + "epoch": 0.6683812329895877, + "grad_norm": 0.09166334569454193, + "learning_rate": 9.973466169365941e-06, + "loss": 8.6611, + "step": 133840 + }, + { + "epoch": 0.6684311718145272, + "grad_norm": 0.09149006009101868, + "learning_rate": 9.971964254424391e-06, + "loss": 8.653, + "step": 133850 + }, + { + "epoch": 0.6684811106394667, + "grad_norm": 0.09438381344079971, + "learning_rate": 9.970462339482841e-06, + "loss": 8.6613, + "step": 133860 + }, + { + "epoch": 0.6685310494644061, + "grad_norm": 0.08881976455450058, + "learning_rate": 9.96896042454129e-06, + "loss": 8.656, + "step": 133870 + }, + { + "epoch": 0.6685809882893455, + "grad_norm": 0.09529709070920944, + "learning_rate": 9.96745850959974e-06, + "loss": 8.6681, + "step": 133880 + }, + { + "epoch": 0.668630927114285, + "grad_norm": 0.09204521775245667, + "learning_rate": 9.965956594658188e-06, + "loss": 8.6722, + "step": 133890 + }, + { + "epoch": 0.6686808659392245, + "grad_norm": 0.09275458008050919, + "learning_rate": 9.964454679716639e-06, + "loss": 8.6581, + "step": 133900 + }, + { + "epoch": 0.6687308047641639, + "grad_norm": 0.09179842472076416, + "learning_rate": 9.962952764775089e-06, + "loss": 8.6784, + "step": 133910 + }, + { + "epoch": 0.6687807435891033, + "grad_norm": 0.09390482306480408, + "learning_rate": 9.961450849833539e-06, + "loss": 8.6541, + "step": 133920 + }, + { + "epoch": 0.6688306824140428, + "grad_norm": 0.09328165650367737, + "learning_rate": 9.959948934891988e-06, + "loss": 8.6696, + "step": 133930 + }, + { + "epoch": 0.6688806212389823, + "grad_norm": 0.09216855466365814, + "learning_rate": 9.958447019950436e-06, + "loss": 8.6686, + "step": 133940 + }, + { + "epoch": 0.6689305600639217, + "grad_norm": 0.09541112184524536, + "learning_rate": 9.956945105008886e-06, + "loss": 8.6747, + "step": 133950 + }, + { + "epoch": 0.6689804988888611, + "grad_norm": 0.0967298373579979, + "learning_rate": 9.955443190067336e-06, + "loss": 8.6533, + "step": 133960 + }, + { + "epoch": 0.6690304377138006, + "grad_norm": 0.09267246723175049, + "learning_rate": 9.953941275125787e-06, + "loss": 8.6666, + "step": 133970 + }, + { + "epoch": 0.6690803765387401, + "grad_norm": 0.09133971482515335, + "learning_rate": 9.952439360184235e-06, + "loss": 8.6595, + "step": 133980 + }, + { + "epoch": 0.6691303153636795, + "grad_norm": 0.0926765725016594, + "learning_rate": 9.950937445242685e-06, + "loss": 8.6628, + "step": 133990 + }, + { + "epoch": 0.6691802541886189, + "grad_norm": 0.09417456388473511, + "learning_rate": 9.949435530301134e-06, + "loss": 8.6531, + "step": 134000 + }, + { + "epoch": 0.6692301930135583, + "grad_norm": 0.09054078161716461, + "learning_rate": 9.947933615359584e-06, + "loss": 8.6442, + "step": 134010 + }, + { + "epoch": 0.6692801318384979, + "grad_norm": 0.09623897820711136, + "learning_rate": 9.946431700418034e-06, + "loss": 8.653, + "step": 134020 + }, + { + "epoch": 0.6693300706634373, + "grad_norm": 0.08746648579835892, + "learning_rate": 9.944929785476483e-06, + "loss": 8.6438, + "step": 134030 + }, + { + "epoch": 0.6693800094883767, + "grad_norm": 0.09206461906433105, + "learning_rate": 9.943427870534933e-06, + "loss": 8.6584, + "step": 134040 + }, + { + "epoch": 0.6694299483133161, + "grad_norm": 0.08908689767122269, + "learning_rate": 9.941925955593381e-06, + "loss": 8.6869, + "step": 134050 + }, + { + "epoch": 0.6694798871382557, + "grad_norm": 0.09292318671941757, + "learning_rate": 9.940424040651831e-06, + "loss": 8.6826, + "step": 134060 + }, + { + "epoch": 0.6695298259631951, + "grad_norm": 0.09243188053369522, + "learning_rate": 9.938922125710282e-06, + "loss": 8.6561, + "step": 134070 + }, + { + "epoch": 0.6695797647881345, + "grad_norm": 0.09017886966466904, + "learning_rate": 9.937420210768732e-06, + "loss": 8.6697, + "step": 134080 + }, + { + "epoch": 0.669629703613074, + "grad_norm": 0.09152711927890778, + "learning_rate": 9.93591829582718e-06, + "loss": 8.6725, + "step": 134090 + }, + { + "epoch": 0.6696796424380135, + "grad_norm": 0.08486898243427277, + "learning_rate": 9.934416380885629e-06, + "loss": 8.6743, + "step": 134100 + }, + { + "epoch": 0.6697295812629529, + "grad_norm": 0.09803474694490433, + "learning_rate": 9.932914465944079e-06, + "loss": 8.6401, + "step": 134110 + }, + { + "epoch": 0.6697795200878923, + "grad_norm": 0.08933472633361816, + "learning_rate": 9.931412551002529e-06, + "loss": 8.6808, + "step": 134120 + }, + { + "epoch": 0.6698294589128317, + "grad_norm": 0.09698875993490219, + "learning_rate": 9.92991063606098e-06, + "loss": 8.6689, + "step": 134130 + }, + { + "epoch": 0.6698793977377713, + "grad_norm": 0.08952021598815918, + "learning_rate": 9.928408721119428e-06, + "loss": 8.6691, + "step": 134140 + }, + { + "epoch": 0.6699293365627107, + "grad_norm": 0.09210740774869919, + "learning_rate": 9.926906806177876e-06, + "loss": 8.6496, + "step": 134150 + }, + { + "epoch": 0.6699792753876501, + "grad_norm": 0.09553104639053345, + "learning_rate": 9.925404891236326e-06, + "loss": 8.6525, + "step": 134160 + }, + { + "epoch": 0.6700292142125895, + "grad_norm": 0.09282413125038147, + "learning_rate": 9.923902976294777e-06, + "loss": 8.6403, + "step": 134170 + }, + { + "epoch": 0.6700791530375291, + "grad_norm": 0.08609344810247421, + "learning_rate": 9.922401061353227e-06, + "loss": 8.6725, + "step": 134180 + }, + { + "epoch": 0.6701290918624685, + "grad_norm": 0.09846524894237518, + "learning_rate": 9.920899146411675e-06, + "loss": 8.6636, + "step": 134190 + }, + { + "epoch": 0.6701790306874079, + "grad_norm": 0.09294971078634262, + "learning_rate": 9.919397231470124e-06, + "loss": 8.6753, + "step": 134200 + }, + { + "epoch": 0.6702289695123473, + "grad_norm": 0.08816470950841904, + "learning_rate": 9.917895316528574e-06, + "loss": 8.6784, + "step": 134210 + }, + { + "epoch": 0.6702789083372869, + "grad_norm": 0.08824525773525238, + "learning_rate": 9.916393401587024e-06, + "loss": 8.6515, + "step": 134220 + }, + { + "epoch": 0.6703288471622263, + "grad_norm": 0.09112218022346497, + "learning_rate": 9.914891486645474e-06, + "loss": 8.6579, + "step": 134230 + }, + { + "epoch": 0.6703787859871657, + "grad_norm": 0.09652066230773926, + "learning_rate": 9.913389571703924e-06, + "loss": 8.6431, + "step": 134240 + }, + { + "epoch": 0.6704287248121051, + "grad_norm": 0.09298358857631683, + "learning_rate": 9.911887656762371e-06, + "loss": 8.6533, + "step": 134250 + }, + { + "epoch": 0.6704786636370447, + "grad_norm": 0.09158873558044434, + "learning_rate": 9.910385741820821e-06, + "loss": 8.6822, + "step": 134260 + }, + { + "epoch": 0.6705286024619841, + "grad_norm": 0.09415313601493835, + "learning_rate": 9.908883826879272e-06, + "loss": 8.6575, + "step": 134270 + }, + { + "epoch": 0.6705785412869235, + "grad_norm": 0.09670121222734451, + "learning_rate": 9.907381911937722e-06, + "loss": 8.6453, + "step": 134280 + }, + { + "epoch": 0.6706284801118629, + "grad_norm": 0.08960152417421341, + "learning_rate": 9.905879996996172e-06, + "loss": 8.6497, + "step": 134290 + }, + { + "epoch": 0.6706784189368025, + "grad_norm": 0.09629824757575989, + "learning_rate": 9.904378082054619e-06, + "loss": 8.6611, + "step": 134300 + }, + { + "epoch": 0.6707283577617419, + "grad_norm": 0.09402245283126831, + "learning_rate": 9.902876167113069e-06, + "loss": 8.6787, + "step": 134310 + }, + { + "epoch": 0.6707782965866813, + "grad_norm": 0.09469646960496902, + "learning_rate": 9.901374252171519e-06, + "loss": 8.6602, + "step": 134320 + }, + { + "epoch": 0.6708282354116207, + "grad_norm": 0.09018561244010925, + "learning_rate": 9.89987233722997e-06, + "loss": 8.6598, + "step": 134330 + }, + { + "epoch": 0.6708781742365603, + "grad_norm": 0.0924052819609642, + "learning_rate": 9.89837042228842e-06, + "loss": 8.6522, + "step": 134340 + }, + { + "epoch": 0.6709281130614997, + "grad_norm": 0.09291255474090576, + "learning_rate": 9.896868507346866e-06, + "loss": 8.6561, + "step": 134350 + }, + { + "epoch": 0.6709780518864391, + "grad_norm": 0.09238367527723312, + "learning_rate": 9.895366592405316e-06, + "loss": 8.6519, + "step": 134360 + }, + { + "epoch": 0.6710279907113785, + "grad_norm": 0.10505057871341705, + "learning_rate": 9.893864677463767e-06, + "loss": 8.6528, + "step": 134370 + }, + { + "epoch": 0.6710779295363181, + "grad_norm": 0.09222082048654556, + "learning_rate": 9.892362762522217e-06, + "loss": 8.6566, + "step": 134380 + }, + { + "epoch": 0.6711278683612575, + "grad_norm": 0.09739550203084946, + "learning_rate": 9.890860847580667e-06, + "loss": 8.6633, + "step": 134390 + }, + { + "epoch": 0.6711778071861969, + "grad_norm": 0.09582085907459259, + "learning_rate": 9.889358932639115e-06, + "loss": 8.6723, + "step": 134400 + }, + { + "epoch": 0.6712277460111363, + "grad_norm": 0.09395405650138855, + "learning_rate": 9.887857017697564e-06, + "loss": 8.6626, + "step": 134410 + }, + { + "epoch": 0.6712776848360759, + "grad_norm": 0.09649001806974411, + "learning_rate": 9.886355102756014e-06, + "loss": 8.6701, + "step": 134420 + }, + { + "epoch": 0.6713276236610153, + "grad_norm": 0.09423769265413284, + "learning_rate": 9.884853187814464e-06, + "loss": 8.6763, + "step": 134430 + }, + { + "epoch": 0.6713775624859547, + "grad_norm": 0.09237778931856155, + "learning_rate": 9.883351272872914e-06, + "loss": 8.6705, + "step": 134440 + }, + { + "epoch": 0.6714275013108941, + "grad_norm": 0.08992139250040054, + "learning_rate": 9.881849357931363e-06, + "loss": 8.6781, + "step": 134450 + }, + { + "epoch": 0.6714774401358337, + "grad_norm": 0.09282557666301727, + "learning_rate": 9.880347442989811e-06, + "loss": 8.6552, + "step": 134460 + }, + { + "epoch": 0.6715273789607731, + "grad_norm": 0.0941321849822998, + "learning_rate": 9.878845528048262e-06, + "loss": 8.6635, + "step": 134470 + }, + { + "epoch": 0.6715773177857125, + "grad_norm": 0.09401953965425491, + "learning_rate": 9.877343613106712e-06, + "loss": 8.6542, + "step": 134480 + }, + { + "epoch": 0.6716272566106519, + "grad_norm": 0.0891314148902893, + "learning_rate": 9.875841698165162e-06, + "loss": 8.6772, + "step": 134490 + }, + { + "epoch": 0.6716771954355913, + "grad_norm": 0.09066881239414215, + "learning_rate": 9.87433978322361e-06, + "loss": 8.6553, + "step": 134500 + }, + { + "epoch": 0.6717271342605309, + "grad_norm": 0.09230073541402817, + "learning_rate": 9.872837868282059e-06, + "loss": 8.6493, + "step": 134510 + }, + { + "epoch": 0.6717770730854703, + "grad_norm": 0.09482811391353607, + "learning_rate": 9.871335953340509e-06, + "loss": 8.656, + "step": 134520 + }, + { + "epoch": 0.6718270119104097, + "grad_norm": 0.0987572893500328, + "learning_rate": 9.86983403839896e-06, + "loss": 8.6604, + "step": 134530 + }, + { + "epoch": 0.6718769507353491, + "grad_norm": 0.10041922330856323, + "learning_rate": 9.86833212345741e-06, + "loss": 8.6664, + "step": 134540 + }, + { + "epoch": 0.6719268895602887, + "grad_norm": 0.09294810891151428, + "learning_rate": 9.866830208515858e-06, + "loss": 8.6552, + "step": 134550 + }, + { + "epoch": 0.6719768283852281, + "grad_norm": 0.0915827602148056, + "learning_rate": 9.865328293574308e-06, + "loss": 8.668, + "step": 134560 + }, + { + "epoch": 0.6720267672101675, + "grad_norm": 0.09003446251153946, + "learning_rate": 9.863826378632757e-06, + "loss": 8.6777, + "step": 134570 + }, + { + "epoch": 0.6720767060351069, + "grad_norm": 0.09368298947811127, + "learning_rate": 9.862324463691207e-06, + "loss": 8.6627, + "step": 134580 + }, + { + "epoch": 0.6721266448600465, + "grad_norm": 0.09174887835979462, + "learning_rate": 9.860822548749657e-06, + "loss": 8.6605, + "step": 134590 + }, + { + "epoch": 0.6721765836849859, + "grad_norm": 0.09435159713029861, + "learning_rate": 9.859320633808105e-06, + "loss": 8.6576, + "step": 134600 + }, + { + "epoch": 0.6722265225099253, + "grad_norm": 0.09179011732339859, + "learning_rate": 9.857818718866556e-06, + "loss": 8.6462, + "step": 134610 + }, + { + "epoch": 0.6722764613348647, + "grad_norm": 0.09385015815496445, + "learning_rate": 9.856316803925004e-06, + "loss": 8.6683, + "step": 134620 + }, + { + "epoch": 0.6723264001598043, + "grad_norm": 0.09193267673254013, + "learning_rate": 9.854814888983454e-06, + "loss": 8.6526, + "step": 134630 + }, + { + "epoch": 0.6723763389847437, + "grad_norm": 0.09351233392953873, + "learning_rate": 9.853312974041904e-06, + "loss": 8.6577, + "step": 134640 + }, + { + "epoch": 0.6724262778096831, + "grad_norm": 0.09000106900930405, + "learning_rate": 9.851811059100353e-06, + "loss": 8.6783, + "step": 134650 + }, + { + "epoch": 0.6724762166346225, + "grad_norm": 0.09067153185606003, + "learning_rate": 9.850309144158803e-06, + "loss": 8.6576, + "step": 134660 + }, + { + "epoch": 0.6725261554595621, + "grad_norm": 0.0920468345284462, + "learning_rate": 9.848807229217252e-06, + "loss": 8.6682, + "step": 134670 + }, + { + "epoch": 0.6725760942845015, + "grad_norm": 0.09809605777263641, + "learning_rate": 9.847305314275702e-06, + "loss": 8.6612, + "step": 134680 + }, + { + "epoch": 0.6726260331094409, + "grad_norm": 0.1000465601682663, + "learning_rate": 9.845803399334152e-06, + "loss": 8.6516, + "step": 134690 + }, + { + "epoch": 0.6726759719343803, + "grad_norm": 0.08977311849594116, + "learning_rate": 9.8443014843926e-06, + "loss": 8.6702, + "step": 134700 + }, + { + "epoch": 0.6727259107593199, + "grad_norm": 0.09617611020803452, + "learning_rate": 9.84279956945105e-06, + "loss": 8.655, + "step": 134710 + }, + { + "epoch": 0.6727758495842593, + "grad_norm": 0.08458354324102402, + "learning_rate": 9.841297654509499e-06, + "loss": 8.6569, + "step": 134720 + }, + { + "epoch": 0.6728257884091987, + "grad_norm": 0.09260948747396469, + "learning_rate": 9.83979573956795e-06, + "loss": 8.6509, + "step": 134730 + }, + { + "epoch": 0.6728757272341381, + "grad_norm": 0.09007035940885544, + "learning_rate": 9.8382938246264e-06, + "loss": 8.6548, + "step": 134740 + }, + { + "epoch": 0.6729256660590777, + "grad_norm": 0.09251650422811508, + "learning_rate": 9.836791909684848e-06, + "loss": 8.6643, + "step": 134750 + }, + { + "epoch": 0.6729756048840171, + "grad_norm": 0.09078550338745117, + "learning_rate": 9.835289994743298e-06, + "loss": 8.6623, + "step": 134760 + }, + { + "epoch": 0.6730255437089565, + "grad_norm": 0.09649539738893509, + "learning_rate": 9.833788079801748e-06, + "loss": 8.6652, + "step": 134770 + }, + { + "epoch": 0.6730754825338959, + "grad_norm": 0.09006690233945847, + "learning_rate": 9.832286164860197e-06, + "loss": 8.6559, + "step": 134780 + }, + { + "epoch": 0.6731254213588355, + "grad_norm": 0.09151316434144974, + "learning_rate": 9.830784249918647e-06, + "loss": 8.656, + "step": 134790 + }, + { + "epoch": 0.6731753601837749, + "grad_norm": 0.08936766535043716, + "learning_rate": 9.829282334977095e-06, + "loss": 8.6594, + "step": 134800 + }, + { + "epoch": 0.6732252990087143, + "grad_norm": 0.09173012524843216, + "learning_rate": 9.827780420035546e-06, + "loss": 8.6488, + "step": 134810 + }, + { + "epoch": 0.6732752378336537, + "grad_norm": 0.09206842631101608, + "learning_rate": 9.826278505093996e-06, + "loss": 8.6666, + "step": 134820 + }, + { + "epoch": 0.6733251766585933, + "grad_norm": 0.08679156005382538, + "learning_rate": 9.824776590152444e-06, + "loss": 8.6556, + "step": 134830 + }, + { + "epoch": 0.6733751154835327, + "grad_norm": 0.08848954737186432, + "learning_rate": 9.823274675210894e-06, + "loss": 8.6569, + "step": 134840 + }, + { + "epoch": 0.6734250543084721, + "grad_norm": 0.09259720891714096, + "learning_rate": 9.821772760269343e-06, + "loss": 8.665, + "step": 134850 + }, + { + "epoch": 0.6734749931334115, + "grad_norm": 0.0927811861038208, + "learning_rate": 9.820270845327793e-06, + "loss": 8.6641, + "step": 134860 + }, + { + "epoch": 0.6735249319583511, + "grad_norm": 0.09524352103471756, + "learning_rate": 9.818768930386243e-06, + "loss": 8.6755, + "step": 134870 + }, + { + "epoch": 0.6735748707832905, + "grad_norm": 0.09407173842191696, + "learning_rate": 9.817267015444692e-06, + "loss": 8.6591, + "step": 134880 + }, + { + "epoch": 0.6736248096082299, + "grad_norm": 0.0950080081820488, + "learning_rate": 9.815765100503142e-06, + "loss": 8.6511, + "step": 134890 + }, + { + "epoch": 0.6736747484331693, + "grad_norm": 0.09280173480510712, + "learning_rate": 9.81426318556159e-06, + "loss": 8.6471, + "step": 134900 + }, + { + "epoch": 0.6737246872581089, + "grad_norm": 0.0905299037694931, + "learning_rate": 9.81276127062004e-06, + "loss": 8.6516, + "step": 134910 + }, + { + "epoch": 0.6737746260830483, + "grad_norm": 0.09357500821352005, + "learning_rate": 9.81125935567849e-06, + "loss": 8.668, + "step": 134920 + }, + { + "epoch": 0.6738245649079877, + "grad_norm": 0.09710787981748581, + "learning_rate": 9.809757440736941e-06, + "loss": 8.6576, + "step": 134930 + }, + { + "epoch": 0.6738745037329271, + "grad_norm": 0.08756748586893082, + "learning_rate": 9.80825552579539e-06, + "loss": 8.6721, + "step": 134940 + }, + { + "epoch": 0.6739244425578667, + "grad_norm": 0.08993040025234222, + "learning_rate": 9.806753610853838e-06, + "loss": 8.6743, + "step": 134950 + }, + { + "epoch": 0.6739743813828061, + "grad_norm": 0.0880391076207161, + "learning_rate": 9.805251695912288e-06, + "loss": 8.6597, + "step": 134960 + }, + { + "epoch": 0.6740243202077455, + "grad_norm": 0.08911752700805664, + "learning_rate": 9.803749780970738e-06, + "loss": 8.6669, + "step": 134970 + }, + { + "epoch": 0.6740742590326849, + "grad_norm": 0.09046872705221176, + "learning_rate": 9.802247866029188e-06, + "loss": 8.6558, + "step": 134980 + }, + { + "epoch": 0.6741241978576245, + "grad_norm": 0.09054312109947205, + "learning_rate": 9.800745951087637e-06, + "loss": 8.6595, + "step": 134990 + }, + { + "epoch": 0.6741741366825639, + "grad_norm": 0.09408404678106308, + "learning_rate": 9.799244036146085e-06, + "loss": 8.6439, + "step": 135000 + }, + { + "epoch": 0.6742240755075033, + "grad_norm": 0.08936314284801483, + "learning_rate": 9.797742121204536e-06, + "loss": 8.6506, + "step": 135010 + }, + { + "epoch": 0.6742740143324427, + "grad_norm": 0.09435812383890152, + "learning_rate": 9.796240206262986e-06, + "loss": 8.6472, + "step": 135020 + }, + { + "epoch": 0.6743239531573822, + "grad_norm": 0.09421943128108978, + "learning_rate": 9.794738291321436e-06, + "loss": 8.6749, + "step": 135030 + }, + { + "epoch": 0.6743738919823217, + "grad_norm": 0.08769576996564865, + "learning_rate": 9.793236376379884e-06, + "loss": 8.6708, + "step": 135040 + }, + { + "epoch": 0.6744238308072611, + "grad_norm": 0.09175736457109451, + "learning_rate": 9.791734461438333e-06, + "loss": 8.6579, + "step": 135050 + }, + { + "epoch": 0.6744737696322005, + "grad_norm": 0.08920390158891678, + "learning_rate": 9.790232546496783e-06, + "loss": 8.6663, + "step": 135060 + }, + { + "epoch": 0.67452370845714, + "grad_norm": 0.09278859943151474, + "learning_rate": 9.788730631555233e-06, + "loss": 8.6719, + "step": 135070 + }, + { + "epoch": 0.6745736472820795, + "grad_norm": 0.090042345225811, + "learning_rate": 9.787228716613683e-06, + "loss": 8.6571, + "step": 135080 + }, + { + "epoch": 0.6746235861070189, + "grad_norm": 0.09226316958665848, + "learning_rate": 9.785726801672134e-06, + "loss": 8.6539, + "step": 135090 + }, + { + "epoch": 0.6746735249319583, + "grad_norm": 0.09357789903879166, + "learning_rate": 9.78422488673058e-06, + "loss": 8.6673, + "step": 135100 + }, + { + "epoch": 0.6747234637568978, + "grad_norm": 0.09317333251237869, + "learning_rate": 9.78272297178903e-06, + "loss": 8.6531, + "step": 135110 + }, + { + "epoch": 0.6747734025818373, + "grad_norm": 0.09152083098888397, + "learning_rate": 9.78122105684748e-06, + "loss": 8.6667, + "step": 135120 + }, + { + "epoch": 0.6748233414067767, + "grad_norm": 0.0933714434504509, + "learning_rate": 9.779719141905931e-06, + "loss": 8.6673, + "step": 135130 + }, + { + "epoch": 0.6748732802317161, + "grad_norm": 0.09301735460758209, + "learning_rate": 9.778217226964381e-06, + "loss": 8.6561, + "step": 135140 + }, + { + "epoch": 0.6749232190566556, + "grad_norm": 0.0876002088189125, + "learning_rate": 9.776715312022828e-06, + "loss": 8.6553, + "step": 135150 + }, + { + "epoch": 0.6749731578815951, + "grad_norm": 0.09405035525560379, + "learning_rate": 9.775213397081278e-06, + "loss": 8.6746, + "step": 135160 + }, + { + "epoch": 0.6750230967065345, + "grad_norm": 0.09140407294034958, + "learning_rate": 9.773711482139728e-06, + "loss": 8.6653, + "step": 135170 + }, + { + "epoch": 0.6750730355314739, + "grad_norm": 0.08978252112865448, + "learning_rate": 9.772209567198178e-06, + "loss": 8.6645, + "step": 135180 + }, + { + "epoch": 0.6751229743564134, + "grad_norm": 0.0867505818605423, + "learning_rate": 9.770707652256629e-06, + "loss": 8.6649, + "step": 135190 + }, + { + "epoch": 0.6751729131813529, + "grad_norm": 0.08967334032058716, + "learning_rate": 9.769205737315075e-06, + "loss": 8.6698, + "step": 135200 + }, + { + "epoch": 0.6752228520062923, + "grad_norm": 0.09012636542320251, + "learning_rate": 9.767703822373526e-06, + "loss": 8.6562, + "step": 135210 + }, + { + "epoch": 0.6752727908312317, + "grad_norm": 0.0897894874215126, + "learning_rate": 9.766201907431976e-06, + "loss": 8.6699, + "step": 135220 + }, + { + "epoch": 0.6753227296561712, + "grad_norm": 0.09098558127880096, + "learning_rate": 9.764699992490426e-06, + "loss": 8.6482, + "step": 135230 + }, + { + "epoch": 0.6753726684811107, + "grad_norm": 0.09297525882720947, + "learning_rate": 9.763198077548876e-06, + "loss": 8.6506, + "step": 135240 + }, + { + "epoch": 0.6754226073060501, + "grad_norm": 0.09668441861867905, + "learning_rate": 9.761696162607325e-06, + "loss": 8.6461, + "step": 135250 + }, + { + "epoch": 0.6754725461309895, + "grad_norm": 0.09092225879430771, + "learning_rate": 9.760194247665773e-06, + "loss": 8.6631, + "step": 135260 + }, + { + "epoch": 0.675522484955929, + "grad_norm": 0.09417429566383362, + "learning_rate": 9.758692332724223e-06, + "loss": 8.6589, + "step": 135270 + }, + { + "epoch": 0.6755724237808685, + "grad_norm": 0.08669702708721161, + "learning_rate": 9.757190417782673e-06, + "loss": 8.6646, + "step": 135280 + }, + { + "epoch": 0.6756223626058079, + "grad_norm": 0.08657342940568924, + "learning_rate": 9.755688502841124e-06, + "loss": 8.6499, + "step": 135290 + }, + { + "epoch": 0.6756723014307473, + "grad_norm": 0.09297830611467361, + "learning_rate": 9.754186587899572e-06, + "loss": 8.6483, + "step": 135300 + }, + { + "epoch": 0.6757222402556868, + "grad_norm": 0.09091796725988388, + "learning_rate": 9.75268467295802e-06, + "loss": 8.6687, + "step": 135310 + }, + { + "epoch": 0.6757721790806263, + "grad_norm": 0.09649965912103653, + "learning_rate": 9.75118275801647e-06, + "loss": 8.6546, + "step": 135320 + }, + { + "epoch": 0.6758221179055657, + "grad_norm": 0.09378094226121902, + "learning_rate": 9.749680843074921e-06, + "loss": 8.6654, + "step": 135330 + }, + { + "epoch": 0.6758720567305051, + "grad_norm": 0.09173280000686646, + "learning_rate": 9.748178928133371e-06, + "loss": 8.6516, + "step": 135340 + }, + { + "epoch": 0.6759219955554446, + "grad_norm": 0.096292644739151, + "learning_rate": 9.74667701319182e-06, + "loss": 8.6403, + "step": 135350 + }, + { + "epoch": 0.675971934380384, + "grad_norm": 0.09011993557214737, + "learning_rate": 9.745175098250268e-06, + "loss": 8.6538, + "step": 135360 + }, + { + "epoch": 0.6760218732053235, + "grad_norm": 0.09252528101205826, + "learning_rate": 9.743673183308718e-06, + "loss": 8.6566, + "step": 135370 + }, + { + "epoch": 0.6760718120302629, + "grad_norm": 0.09614871442317963, + "learning_rate": 9.742171268367168e-06, + "loss": 8.6462, + "step": 135380 + }, + { + "epoch": 0.6761217508552024, + "grad_norm": 0.09508166462182999, + "learning_rate": 9.740669353425619e-06, + "loss": 8.6597, + "step": 135390 + }, + { + "epoch": 0.6761716896801419, + "grad_norm": 0.09378086030483246, + "learning_rate": 9.739167438484067e-06, + "loss": 8.6674, + "step": 135400 + }, + { + "epoch": 0.6762216285050813, + "grad_norm": 0.08668700605630875, + "learning_rate": 9.737665523542517e-06, + "loss": 8.6488, + "step": 135410 + }, + { + "epoch": 0.6762715673300207, + "grad_norm": 0.09363897889852524, + "learning_rate": 9.736163608600966e-06, + "loss": 8.6639, + "step": 135420 + }, + { + "epoch": 0.6763215061549602, + "grad_norm": 0.0928199365735054, + "learning_rate": 9.734661693659416e-06, + "loss": 8.6574, + "step": 135430 + }, + { + "epoch": 0.6763714449798996, + "grad_norm": 0.09100401401519775, + "learning_rate": 9.733159778717866e-06, + "loss": 8.6541, + "step": 135440 + }, + { + "epoch": 0.6764213838048391, + "grad_norm": 0.08921308815479279, + "learning_rate": 9.731657863776315e-06, + "loss": 8.6332, + "step": 135450 + }, + { + "epoch": 0.6764713226297785, + "grad_norm": 0.0926818996667862, + "learning_rate": 9.730155948834765e-06, + "loss": 8.6395, + "step": 135460 + }, + { + "epoch": 0.676521261454718, + "grad_norm": 0.08852003514766693, + "learning_rate": 9.728654033893213e-06, + "loss": 8.6492, + "step": 135470 + }, + { + "epoch": 0.6765712002796574, + "grad_norm": 0.08820042759180069, + "learning_rate": 9.727152118951664e-06, + "loss": 8.6549, + "step": 135480 + }, + { + "epoch": 0.6766211391045969, + "grad_norm": 0.09674821048974991, + "learning_rate": 9.725650204010114e-06, + "loss": 8.6498, + "step": 135490 + }, + { + "epoch": 0.6766710779295363, + "grad_norm": 0.0904177725315094, + "learning_rate": 9.724148289068562e-06, + "loss": 8.6776, + "step": 135500 + }, + { + "epoch": 0.6767210167544757, + "grad_norm": 0.09356199204921722, + "learning_rate": 9.722646374127012e-06, + "loss": 8.6619, + "step": 135510 + }, + { + "epoch": 0.6767709555794152, + "grad_norm": 0.09460005164146423, + "learning_rate": 9.72114445918546e-06, + "loss": 8.6478, + "step": 135520 + }, + { + "epoch": 0.6768208944043547, + "grad_norm": 0.09244546294212341, + "learning_rate": 9.719642544243911e-06, + "loss": 8.6778, + "step": 135530 + }, + { + "epoch": 0.6768708332292941, + "grad_norm": 0.09258837252855301, + "learning_rate": 9.718140629302361e-06, + "loss": 8.6654, + "step": 135540 + }, + { + "epoch": 0.6769207720542335, + "grad_norm": 0.08935635536909103, + "learning_rate": 9.71663871436081e-06, + "loss": 8.6699, + "step": 135550 + }, + { + "epoch": 0.676970710879173, + "grad_norm": 0.10672944039106369, + "learning_rate": 9.71513679941926e-06, + "loss": 8.6493, + "step": 135560 + }, + { + "epoch": 0.6770206497041125, + "grad_norm": 0.09256184101104736, + "learning_rate": 9.71363488447771e-06, + "loss": 8.6716, + "step": 135570 + }, + { + "epoch": 0.6770705885290519, + "grad_norm": 0.09515663981437683, + "learning_rate": 9.712132969536159e-06, + "loss": 8.6385, + "step": 135580 + }, + { + "epoch": 0.6771205273539913, + "grad_norm": 0.09241361916065216, + "learning_rate": 9.710631054594609e-06, + "loss": 8.6506, + "step": 135590 + }, + { + "epoch": 0.6771704661789308, + "grad_norm": 0.08679981529712677, + "learning_rate": 9.709129139653057e-06, + "loss": 8.6791, + "step": 135600 + }, + { + "epoch": 0.6772204050038703, + "grad_norm": 0.08532244712114334, + "learning_rate": 9.707627224711507e-06, + "loss": 8.671, + "step": 135610 + }, + { + "epoch": 0.6772703438288097, + "grad_norm": 0.08982454240322113, + "learning_rate": 9.706125309769958e-06, + "loss": 8.6516, + "step": 135620 + }, + { + "epoch": 0.6773202826537491, + "grad_norm": 0.10029303282499313, + "learning_rate": 9.704623394828406e-06, + "loss": 8.6435, + "step": 135630 + }, + { + "epoch": 0.6773702214786886, + "grad_norm": 0.09623725712299347, + "learning_rate": 9.703121479886856e-06, + "loss": 8.6662, + "step": 135640 + }, + { + "epoch": 0.6774201603036281, + "grad_norm": 0.08999323099851608, + "learning_rate": 9.701619564945305e-06, + "loss": 8.6676, + "step": 135650 + }, + { + "epoch": 0.6774700991285675, + "grad_norm": 0.09232403337955475, + "learning_rate": 9.700117650003755e-06, + "loss": 8.6596, + "step": 135660 + }, + { + "epoch": 0.6775200379535069, + "grad_norm": 0.08921338617801666, + "learning_rate": 9.698615735062205e-06, + "loss": 8.6421, + "step": 135670 + }, + { + "epoch": 0.6775699767784464, + "grad_norm": 0.09304777532815933, + "learning_rate": 9.697113820120654e-06, + "loss": 8.6406, + "step": 135680 + }, + { + "epoch": 0.6776199156033859, + "grad_norm": 0.08728094398975372, + "learning_rate": 9.695611905179104e-06, + "loss": 8.6578, + "step": 135690 + }, + { + "epoch": 0.6776698544283253, + "grad_norm": 0.09113137423992157, + "learning_rate": 9.694109990237552e-06, + "loss": 8.6582, + "step": 135700 + }, + { + "epoch": 0.6777197932532647, + "grad_norm": 0.09369809180498123, + "learning_rate": 9.692608075296002e-06, + "loss": 8.6454, + "step": 135710 + }, + { + "epoch": 0.6777697320782042, + "grad_norm": 0.0949154943227768, + "learning_rate": 9.691106160354453e-06, + "loss": 8.646, + "step": 135720 + }, + { + "epoch": 0.6778196709031437, + "grad_norm": 0.09159260988235474, + "learning_rate": 9.689604245412903e-06, + "loss": 8.6647, + "step": 135730 + }, + { + "epoch": 0.6778696097280831, + "grad_norm": 0.08925732225179672, + "learning_rate": 9.688102330471351e-06, + "loss": 8.6396, + "step": 135740 + }, + { + "epoch": 0.6779195485530225, + "grad_norm": 0.10024773329496384, + "learning_rate": 9.6866004155298e-06, + "loss": 8.6457, + "step": 135750 + }, + { + "epoch": 0.677969487377962, + "grad_norm": 0.08866682648658752, + "learning_rate": 9.68509850058825e-06, + "loss": 8.662, + "step": 135760 + }, + { + "epoch": 0.6780194262029015, + "grad_norm": 0.09140399843454361, + "learning_rate": 9.6835965856467e-06, + "loss": 8.6452, + "step": 135770 + }, + { + "epoch": 0.6780693650278409, + "grad_norm": 0.09286113828420639, + "learning_rate": 9.68209467070515e-06, + "loss": 8.6551, + "step": 135780 + }, + { + "epoch": 0.6781193038527803, + "grad_norm": 0.09333377331495285, + "learning_rate": 9.680592755763599e-06, + "loss": 8.6632, + "step": 135790 + }, + { + "epoch": 0.6781692426777198, + "grad_norm": 0.09056348353624344, + "learning_rate": 9.679090840822047e-06, + "loss": 8.6587, + "step": 135800 + }, + { + "epoch": 0.6782191815026593, + "grad_norm": 0.09084860980510712, + "learning_rate": 9.677588925880497e-06, + "loss": 8.664, + "step": 135810 + }, + { + "epoch": 0.6782691203275987, + "grad_norm": 0.09165407717227936, + "learning_rate": 9.676087010938948e-06, + "loss": 8.6647, + "step": 135820 + }, + { + "epoch": 0.6783190591525381, + "grad_norm": 0.08495034277439117, + "learning_rate": 9.674585095997398e-06, + "loss": 8.6464, + "step": 135830 + }, + { + "epoch": 0.6783689979774776, + "grad_norm": 0.09273646771907806, + "learning_rate": 9.673083181055846e-06, + "loss": 8.6369, + "step": 135840 + }, + { + "epoch": 0.678418936802417, + "grad_norm": 0.09448003023862839, + "learning_rate": 9.671581266114295e-06, + "loss": 8.6468, + "step": 135850 + }, + { + "epoch": 0.6784688756273565, + "grad_norm": 0.08844837546348572, + "learning_rate": 9.670079351172745e-06, + "loss": 8.6591, + "step": 135860 + }, + { + "epoch": 0.6785188144522959, + "grad_norm": 0.09456723183393478, + "learning_rate": 9.668577436231195e-06, + "loss": 8.6537, + "step": 135870 + }, + { + "epoch": 0.6785687532772354, + "grad_norm": 0.0897020697593689, + "learning_rate": 9.667075521289645e-06, + "loss": 8.6545, + "step": 135880 + }, + { + "epoch": 0.6786186921021748, + "grad_norm": 0.09378873556852341, + "learning_rate": 9.665573606348095e-06, + "loss": 8.6497, + "step": 135890 + }, + { + "epoch": 0.6786686309271143, + "grad_norm": 0.09122031182050705, + "learning_rate": 9.664071691406542e-06, + "loss": 8.6599, + "step": 135900 + }, + { + "epoch": 0.6787185697520537, + "grad_norm": 0.0962730273604393, + "learning_rate": 9.662569776464992e-06, + "loss": 8.6579, + "step": 135910 + }, + { + "epoch": 0.6787685085769932, + "grad_norm": 0.0921352207660675, + "learning_rate": 9.661067861523443e-06, + "loss": 8.6433, + "step": 135920 + }, + { + "epoch": 0.6788184474019326, + "grad_norm": 0.09395617246627808, + "learning_rate": 9.659565946581893e-06, + "loss": 8.6507, + "step": 135930 + }, + { + "epoch": 0.6788683862268721, + "grad_norm": 0.08777940273284912, + "learning_rate": 9.658064031640343e-06, + "loss": 8.6626, + "step": 135940 + }, + { + "epoch": 0.6789183250518115, + "grad_norm": 0.08930377662181854, + "learning_rate": 9.65656211669879e-06, + "loss": 8.6573, + "step": 135950 + }, + { + "epoch": 0.678968263876751, + "grad_norm": 0.09742255508899689, + "learning_rate": 9.65506020175724e-06, + "loss": 8.6381, + "step": 135960 + }, + { + "epoch": 0.6790182027016904, + "grad_norm": 0.09233038872480392, + "learning_rate": 9.65355828681569e-06, + "loss": 8.6535, + "step": 135970 + }, + { + "epoch": 0.6790681415266299, + "grad_norm": 0.08856892585754395, + "learning_rate": 9.65205637187414e-06, + "loss": 8.6476, + "step": 135980 + }, + { + "epoch": 0.6791180803515693, + "grad_norm": 0.09412689507007599, + "learning_rate": 9.65055445693259e-06, + "loss": 8.6462, + "step": 135990 + }, + { + "epoch": 0.6791680191765088, + "grad_norm": 0.09102386981248856, + "learning_rate": 9.649052541991037e-06, + "loss": 8.6614, + "step": 136000 + }, + { + "epoch": 0.6792179580014482, + "grad_norm": 0.0882454663515091, + "learning_rate": 9.647550627049487e-06, + "loss": 8.6684, + "step": 136010 + }, + { + "epoch": 0.6792678968263877, + "grad_norm": 0.09218038618564606, + "learning_rate": 9.646048712107938e-06, + "loss": 8.6619, + "step": 136020 + }, + { + "epoch": 0.6793178356513271, + "grad_norm": 0.09152485430240631, + "learning_rate": 9.644546797166388e-06, + "loss": 8.6552, + "step": 136030 + }, + { + "epoch": 0.6793677744762666, + "grad_norm": 0.08703062683343887, + "learning_rate": 9.643044882224838e-06, + "loss": 8.6545, + "step": 136040 + }, + { + "epoch": 0.679417713301206, + "grad_norm": 0.09743312001228333, + "learning_rate": 9.641542967283286e-06, + "loss": 8.6342, + "step": 136050 + }, + { + "epoch": 0.6794676521261455, + "grad_norm": 0.09214362502098083, + "learning_rate": 9.640041052341735e-06, + "loss": 8.6579, + "step": 136060 + }, + { + "epoch": 0.6795175909510849, + "grad_norm": 0.0931442603468895, + "learning_rate": 9.638539137400185e-06, + "loss": 8.6461, + "step": 136070 + }, + { + "epoch": 0.6795675297760244, + "grad_norm": 0.09083132445812225, + "learning_rate": 9.637037222458635e-06, + "loss": 8.6503, + "step": 136080 + }, + { + "epoch": 0.6796174686009638, + "grad_norm": 0.0905727818608284, + "learning_rate": 9.635535307517085e-06, + "loss": 8.6579, + "step": 136090 + }, + { + "epoch": 0.6796674074259033, + "grad_norm": 0.09426701813936234, + "learning_rate": 9.634033392575534e-06, + "loss": 8.6577, + "step": 136100 + }, + { + "epoch": 0.6797173462508427, + "grad_norm": 0.0878579169511795, + "learning_rate": 9.632531477633982e-06, + "loss": 8.6501, + "step": 136110 + }, + { + "epoch": 0.6797672850757822, + "grad_norm": 0.0943881943821907, + "learning_rate": 9.631029562692433e-06, + "loss": 8.6596, + "step": 136120 + }, + { + "epoch": 0.6798172239007216, + "grad_norm": 0.0909527838230133, + "learning_rate": 9.629527647750883e-06, + "loss": 8.6589, + "step": 136130 + }, + { + "epoch": 0.6798671627256611, + "grad_norm": 0.09404680877923965, + "learning_rate": 9.628025732809333e-06, + "loss": 8.6575, + "step": 136140 + }, + { + "epoch": 0.6799171015506005, + "grad_norm": 0.09321132302284241, + "learning_rate": 9.626523817867781e-06, + "loss": 8.6519, + "step": 136150 + }, + { + "epoch": 0.67996704037554, + "grad_norm": 0.09817219525575638, + "learning_rate": 9.62502190292623e-06, + "loss": 8.6566, + "step": 136160 + }, + { + "epoch": 0.6800169792004794, + "grad_norm": 0.09167230874300003, + "learning_rate": 9.62351998798468e-06, + "loss": 8.6576, + "step": 136170 + }, + { + "epoch": 0.6800669180254189, + "grad_norm": 0.09567620605230331, + "learning_rate": 9.62201807304313e-06, + "loss": 8.6495, + "step": 136180 + }, + { + "epoch": 0.6801168568503583, + "grad_norm": 0.0942583829164505, + "learning_rate": 9.62051615810158e-06, + "loss": 8.664, + "step": 136190 + }, + { + "epoch": 0.6801667956752978, + "grad_norm": 0.08843056857585907, + "learning_rate": 9.619014243160029e-06, + "loss": 8.6652, + "step": 136200 + }, + { + "epoch": 0.6802167345002372, + "grad_norm": 0.08930647373199463, + "learning_rate": 9.617512328218479e-06, + "loss": 8.6719, + "step": 136210 + }, + { + "epoch": 0.6802666733251767, + "grad_norm": 0.09439358115196228, + "learning_rate": 9.616010413276928e-06, + "loss": 8.6578, + "step": 136220 + }, + { + "epoch": 0.6803166121501161, + "grad_norm": 0.08645504713058472, + "learning_rate": 9.614508498335378e-06, + "loss": 8.661, + "step": 136230 + }, + { + "epoch": 0.6803665509750556, + "grad_norm": 0.0910206213593483, + "learning_rate": 9.613006583393828e-06, + "loss": 8.6595, + "step": 136240 + }, + { + "epoch": 0.680416489799995, + "grad_norm": 0.09109845012426376, + "learning_rate": 9.611504668452278e-06, + "loss": 8.6505, + "step": 136250 + }, + { + "epoch": 0.6804664286249344, + "grad_norm": 0.09436528384685516, + "learning_rate": 9.610002753510727e-06, + "loss": 8.6472, + "step": 136260 + }, + { + "epoch": 0.6805163674498739, + "grad_norm": 0.09152334183454514, + "learning_rate": 9.608500838569175e-06, + "loss": 8.6531, + "step": 136270 + }, + { + "epoch": 0.6805663062748134, + "grad_norm": 0.09130854159593582, + "learning_rate": 9.606998923627625e-06, + "loss": 8.6697, + "step": 136280 + }, + { + "epoch": 0.6806162450997528, + "grad_norm": 0.08787249028682709, + "learning_rate": 9.605497008686075e-06, + "loss": 8.6474, + "step": 136290 + }, + { + "epoch": 0.6806661839246922, + "grad_norm": 0.09564139693975449, + "learning_rate": 9.603995093744526e-06, + "loss": 8.6475, + "step": 136300 + }, + { + "epoch": 0.6807161227496317, + "grad_norm": 0.08936180919408798, + "learning_rate": 9.602493178802974e-06, + "loss": 8.6639, + "step": 136310 + }, + { + "epoch": 0.6807660615745712, + "grad_norm": 0.09855446964502335, + "learning_rate": 9.600991263861423e-06, + "loss": 8.673, + "step": 136320 + }, + { + "epoch": 0.6808160003995106, + "grad_norm": 0.08871414512395859, + "learning_rate": 9.599489348919873e-06, + "loss": 8.6471, + "step": 136330 + }, + { + "epoch": 0.68086593922445, + "grad_norm": 0.09087369590997696, + "learning_rate": 9.597987433978323e-06, + "loss": 8.6577, + "step": 136340 + }, + { + "epoch": 0.6809158780493895, + "grad_norm": 0.09292785823345184, + "learning_rate": 9.596485519036773e-06, + "loss": 8.6358, + "step": 136350 + }, + { + "epoch": 0.680965816874329, + "grad_norm": 0.09038669615983963, + "learning_rate": 9.594983604095222e-06, + "loss": 8.6422, + "step": 136360 + }, + { + "epoch": 0.6810157556992684, + "grad_norm": 0.09672816097736359, + "learning_rate": 9.593481689153672e-06, + "loss": 8.6498, + "step": 136370 + }, + { + "epoch": 0.6810656945242078, + "grad_norm": 0.09116402268409729, + "learning_rate": 9.59197977421212e-06, + "loss": 8.6377, + "step": 136380 + }, + { + "epoch": 0.6811156333491473, + "grad_norm": 0.09753582626581192, + "learning_rate": 9.59047785927057e-06, + "loss": 8.6643, + "step": 136390 + }, + { + "epoch": 0.6811655721740868, + "grad_norm": 0.0920112207531929, + "learning_rate": 9.58897594432902e-06, + "loss": 8.6488, + "step": 136400 + }, + { + "epoch": 0.6812155109990262, + "grad_norm": 0.09360447525978088, + "learning_rate": 9.587474029387469e-06, + "loss": 8.6664, + "step": 136410 + }, + { + "epoch": 0.6812654498239656, + "grad_norm": 0.08685506880283356, + "learning_rate": 9.58597211444592e-06, + "loss": 8.6444, + "step": 136420 + }, + { + "epoch": 0.6813153886489051, + "grad_norm": 0.08979588001966476, + "learning_rate": 9.584470199504368e-06, + "loss": 8.6541, + "step": 136430 + }, + { + "epoch": 0.6813653274738446, + "grad_norm": 0.09099925309419632, + "learning_rate": 9.582968284562818e-06, + "loss": 8.6319, + "step": 136440 + }, + { + "epoch": 0.681415266298784, + "grad_norm": 0.09031230956315994, + "learning_rate": 9.581466369621268e-06, + "loss": 8.6513, + "step": 136450 + }, + { + "epoch": 0.6814652051237234, + "grad_norm": 0.08769738674163818, + "learning_rate": 9.579964454679717e-06, + "loss": 8.6562, + "step": 136460 + }, + { + "epoch": 0.6815151439486629, + "grad_norm": 0.08992310613393784, + "learning_rate": 9.578462539738167e-06, + "loss": 8.6546, + "step": 136470 + }, + { + "epoch": 0.6815650827736023, + "grad_norm": 0.09138327836990356, + "learning_rate": 9.576960624796615e-06, + "loss": 8.6545, + "step": 136480 + }, + { + "epoch": 0.6816150215985418, + "grad_norm": 0.08947155624628067, + "learning_rate": 9.575458709855065e-06, + "loss": 8.6435, + "step": 136490 + }, + { + "epoch": 0.6816649604234812, + "grad_norm": 0.09748796373605728, + "learning_rate": 9.573956794913516e-06, + "loss": 8.6491, + "step": 136500 + }, + { + "epoch": 0.6817148992484207, + "grad_norm": 0.0930284857749939, + "learning_rate": 9.572454879971964e-06, + "loss": 8.6502, + "step": 136510 + }, + { + "epoch": 0.6817648380733601, + "grad_norm": 0.08821598440408707, + "learning_rate": 9.570952965030414e-06, + "loss": 8.6452, + "step": 136520 + }, + { + "epoch": 0.6818147768982996, + "grad_norm": 0.09385675936937332, + "learning_rate": 9.569451050088864e-06, + "loss": 8.6533, + "step": 136530 + }, + { + "epoch": 0.681864715723239, + "grad_norm": 0.09572584182024002, + "learning_rate": 9.567949135147313e-06, + "loss": 8.6635, + "step": 136540 + }, + { + "epoch": 0.6819146545481785, + "grad_norm": 0.09196435660123825, + "learning_rate": 9.566447220205763e-06, + "loss": 8.6576, + "step": 136550 + }, + { + "epoch": 0.6819645933731179, + "grad_norm": 0.09118195623159409, + "learning_rate": 9.564945305264212e-06, + "loss": 8.6366, + "step": 136560 + }, + { + "epoch": 0.6820145321980574, + "grad_norm": 0.09549646824598312, + "learning_rate": 9.563443390322662e-06, + "loss": 8.6665, + "step": 136570 + }, + { + "epoch": 0.6820644710229968, + "grad_norm": 0.09187420457601547, + "learning_rate": 9.561941475381112e-06, + "loss": 8.6431, + "step": 136580 + }, + { + "epoch": 0.6821144098479363, + "grad_norm": 0.09515494108200073, + "learning_rate": 9.56043956043956e-06, + "loss": 8.6565, + "step": 136590 + }, + { + "epoch": 0.6821643486728757, + "grad_norm": 0.09291938692331314, + "learning_rate": 9.55893764549801e-06, + "loss": 8.6536, + "step": 136600 + }, + { + "epoch": 0.6822142874978152, + "grad_norm": 0.09135958552360535, + "learning_rate": 9.557435730556459e-06, + "loss": 8.6686, + "step": 136610 + }, + { + "epoch": 0.6822642263227546, + "grad_norm": 0.09079675376415253, + "learning_rate": 9.55593381561491e-06, + "loss": 8.6562, + "step": 136620 + }, + { + "epoch": 0.682314165147694, + "grad_norm": 0.08791366219520569, + "learning_rate": 9.55443190067336e-06, + "loss": 8.6422, + "step": 136630 + }, + { + "epoch": 0.6823641039726335, + "grad_norm": 0.09008821099996567, + "learning_rate": 9.552929985731808e-06, + "loss": 8.6484, + "step": 136640 + }, + { + "epoch": 0.682414042797573, + "grad_norm": 0.09008509665727615, + "learning_rate": 9.551428070790258e-06, + "loss": 8.6609, + "step": 136650 + }, + { + "epoch": 0.6824639816225124, + "grad_norm": 0.0959354117512703, + "learning_rate": 9.549926155848707e-06, + "loss": 8.6284, + "step": 136660 + }, + { + "epoch": 0.6825139204474518, + "grad_norm": 0.08918027579784393, + "learning_rate": 9.548424240907157e-06, + "loss": 8.6587, + "step": 136670 + }, + { + "epoch": 0.6825638592723913, + "grad_norm": 0.08706361055374146, + "learning_rate": 9.546922325965607e-06, + "loss": 8.6813, + "step": 136680 + }, + { + "epoch": 0.6826137980973308, + "grad_norm": 0.09403030574321747, + "learning_rate": 9.545420411024057e-06, + "loss": 8.6521, + "step": 136690 + }, + { + "epoch": 0.6826637369222702, + "grad_norm": 0.08769270032644272, + "learning_rate": 9.543918496082506e-06, + "loss": 8.6526, + "step": 136700 + }, + { + "epoch": 0.6827136757472096, + "grad_norm": 0.08837845176458359, + "learning_rate": 9.542416581140954e-06, + "loss": 8.647, + "step": 136710 + }, + { + "epoch": 0.6827636145721491, + "grad_norm": 0.08815280348062515, + "learning_rate": 9.540914666199404e-06, + "loss": 8.6532, + "step": 136720 + }, + { + "epoch": 0.6828135533970886, + "grad_norm": 0.09271814674139023, + "learning_rate": 9.539412751257854e-06, + "loss": 8.6624, + "step": 136730 + }, + { + "epoch": 0.682863492222028, + "grad_norm": 0.09003327786922455, + "learning_rate": 9.537910836316305e-06, + "loss": 8.6456, + "step": 136740 + }, + { + "epoch": 0.6829134310469674, + "grad_norm": 0.09336933493614197, + "learning_rate": 9.536408921374753e-06, + "loss": 8.6364, + "step": 136750 + }, + { + "epoch": 0.6829633698719069, + "grad_norm": 0.08961906284093857, + "learning_rate": 9.534907006433202e-06, + "loss": 8.653, + "step": 136760 + }, + { + "epoch": 0.6830133086968464, + "grad_norm": 0.08953448385000229, + "learning_rate": 9.533405091491652e-06, + "loss": 8.6516, + "step": 136770 + }, + { + "epoch": 0.6830632475217858, + "grad_norm": 0.09705983847379684, + "learning_rate": 9.531903176550102e-06, + "loss": 8.6433, + "step": 136780 + }, + { + "epoch": 0.6831131863467252, + "grad_norm": 0.0961066335439682, + "learning_rate": 9.530401261608552e-06, + "loss": 8.6596, + "step": 136790 + }, + { + "epoch": 0.6831631251716647, + "grad_norm": 0.09489046037197113, + "learning_rate": 9.528899346667e-06, + "loss": 8.6587, + "step": 136800 + }, + { + "epoch": 0.6832130639966042, + "grad_norm": 0.0909392461180687, + "learning_rate": 9.527397431725449e-06, + "loss": 8.6567, + "step": 136810 + }, + { + "epoch": 0.6832630028215436, + "grad_norm": 0.09276154637336731, + "learning_rate": 9.5258955167839e-06, + "loss": 8.6676, + "step": 136820 + }, + { + "epoch": 0.683312941646483, + "grad_norm": 0.08885812014341354, + "learning_rate": 9.52439360184235e-06, + "loss": 8.6517, + "step": 136830 + }, + { + "epoch": 0.6833628804714225, + "grad_norm": 0.0934206172823906, + "learning_rate": 9.5228916869008e-06, + "loss": 8.6566, + "step": 136840 + }, + { + "epoch": 0.683412819296362, + "grad_norm": 0.08924253284931183, + "learning_rate": 9.52138977195925e-06, + "loss": 8.6622, + "step": 136850 + }, + { + "epoch": 0.6834627581213014, + "grad_norm": 0.0887620598077774, + "learning_rate": 9.519887857017697e-06, + "loss": 8.6523, + "step": 136860 + }, + { + "epoch": 0.6835126969462408, + "grad_norm": 0.09116550534963608, + "learning_rate": 9.518385942076147e-06, + "loss": 8.6463, + "step": 136870 + }, + { + "epoch": 0.6835626357711803, + "grad_norm": 0.09384380280971527, + "learning_rate": 9.516884027134597e-06, + "loss": 8.6518, + "step": 136880 + }, + { + "epoch": 0.6836125745961198, + "grad_norm": 0.08999653905630112, + "learning_rate": 9.515382112193047e-06, + "loss": 8.6534, + "step": 136890 + }, + { + "epoch": 0.6836625134210592, + "grad_norm": 0.09472544491291046, + "learning_rate": 9.513880197251497e-06, + "loss": 8.6418, + "step": 136900 + }, + { + "epoch": 0.6837124522459986, + "grad_norm": 0.08867818862199783, + "learning_rate": 9.512378282309944e-06, + "loss": 8.6511, + "step": 136910 + }, + { + "epoch": 0.6837623910709381, + "grad_norm": 0.09118511527776718, + "learning_rate": 9.510876367368394e-06, + "loss": 8.6535, + "step": 136920 + }, + { + "epoch": 0.6838123298958776, + "grad_norm": 0.09027484059333801, + "learning_rate": 9.509374452426844e-06, + "loss": 8.6652, + "step": 136930 + }, + { + "epoch": 0.683862268720817, + "grad_norm": 0.09857519716024399, + "learning_rate": 9.507872537485295e-06, + "loss": 8.6434, + "step": 136940 + }, + { + "epoch": 0.6839122075457564, + "grad_norm": 0.09504196047782898, + "learning_rate": 9.506370622543745e-06, + "loss": 8.6584, + "step": 136950 + }, + { + "epoch": 0.6839621463706959, + "grad_norm": 0.09877480566501617, + "learning_rate": 9.504868707602192e-06, + "loss": 8.6523, + "step": 136960 + }, + { + "epoch": 0.6840120851956354, + "grad_norm": 0.09247778356075287, + "learning_rate": 9.503366792660642e-06, + "loss": 8.6504, + "step": 136970 + }, + { + "epoch": 0.6840620240205748, + "grad_norm": 0.09239480644464493, + "learning_rate": 9.501864877719092e-06, + "loss": 8.6505, + "step": 136980 + }, + { + "epoch": 0.6841119628455142, + "grad_norm": 0.09122372418642044, + "learning_rate": 9.500362962777542e-06, + "loss": 8.6395, + "step": 136990 + }, + { + "epoch": 0.6841619016704537, + "grad_norm": 0.09300727397203445, + "learning_rate": 9.498861047835992e-06, + "loss": 8.6488, + "step": 137000 + }, + { + "epoch": 0.6842118404953932, + "grad_norm": 0.0911245048046112, + "learning_rate": 9.49735913289444e-06, + "loss": 8.6665, + "step": 137010 + }, + { + "epoch": 0.6842617793203326, + "grad_norm": 0.0955081507563591, + "learning_rate": 9.49585721795289e-06, + "loss": 8.6382, + "step": 137020 + }, + { + "epoch": 0.684311718145272, + "grad_norm": 0.09284482151269913, + "learning_rate": 9.49435530301134e-06, + "loss": 8.6343, + "step": 137030 + }, + { + "epoch": 0.6843616569702115, + "grad_norm": 0.09098115563392639, + "learning_rate": 9.49285338806979e-06, + "loss": 8.6491, + "step": 137040 + }, + { + "epoch": 0.684411595795151, + "grad_norm": 0.09565536677837372, + "learning_rate": 9.49135147312824e-06, + "loss": 8.6573, + "step": 137050 + }, + { + "epoch": 0.6844615346200904, + "grad_norm": 0.0895000547170639, + "learning_rate": 9.489849558186688e-06, + "loss": 8.6537, + "step": 137060 + }, + { + "epoch": 0.6845114734450298, + "grad_norm": 0.09270433336496353, + "learning_rate": 9.488347643245137e-06, + "loss": 8.6309, + "step": 137070 + }, + { + "epoch": 0.6845614122699692, + "grad_norm": 0.09027011692523956, + "learning_rate": 9.486845728303587e-06, + "loss": 8.6541, + "step": 137080 + }, + { + "epoch": 0.6846113510949088, + "grad_norm": 0.08946502953767776, + "learning_rate": 9.485343813362037e-06, + "loss": 8.6389, + "step": 137090 + }, + { + "epoch": 0.6846612899198482, + "grad_norm": 0.09274662286043167, + "learning_rate": 9.483841898420487e-06, + "loss": 8.6507, + "step": 137100 + }, + { + "epoch": 0.6847112287447876, + "grad_norm": 0.09150737524032593, + "learning_rate": 9.482339983478936e-06, + "loss": 8.6462, + "step": 137110 + }, + { + "epoch": 0.684761167569727, + "grad_norm": 0.09016991406679153, + "learning_rate": 9.480838068537384e-06, + "loss": 8.662, + "step": 137120 + }, + { + "epoch": 0.6848111063946666, + "grad_norm": 0.09275011718273163, + "learning_rate": 9.479336153595835e-06, + "loss": 8.6538, + "step": 137130 + }, + { + "epoch": 0.684861045219606, + "grad_norm": 0.09211241453886032, + "learning_rate": 9.477834238654285e-06, + "loss": 8.6423, + "step": 137140 + }, + { + "epoch": 0.6849109840445454, + "grad_norm": 0.09323053061962128, + "learning_rate": 9.476332323712735e-06, + "loss": 8.6311, + "step": 137150 + }, + { + "epoch": 0.6849609228694848, + "grad_norm": 0.0909585952758789, + "learning_rate": 9.474830408771183e-06, + "loss": 8.6413, + "step": 137160 + }, + { + "epoch": 0.6850108616944244, + "grad_norm": 0.09803111851215363, + "learning_rate": 9.473328493829634e-06, + "loss": 8.6599, + "step": 137170 + }, + { + "epoch": 0.6850608005193638, + "grad_norm": 0.09382762759923935, + "learning_rate": 9.471826578888082e-06, + "loss": 8.6395, + "step": 137180 + }, + { + "epoch": 0.6851107393443032, + "grad_norm": 0.09193086624145508, + "learning_rate": 9.470324663946532e-06, + "loss": 8.6454, + "step": 137190 + }, + { + "epoch": 0.6851606781692426, + "grad_norm": 0.0866667628288269, + "learning_rate": 9.468822749004982e-06, + "loss": 8.6586, + "step": 137200 + }, + { + "epoch": 0.6852106169941822, + "grad_norm": 0.09309227019548416, + "learning_rate": 9.467320834063431e-06, + "loss": 8.6427, + "step": 137210 + }, + { + "epoch": 0.6852605558191216, + "grad_norm": 0.091177798807621, + "learning_rate": 9.465818919121881e-06, + "loss": 8.6524, + "step": 137220 + }, + { + "epoch": 0.685310494644061, + "grad_norm": 0.09202263504266739, + "learning_rate": 9.46431700418033e-06, + "loss": 8.6487, + "step": 137230 + }, + { + "epoch": 0.6853604334690004, + "grad_norm": 0.0925859585404396, + "learning_rate": 9.46281508923878e-06, + "loss": 8.6511, + "step": 137240 + }, + { + "epoch": 0.68541037229394, + "grad_norm": 0.09529491513967514, + "learning_rate": 9.46131317429723e-06, + "loss": 8.6417, + "step": 137250 + }, + { + "epoch": 0.6854603111188794, + "grad_norm": 0.09451477974653244, + "learning_rate": 9.459811259355678e-06, + "loss": 8.6519, + "step": 137260 + }, + { + "epoch": 0.6855102499438188, + "grad_norm": 0.08990958333015442, + "learning_rate": 9.458309344414129e-06, + "loss": 8.647, + "step": 137270 + }, + { + "epoch": 0.6855601887687582, + "grad_norm": 0.09311163425445557, + "learning_rate": 9.456807429472577e-06, + "loss": 8.642, + "step": 137280 + }, + { + "epoch": 0.6856101275936978, + "grad_norm": 0.09261354058980942, + "learning_rate": 9.455305514531027e-06, + "loss": 8.6528, + "step": 137290 + }, + { + "epoch": 0.6856600664186372, + "grad_norm": 0.09306151419878006, + "learning_rate": 9.453803599589477e-06, + "loss": 8.6436, + "step": 137300 + }, + { + "epoch": 0.6857100052435766, + "grad_norm": 0.09042923152446747, + "learning_rate": 9.452301684647926e-06, + "loss": 8.6518, + "step": 137310 + }, + { + "epoch": 0.685759944068516, + "grad_norm": 0.09723861515522003, + "learning_rate": 9.450799769706376e-06, + "loss": 8.6463, + "step": 137320 + }, + { + "epoch": 0.6858098828934556, + "grad_norm": 0.08681852370500565, + "learning_rate": 9.449297854764826e-06, + "loss": 8.6446, + "step": 137330 + }, + { + "epoch": 0.685859821718395, + "grad_norm": 0.09019467979669571, + "learning_rate": 9.447795939823275e-06, + "loss": 8.6482, + "step": 137340 + }, + { + "epoch": 0.6859097605433344, + "grad_norm": 0.08701999485492706, + "learning_rate": 9.446294024881725e-06, + "loss": 8.6666, + "step": 137350 + }, + { + "epoch": 0.6859596993682738, + "grad_norm": 0.09151817858219147, + "learning_rate": 9.444792109940173e-06, + "loss": 8.6528, + "step": 137360 + }, + { + "epoch": 0.6860096381932134, + "grad_norm": 0.0913306251168251, + "learning_rate": 9.443290194998624e-06, + "loss": 8.6475, + "step": 137370 + }, + { + "epoch": 0.6860595770181528, + "grad_norm": 0.09225058555603027, + "learning_rate": 9.441788280057074e-06, + "loss": 8.6478, + "step": 137380 + }, + { + "epoch": 0.6861095158430922, + "grad_norm": 0.09292393922805786, + "learning_rate": 9.440286365115522e-06, + "loss": 8.6571, + "step": 137390 + }, + { + "epoch": 0.6861594546680316, + "grad_norm": 0.09331595152616501, + "learning_rate": 9.438784450173972e-06, + "loss": 8.6414, + "step": 137400 + }, + { + "epoch": 0.6862093934929712, + "grad_norm": 0.09516488760709763, + "learning_rate": 9.437282535232421e-06, + "loss": 8.6387, + "step": 137410 + }, + { + "epoch": 0.6862593323179106, + "grad_norm": 0.09467773884534836, + "learning_rate": 9.435780620290871e-06, + "loss": 8.6409, + "step": 137420 + }, + { + "epoch": 0.68630927114285, + "grad_norm": 0.09190910309553146, + "learning_rate": 9.434278705349321e-06, + "loss": 8.6683, + "step": 137430 + }, + { + "epoch": 0.6863592099677894, + "grad_norm": 0.09423499554395676, + "learning_rate": 9.43277679040777e-06, + "loss": 8.6736, + "step": 137440 + }, + { + "epoch": 0.686409148792729, + "grad_norm": 0.0923381969332695, + "learning_rate": 9.43127487546622e-06, + "loss": 8.6486, + "step": 137450 + }, + { + "epoch": 0.6864590876176684, + "grad_norm": 0.08919500559568405, + "learning_rate": 9.429772960524668e-06, + "loss": 8.6351, + "step": 137460 + }, + { + "epoch": 0.6865090264426078, + "grad_norm": 0.0964340791106224, + "learning_rate": 9.428271045583119e-06, + "loss": 8.6507, + "step": 137470 + }, + { + "epoch": 0.6865589652675472, + "grad_norm": 0.09165029972791672, + "learning_rate": 9.426769130641569e-06, + "loss": 8.6495, + "step": 137480 + }, + { + "epoch": 0.6866089040924866, + "grad_norm": 0.09624398499727249, + "learning_rate": 9.425267215700019e-06, + "loss": 8.6515, + "step": 137490 + }, + { + "epoch": 0.6866588429174262, + "grad_norm": 0.09847095608711243, + "learning_rate": 9.423765300758467e-06, + "loss": 8.6554, + "step": 137500 + }, + { + "epoch": 0.6867087817423656, + "grad_norm": 0.0960099995136261, + "learning_rate": 9.422263385816916e-06, + "loss": 8.6467, + "step": 137510 + }, + { + "epoch": 0.686758720567305, + "grad_norm": 0.09143520146608353, + "learning_rate": 9.420761470875366e-06, + "loss": 8.6458, + "step": 137520 + }, + { + "epoch": 0.6868086593922444, + "grad_norm": 0.08999507874250412, + "learning_rate": 9.419259555933816e-06, + "loss": 8.6486, + "step": 137530 + }, + { + "epoch": 0.686858598217184, + "grad_norm": 0.0936044380068779, + "learning_rate": 9.417757640992266e-06, + "loss": 8.6722, + "step": 137540 + }, + { + "epoch": 0.6869085370421234, + "grad_norm": 0.08734313398599625, + "learning_rate": 9.416255726050715e-06, + "loss": 8.6346, + "step": 137550 + }, + { + "epoch": 0.6869584758670628, + "grad_norm": 0.09482523053884506, + "learning_rate": 9.414753811109163e-06, + "loss": 8.6509, + "step": 137560 + }, + { + "epoch": 0.6870084146920022, + "grad_norm": 0.08811195939779282, + "learning_rate": 9.413251896167614e-06, + "loss": 8.6456, + "step": 137570 + }, + { + "epoch": 0.6870583535169418, + "grad_norm": 0.09007152169942856, + "learning_rate": 9.411749981226064e-06, + "loss": 8.6496, + "step": 137580 + }, + { + "epoch": 0.6871082923418812, + "grad_norm": 0.09563975781202316, + "learning_rate": 9.410248066284514e-06, + "loss": 8.6543, + "step": 137590 + }, + { + "epoch": 0.6871582311668206, + "grad_norm": 0.09100675582885742, + "learning_rate": 9.408746151342962e-06, + "loss": 8.6472, + "step": 137600 + }, + { + "epoch": 0.68720816999176, + "grad_norm": 0.09444539994001389, + "learning_rate": 9.407244236401411e-06, + "loss": 8.652, + "step": 137610 + }, + { + "epoch": 0.6872581088166996, + "grad_norm": 0.09386273473501205, + "learning_rate": 9.405742321459861e-06, + "loss": 8.6341, + "step": 137620 + }, + { + "epoch": 0.687308047641639, + "grad_norm": 0.09169203042984009, + "learning_rate": 9.404240406518311e-06, + "loss": 8.6365, + "step": 137630 + }, + { + "epoch": 0.6873579864665784, + "grad_norm": 0.0909794345498085, + "learning_rate": 9.402738491576761e-06, + "loss": 8.6598, + "step": 137640 + }, + { + "epoch": 0.6874079252915178, + "grad_norm": 0.08933387696743011, + "learning_rate": 9.40123657663521e-06, + "loss": 8.6547, + "step": 137650 + }, + { + "epoch": 0.6874578641164574, + "grad_norm": 0.097355417907238, + "learning_rate": 9.399734661693658e-06, + "loss": 8.6451, + "step": 137660 + }, + { + "epoch": 0.6875078029413968, + "grad_norm": 0.08841703087091446, + "learning_rate": 9.398232746752109e-06, + "loss": 8.6566, + "step": 137670 + }, + { + "epoch": 0.6875577417663362, + "grad_norm": 0.09026035666465759, + "learning_rate": 9.396730831810559e-06, + "loss": 8.6522, + "step": 137680 + }, + { + "epoch": 0.6876076805912756, + "grad_norm": 0.09169178456068039, + "learning_rate": 9.395228916869009e-06, + "loss": 8.6437, + "step": 137690 + }, + { + "epoch": 0.6876576194162152, + "grad_norm": 0.08948150277137756, + "learning_rate": 9.393727001927459e-06, + "loss": 8.6354, + "step": 137700 + }, + { + "epoch": 0.6877075582411546, + "grad_norm": 0.09596080332994461, + "learning_rate": 9.392225086985906e-06, + "loss": 8.637, + "step": 137710 + }, + { + "epoch": 0.687757497066094, + "grad_norm": 0.09012558311223984, + "learning_rate": 9.390723172044356e-06, + "loss": 8.6362, + "step": 137720 + }, + { + "epoch": 0.6878074358910334, + "grad_norm": 0.09151880443096161, + "learning_rate": 9.389221257102806e-06, + "loss": 8.643, + "step": 137730 + }, + { + "epoch": 0.687857374715973, + "grad_norm": 0.09232999384403229, + "learning_rate": 9.387719342161256e-06, + "loss": 8.648, + "step": 137740 + }, + { + "epoch": 0.6879073135409124, + "grad_norm": 0.08922406286001205, + "learning_rate": 9.386217427219707e-06, + "loss": 8.6526, + "step": 137750 + }, + { + "epoch": 0.6879572523658518, + "grad_norm": 0.09624180942773819, + "learning_rate": 9.384715512278153e-06, + "loss": 8.6546, + "step": 137760 + }, + { + "epoch": 0.6880071911907912, + "grad_norm": 0.09329060465097427, + "learning_rate": 9.383213597336604e-06, + "loss": 8.652, + "step": 137770 + }, + { + "epoch": 0.6880571300157308, + "grad_norm": 0.08916980773210526, + "learning_rate": 9.381711682395054e-06, + "loss": 8.6219, + "step": 137780 + }, + { + "epoch": 0.6881070688406702, + "grad_norm": 0.08881982415914536, + "learning_rate": 9.380209767453504e-06, + "loss": 8.6483, + "step": 137790 + }, + { + "epoch": 0.6881570076656096, + "grad_norm": 0.09591236710548401, + "learning_rate": 9.378707852511954e-06, + "loss": 8.6547, + "step": 137800 + }, + { + "epoch": 0.688206946490549, + "grad_norm": 0.0902673676609993, + "learning_rate": 9.377205937570401e-06, + "loss": 8.6516, + "step": 137810 + }, + { + "epoch": 0.6882568853154886, + "grad_norm": 0.09228136390447617, + "learning_rate": 9.375704022628851e-06, + "loss": 8.6433, + "step": 137820 + }, + { + "epoch": 0.688306824140428, + "grad_norm": 0.09464501589536667, + "learning_rate": 9.374202107687301e-06, + "loss": 8.6375, + "step": 137830 + }, + { + "epoch": 0.6883567629653674, + "grad_norm": 0.09438979625701904, + "learning_rate": 9.372700192745751e-06, + "loss": 8.6417, + "step": 137840 + }, + { + "epoch": 0.6884067017903068, + "grad_norm": 0.08938003331422806, + "learning_rate": 9.371198277804202e-06, + "loss": 8.6508, + "step": 137850 + }, + { + "epoch": 0.6884566406152464, + "grad_norm": 0.09114663302898407, + "learning_rate": 9.36969636286265e-06, + "loss": 8.6371, + "step": 137860 + }, + { + "epoch": 0.6885065794401858, + "grad_norm": 0.09571482241153717, + "learning_rate": 9.368194447921099e-06, + "loss": 8.6337, + "step": 137870 + }, + { + "epoch": 0.6885565182651252, + "grad_norm": 0.08686354756355286, + "learning_rate": 9.366692532979549e-06, + "loss": 8.6501, + "step": 137880 + }, + { + "epoch": 0.6886064570900646, + "grad_norm": 0.09277797490358353, + "learning_rate": 9.365190618037999e-06, + "loss": 8.6464, + "step": 137890 + }, + { + "epoch": 0.6886563959150042, + "grad_norm": 0.09338825196027756, + "learning_rate": 9.363688703096449e-06, + "loss": 8.6573, + "step": 137900 + }, + { + "epoch": 0.6887063347399436, + "grad_norm": 0.09079349040985107, + "learning_rate": 9.362186788154898e-06, + "loss": 8.6472, + "step": 137910 + }, + { + "epoch": 0.688756273564883, + "grad_norm": 0.09190376847982407, + "learning_rate": 9.360684873213346e-06, + "loss": 8.6445, + "step": 137920 + }, + { + "epoch": 0.6888062123898224, + "grad_norm": 0.09575271606445312, + "learning_rate": 9.359182958271796e-06, + "loss": 8.6442, + "step": 137930 + }, + { + "epoch": 0.688856151214762, + "grad_norm": 0.09022709727287292, + "learning_rate": 9.357681043330246e-06, + "loss": 8.6403, + "step": 137940 + }, + { + "epoch": 0.6889060900397014, + "grad_norm": 0.09948229044675827, + "learning_rate": 9.356179128388697e-06, + "loss": 8.636, + "step": 137950 + }, + { + "epoch": 0.6889560288646408, + "grad_norm": 0.09341901540756226, + "learning_rate": 9.354677213447145e-06, + "loss": 8.6431, + "step": 137960 + }, + { + "epoch": 0.6890059676895802, + "grad_norm": 0.09408657997846603, + "learning_rate": 9.353175298505594e-06, + "loss": 8.6304, + "step": 137970 + }, + { + "epoch": 0.6890559065145198, + "grad_norm": 0.09293026477098465, + "learning_rate": 9.351673383564044e-06, + "loss": 8.6281, + "step": 137980 + }, + { + "epoch": 0.6891058453394592, + "grad_norm": 0.09260407835245132, + "learning_rate": 9.350171468622494e-06, + "loss": 8.6426, + "step": 137990 + }, + { + "epoch": 0.6891557841643986, + "grad_norm": 0.09389134496450424, + "learning_rate": 9.348669553680944e-06, + "loss": 8.6538, + "step": 138000 + }, + { + "epoch": 0.689205722989338, + "grad_norm": 0.09285546839237213, + "learning_rate": 9.347167638739393e-06, + "loss": 8.6568, + "step": 138010 + }, + { + "epoch": 0.6892556618142776, + "grad_norm": 0.0875903069972992, + "learning_rate": 9.345665723797843e-06, + "loss": 8.6533, + "step": 138020 + }, + { + "epoch": 0.689305600639217, + "grad_norm": 0.08949259668588638, + "learning_rate": 9.344163808856291e-06, + "loss": 8.6302, + "step": 138030 + }, + { + "epoch": 0.6893555394641564, + "grad_norm": 0.09010973572731018, + "learning_rate": 9.342661893914741e-06, + "loss": 8.6442, + "step": 138040 + }, + { + "epoch": 0.6894054782890958, + "grad_norm": 0.09066742658615112, + "learning_rate": 9.341159978973192e-06, + "loss": 8.6377, + "step": 138050 + }, + { + "epoch": 0.6894554171140354, + "grad_norm": 0.09108269959688187, + "learning_rate": 9.33965806403164e-06, + "loss": 8.6662, + "step": 138060 + }, + { + "epoch": 0.6895053559389748, + "grad_norm": 0.08662552386522293, + "learning_rate": 9.33815614909009e-06, + "loss": 8.6386, + "step": 138070 + }, + { + "epoch": 0.6895552947639142, + "grad_norm": 0.08738676458597183, + "learning_rate": 9.336654234148539e-06, + "loss": 8.6379, + "step": 138080 + }, + { + "epoch": 0.6896052335888536, + "grad_norm": 0.0980435162782669, + "learning_rate": 9.335152319206989e-06, + "loss": 8.6349, + "step": 138090 + }, + { + "epoch": 0.6896551724137931, + "grad_norm": 0.09144052863121033, + "learning_rate": 9.333650404265439e-06, + "loss": 8.638, + "step": 138100 + }, + { + "epoch": 0.6897051112387326, + "grad_norm": 0.09114320576190948, + "learning_rate": 9.332148489323888e-06, + "loss": 8.6406, + "step": 138110 + }, + { + "epoch": 0.689755050063672, + "grad_norm": 0.09149006754159927, + "learning_rate": 9.330646574382338e-06, + "loss": 8.6525, + "step": 138120 + }, + { + "epoch": 0.6898049888886114, + "grad_norm": 0.0991503894329071, + "learning_rate": 9.329144659440786e-06, + "loss": 8.6523, + "step": 138130 + }, + { + "epoch": 0.689854927713551, + "grad_norm": 0.09777787327766418, + "learning_rate": 9.327642744499236e-06, + "loss": 8.6357, + "step": 138140 + }, + { + "epoch": 0.6899048665384904, + "grad_norm": 0.09248573333024979, + "learning_rate": 9.326140829557687e-06, + "loss": 8.6435, + "step": 138150 + }, + { + "epoch": 0.6899548053634298, + "grad_norm": 0.09282256662845612, + "learning_rate": 9.324638914616135e-06, + "loss": 8.6544, + "step": 138160 + }, + { + "epoch": 0.6900047441883692, + "grad_norm": 0.08951915800571442, + "learning_rate": 9.323136999674585e-06, + "loss": 8.6397, + "step": 138170 + }, + { + "epoch": 0.6900546830133087, + "grad_norm": 0.09212198853492737, + "learning_rate": 9.321635084733035e-06, + "loss": 8.6415, + "step": 138180 + }, + { + "epoch": 0.6901046218382482, + "grad_norm": 0.09542210400104523, + "learning_rate": 9.320133169791484e-06, + "loss": 8.6362, + "step": 138190 + }, + { + "epoch": 0.6901545606631876, + "grad_norm": 0.08942250162363052, + "learning_rate": 9.318631254849934e-06, + "loss": 8.6416, + "step": 138200 + }, + { + "epoch": 0.690204499488127, + "grad_norm": 0.08706214278936386, + "learning_rate": 9.317129339908383e-06, + "loss": 8.6381, + "step": 138210 + }, + { + "epoch": 0.6902544383130665, + "grad_norm": 0.09015882015228271, + "learning_rate": 9.315627424966833e-06, + "loss": 8.6384, + "step": 138220 + }, + { + "epoch": 0.690304377138006, + "grad_norm": 0.09370545297861099, + "learning_rate": 9.314125510025283e-06, + "loss": 8.6408, + "step": 138230 + }, + { + "epoch": 0.6903543159629454, + "grad_norm": 0.08900182694196701, + "learning_rate": 9.312623595083731e-06, + "loss": 8.6495, + "step": 138240 + }, + { + "epoch": 0.6904042547878848, + "grad_norm": 0.08960168808698654, + "learning_rate": 9.311121680142182e-06, + "loss": 8.6513, + "step": 138250 + }, + { + "epoch": 0.6904541936128243, + "grad_norm": 0.09003665298223495, + "learning_rate": 9.30961976520063e-06, + "loss": 8.6419, + "step": 138260 + }, + { + "epoch": 0.6905041324377638, + "grad_norm": 0.09128106385469437, + "learning_rate": 9.30811785025908e-06, + "loss": 8.6406, + "step": 138270 + }, + { + "epoch": 0.6905540712627032, + "grad_norm": 0.09446307271718979, + "learning_rate": 9.30661593531753e-06, + "loss": 8.6382, + "step": 138280 + }, + { + "epoch": 0.6906040100876426, + "grad_norm": 0.08831710368394852, + "learning_rate": 9.305114020375979e-06, + "loss": 8.6417, + "step": 138290 + }, + { + "epoch": 0.6906539489125821, + "grad_norm": 0.08965827524662018, + "learning_rate": 9.303612105434429e-06, + "loss": 8.633, + "step": 138300 + }, + { + "epoch": 0.6907038877375216, + "grad_norm": 0.09387178719043732, + "learning_rate": 9.302110190492878e-06, + "loss": 8.6404, + "step": 138310 + }, + { + "epoch": 0.690753826562461, + "grad_norm": 0.0937371626496315, + "learning_rate": 9.300608275551328e-06, + "loss": 8.6382, + "step": 138320 + }, + { + "epoch": 0.6908037653874004, + "grad_norm": 0.08978761732578278, + "learning_rate": 9.299106360609778e-06, + "loss": 8.6647, + "step": 138330 + }, + { + "epoch": 0.6908537042123399, + "grad_norm": 0.09310397505760193, + "learning_rate": 9.297604445668228e-06, + "loss": 8.6311, + "step": 138340 + }, + { + "epoch": 0.6909036430372794, + "grad_norm": 0.0897870808839798, + "learning_rate": 9.296102530726677e-06, + "loss": 8.6466, + "step": 138350 + }, + { + "epoch": 0.6909535818622188, + "grad_norm": 0.09369547665119171, + "learning_rate": 9.294600615785127e-06, + "loss": 8.6451, + "step": 138360 + }, + { + "epoch": 0.6910035206871582, + "grad_norm": 0.09182377904653549, + "learning_rate": 9.293098700843575e-06, + "loss": 8.636, + "step": 138370 + }, + { + "epoch": 0.6910534595120977, + "grad_norm": 0.09317004680633545, + "learning_rate": 9.291596785902025e-06, + "loss": 8.6357, + "step": 138380 + }, + { + "epoch": 0.6911033983370372, + "grad_norm": 0.09063698351383209, + "learning_rate": 9.290094870960476e-06, + "loss": 8.6374, + "step": 138390 + }, + { + "epoch": 0.6911533371619766, + "grad_norm": 0.09114688634872437, + "learning_rate": 9.288592956018924e-06, + "loss": 8.6364, + "step": 138400 + }, + { + "epoch": 0.691203275986916, + "grad_norm": 0.08719582855701447, + "learning_rate": 9.287091041077374e-06, + "loss": 8.6513, + "step": 138410 + }, + { + "epoch": 0.6912532148118555, + "grad_norm": 0.09610260277986526, + "learning_rate": 9.285589126135823e-06, + "loss": 8.6279, + "step": 138420 + }, + { + "epoch": 0.691303153636795, + "grad_norm": 0.08931412547826767, + "learning_rate": 9.284087211194273e-06, + "loss": 8.6179, + "step": 138430 + }, + { + "epoch": 0.6913530924617344, + "grad_norm": 0.09096454083919525, + "learning_rate": 9.282585296252723e-06, + "loss": 8.6538, + "step": 138440 + }, + { + "epoch": 0.6914030312866738, + "grad_norm": 0.09301277250051498, + "learning_rate": 9.281083381311172e-06, + "loss": 8.6439, + "step": 138450 + }, + { + "epoch": 0.6914529701116132, + "grad_norm": 0.09470412135124207, + "learning_rate": 9.279581466369622e-06, + "loss": 8.6572, + "step": 138460 + }, + { + "epoch": 0.6915029089365528, + "grad_norm": 0.08895006030797958, + "learning_rate": 9.27807955142807e-06, + "loss": 8.6397, + "step": 138470 + }, + { + "epoch": 0.6915528477614922, + "grad_norm": 0.09335384517908096, + "learning_rate": 9.27657763648652e-06, + "loss": 8.6424, + "step": 138480 + }, + { + "epoch": 0.6916027865864316, + "grad_norm": 0.08956567943096161, + "learning_rate": 9.27507572154497e-06, + "loss": 8.6657, + "step": 138490 + }, + { + "epoch": 0.691652725411371, + "grad_norm": 0.08856837451457977, + "learning_rate": 9.273573806603421e-06, + "loss": 8.6263, + "step": 138500 + }, + { + "epoch": 0.6917026642363105, + "grad_norm": 0.09013232588768005, + "learning_rate": 9.27207189166187e-06, + "loss": 8.6193, + "step": 138510 + }, + { + "epoch": 0.69175260306125, + "grad_norm": 0.09470102936029434, + "learning_rate": 9.270569976720318e-06, + "loss": 8.6281, + "step": 138520 + }, + { + "epoch": 0.6918025418861894, + "grad_norm": 0.08966551721096039, + "learning_rate": 9.269068061778768e-06, + "loss": 8.6274, + "step": 138530 + }, + { + "epoch": 0.6918524807111288, + "grad_norm": 0.09647005796432495, + "learning_rate": 9.267566146837218e-06, + "loss": 8.652, + "step": 138540 + }, + { + "epoch": 0.6919024195360683, + "grad_norm": 0.08961433917284012, + "learning_rate": 9.266064231895668e-06, + "loss": 8.634, + "step": 138550 + }, + { + "epoch": 0.6919523583610078, + "grad_norm": 0.09530511498451233, + "learning_rate": 9.264562316954117e-06, + "loss": 8.6453, + "step": 138560 + }, + { + "epoch": 0.6920022971859472, + "grad_norm": 0.09226834028959274, + "learning_rate": 9.263060402012565e-06, + "loss": 8.6172, + "step": 138570 + }, + { + "epoch": 0.6920522360108866, + "grad_norm": 0.09644404798746109, + "learning_rate": 9.261558487071016e-06, + "loss": 8.6318, + "step": 138580 + }, + { + "epoch": 0.6921021748358261, + "grad_norm": 0.08702649176120758, + "learning_rate": 9.260056572129466e-06, + "loss": 8.6606, + "step": 138590 + }, + { + "epoch": 0.6921521136607656, + "grad_norm": 0.08770040422677994, + "learning_rate": 9.258554657187916e-06, + "loss": 8.6342, + "step": 138600 + }, + { + "epoch": 0.692202052485705, + "grad_norm": 0.09810072183609009, + "learning_rate": 9.257052742246364e-06, + "loss": 8.6444, + "step": 138610 + }, + { + "epoch": 0.6922519913106444, + "grad_norm": 0.09160198271274567, + "learning_rate": 9.255550827304813e-06, + "loss": 8.6532, + "step": 138620 + }, + { + "epoch": 0.6923019301355839, + "grad_norm": 0.09566731005907059, + "learning_rate": 9.254048912363263e-06, + "loss": 8.6305, + "step": 138630 + }, + { + "epoch": 0.6923518689605234, + "grad_norm": 0.0926600992679596, + "learning_rate": 9.252546997421713e-06, + "loss": 8.6613, + "step": 138640 + }, + { + "epoch": 0.6924018077854628, + "grad_norm": 0.0963670089840889, + "learning_rate": 9.251045082480163e-06, + "loss": 8.6392, + "step": 138650 + }, + { + "epoch": 0.6924517466104022, + "grad_norm": 0.09352785348892212, + "learning_rate": 9.249543167538614e-06, + "loss": 8.6547, + "step": 138660 + }, + { + "epoch": 0.6925016854353417, + "grad_norm": 0.08976289629936218, + "learning_rate": 9.24804125259706e-06, + "loss": 8.6306, + "step": 138670 + }, + { + "epoch": 0.6925516242602812, + "grad_norm": 0.0948214903473854, + "learning_rate": 9.24653933765551e-06, + "loss": 8.6406, + "step": 138680 + }, + { + "epoch": 0.6926015630852206, + "grad_norm": 0.08709558844566345, + "learning_rate": 9.24503742271396e-06, + "loss": 8.6603, + "step": 138690 + }, + { + "epoch": 0.69265150191016, + "grad_norm": 0.09074770659208298, + "learning_rate": 9.243535507772411e-06, + "loss": 8.6567, + "step": 138700 + }, + { + "epoch": 0.6927014407350995, + "grad_norm": 0.08999824523925781, + "learning_rate": 9.242033592830861e-06, + "loss": 8.659, + "step": 138710 + }, + { + "epoch": 0.692751379560039, + "grad_norm": 0.09150887280702591, + "learning_rate": 9.240531677889308e-06, + "loss": 8.6724, + "step": 138720 + }, + { + "epoch": 0.6928013183849784, + "grad_norm": 0.09337803721427917, + "learning_rate": 9.239029762947758e-06, + "loss": 8.64, + "step": 138730 + }, + { + "epoch": 0.6928512572099178, + "grad_norm": 0.09013208001852036, + "learning_rate": 9.237527848006208e-06, + "loss": 8.6463, + "step": 138740 + }, + { + "epoch": 0.6929011960348573, + "grad_norm": 0.08989313244819641, + "learning_rate": 9.236025933064658e-06, + "loss": 8.6294, + "step": 138750 + }, + { + "epoch": 0.6929511348597968, + "grad_norm": 0.09004274755716324, + "learning_rate": 9.234524018123109e-06, + "loss": 8.6373, + "step": 138760 + }, + { + "epoch": 0.6930010736847362, + "grad_norm": 0.09396898746490479, + "learning_rate": 9.233022103181555e-06, + "loss": 8.6434, + "step": 138770 + }, + { + "epoch": 0.6930510125096756, + "grad_norm": 0.08722883462905884, + "learning_rate": 9.231520188240006e-06, + "loss": 8.6448, + "step": 138780 + }, + { + "epoch": 0.6931009513346151, + "grad_norm": 0.0960550382733345, + "learning_rate": 9.230018273298456e-06, + "loss": 8.6277, + "step": 138790 + }, + { + "epoch": 0.6931508901595546, + "grad_norm": 0.09497455507516861, + "learning_rate": 9.228516358356906e-06, + "loss": 8.6294, + "step": 138800 + }, + { + "epoch": 0.693200828984494, + "grad_norm": 0.086859792470932, + "learning_rate": 9.227014443415356e-06, + "loss": 8.632, + "step": 138810 + }, + { + "epoch": 0.6932507678094334, + "grad_norm": 0.08842300623655319, + "learning_rate": 9.225512528473805e-06, + "loss": 8.6624, + "step": 138820 + }, + { + "epoch": 0.6933007066343729, + "grad_norm": 0.09223365783691406, + "learning_rate": 9.224010613532253e-06, + "loss": 8.6479, + "step": 138830 + }, + { + "epoch": 0.6933506454593124, + "grad_norm": 0.09159765392541885, + "learning_rate": 9.222508698590703e-06, + "loss": 8.6521, + "step": 138840 + }, + { + "epoch": 0.6934005842842518, + "grad_norm": 0.0935441255569458, + "learning_rate": 9.221006783649153e-06, + "loss": 8.6378, + "step": 138850 + }, + { + "epoch": 0.6934505231091912, + "grad_norm": 0.09290704131126404, + "learning_rate": 9.219504868707604e-06, + "loss": 8.6148, + "step": 138860 + }, + { + "epoch": 0.6935004619341307, + "grad_norm": 0.08997565507888794, + "learning_rate": 9.218002953766052e-06, + "loss": 8.6324, + "step": 138870 + }, + { + "epoch": 0.6935504007590702, + "grad_norm": 0.09210515767335892, + "learning_rate": 9.2165010388245e-06, + "loss": 8.6539, + "step": 138880 + }, + { + "epoch": 0.6936003395840096, + "grad_norm": 0.09159453958272934, + "learning_rate": 9.21499912388295e-06, + "loss": 8.6455, + "step": 138890 + }, + { + "epoch": 0.693650278408949, + "grad_norm": 0.09023798257112503, + "learning_rate": 9.213497208941401e-06, + "loss": 8.6536, + "step": 138900 + }, + { + "epoch": 0.6937002172338885, + "grad_norm": 0.08913420885801315, + "learning_rate": 9.211995293999851e-06, + "loss": 8.6457, + "step": 138910 + }, + { + "epoch": 0.693750156058828, + "grad_norm": 0.09656532853841782, + "learning_rate": 9.2104933790583e-06, + "loss": 8.6271, + "step": 138920 + }, + { + "epoch": 0.6938000948837674, + "grad_norm": 0.08836175501346588, + "learning_rate": 9.208991464116748e-06, + "loss": 8.6521, + "step": 138930 + }, + { + "epoch": 0.6938500337087068, + "grad_norm": 0.09175171703100204, + "learning_rate": 9.207489549175198e-06, + "loss": 8.6369, + "step": 138940 + }, + { + "epoch": 0.6938999725336463, + "grad_norm": 0.09058476984500885, + "learning_rate": 9.205987634233648e-06, + "loss": 8.637, + "step": 138950 + }, + { + "epoch": 0.6939499113585857, + "grad_norm": 0.08583276718854904, + "learning_rate": 9.204485719292099e-06, + "loss": 8.6521, + "step": 138960 + }, + { + "epoch": 0.6939998501835252, + "grad_norm": 0.09340415894985199, + "learning_rate": 9.202983804350547e-06, + "loss": 8.645, + "step": 138970 + }, + { + "epoch": 0.6940497890084646, + "grad_norm": 0.09227906912565231, + "learning_rate": 9.201481889408997e-06, + "loss": 8.6404, + "step": 138980 + }, + { + "epoch": 0.6940997278334041, + "grad_norm": 0.09200102090835571, + "learning_rate": 9.199979974467446e-06, + "loss": 8.6315, + "step": 138990 + }, + { + "epoch": 0.6941496666583435, + "grad_norm": 0.09255097806453705, + "learning_rate": 9.198478059525896e-06, + "loss": 8.6211, + "step": 139000 + }, + { + "epoch": 0.694199605483283, + "grad_norm": 0.09157124161720276, + "learning_rate": 9.196976144584346e-06, + "loss": 8.648, + "step": 139010 + }, + { + "epoch": 0.6942495443082224, + "grad_norm": 0.09558318555355072, + "learning_rate": 9.195474229642795e-06, + "loss": 8.6472, + "step": 139020 + }, + { + "epoch": 0.6942994831331619, + "grad_norm": 0.09198082983493805, + "learning_rate": 9.193972314701245e-06, + "loss": 8.6296, + "step": 139030 + }, + { + "epoch": 0.6943494219581013, + "grad_norm": 0.09510613977909088, + "learning_rate": 9.192470399759693e-06, + "loss": 8.6449, + "step": 139040 + }, + { + "epoch": 0.6943993607830408, + "grad_norm": 0.0944787859916687, + "learning_rate": 9.190968484818143e-06, + "loss": 8.6387, + "step": 139050 + }, + { + "epoch": 0.6944492996079802, + "grad_norm": 0.09569986164569855, + "learning_rate": 9.189466569876594e-06, + "loss": 8.6267, + "step": 139060 + }, + { + "epoch": 0.6944992384329197, + "grad_norm": 0.09105094522237778, + "learning_rate": 9.187964654935042e-06, + "loss": 8.6455, + "step": 139070 + }, + { + "epoch": 0.6945491772578591, + "grad_norm": 0.09231606870889664, + "learning_rate": 9.186462739993492e-06, + "loss": 8.6481, + "step": 139080 + }, + { + "epoch": 0.6945991160827986, + "grad_norm": 0.09214016050100327, + "learning_rate": 9.18496082505194e-06, + "loss": 8.637, + "step": 139090 + }, + { + "epoch": 0.694649054907738, + "grad_norm": 0.09304431825876236, + "learning_rate": 9.183458910110391e-06, + "loss": 8.6331, + "step": 139100 + }, + { + "epoch": 0.6946989937326775, + "grad_norm": 0.09022108465433121, + "learning_rate": 9.181956995168841e-06, + "loss": 8.6322, + "step": 139110 + }, + { + "epoch": 0.6947489325576169, + "grad_norm": 0.09341921657323837, + "learning_rate": 9.18045508022729e-06, + "loss": 8.6466, + "step": 139120 + }, + { + "epoch": 0.6947988713825564, + "grad_norm": 0.09652432799339294, + "learning_rate": 9.17895316528574e-06, + "loss": 8.6152, + "step": 139130 + }, + { + "epoch": 0.6948488102074958, + "grad_norm": 0.08961055427789688, + "learning_rate": 9.17745125034419e-06, + "loss": 8.6514, + "step": 139140 + }, + { + "epoch": 0.6948987490324353, + "grad_norm": 0.09760846197605133, + "learning_rate": 9.175949335402638e-06, + "loss": 8.635, + "step": 139150 + }, + { + "epoch": 0.6949486878573747, + "grad_norm": 0.0923495665192604, + "learning_rate": 9.174447420461089e-06, + "loss": 8.6445, + "step": 139160 + }, + { + "epoch": 0.6949986266823142, + "grad_norm": 0.09096313267946243, + "learning_rate": 9.172945505519537e-06, + "loss": 8.6452, + "step": 139170 + }, + { + "epoch": 0.6950485655072536, + "grad_norm": 0.09231965243816376, + "learning_rate": 9.171443590577987e-06, + "loss": 8.6316, + "step": 139180 + }, + { + "epoch": 0.6950985043321931, + "grad_norm": 0.09298811107873917, + "learning_rate": 9.169941675636437e-06, + "loss": 8.6394, + "step": 139190 + }, + { + "epoch": 0.6951484431571325, + "grad_norm": 0.08991467207670212, + "learning_rate": 9.168439760694886e-06, + "loss": 8.6401, + "step": 139200 + }, + { + "epoch": 0.695198381982072, + "grad_norm": 0.09446527063846588, + "learning_rate": 9.166937845753336e-06, + "loss": 8.6471, + "step": 139210 + }, + { + "epoch": 0.6952483208070114, + "grad_norm": 0.09214930981397629, + "learning_rate": 9.165435930811785e-06, + "loss": 8.6469, + "step": 139220 + }, + { + "epoch": 0.6952982596319509, + "grad_norm": 0.09560856968164444, + "learning_rate": 9.163934015870235e-06, + "loss": 8.6143, + "step": 139230 + }, + { + "epoch": 0.6953481984568903, + "grad_norm": 0.09127038717269897, + "learning_rate": 9.162432100928685e-06, + "loss": 8.6509, + "step": 139240 + }, + { + "epoch": 0.6953981372818298, + "grad_norm": 0.09658581763505936, + "learning_rate": 9.160930185987133e-06, + "loss": 8.6312, + "step": 139250 + }, + { + "epoch": 0.6954480761067692, + "grad_norm": 0.08580411970615387, + "learning_rate": 9.159428271045584e-06, + "loss": 8.6339, + "step": 139260 + }, + { + "epoch": 0.6954980149317087, + "grad_norm": 0.0958205908536911, + "learning_rate": 9.157926356104032e-06, + "loss": 8.6357, + "step": 139270 + }, + { + "epoch": 0.6955479537566481, + "grad_norm": 0.09207163751125336, + "learning_rate": 9.156424441162482e-06, + "loss": 8.6253, + "step": 139280 + }, + { + "epoch": 0.6955978925815876, + "grad_norm": 0.09957921504974365, + "learning_rate": 9.154922526220932e-06, + "loss": 8.6448, + "step": 139290 + }, + { + "epoch": 0.695647831406527, + "grad_norm": 0.08785407990217209, + "learning_rate": 9.153420611279383e-06, + "loss": 8.6244, + "step": 139300 + }, + { + "epoch": 0.6956977702314665, + "grad_norm": 0.08667879551649094, + "learning_rate": 9.151918696337831e-06, + "loss": 8.6377, + "step": 139310 + }, + { + "epoch": 0.6957477090564059, + "grad_norm": 0.09369443356990814, + "learning_rate": 9.15041678139628e-06, + "loss": 8.6415, + "step": 139320 + }, + { + "epoch": 0.6957976478813453, + "grad_norm": 0.08814938366413116, + "learning_rate": 9.14891486645473e-06, + "loss": 8.629, + "step": 139330 + }, + { + "epoch": 0.6958475867062848, + "grad_norm": 0.0961841344833374, + "learning_rate": 9.14741295151318e-06, + "loss": 8.6497, + "step": 139340 + }, + { + "epoch": 0.6958975255312243, + "grad_norm": 0.09453871101140976, + "learning_rate": 9.14591103657163e-06, + "loss": 8.6451, + "step": 139350 + }, + { + "epoch": 0.6959474643561637, + "grad_norm": 0.09213295578956604, + "learning_rate": 9.144409121630079e-06, + "loss": 8.6234, + "step": 139360 + }, + { + "epoch": 0.6959974031811031, + "grad_norm": 0.08923090249300003, + "learning_rate": 9.142907206688527e-06, + "loss": 8.617, + "step": 139370 + }, + { + "epoch": 0.6960473420060426, + "grad_norm": 0.09008591622114182, + "learning_rate": 9.141405291746977e-06, + "loss": 8.6443, + "step": 139380 + }, + { + "epoch": 0.6960972808309821, + "grad_norm": 0.0908149927854538, + "learning_rate": 9.139903376805427e-06, + "loss": 8.6455, + "step": 139390 + }, + { + "epoch": 0.6961472196559215, + "grad_norm": 0.09176421910524368, + "learning_rate": 9.138401461863878e-06, + "loss": 8.6389, + "step": 139400 + }, + { + "epoch": 0.696197158480861, + "grad_norm": 0.09254884719848633, + "learning_rate": 9.136899546922326e-06, + "loss": 8.6449, + "step": 139410 + }, + { + "epoch": 0.6962470973058004, + "grad_norm": 0.08738537132740021, + "learning_rate": 9.135397631980775e-06, + "loss": 8.6249, + "step": 139420 + }, + { + "epoch": 0.6962970361307398, + "grad_norm": 0.09267020225524902, + "learning_rate": 9.133895717039225e-06, + "loss": 8.6208, + "step": 139430 + }, + { + "epoch": 0.6963469749556793, + "grad_norm": 0.09037657082080841, + "learning_rate": 9.132393802097675e-06, + "loss": 8.6347, + "step": 139440 + }, + { + "epoch": 0.6963969137806187, + "grad_norm": 0.08870069682598114, + "learning_rate": 9.130891887156125e-06, + "loss": 8.6323, + "step": 139450 + }, + { + "epoch": 0.6964468526055582, + "grad_norm": 0.08991122990846634, + "learning_rate": 9.129389972214575e-06, + "loss": 8.6758, + "step": 139460 + }, + { + "epoch": 0.6964967914304976, + "grad_norm": 0.08251538872718811, + "learning_rate": 9.127888057273022e-06, + "loss": 8.6402, + "step": 139470 + }, + { + "epoch": 0.6965467302554371, + "grad_norm": 0.08768171072006226, + "learning_rate": 9.126386142331472e-06, + "loss": 8.6293, + "step": 139480 + }, + { + "epoch": 0.6965966690803765, + "grad_norm": 0.09158322215080261, + "learning_rate": 9.124884227389922e-06, + "loss": 8.6397, + "step": 139490 + }, + { + "epoch": 0.696646607905316, + "grad_norm": 0.08981853723526001, + "learning_rate": 9.123382312448373e-06, + "loss": 8.638, + "step": 139500 + }, + { + "epoch": 0.6966965467302554, + "grad_norm": 0.08998404443264008, + "learning_rate": 9.121880397506823e-06, + "loss": 8.6505, + "step": 139510 + }, + { + "epoch": 0.6967464855551949, + "grad_norm": 0.09334992617368698, + "learning_rate": 9.12037848256527e-06, + "loss": 8.6324, + "step": 139520 + }, + { + "epoch": 0.6967964243801343, + "grad_norm": 0.09155179560184479, + "learning_rate": 9.11887656762372e-06, + "loss": 8.6379, + "step": 139530 + }, + { + "epoch": 0.6968463632050738, + "grad_norm": 0.09096579253673553, + "learning_rate": 9.11737465268217e-06, + "loss": 8.6277, + "step": 139540 + }, + { + "epoch": 0.6968963020300132, + "grad_norm": 0.0905652865767479, + "learning_rate": 9.11587273774062e-06, + "loss": 8.6074, + "step": 139550 + }, + { + "epoch": 0.6969462408549527, + "grad_norm": 0.09262055158615112, + "learning_rate": 9.11437082279907e-06, + "loss": 8.6542, + "step": 139560 + }, + { + "epoch": 0.6969961796798921, + "grad_norm": 0.09247998148202896, + "learning_rate": 9.112868907857517e-06, + "loss": 8.6331, + "step": 139570 + }, + { + "epoch": 0.6970461185048316, + "grad_norm": 0.09279992431402206, + "learning_rate": 9.111366992915967e-06, + "loss": 8.6329, + "step": 139580 + }, + { + "epoch": 0.697096057329771, + "grad_norm": 0.09361322969198227, + "learning_rate": 9.109865077974417e-06, + "loss": 8.6413, + "step": 139590 + }, + { + "epoch": 0.6971459961547105, + "grad_norm": 0.09208791702985764, + "learning_rate": 9.108363163032868e-06, + "loss": 8.6303, + "step": 139600 + }, + { + "epoch": 0.6971959349796499, + "grad_norm": 0.09434138238430023, + "learning_rate": 9.106861248091318e-06, + "loss": 8.626, + "step": 139610 + }, + { + "epoch": 0.6972458738045894, + "grad_norm": 0.09279754012823105, + "learning_rate": 9.105359333149766e-06, + "loss": 8.627, + "step": 139620 + }, + { + "epoch": 0.6972958126295288, + "grad_norm": 0.08741941303014755, + "learning_rate": 9.103857418208215e-06, + "loss": 8.6508, + "step": 139630 + }, + { + "epoch": 0.6973457514544683, + "grad_norm": 0.09483351558446884, + "learning_rate": 9.102355503266665e-06, + "loss": 8.6326, + "step": 139640 + }, + { + "epoch": 0.6973956902794077, + "grad_norm": 0.09228503704071045, + "learning_rate": 9.100853588325115e-06, + "loss": 8.6424, + "step": 139650 + }, + { + "epoch": 0.6974456291043472, + "grad_norm": 0.08870236575603485, + "learning_rate": 9.099351673383565e-06, + "loss": 8.6325, + "step": 139660 + }, + { + "epoch": 0.6974955679292866, + "grad_norm": 0.09134595096111298, + "learning_rate": 9.097849758442014e-06, + "loss": 8.6322, + "step": 139670 + }, + { + "epoch": 0.6975455067542261, + "grad_norm": 0.09109502285718918, + "learning_rate": 9.096347843500462e-06, + "loss": 8.636, + "step": 139680 + }, + { + "epoch": 0.6975954455791655, + "grad_norm": 0.09011939913034439, + "learning_rate": 9.094845928558912e-06, + "loss": 8.6288, + "step": 139690 + }, + { + "epoch": 0.697645384404105, + "grad_norm": 0.09101170301437378, + "learning_rate": 9.093344013617363e-06, + "loss": 8.6427, + "step": 139700 + }, + { + "epoch": 0.6976953232290444, + "grad_norm": 0.09532671421766281, + "learning_rate": 9.091842098675813e-06, + "loss": 8.6448, + "step": 139710 + }, + { + "epoch": 0.6977452620539839, + "grad_norm": 0.08789071440696716, + "learning_rate": 9.090340183734261e-06, + "loss": 8.6285, + "step": 139720 + }, + { + "epoch": 0.6977952008789233, + "grad_norm": 0.08861180394887924, + "learning_rate": 9.08883826879271e-06, + "loss": 8.6353, + "step": 139730 + }, + { + "epoch": 0.6978451397038627, + "grad_norm": 0.0869184210896492, + "learning_rate": 9.08733635385116e-06, + "loss": 8.6337, + "step": 139740 + }, + { + "epoch": 0.6978950785288022, + "grad_norm": 0.09149453043937683, + "learning_rate": 9.08583443890961e-06, + "loss": 8.631, + "step": 139750 + }, + { + "epoch": 0.6979450173537417, + "grad_norm": 0.09057032316923141, + "learning_rate": 9.08433252396806e-06, + "loss": 8.6373, + "step": 139760 + }, + { + "epoch": 0.6979949561786811, + "grad_norm": 0.08867466449737549, + "learning_rate": 9.082830609026509e-06, + "loss": 8.6174, + "step": 139770 + }, + { + "epoch": 0.6980448950036205, + "grad_norm": 0.09577593952417374, + "learning_rate": 9.081328694084959e-06, + "loss": 8.6414, + "step": 139780 + }, + { + "epoch": 0.69809483382856, + "grad_norm": 0.08875248581171036, + "learning_rate": 9.079826779143407e-06, + "loss": 8.6312, + "step": 139790 + }, + { + "epoch": 0.6981447726534995, + "grad_norm": 0.09105167537927628, + "learning_rate": 9.078324864201858e-06, + "loss": 8.6298, + "step": 139800 + }, + { + "epoch": 0.6981947114784389, + "grad_norm": 0.09408155083656311, + "learning_rate": 9.076822949260308e-06, + "loss": 8.6485, + "step": 139810 + }, + { + "epoch": 0.6982446503033783, + "grad_norm": 0.09340722113847733, + "learning_rate": 9.075321034318756e-06, + "loss": 8.6215, + "step": 139820 + }, + { + "epoch": 0.6982945891283178, + "grad_norm": 0.10109424591064453, + "learning_rate": 9.073819119377206e-06, + "loss": 8.631, + "step": 139830 + }, + { + "epoch": 0.6983445279532573, + "grad_norm": 0.0897102952003479, + "learning_rate": 9.072317204435655e-06, + "loss": 8.6321, + "step": 139840 + }, + { + "epoch": 0.6983944667781967, + "grad_norm": 0.09369130432605743, + "learning_rate": 9.070815289494105e-06, + "loss": 8.633, + "step": 139850 + }, + { + "epoch": 0.6984444056031361, + "grad_norm": 0.09355512261390686, + "learning_rate": 9.069313374552555e-06, + "loss": 8.6267, + "step": 139860 + }, + { + "epoch": 0.6984943444280756, + "grad_norm": 0.09666591137647629, + "learning_rate": 9.067811459611004e-06, + "loss": 8.6556, + "step": 139870 + }, + { + "epoch": 0.6985442832530151, + "grad_norm": 0.09669771045446396, + "learning_rate": 9.066309544669454e-06, + "loss": 8.6225, + "step": 139880 + }, + { + "epoch": 0.6985942220779545, + "grad_norm": 0.0945110023021698, + "learning_rate": 9.064807629727902e-06, + "loss": 8.6422, + "step": 139890 + }, + { + "epoch": 0.6986441609028939, + "grad_norm": 0.09108574688434601, + "learning_rate": 9.063305714786353e-06, + "loss": 8.6459, + "step": 139900 + }, + { + "epoch": 0.6986940997278334, + "grad_norm": 0.09179191291332245, + "learning_rate": 9.061803799844803e-06, + "loss": 8.6282, + "step": 139910 + }, + { + "epoch": 0.6987440385527729, + "grad_norm": 0.08860944956541061, + "learning_rate": 9.060301884903251e-06, + "loss": 8.6106, + "step": 139920 + }, + { + "epoch": 0.6987939773777123, + "grad_norm": 0.09390242397785187, + "learning_rate": 9.058799969961701e-06, + "loss": 8.622, + "step": 139930 + }, + { + "epoch": 0.6988439162026517, + "grad_norm": 0.08761897683143616, + "learning_rate": 9.057298055020152e-06, + "loss": 8.6207, + "step": 139940 + }, + { + "epoch": 0.6988938550275912, + "grad_norm": 0.0982297733426094, + "learning_rate": 9.0557961400786e-06, + "loss": 8.6384, + "step": 139950 + }, + { + "epoch": 0.6989437938525307, + "grad_norm": 0.09506106376647949, + "learning_rate": 9.05429422513705e-06, + "loss": 8.6514, + "step": 139960 + }, + { + "epoch": 0.6989937326774701, + "grad_norm": 0.09104983508586884, + "learning_rate": 9.052792310195499e-06, + "loss": 8.629, + "step": 139970 + }, + { + "epoch": 0.6990436715024095, + "grad_norm": 0.08770845830440521, + "learning_rate": 9.051290395253949e-06, + "loss": 8.6221, + "step": 139980 + }, + { + "epoch": 0.699093610327349, + "grad_norm": 0.09654659777879715, + "learning_rate": 9.0497884803124e-06, + "loss": 8.6329, + "step": 139990 + }, + { + "epoch": 0.6991435491522885, + "grad_norm": 0.0931318923830986, + "learning_rate": 9.048286565370848e-06, + "loss": 8.6416, + "step": 140000 + }, + { + "epoch": 0.6991934879772279, + "grad_norm": 0.09113478660583496, + "learning_rate": 9.046784650429298e-06, + "loss": 8.6452, + "step": 140010 + }, + { + "epoch": 0.6992434268021673, + "grad_norm": 0.09566029906272888, + "learning_rate": 9.045282735487746e-06, + "loss": 8.6487, + "step": 140020 + }, + { + "epoch": 0.6992933656271068, + "grad_norm": 0.08825311809778214, + "learning_rate": 9.043780820546197e-06, + "loss": 8.6336, + "step": 140030 + }, + { + "epoch": 0.6993433044520463, + "grad_norm": 0.09055044502019882, + "learning_rate": 9.042278905604647e-06, + "loss": 8.6425, + "step": 140040 + }, + { + "epoch": 0.6993932432769857, + "grad_norm": 0.08617035299539566, + "learning_rate": 9.040776990663095e-06, + "loss": 8.6324, + "step": 140050 + }, + { + "epoch": 0.6994431821019251, + "grad_norm": 0.09692193567752838, + "learning_rate": 9.039275075721545e-06, + "loss": 8.6194, + "step": 140060 + }, + { + "epoch": 0.6994931209268646, + "grad_norm": 0.09942319244146347, + "learning_rate": 9.037773160779994e-06, + "loss": 8.6313, + "step": 140070 + }, + { + "epoch": 0.6995430597518041, + "grad_norm": 0.0971224308013916, + "learning_rate": 9.036271245838444e-06, + "loss": 8.6337, + "step": 140080 + }, + { + "epoch": 0.6995929985767435, + "grad_norm": 0.09974560886621475, + "learning_rate": 9.034769330896894e-06, + "loss": 8.6208, + "step": 140090 + }, + { + "epoch": 0.6996429374016829, + "grad_norm": 0.09148532152175903, + "learning_rate": 9.033267415955344e-06, + "loss": 8.6361, + "step": 140100 + }, + { + "epoch": 0.6996928762266224, + "grad_norm": 0.09783138334751129, + "learning_rate": 9.031765501013793e-06, + "loss": 8.6281, + "step": 140110 + }, + { + "epoch": 0.6997428150515619, + "grad_norm": 0.09642284363508224, + "learning_rate": 9.030263586072241e-06, + "loss": 8.6295, + "step": 140120 + }, + { + "epoch": 0.6997927538765013, + "grad_norm": 0.088691346347332, + "learning_rate": 9.028761671130692e-06, + "loss": 8.6492, + "step": 140130 + }, + { + "epoch": 0.6998426927014407, + "grad_norm": 0.09170258790254593, + "learning_rate": 9.027259756189142e-06, + "loss": 8.6167, + "step": 140140 + }, + { + "epoch": 0.6998926315263801, + "grad_norm": 0.09011033177375793, + "learning_rate": 9.025757841247592e-06, + "loss": 8.6359, + "step": 140150 + }, + { + "epoch": 0.6999425703513197, + "grad_norm": 0.09351085126399994, + "learning_rate": 9.02425592630604e-06, + "loss": 8.6323, + "step": 140160 + }, + { + "epoch": 0.6999925091762591, + "grad_norm": 0.09307374805212021, + "learning_rate": 9.022754011364489e-06, + "loss": 8.6389, + "step": 140170 + }, + { + "epoch": 0.7000424480011985, + "grad_norm": 0.09782139211893082, + "learning_rate": 9.021252096422939e-06, + "loss": 8.6427, + "step": 140180 + }, + { + "epoch": 0.700092386826138, + "grad_norm": 0.09016304463148117, + "learning_rate": 9.01975018148139e-06, + "loss": 8.6376, + "step": 140190 + }, + { + "epoch": 0.7001423256510775, + "grad_norm": 0.0927334874868393, + "learning_rate": 9.01824826653984e-06, + "loss": 8.6239, + "step": 140200 + }, + { + "epoch": 0.7001922644760169, + "grad_norm": 0.09416099637746811, + "learning_rate": 9.016746351598288e-06, + "loss": 8.6107, + "step": 140210 + }, + { + "epoch": 0.7002422033009563, + "grad_norm": 0.09217461943626404, + "learning_rate": 9.015244436656736e-06, + "loss": 8.6225, + "step": 140220 + }, + { + "epoch": 0.7002921421258957, + "grad_norm": 0.08289490640163422, + "learning_rate": 9.013742521715187e-06, + "loss": 8.6443, + "step": 140230 + }, + { + "epoch": 0.7003420809508353, + "grad_norm": 0.08873941749334335, + "learning_rate": 9.012240606773637e-06, + "loss": 8.6334, + "step": 140240 + }, + { + "epoch": 0.7003920197757747, + "grad_norm": 0.09045631438493729, + "learning_rate": 9.010738691832087e-06, + "loss": 8.631, + "step": 140250 + }, + { + "epoch": 0.7004419586007141, + "grad_norm": 0.08844128996133804, + "learning_rate": 9.009236776890537e-06, + "loss": 8.6378, + "step": 140260 + }, + { + "epoch": 0.7004918974256535, + "grad_norm": 0.09297400712966919, + "learning_rate": 9.007734861948984e-06, + "loss": 8.6214, + "step": 140270 + }, + { + "epoch": 0.7005418362505931, + "grad_norm": 0.09032850712537766, + "learning_rate": 9.006232947007434e-06, + "loss": 8.6302, + "step": 140280 + }, + { + "epoch": 0.7005917750755325, + "grad_norm": 0.09603878855705261, + "learning_rate": 9.004731032065884e-06, + "loss": 8.6308, + "step": 140290 + }, + { + "epoch": 0.7006417139004719, + "grad_norm": 0.09244571626186371, + "learning_rate": 9.003229117124334e-06, + "loss": 8.6455, + "step": 140300 + }, + { + "epoch": 0.7006916527254113, + "grad_norm": 0.09039461612701416, + "learning_rate": 9.001727202182785e-06, + "loss": 8.6357, + "step": 140310 + }, + { + "epoch": 0.7007415915503509, + "grad_norm": 0.09459111839532852, + "learning_rate": 9.000225287241231e-06, + "loss": 8.6303, + "step": 140320 + }, + { + "epoch": 0.7007915303752903, + "grad_norm": 0.09677302837371826, + "learning_rate": 8.998723372299682e-06, + "loss": 8.6394, + "step": 140330 + }, + { + "epoch": 0.7008414692002297, + "grad_norm": 0.08825703710317612, + "learning_rate": 8.997221457358132e-06, + "loss": 8.6318, + "step": 140340 + }, + { + "epoch": 0.7008914080251691, + "grad_norm": 0.09611725807189941, + "learning_rate": 8.995719542416582e-06, + "loss": 8.6359, + "step": 140350 + }, + { + "epoch": 0.7009413468501087, + "grad_norm": 0.09157848358154297, + "learning_rate": 8.994217627475032e-06, + "loss": 8.6286, + "step": 140360 + }, + { + "epoch": 0.7009912856750481, + "grad_norm": 0.08757877349853516, + "learning_rate": 8.992715712533479e-06, + "loss": 8.639, + "step": 140370 + }, + { + "epoch": 0.7010412244999875, + "grad_norm": 0.09328636527061462, + "learning_rate": 8.991213797591929e-06, + "loss": 8.6316, + "step": 140380 + }, + { + "epoch": 0.7010911633249269, + "grad_norm": 0.08965921401977539, + "learning_rate": 8.98971188265038e-06, + "loss": 8.624, + "step": 140390 + }, + { + "epoch": 0.7011411021498665, + "grad_norm": 0.0938333123922348, + "learning_rate": 8.98820996770883e-06, + "loss": 8.6466, + "step": 140400 + }, + { + "epoch": 0.7011910409748059, + "grad_norm": 0.08751453459262848, + "learning_rate": 8.98670805276728e-06, + "loss": 8.6313, + "step": 140410 + }, + { + "epoch": 0.7012409797997453, + "grad_norm": 0.09409631788730621, + "learning_rate": 8.985206137825728e-06, + "loss": 8.6354, + "step": 140420 + }, + { + "epoch": 0.7012909186246847, + "grad_norm": 0.09224843233823776, + "learning_rate": 8.983704222884177e-06, + "loss": 8.646, + "step": 140430 + }, + { + "epoch": 0.7013408574496242, + "grad_norm": 0.08712205290794373, + "learning_rate": 8.982202307942627e-06, + "loss": 8.6293, + "step": 140440 + }, + { + "epoch": 0.7013907962745637, + "grad_norm": 0.08894158154726028, + "learning_rate": 8.980700393001077e-06, + "loss": 8.6525, + "step": 140450 + }, + { + "epoch": 0.7014407350995031, + "grad_norm": 0.09181137382984161, + "learning_rate": 8.979198478059527e-06, + "loss": 8.6246, + "step": 140460 + }, + { + "epoch": 0.7014906739244425, + "grad_norm": 0.08507677912712097, + "learning_rate": 8.977696563117976e-06, + "loss": 8.6357, + "step": 140470 + }, + { + "epoch": 0.701540612749382, + "grad_norm": 0.09141664952039719, + "learning_rate": 8.976194648176424e-06, + "loss": 8.6319, + "step": 140480 + }, + { + "epoch": 0.7015905515743215, + "grad_norm": 0.09606952220201492, + "learning_rate": 8.974692733234874e-06, + "loss": 8.6253, + "step": 140490 + }, + { + "epoch": 0.7016404903992609, + "grad_norm": 0.0922846719622612, + "learning_rate": 8.973190818293324e-06, + "loss": 8.6208, + "step": 140500 + }, + { + "epoch": 0.7016904292242003, + "grad_norm": 0.0884494036436081, + "learning_rate": 8.971688903351775e-06, + "loss": 8.6359, + "step": 140510 + }, + { + "epoch": 0.7017403680491398, + "grad_norm": 0.09224381297826767, + "learning_rate": 8.970186988410223e-06, + "loss": 8.6228, + "step": 140520 + }, + { + "epoch": 0.7017903068740793, + "grad_norm": 0.09082509577274323, + "learning_rate": 8.968685073468672e-06, + "loss": 8.6334, + "step": 140530 + }, + { + "epoch": 0.7018402456990187, + "grad_norm": 0.0936235710978508, + "learning_rate": 8.967183158527122e-06, + "loss": 8.6158, + "step": 140540 + }, + { + "epoch": 0.7018901845239581, + "grad_norm": 0.08928533643484116, + "learning_rate": 8.965681243585572e-06, + "loss": 8.6382, + "step": 140550 + }, + { + "epoch": 0.7019401233488975, + "grad_norm": 0.08770529180765152, + "learning_rate": 8.964179328644022e-06, + "loss": 8.6297, + "step": 140560 + }, + { + "epoch": 0.7019900621738371, + "grad_norm": 0.08795421570539474, + "learning_rate": 8.96267741370247e-06, + "loss": 8.6302, + "step": 140570 + }, + { + "epoch": 0.7020400009987765, + "grad_norm": 0.09204691648483276, + "learning_rate": 8.961175498760919e-06, + "loss": 8.6417, + "step": 140580 + }, + { + "epoch": 0.7020899398237159, + "grad_norm": 0.09336613863706589, + "learning_rate": 8.95967358381937e-06, + "loss": 8.6493, + "step": 140590 + }, + { + "epoch": 0.7021398786486553, + "grad_norm": 0.09377633780241013, + "learning_rate": 8.95817166887782e-06, + "loss": 8.6375, + "step": 140600 + }, + { + "epoch": 0.7021898174735949, + "grad_norm": 0.0860079750418663, + "learning_rate": 8.95666975393627e-06, + "loss": 8.6426, + "step": 140610 + }, + { + "epoch": 0.7022397562985343, + "grad_norm": 0.0926608294248581, + "learning_rate": 8.95516783899472e-06, + "loss": 8.6271, + "step": 140620 + }, + { + "epoch": 0.7022896951234737, + "grad_norm": 0.09390340000391006, + "learning_rate": 8.953665924053168e-06, + "loss": 8.6388, + "step": 140630 + }, + { + "epoch": 0.7023396339484131, + "grad_norm": 0.09352290630340576, + "learning_rate": 8.952164009111617e-06, + "loss": 8.6309, + "step": 140640 + }, + { + "epoch": 0.7023895727733527, + "grad_norm": 0.09155908972024918, + "learning_rate": 8.950662094170067e-06, + "loss": 8.6282, + "step": 140650 + }, + { + "epoch": 0.7024395115982921, + "grad_norm": 0.08889105916023254, + "learning_rate": 8.949160179228517e-06, + "loss": 8.6313, + "step": 140660 + }, + { + "epoch": 0.7024894504232315, + "grad_norm": 0.0867091491818428, + "learning_rate": 8.947658264286967e-06, + "loss": 8.6344, + "step": 140670 + }, + { + "epoch": 0.7025393892481709, + "grad_norm": 0.08940424025058746, + "learning_rate": 8.946156349345416e-06, + "loss": 8.641, + "step": 140680 + }, + { + "epoch": 0.7025893280731105, + "grad_norm": 0.09525980055332184, + "learning_rate": 8.944654434403864e-06, + "loss": 8.6463, + "step": 140690 + }, + { + "epoch": 0.7026392668980499, + "grad_norm": 0.08965422958135605, + "learning_rate": 8.943152519462314e-06, + "loss": 8.6191, + "step": 140700 + }, + { + "epoch": 0.7026892057229893, + "grad_norm": 0.09086750447750092, + "learning_rate": 8.941650604520765e-06, + "loss": 8.6479, + "step": 140710 + }, + { + "epoch": 0.7027391445479287, + "grad_norm": 0.08906187117099762, + "learning_rate": 8.940148689579215e-06, + "loss": 8.6292, + "step": 140720 + }, + { + "epoch": 0.7027890833728683, + "grad_norm": 0.09022345393896103, + "learning_rate": 8.938646774637663e-06, + "loss": 8.6404, + "step": 140730 + }, + { + "epoch": 0.7028390221978077, + "grad_norm": 0.08862177282571793, + "learning_rate": 8.937144859696112e-06, + "loss": 8.63, + "step": 140740 + }, + { + "epoch": 0.7028889610227471, + "grad_norm": 0.09106519818305969, + "learning_rate": 8.935642944754562e-06, + "loss": 8.6177, + "step": 140750 + }, + { + "epoch": 0.7029388998476865, + "grad_norm": 0.09161339700222015, + "learning_rate": 8.934141029813012e-06, + "loss": 8.6267, + "step": 140760 + }, + { + "epoch": 0.7029888386726261, + "grad_norm": 0.0945722833275795, + "learning_rate": 8.932639114871462e-06, + "loss": 8.6188, + "step": 140770 + }, + { + "epoch": 0.7030387774975655, + "grad_norm": 0.0907975360751152, + "learning_rate": 8.93113719992991e-06, + "loss": 8.6323, + "step": 140780 + }, + { + "epoch": 0.7030887163225049, + "grad_norm": 0.08957850933074951, + "learning_rate": 8.929635284988361e-06, + "loss": 8.6396, + "step": 140790 + }, + { + "epoch": 0.7031386551474443, + "grad_norm": 0.0977301150560379, + "learning_rate": 8.92813337004681e-06, + "loss": 8.6227, + "step": 140800 + }, + { + "epoch": 0.7031885939723839, + "grad_norm": 0.09034769982099533, + "learning_rate": 8.92663145510526e-06, + "loss": 8.6368, + "step": 140810 + }, + { + "epoch": 0.7032385327973233, + "grad_norm": 0.0880945548415184, + "learning_rate": 8.92512954016371e-06, + "loss": 8.6343, + "step": 140820 + }, + { + "epoch": 0.7032884716222627, + "grad_norm": 0.09109944850206375, + "learning_rate": 8.923627625222158e-06, + "loss": 8.6357, + "step": 140830 + }, + { + "epoch": 0.7033384104472021, + "grad_norm": 0.0931500494480133, + "learning_rate": 8.922125710280608e-06, + "loss": 8.6303, + "step": 140840 + }, + { + "epoch": 0.7033883492721417, + "grad_norm": 0.09611831605434418, + "learning_rate": 8.920623795339057e-06, + "loss": 8.62, + "step": 140850 + }, + { + "epoch": 0.7034382880970811, + "grad_norm": 0.08877561241388321, + "learning_rate": 8.919121880397507e-06, + "loss": 8.6357, + "step": 140860 + }, + { + "epoch": 0.7034882269220205, + "grad_norm": 0.0883457213640213, + "learning_rate": 8.917619965455957e-06, + "loss": 8.6247, + "step": 140870 + }, + { + "epoch": 0.7035381657469599, + "grad_norm": 0.08729522675275803, + "learning_rate": 8.916118050514406e-06, + "loss": 8.6459, + "step": 140880 + }, + { + "epoch": 0.7035881045718995, + "grad_norm": 0.09188755601644516, + "learning_rate": 8.914616135572856e-06, + "loss": 8.6239, + "step": 140890 + }, + { + "epoch": 0.7036380433968389, + "grad_norm": 0.09601496160030365, + "learning_rate": 8.913114220631304e-06, + "loss": 8.6126, + "step": 140900 + }, + { + "epoch": 0.7036879822217783, + "grad_norm": 0.0922279953956604, + "learning_rate": 8.911612305689755e-06, + "loss": 8.6379, + "step": 140910 + }, + { + "epoch": 0.7037379210467177, + "grad_norm": 0.08827944844961166, + "learning_rate": 8.910110390748205e-06, + "loss": 8.6256, + "step": 140920 + }, + { + "epoch": 0.7037878598716573, + "grad_norm": 0.09115996211767197, + "learning_rate": 8.908608475806653e-06, + "loss": 8.6132, + "step": 140930 + }, + { + "epoch": 0.7038377986965967, + "grad_norm": 0.09394663572311401, + "learning_rate": 8.907106560865103e-06, + "loss": 8.6283, + "step": 140940 + }, + { + "epoch": 0.7038877375215361, + "grad_norm": 0.08945653587579727, + "learning_rate": 8.905604645923554e-06, + "loss": 8.6324, + "step": 140950 + }, + { + "epoch": 0.7039376763464755, + "grad_norm": 0.09907026588916779, + "learning_rate": 8.904102730982002e-06, + "loss": 8.6333, + "step": 140960 + }, + { + "epoch": 0.7039876151714151, + "grad_norm": 0.09264247864484787, + "learning_rate": 8.902600816040452e-06, + "loss": 8.6323, + "step": 140970 + }, + { + "epoch": 0.7040375539963545, + "grad_norm": 0.08847159892320633, + "learning_rate": 8.9010989010989e-06, + "loss": 8.6337, + "step": 140980 + }, + { + "epoch": 0.7040874928212939, + "grad_norm": 0.08942971378564835, + "learning_rate": 8.899596986157351e-06, + "loss": 8.6253, + "step": 140990 + }, + { + "epoch": 0.7041374316462333, + "grad_norm": 0.0965491458773613, + "learning_rate": 8.898095071215801e-06, + "loss": 8.6361, + "step": 141000 + }, + { + "epoch": 0.7041873704711729, + "grad_norm": 0.09533529728651047, + "learning_rate": 8.89659315627425e-06, + "loss": 8.6196, + "step": 141010 + }, + { + "epoch": 0.7042373092961123, + "grad_norm": 0.09187507629394531, + "learning_rate": 8.8950912413327e-06, + "loss": 8.6384, + "step": 141020 + }, + { + "epoch": 0.7042872481210517, + "grad_norm": 0.09570582956075668, + "learning_rate": 8.893589326391148e-06, + "loss": 8.6171, + "step": 141030 + }, + { + "epoch": 0.7043371869459911, + "grad_norm": 0.09152864664793015, + "learning_rate": 8.892087411449598e-06, + "loss": 8.6202, + "step": 141040 + }, + { + "epoch": 0.7043871257709307, + "grad_norm": 0.08897831290960312, + "learning_rate": 8.890585496508049e-06, + "loss": 8.6357, + "step": 141050 + }, + { + "epoch": 0.7044370645958701, + "grad_norm": 0.09181348979473114, + "learning_rate": 8.889083581566497e-06, + "loss": 8.6106, + "step": 141060 + }, + { + "epoch": 0.7044870034208095, + "grad_norm": 0.09355124831199646, + "learning_rate": 8.887581666624947e-06, + "loss": 8.631, + "step": 141070 + }, + { + "epoch": 0.7045369422457489, + "grad_norm": 0.08978011459112167, + "learning_rate": 8.886079751683396e-06, + "loss": 8.6163, + "step": 141080 + }, + { + "epoch": 0.7045868810706885, + "grad_norm": 0.09040986001491547, + "learning_rate": 8.884577836741846e-06, + "loss": 8.6161, + "step": 141090 + }, + { + "epoch": 0.7046368198956279, + "grad_norm": 0.09216012805700302, + "learning_rate": 8.883075921800296e-06, + "loss": 8.6069, + "step": 141100 + }, + { + "epoch": 0.7046867587205673, + "grad_norm": 0.09498348832130432, + "learning_rate": 8.881574006858746e-06, + "loss": 8.6323, + "step": 141110 + }, + { + "epoch": 0.7047366975455067, + "grad_norm": 0.09142297506332397, + "learning_rate": 8.880072091917195e-06, + "loss": 8.6146, + "step": 141120 + }, + { + "epoch": 0.7047866363704463, + "grad_norm": 0.08392765372991562, + "learning_rate": 8.878570176975643e-06, + "loss": 8.6236, + "step": 141130 + }, + { + "epoch": 0.7048365751953857, + "grad_norm": 0.08954204618930817, + "learning_rate": 8.877068262034093e-06, + "loss": 8.6459, + "step": 141140 + }, + { + "epoch": 0.7048865140203251, + "grad_norm": 0.09618078172206879, + "learning_rate": 8.875566347092544e-06, + "loss": 8.6317, + "step": 141150 + }, + { + "epoch": 0.7049364528452645, + "grad_norm": 0.09143991023302078, + "learning_rate": 8.874064432150994e-06, + "loss": 8.6466, + "step": 141160 + }, + { + "epoch": 0.704986391670204, + "grad_norm": 0.09367083013057709, + "learning_rate": 8.872562517209442e-06, + "loss": 8.6332, + "step": 141170 + }, + { + "epoch": 0.7050363304951435, + "grad_norm": 0.09456432610750198, + "learning_rate": 8.87106060226789e-06, + "loss": 8.6154, + "step": 141180 + }, + { + "epoch": 0.7050862693200829, + "grad_norm": 0.09094260632991791, + "learning_rate": 8.869558687326341e-06, + "loss": 8.6337, + "step": 141190 + }, + { + "epoch": 0.7051362081450223, + "grad_norm": 0.09079168736934662, + "learning_rate": 8.868056772384791e-06, + "loss": 8.6399, + "step": 141200 + }, + { + "epoch": 0.7051861469699618, + "grad_norm": 0.08768593519926071, + "learning_rate": 8.866554857443241e-06, + "loss": 8.6241, + "step": 141210 + }, + { + "epoch": 0.7052360857949013, + "grad_norm": 0.09279026836156845, + "learning_rate": 8.86505294250169e-06, + "loss": 8.6303, + "step": 141220 + }, + { + "epoch": 0.7052860246198407, + "grad_norm": 0.09045767784118652, + "learning_rate": 8.863551027560138e-06, + "loss": 8.6399, + "step": 141230 + }, + { + "epoch": 0.7053359634447801, + "grad_norm": 0.09411778301000595, + "learning_rate": 8.862049112618588e-06, + "loss": 8.6315, + "step": 141240 + }, + { + "epoch": 0.7053859022697196, + "grad_norm": 0.08939050883054733, + "learning_rate": 8.860547197677039e-06, + "loss": 8.6213, + "step": 141250 + }, + { + "epoch": 0.7054358410946591, + "grad_norm": 0.08885614573955536, + "learning_rate": 8.859045282735489e-06, + "loss": 8.6282, + "step": 141260 + }, + { + "epoch": 0.7054857799195985, + "grad_norm": 0.09590033441781998, + "learning_rate": 8.857543367793939e-06, + "loss": 8.6296, + "step": 141270 + }, + { + "epoch": 0.7055357187445379, + "grad_norm": 0.09497443586587906, + "learning_rate": 8.856041452852386e-06, + "loss": 8.6256, + "step": 141280 + }, + { + "epoch": 0.7055856575694774, + "grad_norm": 0.09108743816614151, + "learning_rate": 8.854539537910836e-06, + "loss": 8.6314, + "step": 141290 + }, + { + "epoch": 0.7056355963944169, + "grad_norm": 0.08908271044492722, + "learning_rate": 8.853037622969286e-06, + "loss": 8.6233, + "step": 141300 + }, + { + "epoch": 0.7056855352193563, + "grad_norm": 0.10099402815103531, + "learning_rate": 8.851535708027736e-06, + "loss": 8.6468, + "step": 141310 + }, + { + "epoch": 0.7057354740442957, + "grad_norm": 0.09605363756418228, + "learning_rate": 8.850033793086187e-06, + "loss": 8.6166, + "step": 141320 + }, + { + "epoch": 0.7057854128692352, + "grad_norm": 0.09077009558677673, + "learning_rate": 8.848531878144633e-06, + "loss": 8.6215, + "step": 141330 + }, + { + "epoch": 0.7058353516941747, + "grad_norm": 0.0913512110710144, + "learning_rate": 8.847029963203083e-06, + "loss": 8.6587, + "step": 141340 + }, + { + "epoch": 0.7058852905191141, + "grad_norm": 0.08867861330509186, + "learning_rate": 8.845528048261534e-06, + "loss": 8.6569, + "step": 141350 + }, + { + "epoch": 0.7059352293440535, + "grad_norm": 0.09407979995012283, + "learning_rate": 8.844026133319984e-06, + "loss": 8.606, + "step": 141360 + }, + { + "epoch": 0.705985168168993, + "grad_norm": 0.0922011062502861, + "learning_rate": 8.842524218378434e-06, + "loss": 8.6163, + "step": 141370 + }, + { + "epoch": 0.7060351069939325, + "grad_norm": 0.09292416274547577, + "learning_rate": 8.84102230343688e-06, + "loss": 8.6203, + "step": 141380 + }, + { + "epoch": 0.7060850458188719, + "grad_norm": 0.09504501521587372, + "learning_rate": 8.839520388495331e-06, + "loss": 8.6259, + "step": 141390 + }, + { + "epoch": 0.7061349846438113, + "grad_norm": 0.09113211929798126, + "learning_rate": 8.838018473553781e-06, + "loss": 8.6264, + "step": 141400 + }, + { + "epoch": 0.7061849234687507, + "grad_norm": 0.09200391918420792, + "learning_rate": 8.836516558612231e-06, + "loss": 8.6251, + "step": 141410 + }, + { + "epoch": 0.7062348622936903, + "grad_norm": 0.09039604663848877, + "learning_rate": 8.835014643670682e-06, + "loss": 8.6115, + "step": 141420 + }, + { + "epoch": 0.7062848011186297, + "grad_norm": 0.0915713682770729, + "learning_rate": 8.83351272872913e-06, + "loss": 8.6198, + "step": 141430 + }, + { + "epoch": 0.7063347399435691, + "grad_norm": 0.09520621597766876, + "learning_rate": 8.832010813787578e-06, + "loss": 8.6043, + "step": 141440 + }, + { + "epoch": 0.7063846787685085, + "grad_norm": 0.0928700864315033, + "learning_rate": 8.830508898846029e-06, + "loss": 8.61, + "step": 141450 + }, + { + "epoch": 0.706434617593448, + "grad_norm": 0.09517066925764084, + "learning_rate": 8.829006983904479e-06, + "loss": 8.6289, + "step": 141460 + }, + { + "epoch": 0.7064845564183875, + "grad_norm": 0.0947510227560997, + "learning_rate": 8.827505068962929e-06, + "loss": 8.6077, + "step": 141470 + }, + { + "epoch": 0.7065344952433269, + "grad_norm": 0.09221941232681274, + "learning_rate": 8.826003154021377e-06, + "loss": 8.6342, + "step": 141480 + }, + { + "epoch": 0.7065844340682663, + "grad_norm": 0.08994131535291672, + "learning_rate": 8.824501239079826e-06, + "loss": 8.6346, + "step": 141490 + }, + { + "epoch": 0.7066343728932059, + "grad_norm": 0.09264985471963882, + "learning_rate": 8.822999324138276e-06, + "loss": 8.6291, + "step": 141500 + }, + { + "epoch": 0.7066843117181453, + "grad_norm": 0.09057199954986572, + "learning_rate": 8.821497409196726e-06, + "loss": 8.6444, + "step": 141510 + }, + { + "epoch": 0.7067342505430847, + "grad_norm": 0.08880162984132767, + "learning_rate": 8.819995494255177e-06, + "loss": 8.6288, + "step": 141520 + }, + { + "epoch": 0.7067841893680241, + "grad_norm": 0.08788347989320755, + "learning_rate": 8.818493579313625e-06, + "loss": 8.6211, + "step": 141530 + }, + { + "epoch": 0.7068341281929637, + "grad_norm": 0.08884553611278534, + "learning_rate": 8.816991664372073e-06, + "loss": 8.6423, + "step": 141540 + }, + { + "epoch": 0.7068840670179031, + "grad_norm": 0.090285524725914, + "learning_rate": 8.815489749430524e-06, + "loss": 8.6358, + "step": 141550 + }, + { + "epoch": 0.7069340058428425, + "grad_norm": 0.09516801685094833, + "learning_rate": 8.813987834488974e-06, + "loss": 8.618, + "step": 141560 + }, + { + "epoch": 0.7069839446677819, + "grad_norm": 0.09445168077945709, + "learning_rate": 8.812485919547424e-06, + "loss": 8.6284, + "step": 141570 + }, + { + "epoch": 0.7070338834927214, + "grad_norm": 0.08974064886569977, + "learning_rate": 8.810984004605873e-06, + "loss": 8.612, + "step": 141580 + }, + { + "epoch": 0.7070838223176609, + "grad_norm": 0.09538102895021439, + "learning_rate": 8.809482089664323e-06, + "loss": 8.6337, + "step": 141590 + }, + { + "epoch": 0.7071337611426003, + "grad_norm": 0.08882886171340942, + "learning_rate": 8.807980174722771e-06, + "loss": 8.6367, + "step": 141600 + }, + { + "epoch": 0.7071836999675397, + "grad_norm": 0.09137466549873352, + "learning_rate": 8.806478259781221e-06, + "loss": 8.6261, + "step": 141610 + }, + { + "epoch": 0.7072336387924792, + "grad_norm": 0.08818534016609192, + "learning_rate": 8.804976344839672e-06, + "loss": 8.6378, + "step": 141620 + }, + { + "epoch": 0.7072835776174187, + "grad_norm": 0.09256480634212494, + "learning_rate": 8.80347442989812e-06, + "loss": 8.6342, + "step": 141630 + }, + { + "epoch": 0.7073335164423581, + "grad_norm": 0.08914809674024582, + "learning_rate": 8.80197251495657e-06, + "loss": 8.6384, + "step": 141640 + }, + { + "epoch": 0.7073834552672975, + "grad_norm": 0.09401541948318481, + "learning_rate": 8.800470600015019e-06, + "loss": 8.6253, + "step": 141650 + }, + { + "epoch": 0.707433394092237, + "grad_norm": 0.09287466108798981, + "learning_rate": 8.798968685073469e-06, + "loss": 8.6103, + "step": 141660 + }, + { + "epoch": 0.7074833329171765, + "grad_norm": 0.09313994646072388, + "learning_rate": 8.797466770131919e-06, + "loss": 8.6352, + "step": 141670 + }, + { + "epoch": 0.7075332717421159, + "grad_norm": 0.09282007813453674, + "learning_rate": 8.795964855190368e-06, + "loss": 8.6173, + "step": 141680 + }, + { + "epoch": 0.7075832105670553, + "grad_norm": 0.08810416609048843, + "learning_rate": 8.794462940248818e-06, + "loss": 8.6341, + "step": 141690 + }, + { + "epoch": 0.7076331493919948, + "grad_norm": 0.09486988186836243, + "learning_rate": 8.792961025307266e-06, + "loss": 8.6187, + "step": 141700 + }, + { + "epoch": 0.7076830882169343, + "grad_norm": 0.08923081308603287, + "learning_rate": 8.791459110365716e-06, + "loss": 8.6303, + "step": 141710 + }, + { + "epoch": 0.7077330270418737, + "grad_norm": 0.09007243067026138, + "learning_rate": 8.789957195424167e-06, + "loss": 8.6309, + "step": 141720 + }, + { + "epoch": 0.7077829658668131, + "grad_norm": 0.09199108928442001, + "learning_rate": 8.788455280482615e-06, + "loss": 8.6047, + "step": 141730 + }, + { + "epoch": 0.7078329046917526, + "grad_norm": 0.09315073490142822, + "learning_rate": 8.786953365541065e-06, + "loss": 8.639, + "step": 141740 + }, + { + "epoch": 0.7078828435166921, + "grad_norm": 0.09393049031496048, + "learning_rate": 8.785451450599515e-06, + "loss": 8.637, + "step": 141750 + }, + { + "epoch": 0.7079327823416315, + "grad_norm": 0.08970038592815399, + "learning_rate": 8.783949535657964e-06, + "loss": 8.6206, + "step": 141760 + }, + { + "epoch": 0.7079827211665709, + "grad_norm": 0.09079226106405258, + "learning_rate": 8.782447620716414e-06, + "loss": 8.6221, + "step": 141770 + }, + { + "epoch": 0.7080326599915104, + "grad_norm": 0.09092725813388824, + "learning_rate": 8.780945705774863e-06, + "loss": 8.6283, + "step": 141780 + }, + { + "epoch": 0.7080825988164499, + "grad_norm": 0.08902301639318466, + "learning_rate": 8.779443790833313e-06, + "loss": 8.6266, + "step": 141790 + }, + { + "epoch": 0.7081325376413893, + "grad_norm": 0.09129643440246582, + "learning_rate": 8.777941875891763e-06, + "loss": 8.6308, + "step": 141800 + }, + { + "epoch": 0.7081824764663287, + "grad_norm": 0.09005031734704971, + "learning_rate": 8.776439960950211e-06, + "loss": 8.6405, + "step": 141810 + }, + { + "epoch": 0.7082324152912682, + "grad_norm": 0.0907624140381813, + "learning_rate": 8.774938046008662e-06, + "loss": 8.6345, + "step": 141820 + }, + { + "epoch": 0.7082823541162077, + "grad_norm": 0.09212564677000046, + "learning_rate": 8.77343613106711e-06, + "loss": 8.6532, + "step": 141830 + }, + { + "epoch": 0.7083322929411471, + "grad_norm": 0.09632585197687149, + "learning_rate": 8.77193421612556e-06, + "loss": 8.611, + "step": 141840 + }, + { + "epoch": 0.7083822317660865, + "grad_norm": 0.09049876779317856, + "learning_rate": 8.77043230118401e-06, + "loss": 8.6288, + "step": 141850 + }, + { + "epoch": 0.708432170591026, + "grad_norm": 0.08971288055181503, + "learning_rate": 8.768930386242459e-06, + "loss": 8.6298, + "step": 141860 + }, + { + "epoch": 0.7084821094159655, + "grad_norm": 0.09906616806983948, + "learning_rate": 8.767428471300909e-06, + "loss": 8.6211, + "step": 141870 + }, + { + "epoch": 0.7085320482409049, + "grad_norm": 0.09421475976705551, + "learning_rate": 8.765926556359358e-06, + "loss": 8.6384, + "step": 141880 + }, + { + "epoch": 0.7085819870658443, + "grad_norm": 0.08919620513916016, + "learning_rate": 8.764424641417808e-06, + "loss": 8.6428, + "step": 141890 + }, + { + "epoch": 0.7086319258907838, + "grad_norm": 0.0901108905673027, + "learning_rate": 8.762922726476258e-06, + "loss": 8.6232, + "step": 141900 + }, + { + "epoch": 0.7086818647157233, + "grad_norm": 0.0895710289478302, + "learning_rate": 8.761420811534708e-06, + "loss": 8.6461, + "step": 141910 + }, + { + "epoch": 0.7087318035406627, + "grad_norm": 0.09124090522527695, + "learning_rate": 8.759918896593157e-06, + "loss": 8.6357, + "step": 141920 + }, + { + "epoch": 0.7087817423656021, + "grad_norm": 0.09022539854049683, + "learning_rate": 8.758416981651605e-06, + "loss": 8.622, + "step": 141930 + }, + { + "epoch": 0.7088316811905416, + "grad_norm": 0.08793357014656067, + "learning_rate": 8.756915066710055e-06, + "loss": 8.6265, + "step": 141940 + }, + { + "epoch": 0.708881620015481, + "grad_norm": 0.09152141213417053, + "learning_rate": 8.755413151768505e-06, + "loss": 8.6229, + "step": 141950 + }, + { + "epoch": 0.7089315588404205, + "grad_norm": 0.09446197748184204, + "learning_rate": 8.753911236826956e-06, + "loss": 8.6395, + "step": 141960 + }, + { + "epoch": 0.7089814976653599, + "grad_norm": 0.08696843683719635, + "learning_rate": 8.752409321885404e-06, + "loss": 8.6207, + "step": 141970 + }, + { + "epoch": 0.7090314364902994, + "grad_norm": 0.09359335899353027, + "learning_rate": 8.750907406943853e-06, + "loss": 8.6385, + "step": 141980 + }, + { + "epoch": 0.7090813753152388, + "grad_norm": 0.09001364558935165, + "learning_rate": 8.749405492002303e-06, + "loss": 8.6352, + "step": 141990 + }, + { + "epoch": 0.7091313141401783, + "grad_norm": 0.09298966825008392, + "learning_rate": 8.747903577060753e-06, + "loss": 8.6246, + "step": 142000 + }, + { + "epoch": 0.7091812529651177, + "grad_norm": 0.09238903969526291, + "learning_rate": 8.746401662119203e-06, + "loss": 8.6325, + "step": 142010 + }, + { + "epoch": 0.7092311917900572, + "grad_norm": 0.09266587346792221, + "learning_rate": 8.744899747177652e-06, + "loss": 8.6339, + "step": 142020 + }, + { + "epoch": 0.7092811306149966, + "grad_norm": 0.090972401201725, + "learning_rate": 8.7433978322361e-06, + "loss": 8.634, + "step": 142030 + }, + { + "epoch": 0.7093310694399361, + "grad_norm": 0.09120898693799973, + "learning_rate": 8.74189591729455e-06, + "loss": 8.6247, + "step": 142040 + }, + { + "epoch": 0.7093810082648755, + "grad_norm": 0.09463303536176682, + "learning_rate": 8.740394002353e-06, + "loss": 8.6374, + "step": 142050 + }, + { + "epoch": 0.709430947089815, + "grad_norm": 0.09146030992269516, + "learning_rate": 8.73889208741145e-06, + "loss": 8.6386, + "step": 142060 + }, + { + "epoch": 0.7094808859147544, + "grad_norm": 0.09224395453929901, + "learning_rate": 8.7373901724699e-06, + "loss": 8.6257, + "step": 142070 + }, + { + "epoch": 0.7095308247396939, + "grad_norm": 0.08982054889202118, + "learning_rate": 8.735888257528348e-06, + "loss": 8.6414, + "step": 142080 + }, + { + "epoch": 0.7095807635646333, + "grad_norm": 0.09617902338504791, + "learning_rate": 8.734386342586798e-06, + "loss": 8.6255, + "step": 142090 + }, + { + "epoch": 0.7096307023895728, + "grad_norm": 0.09528889507055283, + "learning_rate": 8.732884427645248e-06, + "loss": 8.622, + "step": 142100 + }, + { + "epoch": 0.7096806412145122, + "grad_norm": 0.0894269272685051, + "learning_rate": 8.731382512703698e-06, + "loss": 8.6288, + "step": 142110 + }, + { + "epoch": 0.7097305800394517, + "grad_norm": 0.09145845472812653, + "learning_rate": 8.729880597762148e-06, + "loss": 8.6278, + "step": 142120 + }, + { + "epoch": 0.7097805188643911, + "grad_norm": 0.09675304591655731, + "learning_rate": 8.728378682820595e-06, + "loss": 8.6259, + "step": 142130 + }, + { + "epoch": 0.7098304576893306, + "grad_norm": 0.09058880060911179, + "learning_rate": 8.726876767879045e-06, + "loss": 8.6513, + "step": 142140 + }, + { + "epoch": 0.70988039651427, + "grad_norm": 0.0966552197933197, + "learning_rate": 8.725374852937495e-06, + "loss": 8.61, + "step": 142150 + }, + { + "epoch": 0.7099303353392095, + "grad_norm": 0.08744119107723236, + "learning_rate": 8.723872937995946e-06, + "loss": 8.6147, + "step": 142160 + }, + { + "epoch": 0.7099802741641489, + "grad_norm": 0.09429807960987091, + "learning_rate": 8.722371023054396e-06, + "loss": 8.6291, + "step": 142170 + }, + { + "epoch": 0.7100302129890884, + "grad_norm": 0.08756644278764725, + "learning_rate": 8.720869108112843e-06, + "loss": 8.6386, + "step": 142180 + }, + { + "epoch": 0.7100801518140278, + "grad_norm": 0.08758307993412018, + "learning_rate": 8.719367193171293e-06, + "loss": 8.6153, + "step": 142190 + }, + { + "epoch": 0.7101300906389673, + "grad_norm": 0.09212186932563782, + "learning_rate": 8.717865278229743e-06, + "loss": 8.6224, + "step": 142200 + }, + { + "epoch": 0.7101800294639067, + "grad_norm": 0.09597406536340714, + "learning_rate": 8.716363363288193e-06, + "loss": 8.6243, + "step": 142210 + }, + { + "epoch": 0.7102299682888462, + "grad_norm": 0.09284794330596924, + "learning_rate": 8.714861448346643e-06, + "loss": 8.6202, + "step": 142220 + }, + { + "epoch": 0.7102799071137856, + "grad_norm": 0.09078753739595413, + "learning_rate": 8.713359533405092e-06, + "loss": 8.625, + "step": 142230 + }, + { + "epoch": 0.7103298459387251, + "grad_norm": 0.09475717693567276, + "learning_rate": 8.71185761846354e-06, + "loss": 8.6252, + "step": 142240 + }, + { + "epoch": 0.7103797847636645, + "grad_norm": 0.09465180337429047, + "learning_rate": 8.71035570352199e-06, + "loss": 8.6361, + "step": 142250 + }, + { + "epoch": 0.710429723588604, + "grad_norm": 0.09588777273893356, + "learning_rate": 8.70885378858044e-06, + "loss": 8.6176, + "step": 142260 + }, + { + "epoch": 0.7104796624135434, + "grad_norm": 0.09004874527454376, + "learning_rate": 8.70735187363889e-06, + "loss": 8.6325, + "step": 142270 + }, + { + "epoch": 0.7105296012384829, + "grad_norm": 0.09361999481916428, + "learning_rate": 8.70584995869734e-06, + "loss": 8.6366, + "step": 142280 + }, + { + "epoch": 0.7105795400634223, + "grad_norm": 0.09634486585855484, + "learning_rate": 8.704348043755788e-06, + "loss": 8.6259, + "step": 142290 + }, + { + "epoch": 0.7106294788883618, + "grad_norm": 0.0938863530755043, + "learning_rate": 8.702846128814238e-06, + "loss": 8.6259, + "step": 142300 + }, + { + "epoch": 0.7106794177133012, + "grad_norm": 0.09446729719638824, + "learning_rate": 8.701344213872688e-06, + "loss": 8.6237, + "step": 142310 + }, + { + "epoch": 0.7107293565382407, + "grad_norm": 0.09354673326015472, + "learning_rate": 8.699842298931138e-06, + "loss": 8.6212, + "step": 142320 + }, + { + "epoch": 0.7107792953631801, + "grad_norm": 0.08772012591362, + "learning_rate": 8.698340383989587e-06, + "loss": 8.6214, + "step": 142330 + }, + { + "epoch": 0.7108292341881196, + "grad_norm": 0.0878673866391182, + "learning_rate": 8.696838469048035e-06, + "loss": 8.6261, + "step": 142340 + }, + { + "epoch": 0.710879173013059, + "grad_norm": 0.08873125165700912, + "learning_rate": 8.695336554106485e-06, + "loss": 8.6183, + "step": 142350 + }, + { + "epoch": 0.7109291118379985, + "grad_norm": 0.0938340574502945, + "learning_rate": 8.693834639164936e-06, + "loss": 8.6124, + "step": 142360 + }, + { + "epoch": 0.7109790506629379, + "grad_norm": 0.09345322847366333, + "learning_rate": 8.692332724223386e-06, + "loss": 8.6146, + "step": 142370 + }, + { + "epoch": 0.7110289894878774, + "grad_norm": 0.09127715975046158, + "learning_rate": 8.690830809281834e-06, + "loss": 8.6477, + "step": 142380 + }, + { + "epoch": 0.7110789283128168, + "grad_norm": 0.09575607627630234, + "learning_rate": 8.689328894340284e-06, + "loss": 8.6153, + "step": 142390 + }, + { + "epoch": 0.7111288671377562, + "grad_norm": 0.08808735758066177, + "learning_rate": 8.687826979398733e-06, + "loss": 8.6387, + "step": 142400 + }, + { + "epoch": 0.7111788059626957, + "grad_norm": 0.09160853177309036, + "learning_rate": 8.686325064457183e-06, + "loss": 8.6374, + "step": 142410 + }, + { + "epoch": 0.7112287447876351, + "grad_norm": 0.09475651383399963, + "learning_rate": 8.684823149515633e-06, + "loss": 8.6342, + "step": 142420 + }, + { + "epoch": 0.7112786836125746, + "grad_norm": 0.08886786550283432, + "learning_rate": 8.683321234574082e-06, + "loss": 8.6129, + "step": 142430 + }, + { + "epoch": 0.711328622437514, + "grad_norm": 0.0918954536318779, + "learning_rate": 8.681819319632532e-06, + "loss": 8.6083, + "step": 142440 + }, + { + "epoch": 0.7113785612624535, + "grad_norm": 0.09870171546936035, + "learning_rate": 8.68031740469098e-06, + "loss": 8.6117, + "step": 142450 + }, + { + "epoch": 0.7114285000873929, + "grad_norm": 0.09634058177471161, + "learning_rate": 8.67881548974943e-06, + "loss": 8.6249, + "step": 142460 + }, + { + "epoch": 0.7114784389123324, + "grad_norm": 0.09128417074680328, + "learning_rate": 8.67731357480788e-06, + "loss": 8.6259, + "step": 142470 + }, + { + "epoch": 0.7115283777372718, + "grad_norm": 0.08941973000764847, + "learning_rate": 8.67581165986633e-06, + "loss": 8.6221, + "step": 142480 + }, + { + "epoch": 0.7115783165622113, + "grad_norm": 0.10016234964132309, + "learning_rate": 8.67430974492478e-06, + "loss": 8.633, + "step": 142490 + }, + { + "epoch": 0.7116282553871507, + "grad_norm": 0.08851133286952972, + "learning_rate": 8.672807829983228e-06, + "loss": 8.6333, + "step": 142500 + }, + { + "epoch": 0.7116781942120902, + "grad_norm": 0.08761074393987656, + "learning_rate": 8.671305915041678e-06, + "loss": 8.6358, + "step": 142510 + }, + { + "epoch": 0.7117281330370296, + "grad_norm": 0.0912003293633461, + "learning_rate": 8.669804000100128e-06, + "loss": 8.6103, + "step": 142520 + }, + { + "epoch": 0.7117780718619691, + "grad_norm": 0.09523065388202667, + "learning_rate": 8.668302085158577e-06, + "loss": 8.6148, + "step": 142530 + }, + { + "epoch": 0.7118280106869085, + "grad_norm": 0.09399678558111191, + "learning_rate": 8.666800170217027e-06, + "loss": 8.622, + "step": 142540 + }, + { + "epoch": 0.711877949511848, + "grad_norm": 0.09112656861543655, + "learning_rate": 8.665298255275477e-06, + "loss": 8.6207, + "step": 142550 + }, + { + "epoch": 0.7119278883367874, + "grad_norm": 0.08925676345825195, + "learning_rate": 8.663796340333926e-06, + "loss": 8.6353, + "step": 142560 + }, + { + "epoch": 0.7119778271617269, + "grad_norm": 0.09203901141881943, + "learning_rate": 8.662294425392376e-06, + "loss": 8.6239, + "step": 142570 + }, + { + "epoch": 0.7120277659866663, + "grad_norm": 0.08438628166913986, + "learning_rate": 8.660792510450824e-06, + "loss": 8.6296, + "step": 142580 + }, + { + "epoch": 0.7120777048116058, + "grad_norm": 0.08993806689977646, + "learning_rate": 8.659290595509274e-06, + "loss": 8.6178, + "step": 142590 + }, + { + "epoch": 0.7121276436365452, + "grad_norm": 0.08885114639997482, + "learning_rate": 8.657788680567725e-06, + "loss": 8.6262, + "step": 142600 + }, + { + "epoch": 0.7121775824614847, + "grad_norm": 0.09332863986492157, + "learning_rate": 8.656286765626173e-06, + "loss": 8.6335, + "step": 142610 + }, + { + "epoch": 0.7122275212864241, + "grad_norm": 0.09431906044483185, + "learning_rate": 8.654784850684623e-06, + "loss": 8.6263, + "step": 142620 + }, + { + "epoch": 0.7122774601113636, + "grad_norm": 0.09060939401388168, + "learning_rate": 8.653282935743072e-06, + "loss": 8.621, + "step": 142630 + }, + { + "epoch": 0.712327398936303, + "grad_norm": 0.09892533719539642, + "learning_rate": 8.651781020801522e-06, + "loss": 8.6007, + "step": 142640 + }, + { + "epoch": 0.7123773377612425, + "grad_norm": 0.09287749230861664, + "learning_rate": 8.650279105859972e-06, + "loss": 8.6396, + "step": 142650 + }, + { + "epoch": 0.7124272765861819, + "grad_norm": 0.09754089266061783, + "learning_rate": 8.64877719091842e-06, + "loss": 8.6199, + "step": 142660 + }, + { + "epoch": 0.7124772154111214, + "grad_norm": 0.09147115051746368, + "learning_rate": 8.64727527597687e-06, + "loss": 8.6157, + "step": 142670 + }, + { + "epoch": 0.7125271542360608, + "grad_norm": 0.09473909437656403, + "learning_rate": 8.64577336103532e-06, + "loss": 8.6091, + "step": 142680 + }, + { + "epoch": 0.7125770930610003, + "grad_norm": 0.08881004899740219, + "learning_rate": 8.64427144609377e-06, + "loss": 8.6372, + "step": 142690 + }, + { + "epoch": 0.7126270318859397, + "grad_norm": 0.08714466542005539, + "learning_rate": 8.64276953115222e-06, + "loss": 8.6302, + "step": 142700 + }, + { + "epoch": 0.7126769707108792, + "grad_norm": 0.08891968429088593, + "learning_rate": 8.64126761621067e-06, + "loss": 8.6191, + "step": 142710 + }, + { + "epoch": 0.7127269095358186, + "grad_norm": 0.09196534752845764, + "learning_rate": 8.639765701269118e-06, + "loss": 8.62, + "step": 142720 + }, + { + "epoch": 0.712776848360758, + "grad_norm": 0.08720948547124863, + "learning_rate": 8.638263786327568e-06, + "loss": 8.6318, + "step": 142730 + }, + { + "epoch": 0.7128267871856975, + "grad_norm": 0.0971934124827385, + "learning_rate": 8.636761871386017e-06, + "loss": 8.6121, + "step": 142740 + }, + { + "epoch": 0.712876726010637, + "grad_norm": 0.09166713804006577, + "learning_rate": 8.635259956444467e-06, + "loss": 8.6297, + "step": 142750 + }, + { + "epoch": 0.7129266648355764, + "grad_norm": 0.09517423063516617, + "learning_rate": 8.633758041502917e-06, + "loss": 8.6293, + "step": 142760 + }, + { + "epoch": 0.7129766036605159, + "grad_norm": 0.09297715872526169, + "learning_rate": 8.632256126561366e-06, + "loss": 8.6296, + "step": 142770 + }, + { + "epoch": 0.7130265424854553, + "grad_norm": 0.09390052407979965, + "learning_rate": 8.630754211619816e-06, + "loss": 8.6186, + "step": 142780 + }, + { + "epoch": 0.7130764813103948, + "grad_norm": 0.09712127596139908, + "learning_rate": 8.629252296678264e-06, + "loss": 8.6155, + "step": 142790 + }, + { + "epoch": 0.7131264201353342, + "grad_norm": 0.0925319716334343, + "learning_rate": 8.627750381736715e-06, + "loss": 8.6215, + "step": 142800 + }, + { + "epoch": 0.7131763589602736, + "grad_norm": 0.09186070412397385, + "learning_rate": 8.626248466795165e-06, + "loss": 8.6158, + "step": 142810 + }, + { + "epoch": 0.7132262977852131, + "grad_norm": 0.09534446150064468, + "learning_rate": 8.624746551853613e-06, + "loss": 8.624, + "step": 142820 + }, + { + "epoch": 0.7132762366101526, + "grad_norm": 0.09688442945480347, + "learning_rate": 8.623244636912063e-06, + "loss": 8.5915, + "step": 142830 + }, + { + "epoch": 0.713326175435092, + "grad_norm": 0.0908428207039833, + "learning_rate": 8.621742721970512e-06, + "loss": 8.6246, + "step": 142840 + }, + { + "epoch": 0.7133761142600314, + "grad_norm": 0.08914895355701447, + "learning_rate": 8.620240807028962e-06, + "loss": 8.6274, + "step": 142850 + }, + { + "epoch": 0.7134260530849709, + "grad_norm": 0.0967710018157959, + "learning_rate": 8.618738892087412e-06, + "loss": 8.6185, + "step": 142860 + }, + { + "epoch": 0.7134759919099104, + "grad_norm": 0.09269560873508453, + "learning_rate": 8.617236977145863e-06, + "loss": 8.6148, + "step": 142870 + }, + { + "epoch": 0.7135259307348498, + "grad_norm": 0.09392331540584564, + "learning_rate": 8.615735062204311e-06, + "loss": 8.6251, + "step": 142880 + }, + { + "epoch": 0.7135758695597892, + "grad_norm": 0.08788944780826569, + "learning_rate": 8.61423314726276e-06, + "loss": 8.6238, + "step": 142890 + }, + { + "epoch": 0.7136258083847287, + "grad_norm": 0.09034030884504318, + "learning_rate": 8.61273123232121e-06, + "loss": 8.622, + "step": 142900 + }, + { + "epoch": 0.7136757472096682, + "grad_norm": 0.08973328024148941, + "learning_rate": 8.61122931737966e-06, + "loss": 8.6273, + "step": 142910 + }, + { + "epoch": 0.7137256860346076, + "grad_norm": 0.08710794895887375, + "learning_rate": 8.60972740243811e-06, + "loss": 8.6138, + "step": 142920 + }, + { + "epoch": 0.713775624859547, + "grad_norm": 0.09544447809457779, + "learning_rate": 8.608225487496558e-06, + "loss": 8.6187, + "step": 142930 + }, + { + "epoch": 0.7138255636844865, + "grad_norm": 0.09114502370357513, + "learning_rate": 8.606723572555007e-06, + "loss": 8.6234, + "step": 142940 + }, + { + "epoch": 0.713875502509426, + "grad_norm": 0.09414034336805344, + "learning_rate": 8.605221657613457e-06, + "loss": 8.6191, + "step": 142950 + }, + { + "epoch": 0.7139254413343654, + "grad_norm": 0.08966190367937088, + "learning_rate": 8.603719742671907e-06, + "loss": 8.6134, + "step": 142960 + }, + { + "epoch": 0.7139753801593048, + "grad_norm": 0.08923894166946411, + "learning_rate": 8.602217827730358e-06, + "loss": 8.6135, + "step": 142970 + }, + { + "epoch": 0.7140253189842443, + "grad_norm": 0.09734483063220978, + "learning_rate": 8.600715912788806e-06, + "loss": 8.6281, + "step": 142980 + }, + { + "epoch": 0.7140752578091838, + "grad_norm": 0.09202791005373001, + "learning_rate": 8.599213997847254e-06, + "loss": 8.6208, + "step": 142990 + }, + { + "epoch": 0.7141251966341232, + "grad_norm": 0.09096809476613998, + "learning_rate": 8.597712082905705e-06, + "loss": 8.6286, + "step": 143000 + }, + { + "epoch": 0.7141751354590626, + "grad_norm": 0.09096328914165497, + "learning_rate": 8.596210167964155e-06, + "loss": 8.6242, + "step": 143010 + }, + { + "epoch": 0.7142250742840021, + "grad_norm": 0.09118296205997467, + "learning_rate": 8.594708253022605e-06, + "loss": 8.6227, + "step": 143020 + }, + { + "epoch": 0.7142750131089416, + "grad_norm": 0.09323076903820038, + "learning_rate": 8.593206338081055e-06, + "loss": 8.6223, + "step": 143030 + }, + { + "epoch": 0.714324951933881, + "grad_norm": 0.08754134923219681, + "learning_rate": 8.591704423139502e-06, + "loss": 8.6249, + "step": 143040 + }, + { + "epoch": 0.7143748907588204, + "grad_norm": 0.09009493887424469, + "learning_rate": 8.590202508197952e-06, + "loss": 8.6254, + "step": 143050 + }, + { + "epoch": 0.7144248295837599, + "grad_norm": 0.09118343889713287, + "learning_rate": 8.588700593256402e-06, + "loss": 8.6078, + "step": 143060 + }, + { + "epoch": 0.7144747684086994, + "grad_norm": 0.0941152274608612, + "learning_rate": 8.587198678314853e-06, + "loss": 8.6298, + "step": 143070 + }, + { + "epoch": 0.7145247072336388, + "grad_norm": 0.08993294835090637, + "learning_rate": 8.585696763373303e-06, + "loss": 8.6334, + "step": 143080 + }, + { + "epoch": 0.7145746460585782, + "grad_norm": 0.0892404243350029, + "learning_rate": 8.58419484843175e-06, + "loss": 8.5978, + "step": 143090 + }, + { + "epoch": 0.7146245848835177, + "grad_norm": 0.08918585628271103, + "learning_rate": 8.5826929334902e-06, + "loss": 8.6161, + "step": 143100 + }, + { + "epoch": 0.7146745237084572, + "grad_norm": 0.08921711891889572, + "learning_rate": 8.58119101854865e-06, + "loss": 8.6231, + "step": 143110 + }, + { + "epoch": 0.7147244625333966, + "grad_norm": 0.10099107027053833, + "learning_rate": 8.5796891036071e-06, + "loss": 8.6362, + "step": 143120 + }, + { + "epoch": 0.714774401358336, + "grad_norm": 0.09065527468919754, + "learning_rate": 8.57818718866555e-06, + "loss": 8.6122, + "step": 143130 + }, + { + "epoch": 0.7148243401832755, + "grad_norm": 0.09168516844511032, + "learning_rate": 8.576685273723997e-06, + "loss": 8.6107, + "step": 143140 + }, + { + "epoch": 0.714874279008215, + "grad_norm": 0.0854279100894928, + "learning_rate": 8.575183358782447e-06, + "loss": 8.6273, + "step": 143150 + }, + { + "epoch": 0.7149242178331544, + "grad_norm": 0.08854588121175766, + "learning_rate": 8.573681443840897e-06, + "loss": 8.6061, + "step": 143160 + }, + { + "epoch": 0.7149741566580938, + "grad_norm": 0.0906798392534256, + "learning_rate": 8.572179528899348e-06, + "loss": 8.6264, + "step": 143170 + }, + { + "epoch": 0.7150240954830333, + "grad_norm": 0.09377674758434296, + "learning_rate": 8.570677613957798e-06, + "loss": 8.6236, + "step": 143180 + }, + { + "epoch": 0.7150740343079728, + "grad_norm": 0.09252772480249405, + "learning_rate": 8.569175699016246e-06, + "loss": 8.6202, + "step": 143190 + }, + { + "epoch": 0.7151239731329122, + "grad_norm": 0.09276933968067169, + "learning_rate": 8.567673784074695e-06, + "loss": 8.6067, + "step": 143200 + }, + { + "epoch": 0.7151739119578516, + "grad_norm": 0.09133142232894897, + "learning_rate": 8.566171869133145e-06, + "loss": 8.6154, + "step": 143210 + }, + { + "epoch": 0.715223850782791, + "grad_norm": 0.09052088111639023, + "learning_rate": 8.564669954191595e-06, + "loss": 8.6426, + "step": 143220 + }, + { + "epoch": 0.7152737896077306, + "grad_norm": 0.0909975916147232, + "learning_rate": 8.563168039250045e-06, + "loss": 8.6208, + "step": 143230 + }, + { + "epoch": 0.71532372843267, + "grad_norm": 0.08892220258712769, + "learning_rate": 8.561666124308494e-06, + "loss": 8.61, + "step": 143240 + }, + { + "epoch": 0.7153736672576094, + "grad_norm": 0.09050557017326355, + "learning_rate": 8.560164209366942e-06, + "loss": 8.6121, + "step": 143250 + }, + { + "epoch": 0.7154236060825488, + "grad_norm": 0.09054877609014511, + "learning_rate": 8.558662294425392e-06, + "loss": 8.6173, + "step": 143260 + }, + { + "epoch": 0.7154735449074884, + "grad_norm": 0.09878473728895187, + "learning_rate": 8.557160379483843e-06, + "loss": 8.611, + "step": 143270 + }, + { + "epoch": 0.7155234837324278, + "grad_norm": 0.08595815300941467, + "learning_rate": 8.555658464542293e-06, + "loss": 8.6321, + "step": 143280 + }, + { + "epoch": 0.7155734225573672, + "grad_norm": 0.0915842056274414, + "learning_rate": 8.554156549600741e-06, + "loss": 8.6161, + "step": 143290 + }, + { + "epoch": 0.7156233613823066, + "grad_norm": 0.08792208880186081, + "learning_rate": 8.55265463465919e-06, + "loss": 8.6099, + "step": 143300 + }, + { + "epoch": 0.7156733002072462, + "grad_norm": 0.0902579203248024, + "learning_rate": 8.55115271971764e-06, + "loss": 8.6081, + "step": 143310 + }, + { + "epoch": 0.7157232390321856, + "grad_norm": 0.09018343687057495, + "learning_rate": 8.54965080477609e-06, + "loss": 8.6206, + "step": 143320 + }, + { + "epoch": 0.715773177857125, + "grad_norm": 0.09098510444164276, + "learning_rate": 8.54814888983454e-06, + "loss": 8.6374, + "step": 143330 + }, + { + "epoch": 0.7158231166820644, + "grad_norm": 0.09270698577165604, + "learning_rate": 8.546646974892989e-06, + "loss": 8.6342, + "step": 143340 + }, + { + "epoch": 0.715873055507004, + "grad_norm": 0.09691673517227173, + "learning_rate": 8.545145059951439e-06, + "loss": 8.6208, + "step": 143350 + }, + { + "epoch": 0.7159229943319434, + "grad_norm": 0.09676571935415268, + "learning_rate": 8.543643145009887e-06, + "loss": 8.6159, + "step": 143360 + }, + { + "epoch": 0.7159729331568828, + "grad_norm": 0.08486426621675491, + "learning_rate": 8.542141230068338e-06, + "loss": 8.6335, + "step": 143370 + }, + { + "epoch": 0.7160228719818222, + "grad_norm": 0.09273499995470047, + "learning_rate": 8.540639315126788e-06, + "loss": 8.62, + "step": 143380 + }, + { + "epoch": 0.7160728108067617, + "grad_norm": 0.09392505139112473, + "learning_rate": 8.539137400185236e-06, + "loss": 8.6318, + "step": 143390 + }, + { + "epoch": 0.7161227496317012, + "grad_norm": 0.08955458551645279, + "learning_rate": 8.537635485243686e-06, + "loss": 8.6154, + "step": 143400 + }, + { + "epoch": 0.7161726884566406, + "grad_norm": 0.08472466468811035, + "learning_rate": 8.536133570302135e-06, + "loss": 8.6181, + "step": 143410 + }, + { + "epoch": 0.71622262728158, + "grad_norm": 0.08813430368900299, + "learning_rate": 8.534631655360585e-06, + "loss": 8.6169, + "step": 143420 + }, + { + "epoch": 0.7162725661065195, + "grad_norm": 0.09203272312879562, + "learning_rate": 8.533129740419035e-06, + "loss": 8.6113, + "step": 143430 + }, + { + "epoch": 0.716322504931459, + "grad_norm": 0.08561133593320847, + "learning_rate": 8.531627825477484e-06, + "loss": 8.6069, + "step": 143440 + }, + { + "epoch": 0.7163724437563984, + "grad_norm": 0.08707552403211594, + "learning_rate": 8.530125910535934e-06, + "loss": 8.6089, + "step": 143450 + }, + { + "epoch": 0.7164223825813378, + "grad_norm": 0.09338618814945221, + "learning_rate": 8.528623995594382e-06, + "loss": 8.6026, + "step": 143460 + }, + { + "epoch": 0.7164723214062773, + "grad_norm": 0.08720285445451736, + "learning_rate": 8.527122080652833e-06, + "loss": 8.6415, + "step": 143470 + }, + { + "epoch": 0.7165222602312168, + "grad_norm": 0.09213987737894058, + "learning_rate": 8.525620165711283e-06, + "loss": 8.6261, + "step": 143480 + }, + { + "epoch": 0.7165721990561562, + "grad_norm": 0.09045135974884033, + "learning_rate": 8.524118250769731e-06, + "loss": 8.6133, + "step": 143490 + }, + { + "epoch": 0.7166221378810956, + "grad_norm": 0.08416689932346344, + "learning_rate": 8.522616335828181e-06, + "loss": 8.6213, + "step": 143500 + }, + { + "epoch": 0.716672076706035, + "grad_norm": 0.09071792662143707, + "learning_rate": 8.52111442088663e-06, + "loss": 8.606, + "step": 143510 + }, + { + "epoch": 0.7167220155309746, + "grad_norm": 0.08766292035579681, + "learning_rate": 8.51961250594508e-06, + "loss": 8.6364, + "step": 143520 + }, + { + "epoch": 0.716771954355914, + "grad_norm": 0.09889094531536102, + "learning_rate": 8.51811059100353e-06, + "loss": 8.6084, + "step": 143530 + }, + { + "epoch": 0.7168218931808534, + "grad_norm": 0.09228768199682236, + "learning_rate": 8.516608676061979e-06, + "loss": 8.617, + "step": 143540 + }, + { + "epoch": 0.7168718320057929, + "grad_norm": 0.08915939927101135, + "learning_rate": 8.515106761120429e-06, + "loss": 8.6044, + "step": 143550 + }, + { + "epoch": 0.7169217708307324, + "grad_norm": 0.08671806752681732, + "learning_rate": 8.513604846178879e-06, + "loss": 8.6338, + "step": 143560 + }, + { + "epoch": 0.7169717096556718, + "grad_norm": 0.090422123670578, + "learning_rate": 8.512102931237328e-06, + "loss": 8.611, + "step": 143570 + }, + { + "epoch": 0.7170216484806112, + "grad_norm": 0.08737920969724655, + "learning_rate": 8.510601016295778e-06, + "loss": 8.6161, + "step": 143580 + }, + { + "epoch": 0.7170715873055507, + "grad_norm": 0.08856778591871262, + "learning_rate": 8.509099101354226e-06, + "loss": 8.63, + "step": 143590 + }, + { + "epoch": 0.7171215261304902, + "grad_norm": 0.0872621163725853, + "learning_rate": 8.507597186412676e-06, + "loss": 8.6203, + "step": 143600 + }, + { + "epoch": 0.7171714649554296, + "grad_norm": 0.08896245807409286, + "learning_rate": 8.506095271471127e-06, + "loss": 8.6228, + "step": 143610 + }, + { + "epoch": 0.717221403780369, + "grad_norm": 0.09879324585199356, + "learning_rate": 8.504593356529575e-06, + "loss": 8.6155, + "step": 143620 + }, + { + "epoch": 0.7172713426053084, + "grad_norm": 0.08767727017402649, + "learning_rate": 8.503091441588025e-06, + "loss": 8.6018, + "step": 143630 + }, + { + "epoch": 0.717321281430248, + "grad_norm": 0.0934598445892334, + "learning_rate": 8.501589526646474e-06, + "loss": 8.6157, + "step": 143640 + }, + { + "epoch": 0.7173712202551874, + "grad_norm": 0.09154853224754333, + "learning_rate": 8.500087611704924e-06, + "loss": 8.618, + "step": 143650 + }, + { + "epoch": 0.7174211590801268, + "grad_norm": 0.09346166253089905, + "learning_rate": 8.498585696763374e-06, + "loss": 8.6144, + "step": 143660 + }, + { + "epoch": 0.7174710979050662, + "grad_norm": 0.09311819821596146, + "learning_rate": 8.497083781821823e-06, + "loss": 8.6105, + "step": 143670 + }, + { + "epoch": 0.7175210367300058, + "grad_norm": 0.08571168035268784, + "learning_rate": 8.495581866880273e-06, + "loss": 8.6284, + "step": 143680 + }, + { + "epoch": 0.7175709755549452, + "grad_norm": 0.09284307062625885, + "learning_rate": 8.494079951938721e-06, + "loss": 8.6317, + "step": 143690 + }, + { + "epoch": 0.7176209143798846, + "grad_norm": 0.08840735256671906, + "learning_rate": 8.492578036997171e-06, + "loss": 8.6275, + "step": 143700 + }, + { + "epoch": 0.717670853204824, + "grad_norm": 0.09564638137817383, + "learning_rate": 8.491076122055622e-06, + "loss": 8.5992, + "step": 143710 + }, + { + "epoch": 0.7177207920297636, + "grad_norm": 0.09461285173892975, + "learning_rate": 8.489574207114072e-06, + "loss": 8.6209, + "step": 143720 + }, + { + "epoch": 0.717770730854703, + "grad_norm": 0.0862046629190445, + "learning_rate": 8.48807229217252e-06, + "loss": 8.6381, + "step": 143730 + }, + { + "epoch": 0.7178206696796424, + "grad_norm": 0.0956760123372078, + "learning_rate": 8.486570377230969e-06, + "loss": 8.6162, + "step": 143740 + }, + { + "epoch": 0.7178706085045818, + "grad_norm": 0.09319616854190826, + "learning_rate": 8.485068462289419e-06, + "loss": 8.617, + "step": 143750 + }, + { + "epoch": 0.7179205473295214, + "grad_norm": 0.08868087083101273, + "learning_rate": 8.483566547347869e-06, + "loss": 8.6159, + "step": 143760 + }, + { + "epoch": 0.7179704861544608, + "grad_norm": 0.0888964906334877, + "learning_rate": 8.48206463240632e-06, + "loss": 8.6165, + "step": 143770 + }, + { + "epoch": 0.7180204249794002, + "grad_norm": 0.0897190198302269, + "learning_rate": 8.480562717464768e-06, + "loss": 8.6291, + "step": 143780 + }, + { + "epoch": 0.7180703638043396, + "grad_norm": 0.0912693589925766, + "learning_rate": 8.479060802523216e-06, + "loss": 8.6018, + "step": 143790 + }, + { + "epoch": 0.7181203026292792, + "grad_norm": 0.09534019976854324, + "learning_rate": 8.477558887581666e-06, + "loss": 8.6268, + "step": 143800 + }, + { + "epoch": 0.7181702414542186, + "grad_norm": 0.0912948027253151, + "learning_rate": 8.476056972640117e-06, + "loss": 8.6165, + "step": 143810 + }, + { + "epoch": 0.718220180279158, + "grad_norm": 0.09484043717384338, + "learning_rate": 8.474555057698567e-06, + "loss": 8.6171, + "step": 143820 + }, + { + "epoch": 0.7182701191040974, + "grad_norm": 0.09343184530735016, + "learning_rate": 8.473053142757015e-06, + "loss": 8.6158, + "step": 143830 + }, + { + "epoch": 0.718320057929037, + "grad_norm": 0.09978407621383667, + "learning_rate": 8.471551227815464e-06, + "loss": 8.6112, + "step": 143840 + }, + { + "epoch": 0.7183699967539764, + "grad_norm": 0.09003512561321259, + "learning_rate": 8.470049312873914e-06, + "loss": 8.6293, + "step": 143850 + }, + { + "epoch": 0.7184199355789158, + "grad_norm": 0.0940660908818245, + "learning_rate": 8.468547397932364e-06, + "loss": 8.6131, + "step": 143860 + }, + { + "epoch": 0.7184698744038552, + "grad_norm": 0.09280209988355637, + "learning_rate": 8.467045482990814e-06, + "loss": 8.6309, + "step": 143870 + }, + { + "epoch": 0.7185198132287948, + "grad_norm": 0.0923769474029541, + "learning_rate": 8.465543568049264e-06, + "loss": 8.6371, + "step": 143880 + }, + { + "epoch": 0.7185697520537342, + "grad_norm": 0.09040191769599915, + "learning_rate": 8.464041653107711e-06, + "loss": 8.6111, + "step": 143890 + }, + { + "epoch": 0.7186196908786736, + "grad_norm": 0.09328994154930115, + "learning_rate": 8.462539738166161e-06, + "loss": 8.6138, + "step": 143900 + }, + { + "epoch": 0.718669629703613, + "grad_norm": 0.0912991613149643, + "learning_rate": 8.461037823224612e-06, + "loss": 8.601, + "step": 143910 + }, + { + "epoch": 0.7187195685285526, + "grad_norm": 0.09061001241207123, + "learning_rate": 8.459535908283062e-06, + "loss": 8.6132, + "step": 143920 + }, + { + "epoch": 0.718769507353492, + "grad_norm": 0.09003961086273193, + "learning_rate": 8.458033993341512e-06, + "loss": 8.6153, + "step": 143930 + }, + { + "epoch": 0.7188194461784314, + "grad_norm": 0.09332337230443954, + "learning_rate": 8.456532078399959e-06, + "loss": 8.6154, + "step": 143940 + }, + { + "epoch": 0.7188693850033708, + "grad_norm": 0.09105270355939865, + "learning_rate": 8.455030163458409e-06, + "loss": 8.6062, + "step": 143950 + }, + { + "epoch": 0.7189193238283104, + "grad_norm": 0.0938153937458992, + "learning_rate": 8.453528248516859e-06, + "loss": 8.6181, + "step": 143960 + }, + { + "epoch": 0.7189692626532498, + "grad_norm": 0.08828999102115631, + "learning_rate": 8.45202633357531e-06, + "loss": 8.6192, + "step": 143970 + }, + { + "epoch": 0.7190192014781892, + "grad_norm": 0.09276334941387177, + "learning_rate": 8.45052441863376e-06, + "loss": 8.609, + "step": 143980 + }, + { + "epoch": 0.7190691403031286, + "grad_norm": 0.09385494142770767, + "learning_rate": 8.449022503692206e-06, + "loss": 8.6149, + "step": 143990 + }, + { + "epoch": 0.7191190791280682, + "grad_norm": 0.09120655804872513, + "learning_rate": 8.447520588750656e-06, + "loss": 8.6296, + "step": 144000 + }, + { + "epoch": 0.7191690179530076, + "grad_norm": 0.08751309663057327, + "learning_rate": 8.446018673809107e-06, + "loss": 8.6187, + "step": 144010 + }, + { + "epoch": 0.719218956777947, + "grad_norm": 0.08984875679016113, + "learning_rate": 8.444516758867557e-06, + "loss": 8.627, + "step": 144020 + }, + { + "epoch": 0.7192688956028864, + "grad_norm": 0.08760925382375717, + "learning_rate": 8.443014843926007e-06, + "loss": 8.6132, + "step": 144030 + }, + { + "epoch": 0.719318834427826, + "grad_norm": 0.09052575379610062, + "learning_rate": 8.441512928984455e-06, + "loss": 8.6184, + "step": 144040 + }, + { + "epoch": 0.7193687732527654, + "grad_norm": 0.08988479524850845, + "learning_rate": 8.440011014042904e-06, + "loss": 8.6226, + "step": 144050 + }, + { + "epoch": 0.7194187120777048, + "grad_norm": 0.0874130055308342, + "learning_rate": 8.438509099101354e-06, + "loss": 8.6209, + "step": 144060 + }, + { + "epoch": 0.7194686509026442, + "grad_norm": 0.09357550740242004, + "learning_rate": 8.437007184159804e-06, + "loss": 8.5982, + "step": 144070 + }, + { + "epoch": 0.7195185897275838, + "grad_norm": 0.0861838087439537, + "learning_rate": 8.435505269218254e-06, + "loss": 8.6101, + "step": 144080 + }, + { + "epoch": 0.7195685285525232, + "grad_norm": 0.0887862890958786, + "learning_rate": 8.434003354276703e-06, + "loss": 8.6, + "step": 144090 + }, + { + "epoch": 0.7196184673774626, + "grad_norm": 0.09258834272623062, + "learning_rate": 8.432501439335151e-06, + "loss": 8.6318, + "step": 144100 + }, + { + "epoch": 0.719668406202402, + "grad_norm": 0.0883084088563919, + "learning_rate": 8.430999524393602e-06, + "loss": 8.6222, + "step": 144110 + }, + { + "epoch": 0.7197183450273416, + "grad_norm": 0.09339895844459534, + "learning_rate": 8.429497609452052e-06, + "loss": 8.6054, + "step": 144120 + }, + { + "epoch": 0.719768283852281, + "grad_norm": 0.0934942364692688, + "learning_rate": 8.427995694510502e-06, + "loss": 8.6172, + "step": 144130 + }, + { + "epoch": 0.7198182226772204, + "grad_norm": 0.09863001853227615, + "learning_rate": 8.42649377956895e-06, + "loss": 8.6149, + "step": 144140 + }, + { + "epoch": 0.7198681615021598, + "grad_norm": 0.08822454512119293, + "learning_rate": 8.424991864627399e-06, + "loss": 8.614, + "step": 144150 + }, + { + "epoch": 0.7199181003270994, + "grad_norm": 0.08899369835853577, + "learning_rate": 8.423489949685849e-06, + "loss": 8.6169, + "step": 144160 + }, + { + "epoch": 0.7199680391520388, + "grad_norm": 0.08983699232339859, + "learning_rate": 8.4219880347443e-06, + "loss": 8.606, + "step": 144170 + }, + { + "epoch": 0.7200179779769782, + "grad_norm": 0.092051200568676, + "learning_rate": 8.42048611980275e-06, + "loss": 8.6196, + "step": 144180 + }, + { + "epoch": 0.7200679168019176, + "grad_norm": 0.09895046800374985, + "learning_rate": 8.418984204861198e-06, + "loss": 8.6161, + "step": 144190 + }, + { + "epoch": 0.7201178556268572, + "grad_norm": 0.09208567440509796, + "learning_rate": 8.417482289919648e-06, + "loss": 8.6099, + "step": 144200 + }, + { + "epoch": 0.7201677944517966, + "grad_norm": 0.08995640277862549, + "learning_rate": 8.415980374978097e-06, + "loss": 8.626, + "step": 144210 + }, + { + "epoch": 0.720217733276736, + "grad_norm": 0.08942251652479172, + "learning_rate": 8.414478460036547e-06, + "loss": 8.614, + "step": 144220 + }, + { + "epoch": 0.7202676721016754, + "grad_norm": 0.08955737203359604, + "learning_rate": 8.412976545094997e-06, + "loss": 8.6169, + "step": 144230 + }, + { + "epoch": 0.720317610926615, + "grad_norm": 0.08591482788324356, + "learning_rate": 8.411474630153445e-06, + "loss": 8.5999, + "step": 144240 + }, + { + "epoch": 0.7203675497515544, + "grad_norm": 0.09663660079240799, + "learning_rate": 8.409972715211896e-06, + "loss": 8.6279, + "step": 144250 + }, + { + "epoch": 0.7204174885764938, + "grad_norm": 0.10267946124076843, + "learning_rate": 8.408470800270344e-06, + "loss": 8.5984, + "step": 144260 + }, + { + "epoch": 0.7204674274014332, + "grad_norm": 0.08785830438137054, + "learning_rate": 8.406968885328794e-06, + "loss": 8.6214, + "step": 144270 + }, + { + "epoch": 0.7205173662263727, + "grad_norm": 0.0935339480638504, + "learning_rate": 8.405466970387244e-06, + "loss": 8.6095, + "step": 144280 + }, + { + "epoch": 0.7205673050513122, + "grad_norm": 0.08838778734207153, + "learning_rate": 8.403965055445693e-06, + "loss": 8.6216, + "step": 144290 + }, + { + "epoch": 0.7206172438762516, + "grad_norm": 0.09756764769554138, + "learning_rate": 8.402463140504143e-06, + "loss": 8.61, + "step": 144300 + }, + { + "epoch": 0.720667182701191, + "grad_norm": 0.0923360213637352, + "learning_rate": 8.400961225562592e-06, + "loss": 8.5908, + "step": 144310 + }, + { + "epoch": 0.7207171215261305, + "grad_norm": 0.08914266526699066, + "learning_rate": 8.399459310621042e-06, + "loss": 8.5996, + "step": 144320 + }, + { + "epoch": 0.72076706035107, + "grad_norm": 0.09269039332866669, + "learning_rate": 8.397957395679492e-06, + "loss": 8.63, + "step": 144330 + }, + { + "epoch": 0.7208169991760094, + "grad_norm": 0.09222870320081711, + "learning_rate": 8.39645548073794e-06, + "loss": 8.6083, + "step": 144340 + }, + { + "epoch": 0.7208669380009488, + "grad_norm": 0.09075245261192322, + "learning_rate": 8.39495356579639e-06, + "loss": 8.6032, + "step": 144350 + }, + { + "epoch": 0.7209168768258883, + "grad_norm": 0.0898914784193039, + "learning_rate": 8.39345165085484e-06, + "loss": 8.5895, + "step": 144360 + }, + { + "epoch": 0.7209668156508278, + "grad_norm": 0.08764638006687164, + "learning_rate": 8.39194973591329e-06, + "loss": 8.5997, + "step": 144370 + }, + { + "epoch": 0.7210167544757672, + "grad_norm": 0.08820385485887527, + "learning_rate": 8.39044782097174e-06, + "loss": 8.6189, + "step": 144380 + }, + { + "epoch": 0.7210666933007066, + "grad_norm": 0.08908724784851074, + "learning_rate": 8.388945906030188e-06, + "loss": 8.6078, + "step": 144390 + }, + { + "epoch": 0.721116632125646, + "grad_norm": 0.0920150950551033, + "learning_rate": 8.387443991088638e-06, + "loss": 8.6197, + "step": 144400 + }, + { + "epoch": 0.7211665709505856, + "grad_norm": 0.09331239759922028, + "learning_rate": 8.385942076147088e-06, + "loss": 8.6267, + "step": 144410 + }, + { + "epoch": 0.721216509775525, + "grad_norm": 0.08835045248270035, + "learning_rate": 8.384440161205537e-06, + "loss": 8.599, + "step": 144420 + }, + { + "epoch": 0.7212664486004644, + "grad_norm": 0.0986829549074173, + "learning_rate": 8.382938246263987e-06, + "loss": 8.6299, + "step": 144430 + }, + { + "epoch": 0.7213163874254038, + "grad_norm": 0.09488464146852493, + "learning_rate": 8.381436331322435e-06, + "loss": 8.6146, + "step": 144440 + }, + { + "epoch": 0.7213663262503434, + "grad_norm": 0.09445064514875412, + "learning_rate": 8.379934416380886e-06, + "loss": 8.6191, + "step": 144450 + }, + { + "epoch": 0.7214162650752828, + "grad_norm": 0.10629602521657944, + "learning_rate": 8.378432501439336e-06, + "loss": 8.6149, + "step": 144460 + }, + { + "epoch": 0.7214662039002222, + "grad_norm": 0.09508983045816422, + "learning_rate": 8.376930586497784e-06, + "loss": 8.6109, + "step": 144470 + }, + { + "epoch": 0.7215161427251616, + "grad_norm": 0.0893370732665062, + "learning_rate": 8.375428671556234e-06, + "loss": 8.6295, + "step": 144480 + }, + { + "epoch": 0.7215660815501012, + "grad_norm": 0.09526433795690536, + "learning_rate": 8.373926756614683e-06, + "loss": 8.6181, + "step": 144490 + }, + { + "epoch": 0.7216160203750406, + "grad_norm": 0.08652014285326004, + "learning_rate": 8.372424841673133e-06, + "loss": 8.6274, + "step": 144500 + }, + { + "epoch": 0.72166595919998, + "grad_norm": 0.09043004363775253, + "learning_rate": 8.370922926731583e-06, + "loss": 8.6233, + "step": 144510 + }, + { + "epoch": 0.7217158980249194, + "grad_norm": 0.09349585324525833, + "learning_rate": 8.369421011790034e-06, + "loss": 8.6119, + "step": 144520 + }, + { + "epoch": 0.721765836849859, + "grad_norm": 0.09839218109846115, + "learning_rate": 8.367919096848482e-06, + "loss": 8.5964, + "step": 144530 + }, + { + "epoch": 0.7218157756747984, + "grad_norm": 0.09267604351043701, + "learning_rate": 8.36641718190693e-06, + "loss": 8.6024, + "step": 144540 + }, + { + "epoch": 0.7218657144997378, + "grad_norm": 0.08900393545627594, + "learning_rate": 8.36491526696538e-06, + "loss": 8.6205, + "step": 144550 + }, + { + "epoch": 0.7219156533246772, + "grad_norm": 0.08693215250968933, + "learning_rate": 8.36341335202383e-06, + "loss": 8.6103, + "step": 144560 + }, + { + "epoch": 0.7219655921496168, + "grad_norm": 0.09515117853879929, + "learning_rate": 8.361911437082281e-06, + "loss": 8.6159, + "step": 144570 + }, + { + "epoch": 0.7220155309745562, + "grad_norm": 0.08883559703826904, + "learning_rate": 8.36040952214073e-06, + "loss": 8.6283, + "step": 144580 + }, + { + "epoch": 0.7220654697994956, + "grad_norm": 0.0927378311753273, + "learning_rate": 8.358907607199178e-06, + "loss": 8.6171, + "step": 144590 + }, + { + "epoch": 0.722115408624435, + "grad_norm": 0.09397240728139877, + "learning_rate": 8.357405692257628e-06, + "loss": 8.6131, + "step": 144600 + }, + { + "epoch": 0.7221653474493746, + "grad_norm": 0.0927707701921463, + "learning_rate": 8.355903777316078e-06, + "loss": 8.6159, + "step": 144610 + }, + { + "epoch": 0.722215286274314, + "grad_norm": 0.09005044400691986, + "learning_rate": 8.354401862374529e-06, + "loss": 8.6134, + "step": 144620 + }, + { + "epoch": 0.7222652250992534, + "grad_norm": 0.08939428627490997, + "learning_rate": 8.352899947432977e-06, + "loss": 8.6081, + "step": 144630 + }, + { + "epoch": 0.7223151639241928, + "grad_norm": 0.09121254086494446, + "learning_rate": 8.351398032491425e-06, + "loss": 8.6085, + "step": 144640 + }, + { + "epoch": 0.7223651027491323, + "grad_norm": 0.08655776083469391, + "learning_rate": 8.349896117549876e-06, + "loss": 8.6199, + "step": 144650 + }, + { + "epoch": 0.7224150415740718, + "grad_norm": 0.09415041655302048, + "learning_rate": 8.348394202608326e-06, + "loss": 8.605, + "step": 144660 + }, + { + "epoch": 0.7224649803990112, + "grad_norm": 0.09268126636743546, + "learning_rate": 8.346892287666776e-06, + "loss": 8.6162, + "step": 144670 + }, + { + "epoch": 0.7225149192239506, + "grad_norm": 0.1028081476688385, + "learning_rate": 8.345390372725226e-06, + "loss": 8.6293, + "step": 144680 + }, + { + "epoch": 0.7225648580488901, + "grad_norm": 0.09145328402519226, + "learning_rate": 8.343888457783673e-06, + "loss": 8.6075, + "step": 144690 + }, + { + "epoch": 0.7226147968738296, + "grad_norm": 0.08995439857244492, + "learning_rate": 8.342386542842123e-06, + "loss": 8.5961, + "step": 144700 + }, + { + "epoch": 0.722664735698769, + "grad_norm": 0.08796056360006332, + "learning_rate": 8.340884627900573e-06, + "loss": 8.6194, + "step": 144710 + }, + { + "epoch": 0.7227146745237084, + "grad_norm": 0.0950382873415947, + "learning_rate": 8.339382712959024e-06, + "loss": 8.6196, + "step": 144720 + }, + { + "epoch": 0.7227646133486479, + "grad_norm": 0.09233977645635605, + "learning_rate": 8.337880798017474e-06, + "loss": 8.6228, + "step": 144730 + }, + { + "epoch": 0.7228145521735874, + "grad_norm": 0.09013422578573227, + "learning_rate": 8.33637888307592e-06, + "loss": 8.6222, + "step": 144740 + }, + { + "epoch": 0.7228644909985268, + "grad_norm": 0.0920189619064331, + "learning_rate": 8.33487696813437e-06, + "loss": 8.6278, + "step": 144750 + }, + { + "epoch": 0.7229144298234662, + "grad_norm": 0.09212841838598251, + "learning_rate": 8.333375053192821e-06, + "loss": 8.6109, + "step": 144760 + }, + { + "epoch": 0.7229643686484057, + "grad_norm": 0.09851231426000595, + "learning_rate": 8.331873138251271e-06, + "loss": 8.6134, + "step": 144770 + }, + { + "epoch": 0.7230143074733452, + "grad_norm": 0.09373413026332855, + "learning_rate": 8.330371223309721e-06, + "loss": 8.5994, + "step": 144780 + }, + { + "epoch": 0.7230642462982846, + "grad_norm": 0.09392852336168289, + "learning_rate": 8.328869308368168e-06, + "loss": 8.628, + "step": 144790 + }, + { + "epoch": 0.723114185123224, + "grad_norm": 0.09037362039089203, + "learning_rate": 8.327367393426618e-06, + "loss": 8.6152, + "step": 144800 + }, + { + "epoch": 0.7231641239481635, + "grad_norm": 0.08910314738750458, + "learning_rate": 8.325865478485068e-06, + "loss": 8.5945, + "step": 144810 + }, + { + "epoch": 0.723214062773103, + "grad_norm": 0.08930026739835739, + "learning_rate": 8.324363563543519e-06, + "loss": 8.61, + "step": 144820 + }, + { + "epoch": 0.7232640015980424, + "grad_norm": 0.09371279925107956, + "learning_rate": 8.322861648601969e-06, + "loss": 8.6156, + "step": 144830 + }, + { + "epoch": 0.7233139404229818, + "grad_norm": 0.0969967246055603, + "learning_rate": 8.321359733660417e-06, + "loss": 8.6191, + "step": 144840 + }, + { + "epoch": 0.7233638792479213, + "grad_norm": 0.0957605391740799, + "learning_rate": 8.319857818718866e-06, + "loss": 8.6104, + "step": 144850 + }, + { + "epoch": 0.7234138180728608, + "grad_norm": 0.09280975908041, + "learning_rate": 8.318355903777316e-06, + "loss": 8.6152, + "step": 144860 + }, + { + "epoch": 0.7234637568978002, + "grad_norm": 0.09555871039628983, + "learning_rate": 8.316853988835766e-06, + "loss": 8.6135, + "step": 144870 + }, + { + "epoch": 0.7235136957227396, + "grad_norm": 0.08848311007022858, + "learning_rate": 8.315352073894216e-06, + "loss": 8.6073, + "step": 144880 + }, + { + "epoch": 0.7235636345476791, + "grad_norm": 0.09260237962007523, + "learning_rate": 8.313850158952665e-06, + "loss": 8.613, + "step": 144890 + }, + { + "epoch": 0.7236135733726186, + "grad_norm": 0.08797964453697205, + "learning_rate": 8.312348244011113e-06, + "loss": 8.6129, + "step": 144900 + }, + { + "epoch": 0.723663512197558, + "grad_norm": 0.09759566187858582, + "learning_rate": 8.310846329069563e-06, + "loss": 8.6059, + "step": 144910 + }, + { + "epoch": 0.7237134510224974, + "grad_norm": 0.09639012068510056, + "learning_rate": 8.309344414128014e-06, + "loss": 8.607, + "step": 144920 + }, + { + "epoch": 0.7237633898474369, + "grad_norm": 0.09098049253225327, + "learning_rate": 8.307842499186464e-06, + "loss": 8.6158, + "step": 144930 + }, + { + "epoch": 0.7238133286723764, + "grad_norm": 0.08897264301776886, + "learning_rate": 8.306340584244912e-06, + "loss": 8.6348, + "step": 144940 + }, + { + "epoch": 0.7238632674973158, + "grad_norm": 0.09100917726755142, + "learning_rate": 8.30483866930336e-06, + "loss": 8.6146, + "step": 144950 + }, + { + "epoch": 0.7239132063222552, + "grad_norm": 0.0964989885687828, + "learning_rate": 8.303336754361811e-06, + "loss": 8.6144, + "step": 144960 + }, + { + "epoch": 0.7239631451471947, + "grad_norm": 0.10053501278162003, + "learning_rate": 8.301834839420261e-06, + "loss": 8.6179, + "step": 144970 + }, + { + "epoch": 0.7240130839721342, + "grad_norm": 0.09664203226566315, + "learning_rate": 8.300332924478711e-06, + "loss": 8.6196, + "step": 144980 + }, + { + "epoch": 0.7240630227970736, + "grad_norm": 0.08678041398525238, + "learning_rate": 8.298831009537161e-06, + "loss": 8.6115, + "step": 144990 + }, + { + "epoch": 0.724112961622013, + "grad_norm": 0.09090323746204376, + "learning_rate": 8.29732909459561e-06, + "loss": 8.6013, + "step": 145000 + }, + { + "epoch": 0.7241629004469525, + "grad_norm": 0.09203115105628967, + "learning_rate": 8.295827179654058e-06, + "loss": 8.6139, + "step": 145010 + }, + { + "epoch": 0.724212839271892, + "grad_norm": 0.0934581458568573, + "learning_rate": 8.294325264712509e-06, + "loss": 8.6223, + "step": 145020 + }, + { + "epoch": 0.7242627780968314, + "grad_norm": 0.09179078042507172, + "learning_rate": 8.292823349770959e-06, + "loss": 8.6099, + "step": 145030 + }, + { + "epoch": 0.7243127169217708, + "grad_norm": 0.09519582986831665, + "learning_rate": 8.291321434829409e-06, + "loss": 8.6186, + "step": 145040 + }, + { + "epoch": 0.7243626557467103, + "grad_norm": 0.08919553458690643, + "learning_rate": 8.289819519887857e-06, + "loss": 8.607, + "step": 145050 + }, + { + "epoch": 0.7244125945716497, + "grad_norm": 0.09014479070901871, + "learning_rate": 8.288317604946306e-06, + "loss": 8.5982, + "step": 145060 + }, + { + "epoch": 0.7244625333965892, + "grad_norm": 0.0924357920885086, + "learning_rate": 8.286815690004756e-06, + "loss": 8.6198, + "step": 145070 + }, + { + "epoch": 0.7245124722215286, + "grad_norm": 0.09133771061897278, + "learning_rate": 8.285313775063206e-06, + "loss": 8.6248, + "step": 145080 + }, + { + "epoch": 0.7245624110464681, + "grad_norm": 0.08713619410991669, + "learning_rate": 8.283811860121656e-06, + "loss": 8.6429, + "step": 145090 + }, + { + "epoch": 0.7246123498714075, + "grad_norm": 0.09331973642110825, + "learning_rate": 8.282309945180105e-06, + "loss": 8.6161, + "step": 145100 + }, + { + "epoch": 0.724662288696347, + "grad_norm": 0.08959534019231796, + "learning_rate": 8.280808030238553e-06, + "loss": 8.6193, + "step": 145110 + }, + { + "epoch": 0.7247122275212864, + "grad_norm": 0.09544025361537933, + "learning_rate": 8.279306115297004e-06, + "loss": 8.6113, + "step": 145120 + }, + { + "epoch": 0.7247621663462259, + "grad_norm": 0.08845238387584686, + "learning_rate": 8.277804200355454e-06, + "loss": 8.6096, + "step": 145130 + }, + { + "epoch": 0.7248121051711653, + "grad_norm": 0.09223693609237671, + "learning_rate": 8.276302285413904e-06, + "loss": 8.606, + "step": 145140 + }, + { + "epoch": 0.7248620439961048, + "grad_norm": 0.09197333455085754, + "learning_rate": 8.274800370472352e-06, + "loss": 8.6065, + "step": 145150 + }, + { + "epoch": 0.7249119828210442, + "grad_norm": 0.0903843566775322, + "learning_rate": 8.273298455530803e-06, + "loss": 8.6257, + "step": 145160 + }, + { + "epoch": 0.7249619216459837, + "grad_norm": 0.08799325674772263, + "learning_rate": 8.271796540589251e-06, + "loss": 8.6078, + "step": 145170 + }, + { + "epoch": 0.7250118604709231, + "grad_norm": 0.09171675890684128, + "learning_rate": 8.270294625647701e-06, + "loss": 8.6055, + "step": 145180 + }, + { + "epoch": 0.7250617992958626, + "grad_norm": 0.09235136955976486, + "learning_rate": 8.268792710706151e-06, + "loss": 8.5921, + "step": 145190 + }, + { + "epoch": 0.725111738120802, + "grad_norm": 0.09448470175266266, + "learning_rate": 8.2672907957646e-06, + "loss": 8.6096, + "step": 145200 + }, + { + "epoch": 0.7251616769457415, + "grad_norm": 0.09224043041467667, + "learning_rate": 8.26578888082305e-06, + "loss": 8.6311, + "step": 145210 + }, + { + "epoch": 0.7252116157706809, + "grad_norm": 0.09602584689855576, + "learning_rate": 8.264286965881499e-06, + "loss": 8.6032, + "step": 145220 + }, + { + "epoch": 0.7252615545956204, + "grad_norm": 0.09572752565145493, + "learning_rate": 8.262785050939949e-06, + "loss": 8.6264, + "step": 145230 + }, + { + "epoch": 0.7253114934205598, + "grad_norm": 0.08928846567869186, + "learning_rate": 8.261283135998399e-06, + "loss": 8.6099, + "step": 145240 + }, + { + "epoch": 0.7253614322454993, + "grad_norm": 0.09179164469242096, + "learning_rate": 8.259781221056847e-06, + "loss": 8.6001, + "step": 145250 + }, + { + "epoch": 0.7254113710704387, + "grad_norm": 0.09306824952363968, + "learning_rate": 8.258279306115298e-06, + "loss": 8.6105, + "step": 145260 + }, + { + "epoch": 0.7254613098953782, + "grad_norm": 0.08858099579811096, + "learning_rate": 8.256777391173746e-06, + "loss": 8.6093, + "step": 145270 + }, + { + "epoch": 0.7255112487203176, + "grad_norm": 0.09503430128097534, + "learning_rate": 8.255275476232196e-06, + "loss": 8.5933, + "step": 145280 + }, + { + "epoch": 0.7255611875452571, + "grad_norm": 0.0941505953669548, + "learning_rate": 8.253773561290646e-06, + "loss": 8.6164, + "step": 145290 + }, + { + "epoch": 0.7256111263701965, + "grad_norm": 0.0895562395453453, + "learning_rate": 8.252271646349095e-06, + "loss": 8.6136, + "step": 145300 + }, + { + "epoch": 0.725661065195136, + "grad_norm": 0.0945998951792717, + "learning_rate": 8.250769731407545e-06, + "loss": 8.6132, + "step": 145310 + }, + { + "epoch": 0.7257110040200754, + "grad_norm": 0.09146866202354431, + "learning_rate": 8.249267816465995e-06, + "loss": 8.6185, + "step": 145320 + }, + { + "epoch": 0.7257609428450149, + "grad_norm": 0.09208477288484573, + "learning_rate": 8.247765901524444e-06, + "loss": 8.6098, + "step": 145330 + }, + { + "epoch": 0.7258108816699543, + "grad_norm": 0.10175745189189911, + "learning_rate": 8.246263986582894e-06, + "loss": 8.607, + "step": 145340 + }, + { + "epoch": 0.7258608204948938, + "grad_norm": 0.08928140252828598, + "learning_rate": 8.244762071641342e-06, + "loss": 8.611, + "step": 145350 + }, + { + "epoch": 0.7259107593198332, + "grad_norm": 0.08949202299118042, + "learning_rate": 8.243260156699793e-06, + "loss": 8.6125, + "step": 145360 + }, + { + "epoch": 0.7259606981447726, + "grad_norm": 0.10165835916996002, + "learning_rate": 8.241758241758243e-06, + "loss": 8.6136, + "step": 145370 + }, + { + "epoch": 0.7260106369697121, + "grad_norm": 0.09630440920591354, + "learning_rate": 8.240256326816691e-06, + "loss": 8.6119, + "step": 145380 + }, + { + "epoch": 0.7260605757946516, + "grad_norm": 0.09105534851551056, + "learning_rate": 8.238754411875141e-06, + "loss": 8.6037, + "step": 145390 + }, + { + "epoch": 0.726110514619591, + "grad_norm": 0.09389536082744598, + "learning_rate": 8.23725249693359e-06, + "loss": 8.5986, + "step": 145400 + }, + { + "epoch": 0.7261604534445304, + "grad_norm": 0.08979053050279617, + "learning_rate": 8.23575058199204e-06, + "loss": 8.6063, + "step": 145410 + }, + { + "epoch": 0.7262103922694699, + "grad_norm": 0.09311927855014801, + "learning_rate": 8.23424866705049e-06, + "loss": 8.6059, + "step": 145420 + }, + { + "epoch": 0.7262603310944094, + "grad_norm": 0.08796468377113342, + "learning_rate": 8.232746752108939e-06, + "loss": 8.6218, + "step": 145430 + }, + { + "epoch": 0.7263102699193488, + "grad_norm": 0.0910877138376236, + "learning_rate": 8.231244837167389e-06, + "loss": 8.611, + "step": 145440 + }, + { + "epoch": 0.7263602087442882, + "grad_norm": 0.09477691352367401, + "learning_rate": 8.229742922225837e-06, + "loss": 8.6215, + "step": 145450 + }, + { + "epoch": 0.7264101475692277, + "grad_norm": 0.0948781967163086, + "learning_rate": 8.228241007284288e-06, + "loss": 8.5978, + "step": 145460 + }, + { + "epoch": 0.7264600863941671, + "grad_norm": 0.08527876436710358, + "learning_rate": 8.226739092342738e-06, + "loss": 8.6191, + "step": 145470 + }, + { + "epoch": 0.7265100252191066, + "grad_norm": 0.09014835208654404, + "learning_rate": 8.225237177401188e-06, + "loss": 8.6033, + "step": 145480 + }, + { + "epoch": 0.726559964044046, + "grad_norm": 0.09688574820756912, + "learning_rate": 8.223735262459636e-06, + "loss": 8.6061, + "step": 145490 + }, + { + "epoch": 0.7266099028689855, + "grad_norm": 0.08917857706546783, + "learning_rate": 8.222233347518085e-06, + "loss": 8.6284, + "step": 145500 + }, + { + "epoch": 0.726659841693925, + "grad_norm": 0.09284509718418121, + "learning_rate": 8.220731432576535e-06, + "loss": 8.6115, + "step": 145510 + }, + { + "epoch": 0.7267097805188644, + "grad_norm": 0.08809034526348114, + "learning_rate": 8.219229517634985e-06, + "loss": 8.6158, + "step": 145520 + }, + { + "epoch": 0.7267597193438038, + "grad_norm": 0.09083345532417297, + "learning_rate": 8.217727602693435e-06, + "loss": 8.6272, + "step": 145530 + }, + { + "epoch": 0.7268096581687433, + "grad_norm": 0.08907760679721832, + "learning_rate": 8.216225687751884e-06, + "loss": 8.6008, + "step": 145540 + }, + { + "epoch": 0.7268595969936827, + "grad_norm": 0.08739221096038818, + "learning_rate": 8.214723772810332e-06, + "loss": 8.6271, + "step": 145550 + }, + { + "epoch": 0.7269095358186222, + "grad_norm": 0.0876481756567955, + "learning_rate": 8.213221857868783e-06, + "loss": 8.6061, + "step": 145560 + }, + { + "epoch": 0.7269594746435616, + "grad_norm": 0.08937806636095047, + "learning_rate": 8.211719942927233e-06, + "loss": 8.629, + "step": 145570 + }, + { + "epoch": 0.7270094134685011, + "grad_norm": 0.08746001869440079, + "learning_rate": 8.210218027985683e-06, + "loss": 8.6011, + "step": 145580 + }, + { + "epoch": 0.7270593522934405, + "grad_norm": 0.09080873429775238, + "learning_rate": 8.208716113044131e-06, + "loss": 8.6195, + "step": 145590 + }, + { + "epoch": 0.72710929111838, + "grad_norm": 0.09297414124011993, + "learning_rate": 8.20721419810258e-06, + "loss": 8.6009, + "step": 145600 + }, + { + "epoch": 0.7271592299433194, + "grad_norm": 0.09286045283079147, + "learning_rate": 8.20571228316103e-06, + "loss": 8.6177, + "step": 145610 + }, + { + "epoch": 0.7272091687682589, + "grad_norm": 0.08479689061641693, + "learning_rate": 8.20421036821948e-06, + "loss": 8.6263, + "step": 145620 + }, + { + "epoch": 0.7272591075931983, + "grad_norm": 0.09006743133068085, + "learning_rate": 8.20270845327793e-06, + "loss": 8.598, + "step": 145630 + }, + { + "epoch": 0.7273090464181378, + "grad_norm": 0.09547232836484909, + "learning_rate": 8.20120653833638e-06, + "loss": 8.5996, + "step": 145640 + }, + { + "epoch": 0.7273589852430772, + "grad_norm": 0.0883583128452301, + "learning_rate": 8.199704623394827e-06, + "loss": 8.6021, + "step": 145650 + }, + { + "epoch": 0.7274089240680167, + "grad_norm": 0.09698734432458878, + "learning_rate": 8.198202708453278e-06, + "loss": 8.6079, + "step": 145660 + }, + { + "epoch": 0.7274588628929561, + "grad_norm": 0.09151437133550644, + "learning_rate": 8.196700793511728e-06, + "loss": 8.6057, + "step": 145670 + }, + { + "epoch": 0.7275088017178956, + "grad_norm": 0.09400162845849991, + "learning_rate": 8.195198878570178e-06, + "loss": 8.6039, + "step": 145680 + }, + { + "epoch": 0.727558740542835, + "grad_norm": 0.08918856829404831, + "learning_rate": 8.193696963628628e-06, + "loss": 8.6045, + "step": 145690 + }, + { + "epoch": 0.7276086793677745, + "grad_norm": 0.09551417827606201, + "learning_rate": 8.192195048687075e-06, + "loss": 8.6184, + "step": 145700 + }, + { + "epoch": 0.7276586181927139, + "grad_norm": 0.09140796959400177, + "learning_rate": 8.190693133745525e-06, + "loss": 8.5989, + "step": 145710 + }, + { + "epoch": 0.7277085570176534, + "grad_norm": 0.09093940258026123, + "learning_rate": 8.189191218803975e-06, + "loss": 8.6054, + "step": 145720 + }, + { + "epoch": 0.7277584958425928, + "grad_norm": 0.08896015584468842, + "learning_rate": 8.187689303862425e-06, + "loss": 8.6201, + "step": 145730 + }, + { + "epoch": 0.7278084346675323, + "grad_norm": 0.08995377272367477, + "learning_rate": 8.186187388920876e-06, + "loss": 8.6244, + "step": 145740 + }, + { + "epoch": 0.7278583734924717, + "grad_norm": 0.08884330838918686, + "learning_rate": 8.184685473979322e-06, + "loss": 8.6063, + "step": 145750 + }, + { + "epoch": 0.7279083123174112, + "grad_norm": 0.09230002015829086, + "learning_rate": 8.183183559037773e-06, + "loss": 8.6138, + "step": 145760 + }, + { + "epoch": 0.7279582511423506, + "grad_norm": 0.0911339670419693, + "learning_rate": 8.181681644096223e-06, + "loss": 8.5995, + "step": 145770 + }, + { + "epoch": 0.7280081899672901, + "grad_norm": 0.08715797960758209, + "learning_rate": 8.180179729154673e-06, + "loss": 8.613, + "step": 145780 + }, + { + "epoch": 0.7280581287922295, + "grad_norm": 0.09273428469896317, + "learning_rate": 8.178677814213123e-06, + "loss": 8.6247, + "step": 145790 + }, + { + "epoch": 0.728108067617169, + "grad_norm": 0.09207670390605927, + "learning_rate": 8.177175899271572e-06, + "loss": 8.6245, + "step": 145800 + }, + { + "epoch": 0.7281580064421084, + "grad_norm": 0.09679616987705231, + "learning_rate": 8.17567398433002e-06, + "loss": 8.6092, + "step": 145810 + }, + { + "epoch": 0.7282079452670479, + "grad_norm": 0.09426999092102051, + "learning_rate": 8.17417206938847e-06, + "loss": 8.6225, + "step": 145820 + }, + { + "epoch": 0.7282578840919873, + "grad_norm": 0.09463541954755783, + "learning_rate": 8.17267015444692e-06, + "loss": 8.6008, + "step": 145830 + }, + { + "epoch": 0.7283078229169268, + "grad_norm": 0.09443298727273941, + "learning_rate": 8.17116823950537e-06, + "loss": 8.5991, + "step": 145840 + }, + { + "epoch": 0.7283577617418662, + "grad_norm": 0.09195612370967865, + "learning_rate": 8.169666324563819e-06, + "loss": 8.5961, + "step": 145850 + }, + { + "epoch": 0.7284077005668057, + "grad_norm": 0.08928360790014267, + "learning_rate": 8.168164409622268e-06, + "loss": 8.634, + "step": 145860 + }, + { + "epoch": 0.7284576393917451, + "grad_norm": 0.09235423803329468, + "learning_rate": 8.166662494680718e-06, + "loss": 8.6134, + "step": 145870 + }, + { + "epoch": 0.7285075782166845, + "grad_norm": 0.09218775480985641, + "learning_rate": 8.165160579739168e-06, + "loss": 8.6084, + "step": 145880 + }, + { + "epoch": 0.728557517041624, + "grad_norm": 0.09094615280628204, + "learning_rate": 8.163658664797618e-06, + "loss": 8.6155, + "step": 145890 + }, + { + "epoch": 0.7286074558665635, + "grad_norm": 0.08593304455280304, + "learning_rate": 8.162156749856067e-06, + "loss": 8.6124, + "step": 145900 + }, + { + "epoch": 0.7286573946915029, + "grad_norm": 0.09099908173084259, + "learning_rate": 8.160654834914515e-06, + "loss": 8.6163, + "step": 145910 + }, + { + "epoch": 0.7287073335164423, + "grad_norm": 0.0902167484164238, + "learning_rate": 8.159152919972965e-06, + "loss": 8.6227, + "step": 145920 + }, + { + "epoch": 0.7287572723413818, + "grad_norm": 0.09443819522857666, + "learning_rate": 8.157651005031415e-06, + "loss": 8.6165, + "step": 145930 + }, + { + "epoch": 0.7288072111663213, + "grad_norm": 0.09116199612617493, + "learning_rate": 8.156149090089866e-06, + "loss": 8.6201, + "step": 145940 + }, + { + "epoch": 0.7288571499912607, + "grad_norm": 0.09759961068630219, + "learning_rate": 8.154647175148314e-06, + "loss": 8.6189, + "step": 145950 + }, + { + "epoch": 0.7289070888162001, + "grad_norm": 0.09084867686033249, + "learning_rate": 8.153145260206764e-06, + "loss": 8.5922, + "step": 145960 + }, + { + "epoch": 0.7289570276411396, + "grad_norm": 0.09356144815683365, + "learning_rate": 8.151643345265213e-06, + "loss": 8.6069, + "step": 145970 + }, + { + "epoch": 0.7290069664660791, + "grad_norm": 0.08888962864875793, + "learning_rate": 8.150141430323663e-06, + "loss": 8.6001, + "step": 145980 + }, + { + "epoch": 0.7290569052910185, + "grad_norm": 0.08829820901155472, + "learning_rate": 8.148639515382113e-06, + "loss": 8.6117, + "step": 145990 + }, + { + "epoch": 0.7291068441159579, + "grad_norm": 0.092521533370018, + "learning_rate": 8.147137600440562e-06, + "loss": 8.6116, + "step": 146000 + }, + { + "epoch": 0.7291567829408974, + "grad_norm": 0.09116620570421219, + "learning_rate": 8.145635685499012e-06, + "loss": 8.6103, + "step": 146010 + }, + { + "epoch": 0.7292067217658369, + "grad_norm": 0.09370893239974976, + "learning_rate": 8.14413377055746e-06, + "loss": 8.5975, + "step": 146020 + }, + { + "epoch": 0.7292566605907763, + "grad_norm": 0.09606023132801056, + "learning_rate": 8.14263185561591e-06, + "loss": 8.6029, + "step": 146030 + }, + { + "epoch": 0.7293065994157157, + "grad_norm": 0.08850355446338654, + "learning_rate": 8.14112994067436e-06, + "loss": 8.6077, + "step": 146040 + }, + { + "epoch": 0.7293565382406552, + "grad_norm": 0.09388591349124908, + "learning_rate": 8.13962802573281e-06, + "loss": 8.6063, + "step": 146050 + }, + { + "epoch": 0.7294064770655947, + "grad_norm": 0.09386676549911499, + "learning_rate": 8.13812611079126e-06, + "loss": 8.5971, + "step": 146060 + }, + { + "epoch": 0.7294564158905341, + "grad_norm": 0.09026592969894409, + "learning_rate": 8.136624195849708e-06, + "loss": 8.5921, + "step": 146070 + }, + { + "epoch": 0.7295063547154735, + "grad_norm": 0.09007930010557175, + "learning_rate": 8.135122280908158e-06, + "loss": 8.6017, + "step": 146080 + }, + { + "epoch": 0.729556293540413, + "grad_norm": 0.0955241322517395, + "learning_rate": 8.133620365966608e-06, + "loss": 8.6081, + "step": 146090 + }, + { + "epoch": 0.7296062323653525, + "grad_norm": 0.09348488599061966, + "learning_rate": 8.132118451025057e-06, + "loss": 8.5981, + "step": 146100 + }, + { + "epoch": 0.7296561711902919, + "grad_norm": 0.08962753415107727, + "learning_rate": 8.130616536083507e-06, + "loss": 8.6063, + "step": 146110 + }, + { + "epoch": 0.7297061100152313, + "grad_norm": 0.09236380457878113, + "learning_rate": 8.129114621141957e-06, + "loss": 8.5859, + "step": 146120 + }, + { + "epoch": 0.7297560488401708, + "grad_norm": 0.08918324112892151, + "learning_rate": 8.127612706200406e-06, + "loss": 8.6115, + "step": 146130 + }, + { + "epoch": 0.7298059876651103, + "grad_norm": 0.090632364153862, + "learning_rate": 8.126110791258856e-06, + "loss": 8.6016, + "step": 146140 + }, + { + "epoch": 0.7298559264900497, + "grad_norm": 0.09161046892404556, + "learning_rate": 8.124608876317304e-06, + "loss": 8.5882, + "step": 146150 + }, + { + "epoch": 0.7299058653149891, + "grad_norm": 0.09371636062860489, + "learning_rate": 8.123106961375754e-06, + "loss": 8.5995, + "step": 146160 + }, + { + "epoch": 0.7299558041399286, + "grad_norm": 0.08753936737775803, + "learning_rate": 8.121605046434205e-06, + "loss": 8.5968, + "step": 146170 + }, + { + "epoch": 0.7300057429648681, + "grad_norm": 0.0922481119632721, + "learning_rate": 8.120103131492653e-06, + "loss": 8.5951, + "step": 146180 + }, + { + "epoch": 0.7300556817898075, + "grad_norm": 0.0926506519317627, + "learning_rate": 8.118601216551103e-06, + "loss": 8.603, + "step": 146190 + }, + { + "epoch": 0.7301056206147469, + "grad_norm": 0.08759573101997375, + "learning_rate": 8.117099301609552e-06, + "loss": 8.6076, + "step": 146200 + }, + { + "epoch": 0.7301555594396864, + "grad_norm": 0.09604066610336304, + "learning_rate": 8.115597386668002e-06, + "loss": 8.6029, + "step": 146210 + }, + { + "epoch": 0.7302054982646259, + "grad_norm": 0.08609198778867722, + "learning_rate": 8.114095471726452e-06, + "loss": 8.6056, + "step": 146220 + }, + { + "epoch": 0.7302554370895653, + "grad_norm": 0.08941318094730377, + "learning_rate": 8.1125935567849e-06, + "loss": 8.6103, + "step": 146230 + }, + { + "epoch": 0.7303053759145047, + "grad_norm": 0.09113612025976181, + "learning_rate": 8.11109164184335e-06, + "loss": 8.6069, + "step": 146240 + }, + { + "epoch": 0.7303553147394442, + "grad_norm": 0.09444758296012878, + "learning_rate": 8.1095897269018e-06, + "loss": 8.6201, + "step": 146250 + }, + { + "epoch": 0.7304052535643837, + "grad_norm": 0.0867748036980629, + "learning_rate": 8.10808781196025e-06, + "loss": 8.6056, + "step": 146260 + }, + { + "epoch": 0.7304551923893231, + "grad_norm": 0.09332028776407242, + "learning_rate": 8.1065858970187e-06, + "loss": 8.6122, + "step": 146270 + }, + { + "epoch": 0.7305051312142625, + "grad_norm": 0.09203875064849854, + "learning_rate": 8.10508398207715e-06, + "loss": 8.6157, + "step": 146280 + }, + { + "epoch": 0.730555070039202, + "grad_norm": 0.09554773569107056, + "learning_rate": 8.103582067135598e-06, + "loss": 8.5996, + "step": 146290 + }, + { + "epoch": 0.7306050088641415, + "grad_norm": 0.09731141477823257, + "learning_rate": 8.102080152194047e-06, + "loss": 8.5881, + "step": 146300 + }, + { + "epoch": 0.7306549476890809, + "grad_norm": 0.0938003659248352, + "learning_rate": 8.100578237252497e-06, + "loss": 8.6107, + "step": 146310 + }, + { + "epoch": 0.7307048865140203, + "grad_norm": 0.09249870479106903, + "learning_rate": 8.099076322310947e-06, + "loss": 8.6074, + "step": 146320 + }, + { + "epoch": 0.7307548253389597, + "grad_norm": 0.08894717693328857, + "learning_rate": 8.097574407369397e-06, + "loss": 8.6157, + "step": 146330 + }, + { + "epoch": 0.7308047641638993, + "grad_norm": 0.08928055316209793, + "learning_rate": 8.096072492427846e-06, + "loss": 8.611, + "step": 146340 + }, + { + "epoch": 0.7308547029888387, + "grad_norm": 0.08695624768733978, + "learning_rate": 8.094570577486294e-06, + "loss": 8.5939, + "step": 146350 + }, + { + "epoch": 0.7309046418137781, + "grad_norm": 0.09484840929508209, + "learning_rate": 8.093068662544744e-06, + "loss": 8.6148, + "step": 146360 + }, + { + "epoch": 0.7309545806387175, + "grad_norm": 0.09645149856805801, + "learning_rate": 8.091566747603195e-06, + "loss": 8.6244, + "step": 146370 + }, + { + "epoch": 0.731004519463657, + "grad_norm": 0.09232309460639954, + "learning_rate": 8.090064832661645e-06, + "loss": 8.6222, + "step": 146380 + }, + { + "epoch": 0.7310544582885965, + "grad_norm": 0.0919228121638298, + "learning_rate": 8.088562917720093e-06, + "loss": 8.5962, + "step": 146390 + }, + { + "epoch": 0.7311043971135359, + "grad_norm": 0.09435618668794632, + "learning_rate": 8.087061002778542e-06, + "loss": 8.62, + "step": 146400 + }, + { + "epoch": 0.7311543359384753, + "grad_norm": 0.09660220891237259, + "learning_rate": 8.085559087836992e-06, + "loss": 8.5962, + "step": 146410 + }, + { + "epoch": 0.7312042747634148, + "grad_norm": 0.0912780910730362, + "learning_rate": 8.084057172895442e-06, + "loss": 8.6183, + "step": 146420 + }, + { + "epoch": 0.7312542135883543, + "grad_norm": 0.09284259378910065, + "learning_rate": 8.082555257953892e-06, + "loss": 8.61, + "step": 146430 + }, + { + "epoch": 0.7313041524132937, + "grad_norm": 0.09274186193943024, + "learning_rate": 8.08105334301234e-06, + "loss": 8.6186, + "step": 146440 + }, + { + "epoch": 0.7313540912382331, + "grad_norm": 0.09855927526950836, + "learning_rate": 8.07955142807079e-06, + "loss": 8.5975, + "step": 146450 + }, + { + "epoch": 0.7314040300631726, + "grad_norm": 0.0931464433670044, + "learning_rate": 8.07804951312924e-06, + "loss": 8.6029, + "step": 146460 + }, + { + "epoch": 0.7314539688881121, + "grad_norm": 0.09151794761419296, + "learning_rate": 8.07654759818769e-06, + "loss": 8.596, + "step": 146470 + }, + { + "epoch": 0.7315039077130515, + "grad_norm": 0.08983919769525528, + "learning_rate": 8.07504568324614e-06, + "loss": 8.6227, + "step": 146480 + }, + { + "epoch": 0.7315538465379909, + "grad_norm": 0.09787040948867798, + "learning_rate": 8.07354376830459e-06, + "loss": 8.6004, + "step": 146490 + }, + { + "epoch": 0.7316037853629304, + "grad_norm": 0.09325719624757767, + "learning_rate": 8.072041853363037e-06, + "loss": 8.612, + "step": 146500 + }, + { + "epoch": 0.7316537241878699, + "grad_norm": 0.09518974274396896, + "learning_rate": 8.070539938421487e-06, + "loss": 8.6086, + "step": 146510 + }, + { + "epoch": 0.7317036630128093, + "grad_norm": 0.09281483292579651, + "learning_rate": 8.069038023479937e-06, + "loss": 8.6059, + "step": 146520 + }, + { + "epoch": 0.7317536018377487, + "grad_norm": 0.09161432087421417, + "learning_rate": 8.067536108538387e-06, + "loss": 8.6162, + "step": 146530 + }, + { + "epoch": 0.7318035406626882, + "grad_norm": 0.08932630717754364, + "learning_rate": 8.066034193596837e-06, + "loss": 8.6122, + "step": 146540 + }, + { + "epoch": 0.7318534794876277, + "grad_norm": 0.10480964183807373, + "learning_rate": 8.064532278655284e-06, + "loss": 8.5951, + "step": 146550 + }, + { + "epoch": 0.7319034183125671, + "grad_norm": 0.09288931638002396, + "learning_rate": 8.063030363713734e-06, + "loss": 8.6034, + "step": 146560 + }, + { + "epoch": 0.7319533571375065, + "grad_norm": 0.09532026201486588, + "learning_rate": 8.061528448772185e-06, + "loss": 8.6096, + "step": 146570 + }, + { + "epoch": 0.732003295962446, + "grad_norm": 0.09342137724161148, + "learning_rate": 8.060026533830635e-06, + "loss": 8.5875, + "step": 146580 + }, + { + "epoch": 0.7320532347873855, + "grad_norm": 0.091585174202919, + "learning_rate": 8.058524618889085e-06, + "loss": 8.61, + "step": 146590 + }, + { + "epoch": 0.7321031736123249, + "grad_norm": 0.08510378003120422, + "learning_rate": 8.057022703947532e-06, + "loss": 8.6082, + "step": 146600 + }, + { + "epoch": 0.7321531124372643, + "grad_norm": 0.0953449234366417, + "learning_rate": 8.055520789005982e-06, + "loss": 8.6204, + "step": 146610 + }, + { + "epoch": 0.7322030512622038, + "grad_norm": 0.0904490277171135, + "learning_rate": 8.054018874064432e-06, + "loss": 8.6037, + "step": 146620 + }, + { + "epoch": 0.7322529900871433, + "grad_norm": 0.09040288627147675, + "learning_rate": 8.052516959122882e-06, + "loss": 8.6055, + "step": 146630 + }, + { + "epoch": 0.7323029289120827, + "grad_norm": 0.09579188376665115, + "learning_rate": 8.051015044181332e-06, + "loss": 8.6217, + "step": 146640 + }, + { + "epoch": 0.7323528677370221, + "grad_norm": 0.09186054021120071, + "learning_rate": 8.049513129239781e-06, + "loss": 8.5966, + "step": 146650 + }, + { + "epoch": 0.7324028065619616, + "grad_norm": 0.08724722266197205, + "learning_rate": 8.04801121429823e-06, + "loss": 8.6201, + "step": 146660 + }, + { + "epoch": 0.7324527453869011, + "grad_norm": 0.09225723147392273, + "learning_rate": 8.04650929935668e-06, + "loss": 8.604, + "step": 146670 + }, + { + "epoch": 0.7325026842118405, + "grad_norm": 0.0851278230547905, + "learning_rate": 8.04500738441513e-06, + "loss": 8.6148, + "step": 146680 + }, + { + "epoch": 0.7325526230367799, + "grad_norm": 0.09377813339233398, + "learning_rate": 8.04350546947358e-06, + "loss": 8.6004, + "step": 146690 + }, + { + "epoch": 0.7326025618617193, + "grad_norm": 0.0915536880493164, + "learning_rate": 8.042003554532028e-06, + "loss": 8.6146, + "step": 146700 + }, + { + "epoch": 0.7326525006866589, + "grad_norm": 0.09648875147104263, + "learning_rate": 8.040501639590477e-06, + "loss": 8.6067, + "step": 146710 + }, + { + "epoch": 0.7327024395115983, + "grad_norm": 0.09112456440925598, + "learning_rate": 8.038999724648927e-06, + "loss": 8.6042, + "step": 146720 + }, + { + "epoch": 0.7327523783365377, + "grad_norm": 0.08740793168544769, + "learning_rate": 8.037497809707377e-06, + "loss": 8.6129, + "step": 146730 + }, + { + "epoch": 0.7328023171614771, + "grad_norm": 0.09064393490552902, + "learning_rate": 8.035995894765827e-06, + "loss": 8.6168, + "step": 146740 + }, + { + "epoch": 0.7328522559864167, + "grad_norm": 0.09343009442090988, + "learning_rate": 8.034493979824276e-06, + "loss": 8.5964, + "step": 146750 + }, + { + "epoch": 0.7329021948113561, + "grad_norm": 0.10053983330726624, + "learning_rate": 8.032992064882724e-06, + "loss": 8.5884, + "step": 146760 + }, + { + "epoch": 0.7329521336362955, + "grad_norm": 0.09632902592420578, + "learning_rate": 8.031490149941175e-06, + "loss": 8.6121, + "step": 146770 + }, + { + "epoch": 0.7330020724612349, + "grad_norm": 0.09040207415819168, + "learning_rate": 8.029988234999625e-06, + "loss": 8.6016, + "step": 146780 + }, + { + "epoch": 0.7330520112861745, + "grad_norm": 0.09205528348684311, + "learning_rate": 8.028486320058075e-06, + "loss": 8.5859, + "step": 146790 + }, + { + "epoch": 0.7331019501111139, + "grad_norm": 0.09228353202342987, + "learning_rate": 8.026984405116523e-06, + "loss": 8.6206, + "step": 146800 + }, + { + "epoch": 0.7331518889360533, + "grad_norm": 0.08614012598991394, + "learning_rate": 8.025482490174974e-06, + "loss": 8.6226, + "step": 146810 + }, + { + "epoch": 0.7332018277609927, + "grad_norm": 0.0924997478723526, + "learning_rate": 8.023980575233422e-06, + "loss": 8.5959, + "step": 146820 + }, + { + "epoch": 0.7332517665859323, + "grad_norm": 0.09448036551475525, + "learning_rate": 8.022478660291872e-06, + "loss": 8.6089, + "step": 146830 + }, + { + "epoch": 0.7333017054108717, + "grad_norm": 0.09214811772108078, + "learning_rate": 8.020976745350322e-06, + "loss": 8.5967, + "step": 146840 + }, + { + "epoch": 0.7333516442358111, + "grad_norm": 0.10002180933952332, + "learning_rate": 8.019474830408771e-06, + "loss": 8.5945, + "step": 146850 + }, + { + "epoch": 0.7334015830607505, + "grad_norm": 0.08861683309078217, + "learning_rate": 8.017972915467221e-06, + "loss": 8.5851, + "step": 146860 + }, + { + "epoch": 0.7334515218856901, + "grad_norm": 0.09217624366283417, + "learning_rate": 8.01647100052567e-06, + "loss": 8.6059, + "step": 146870 + }, + { + "epoch": 0.7335014607106295, + "grad_norm": 0.0931958556175232, + "learning_rate": 8.01496908558412e-06, + "loss": 8.6036, + "step": 146880 + }, + { + "epoch": 0.7335513995355689, + "grad_norm": 0.09389421343803406, + "learning_rate": 8.01346717064257e-06, + "loss": 8.6066, + "step": 146890 + }, + { + "epoch": 0.7336013383605083, + "grad_norm": 0.08797122538089752, + "learning_rate": 8.011965255701018e-06, + "loss": 8.6272, + "step": 146900 + }, + { + "epoch": 0.7336512771854479, + "grad_norm": 0.09919506311416626, + "learning_rate": 8.010463340759469e-06, + "loss": 8.5877, + "step": 146910 + }, + { + "epoch": 0.7337012160103873, + "grad_norm": 0.09523117542266846, + "learning_rate": 8.008961425817917e-06, + "loss": 8.5809, + "step": 146920 + }, + { + "epoch": 0.7337511548353267, + "grad_norm": 0.0903659462928772, + "learning_rate": 8.007459510876367e-06, + "loss": 8.6084, + "step": 146930 + }, + { + "epoch": 0.7338010936602661, + "grad_norm": 0.09395240247249603, + "learning_rate": 8.005957595934817e-06, + "loss": 8.59, + "step": 146940 + }, + { + "epoch": 0.7338510324852057, + "grad_norm": 0.0915970653295517, + "learning_rate": 8.004455680993266e-06, + "loss": 8.6173, + "step": 146950 + }, + { + "epoch": 0.7339009713101451, + "grad_norm": 0.10002275556325912, + "learning_rate": 8.002953766051716e-06, + "loss": 8.5978, + "step": 146960 + }, + { + "epoch": 0.7339509101350845, + "grad_norm": 0.10154590010643005, + "learning_rate": 8.001451851110166e-06, + "loss": 8.5999, + "step": 146970 + }, + { + "epoch": 0.7340008489600239, + "grad_norm": 0.09146125614643097, + "learning_rate": 7.999949936168615e-06, + "loss": 8.6126, + "step": 146980 + }, + { + "epoch": 0.7340507877849635, + "grad_norm": 0.09198717772960663, + "learning_rate": 7.998448021227065e-06, + "loss": 8.6202, + "step": 146990 + }, + { + "epoch": 0.7341007266099029, + "grad_norm": 0.09776240587234497, + "learning_rate": 7.996946106285513e-06, + "loss": 8.6133, + "step": 147000 + }, + { + "epoch": 0.7341506654348423, + "grad_norm": 0.09213253110647202, + "learning_rate": 7.995444191343964e-06, + "loss": 8.6106, + "step": 147010 + }, + { + "epoch": 0.7342006042597817, + "grad_norm": 0.09108315408229828, + "learning_rate": 7.993942276402414e-06, + "loss": 8.6227, + "step": 147020 + }, + { + "epoch": 0.7342505430847213, + "grad_norm": 0.08943723142147064, + "learning_rate": 7.992440361460862e-06, + "loss": 8.6152, + "step": 147030 + }, + { + "epoch": 0.7343004819096607, + "grad_norm": 0.08865174651145935, + "learning_rate": 7.990938446519312e-06, + "loss": 8.591, + "step": 147040 + }, + { + "epoch": 0.7343504207346001, + "grad_norm": 0.0994340255856514, + "learning_rate": 7.989436531577763e-06, + "loss": 8.6018, + "step": 147050 + }, + { + "epoch": 0.7344003595595395, + "grad_norm": 0.09007614850997925, + "learning_rate": 7.987934616636211e-06, + "loss": 8.5896, + "step": 147060 + }, + { + "epoch": 0.7344502983844791, + "grad_norm": 0.08295390009880066, + "learning_rate": 7.986432701694661e-06, + "loss": 8.6055, + "step": 147070 + }, + { + "epoch": 0.7345002372094185, + "grad_norm": 0.09356316924095154, + "learning_rate": 7.98493078675311e-06, + "loss": 8.6058, + "step": 147080 + }, + { + "epoch": 0.7345501760343579, + "grad_norm": 0.09198088943958282, + "learning_rate": 7.98342887181156e-06, + "loss": 8.6089, + "step": 147090 + }, + { + "epoch": 0.7346001148592973, + "grad_norm": 0.09037932008504868, + "learning_rate": 7.98192695687001e-06, + "loss": 8.6066, + "step": 147100 + }, + { + "epoch": 0.7346500536842369, + "grad_norm": 0.0916791781783104, + "learning_rate": 7.980425041928459e-06, + "loss": 8.6022, + "step": 147110 + }, + { + "epoch": 0.7346999925091763, + "grad_norm": 0.09494098275899887, + "learning_rate": 7.978923126986909e-06, + "loss": 8.5989, + "step": 147120 + }, + { + "epoch": 0.7347499313341157, + "grad_norm": 0.0917518213391304, + "learning_rate": 7.977421212045359e-06, + "loss": 8.5957, + "step": 147130 + }, + { + "epoch": 0.7347998701590551, + "grad_norm": 0.08974310755729675, + "learning_rate": 7.975919297103807e-06, + "loss": 8.6081, + "step": 147140 + }, + { + "epoch": 0.7348498089839947, + "grad_norm": 0.09109298884868622, + "learning_rate": 7.974417382162258e-06, + "loss": 8.5945, + "step": 147150 + }, + { + "epoch": 0.7348997478089341, + "grad_norm": 0.08790141344070435, + "learning_rate": 7.972915467220706e-06, + "loss": 8.6105, + "step": 147160 + }, + { + "epoch": 0.7349496866338735, + "grad_norm": 0.09072345495223999, + "learning_rate": 7.971413552279156e-06, + "loss": 8.6066, + "step": 147170 + }, + { + "epoch": 0.7349996254588129, + "grad_norm": 0.08956841379404068, + "learning_rate": 7.969911637337606e-06, + "loss": 8.6191, + "step": 147180 + }, + { + "epoch": 0.7350495642837525, + "grad_norm": 0.09307093918323517, + "learning_rate": 7.968409722396055e-06, + "loss": 8.6011, + "step": 147190 + }, + { + "epoch": 0.7350995031086919, + "grad_norm": 0.09060484170913696, + "learning_rate": 7.966907807454505e-06, + "loss": 8.6013, + "step": 147200 + }, + { + "epoch": 0.7351494419336313, + "grad_norm": 0.08914856612682343, + "learning_rate": 7.965405892512954e-06, + "loss": 8.6046, + "step": 147210 + }, + { + "epoch": 0.7351993807585707, + "grad_norm": 0.0949515700340271, + "learning_rate": 7.963903977571404e-06, + "loss": 8.6229, + "step": 147220 + }, + { + "epoch": 0.7352493195835103, + "grad_norm": 0.09112024307250977, + "learning_rate": 7.962402062629854e-06, + "loss": 8.6261, + "step": 147230 + }, + { + "epoch": 0.7352992584084497, + "grad_norm": 0.0883699506521225, + "learning_rate": 7.960900147688302e-06, + "loss": 8.612, + "step": 147240 + }, + { + "epoch": 0.7353491972333891, + "grad_norm": 0.094344362616539, + "learning_rate": 7.959398232746753e-06, + "loss": 8.61, + "step": 147250 + }, + { + "epoch": 0.7353991360583285, + "grad_norm": 0.08884866535663605, + "learning_rate": 7.957896317805201e-06, + "loss": 8.6119, + "step": 147260 + }, + { + "epoch": 0.735449074883268, + "grad_norm": 0.09071295708417892, + "learning_rate": 7.956394402863651e-06, + "loss": 8.6011, + "step": 147270 + }, + { + "epoch": 0.7354990137082075, + "grad_norm": 0.09404217451810837, + "learning_rate": 7.954892487922101e-06, + "loss": 8.5964, + "step": 147280 + }, + { + "epoch": 0.7355489525331469, + "grad_norm": 0.0958835780620575, + "learning_rate": 7.953390572980552e-06, + "loss": 8.5959, + "step": 147290 + }, + { + "epoch": 0.7355988913580863, + "grad_norm": 0.09104613214731216, + "learning_rate": 7.951888658039e-06, + "loss": 8.5849, + "step": 147300 + }, + { + "epoch": 0.7356488301830258, + "grad_norm": 0.0909975990653038, + "learning_rate": 7.950386743097449e-06, + "loss": 8.6005, + "step": 147310 + }, + { + "epoch": 0.7356987690079653, + "grad_norm": 0.09111376851797104, + "learning_rate": 7.948884828155899e-06, + "loss": 8.6123, + "step": 147320 + }, + { + "epoch": 0.7357487078329047, + "grad_norm": 0.08861295878887177, + "learning_rate": 7.947382913214349e-06, + "loss": 8.5943, + "step": 147330 + }, + { + "epoch": 0.7357986466578441, + "grad_norm": 0.09001336991786957, + "learning_rate": 7.9458809982728e-06, + "loss": 8.5862, + "step": 147340 + }, + { + "epoch": 0.7358485854827835, + "grad_norm": 0.0889565646648407, + "learning_rate": 7.944379083331248e-06, + "loss": 8.5973, + "step": 147350 + }, + { + "epoch": 0.7358985243077231, + "grad_norm": 0.09270651638507843, + "learning_rate": 7.942877168389696e-06, + "loss": 8.5932, + "step": 147360 + }, + { + "epoch": 0.7359484631326625, + "grad_norm": 0.09283141791820526, + "learning_rate": 7.941375253448146e-06, + "loss": 8.595, + "step": 147370 + }, + { + "epoch": 0.7359984019576019, + "grad_norm": 0.09074781090021133, + "learning_rate": 7.939873338506596e-06, + "loss": 8.6006, + "step": 147380 + }, + { + "epoch": 0.7360483407825413, + "grad_norm": 0.09841238707304001, + "learning_rate": 7.938371423565047e-06, + "loss": 8.6104, + "step": 147390 + }, + { + "epoch": 0.7360982796074809, + "grad_norm": 0.09481830149888992, + "learning_rate": 7.936869508623495e-06, + "loss": 8.6014, + "step": 147400 + }, + { + "epoch": 0.7361482184324203, + "grad_norm": 0.08906048536300659, + "learning_rate": 7.935367593681944e-06, + "loss": 8.6178, + "step": 147410 + }, + { + "epoch": 0.7361981572573597, + "grad_norm": 0.08914127200841904, + "learning_rate": 7.933865678740394e-06, + "loss": 8.6059, + "step": 147420 + }, + { + "epoch": 0.7362480960822991, + "grad_norm": 0.09480444341897964, + "learning_rate": 7.932363763798844e-06, + "loss": 8.5944, + "step": 147430 + }, + { + "epoch": 0.7362980349072387, + "grad_norm": 0.09161514043807983, + "learning_rate": 7.930861848857294e-06, + "loss": 8.6164, + "step": 147440 + }, + { + "epoch": 0.7363479737321781, + "grad_norm": 0.09558946639299393, + "learning_rate": 7.929359933915744e-06, + "loss": 8.5894, + "step": 147450 + }, + { + "epoch": 0.7363979125571175, + "grad_norm": 0.09459544718265533, + "learning_rate": 7.927858018974191e-06, + "loss": 8.598, + "step": 147460 + }, + { + "epoch": 0.7364478513820569, + "grad_norm": 0.09031836688518524, + "learning_rate": 7.926356104032641e-06, + "loss": 8.6046, + "step": 147470 + }, + { + "epoch": 0.7364977902069965, + "grad_norm": 0.0889408141374588, + "learning_rate": 7.924854189091091e-06, + "loss": 8.6006, + "step": 147480 + }, + { + "epoch": 0.7365477290319359, + "grad_norm": 0.09583480656147003, + "learning_rate": 7.923352274149542e-06, + "loss": 8.6201, + "step": 147490 + }, + { + "epoch": 0.7365976678568753, + "grad_norm": 0.09124153107404709, + "learning_rate": 7.921850359207992e-06, + "loss": 8.606, + "step": 147500 + }, + { + "epoch": 0.7366476066818147, + "grad_norm": 0.09104292094707489, + "learning_rate": 7.920348444266439e-06, + "loss": 8.5772, + "step": 147510 + }, + { + "epoch": 0.7366975455067543, + "grad_norm": 0.09210095554590225, + "learning_rate": 7.918846529324889e-06, + "loss": 8.6044, + "step": 147520 + }, + { + "epoch": 0.7367474843316937, + "grad_norm": 0.08999733626842499, + "learning_rate": 7.917344614383339e-06, + "loss": 8.6077, + "step": 147530 + }, + { + "epoch": 0.7367974231566331, + "grad_norm": 0.09261172264814377, + "learning_rate": 7.91584269944179e-06, + "loss": 8.5944, + "step": 147540 + }, + { + "epoch": 0.7368473619815725, + "grad_norm": 0.0916711613535881, + "learning_rate": 7.91434078450024e-06, + "loss": 8.5811, + "step": 147550 + }, + { + "epoch": 0.7368973008065121, + "grad_norm": 0.09489434212446213, + "learning_rate": 7.912838869558686e-06, + "loss": 8.6007, + "step": 147560 + }, + { + "epoch": 0.7369472396314515, + "grad_norm": 0.09215337038040161, + "learning_rate": 7.911336954617136e-06, + "loss": 8.6051, + "step": 147570 + }, + { + "epoch": 0.7369971784563909, + "grad_norm": 0.09621989727020264, + "learning_rate": 7.909835039675586e-06, + "loss": 8.6042, + "step": 147580 + }, + { + "epoch": 0.7370471172813303, + "grad_norm": 0.09417767822742462, + "learning_rate": 7.908333124734037e-06, + "loss": 8.5984, + "step": 147590 + }, + { + "epoch": 0.7370970561062699, + "grad_norm": 0.09243947267532349, + "learning_rate": 7.906831209792487e-06, + "loss": 8.6154, + "step": 147600 + }, + { + "epoch": 0.7371469949312093, + "grad_norm": 0.09667365252971649, + "learning_rate": 7.905329294850935e-06, + "loss": 8.6127, + "step": 147610 + }, + { + "epoch": 0.7371969337561487, + "grad_norm": 0.09225791692733765, + "learning_rate": 7.903827379909384e-06, + "loss": 8.5939, + "step": 147620 + }, + { + "epoch": 0.7372468725810881, + "grad_norm": 0.09003566205501556, + "learning_rate": 7.902325464967834e-06, + "loss": 8.5911, + "step": 147630 + }, + { + "epoch": 0.7372968114060277, + "grad_norm": 0.09092370420694351, + "learning_rate": 7.900823550026284e-06, + "loss": 8.5904, + "step": 147640 + }, + { + "epoch": 0.7373467502309671, + "grad_norm": 0.09510780870914459, + "learning_rate": 7.899321635084734e-06, + "loss": 8.6026, + "step": 147650 + }, + { + "epoch": 0.7373966890559065, + "grad_norm": 0.09396243095397949, + "learning_rate": 7.897819720143183e-06, + "loss": 8.6125, + "step": 147660 + }, + { + "epoch": 0.7374466278808459, + "grad_norm": 0.09083011746406555, + "learning_rate": 7.896317805201631e-06, + "loss": 8.5926, + "step": 147670 + }, + { + "epoch": 0.7374965667057855, + "grad_norm": 0.10038017481565475, + "learning_rate": 7.894815890260082e-06, + "loss": 8.6012, + "step": 147680 + }, + { + "epoch": 0.7375465055307249, + "grad_norm": 0.0896848812699318, + "learning_rate": 7.893313975318532e-06, + "loss": 8.5961, + "step": 147690 + }, + { + "epoch": 0.7375964443556643, + "grad_norm": 0.09542018920183182, + "learning_rate": 7.891812060376982e-06, + "loss": 8.6294, + "step": 147700 + }, + { + "epoch": 0.7376463831806037, + "grad_norm": 0.0885300263762474, + "learning_rate": 7.89031014543543e-06, + "loss": 8.607, + "step": 147710 + }, + { + "epoch": 0.7376963220055432, + "grad_norm": 0.0911983922123909, + "learning_rate": 7.888808230493879e-06, + "loss": 8.5836, + "step": 147720 + }, + { + "epoch": 0.7377462608304827, + "grad_norm": 0.08745992183685303, + "learning_rate": 7.887306315552329e-06, + "loss": 8.5998, + "step": 147730 + }, + { + "epoch": 0.7377961996554221, + "grad_norm": 0.09371349215507507, + "learning_rate": 7.88580440061078e-06, + "loss": 8.5988, + "step": 147740 + }, + { + "epoch": 0.7378461384803615, + "grad_norm": 0.09324972331523895, + "learning_rate": 7.88430248566923e-06, + "loss": 8.5987, + "step": 147750 + }, + { + "epoch": 0.737896077305301, + "grad_norm": 0.09328649193048477, + "learning_rate": 7.882800570727678e-06, + "loss": 8.6026, + "step": 147760 + }, + { + "epoch": 0.7379460161302405, + "grad_norm": 0.0888761654496193, + "learning_rate": 7.881298655786128e-06, + "loss": 8.6012, + "step": 147770 + }, + { + "epoch": 0.7379959549551799, + "grad_norm": 0.09246739745140076, + "learning_rate": 7.879796740844577e-06, + "loss": 8.6072, + "step": 147780 + }, + { + "epoch": 0.7380458937801193, + "grad_norm": 0.09637174010276794, + "learning_rate": 7.878294825903027e-06, + "loss": 8.6062, + "step": 147790 + }, + { + "epoch": 0.7380958326050588, + "grad_norm": 0.08997974544763565, + "learning_rate": 7.876792910961477e-06, + "loss": 8.5919, + "step": 147800 + }, + { + "epoch": 0.7381457714299983, + "grad_norm": 0.08694270998239517, + "learning_rate": 7.875290996019925e-06, + "loss": 8.5971, + "step": 147810 + }, + { + "epoch": 0.7381957102549377, + "grad_norm": 0.09957410395145416, + "learning_rate": 7.873789081078376e-06, + "loss": 8.5931, + "step": 147820 + }, + { + "epoch": 0.7382456490798771, + "grad_norm": 0.09661848098039627, + "learning_rate": 7.872287166136824e-06, + "loss": 8.6017, + "step": 147830 + }, + { + "epoch": 0.7382955879048166, + "grad_norm": 0.09177085012197495, + "learning_rate": 7.870785251195274e-06, + "loss": 8.6021, + "step": 147840 + }, + { + "epoch": 0.7383455267297561, + "grad_norm": 0.0871456116437912, + "learning_rate": 7.869283336253724e-06, + "loss": 8.5894, + "step": 147850 + }, + { + "epoch": 0.7383954655546955, + "grad_norm": 0.09130498766899109, + "learning_rate": 7.867781421312173e-06, + "loss": 8.5875, + "step": 147860 + }, + { + "epoch": 0.7384454043796349, + "grad_norm": 0.09617603570222855, + "learning_rate": 7.866279506370623e-06, + "loss": 8.6145, + "step": 147870 + }, + { + "epoch": 0.7384953432045744, + "grad_norm": 0.08777206391096115, + "learning_rate": 7.864777591429072e-06, + "loss": 8.6098, + "step": 147880 + }, + { + "epoch": 0.7385452820295139, + "grad_norm": 0.08969053626060486, + "learning_rate": 7.863275676487522e-06, + "loss": 8.5935, + "step": 147890 + }, + { + "epoch": 0.7385952208544533, + "grad_norm": 0.091539166867733, + "learning_rate": 7.861773761545972e-06, + "loss": 8.5958, + "step": 147900 + }, + { + "epoch": 0.7386451596793927, + "grad_norm": 0.0880018100142479, + "learning_rate": 7.86027184660442e-06, + "loss": 8.6105, + "step": 147910 + }, + { + "epoch": 0.7386950985043322, + "grad_norm": 0.09044984728097916, + "learning_rate": 7.85876993166287e-06, + "loss": 8.6065, + "step": 147920 + }, + { + "epoch": 0.7387450373292717, + "grad_norm": 0.0911531150341034, + "learning_rate": 7.85726801672132e-06, + "loss": 8.6034, + "step": 147930 + }, + { + "epoch": 0.7387949761542111, + "grad_norm": 0.09581080824136734, + "learning_rate": 7.85576610177977e-06, + "loss": 8.5987, + "step": 147940 + }, + { + "epoch": 0.7388449149791505, + "grad_norm": 0.08814167231321335, + "learning_rate": 7.85426418683822e-06, + "loss": 8.604, + "step": 147950 + }, + { + "epoch": 0.73889485380409, + "grad_norm": 0.09530513733625412, + "learning_rate": 7.852762271896668e-06, + "loss": 8.5862, + "step": 147960 + }, + { + "epoch": 0.7389447926290295, + "grad_norm": 0.08761637657880783, + "learning_rate": 7.851260356955118e-06, + "loss": 8.6251, + "step": 147970 + }, + { + "epoch": 0.7389947314539689, + "grad_norm": 0.09182920306921005, + "learning_rate": 7.849758442013568e-06, + "loss": 8.6002, + "step": 147980 + }, + { + "epoch": 0.7390446702789083, + "grad_norm": 0.0961126983165741, + "learning_rate": 7.848256527072017e-06, + "loss": 8.6096, + "step": 147990 + }, + { + "epoch": 0.7390946091038478, + "grad_norm": 0.09371432662010193, + "learning_rate": 7.846754612130467e-06, + "loss": 8.616, + "step": 148000 + }, + { + "epoch": 0.7391445479287873, + "grad_norm": 0.08760569244623184, + "learning_rate": 7.845252697188915e-06, + "loss": 8.6174, + "step": 148010 + }, + { + "epoch": 0.7391944867537267, + "grad_norm": 0.09141170978546143, + "learning_rate": 7.843750782247366e-06, + "loss": 8.6018, + "step": 148020 + }, + { + "epoch": 0.7392444255786661, + "grad_norm": 0.09060084819793701, + "learning_rate": 7.842248867305816e-06, + "loss": 8.5826, + "step": 148030 + }, + { + "epoch": 0.7392943644036056, + "grad_norm": 0.09274831414222717, + "learning_rate": 7.840746952364264e-06, + "loss": 8.6069, + "step": 148040 + }, + { + "epoch": 0.739344303228545, + "grad_norm": 0.09324593096971512, + "learning_rate": 7.839245037422714e-06, + "loss": 8.6036, + "step": 148050 + }, + { + "epoch": 0.7393942420534845, + "grad_norm": 0.09713485836982727, + "learning_rate": 7.837743122481163e-06, + "loss": 8.6138, + "step": 148060 + }, + { + "epoch": 0.7394441808784239, + "grad_norm": 0.08883433789014816, + "learning_rate": 7.836241207539613e-06, + "loss": 8.6116, + "step": 148070 + }, + { + "epoch": 0.7394941197033634, + "grad_norm": 0.09417309612035751, + "learning_rate": 7.834739292598063e-06, + "loss": 8.5914, + "step": 148080 + }, + { + "epoch": 0.7395440585283029, + "grad_norm": 0.09347934275865555, + "learning_rate": 7.833237377656513e-06, + "loss": 8.6136, + "step": 148090 + }, + { + "epoch": 0.7395939973532423, + "grad_norm": 0.09639857709407806, + "learning_rate": 7.831735462714962e-06, + "loss": 8.5874, + "step": 148100 + }, + { + "epoch": 0.7396439361781817, + "grad_norm": 0.09195045381784439, + "learning_rate": 7.83023354777341e-06, + "loss": 8.5943, + "step": 148110 + }, + { + "epoch": 0.7396938750031212, + "grad_norm": 0.09634042531251907, + "learning_rate": 7.82873163283186e-06, + "loss": 8.6032, + "step": 148120 + }, + { + "epoch": 0.7397438138280606, + "grad_norm": 0.08561528474092484, + "learning_rate": 7.82722971789031e-06, + "loss": 8.5971, + "step": 148130 + }, + { + "epoch": 0.7397937526530001, + "grad_norm": 0.08745627850294113, + "learning_rate": 7.825727802948761e-06, + "loss": 8.5908, + "step": 148140 + }, + { + "epoch": 0.7398436914779395, + "grad_norm": 0.09139811247587204, + "learning_rate": 7.82422588800721e-06, + "loss": 8.5864, + "step": 148150 + }, + { + "epoch": 0.739893630302879, + "grad_norm": 0.09640243649482727, + "learning_rate": 7.822723973065658e-06, + "loss": 8.599, + "step": 148160 + }, + { + "epoch": 0.7399435691278184, + "grad_norm": 0.09190690517425537, + "learning_rate": 7.821222058124108e-06, + "loss": 8.6074, + "step": 148170 + }, + { + "epoch": 0.7399935079527579, + "grad_norm": 0.08793210238218307, + "learning_rate": 7.819720143182558e-06, + "loss": 8.602, + "step": 148180 + }, + { + "epoch": 0.7400434467776973, + "grad_norm": 0.09305689483880997, + "learning_rate": 7.818218228241008e-06, + "loss": 8.5939, + "step": 148190 + }, + { + "epoch": 0.7400933856026368, + "grad_norm": 0.09034860879182816, + "learning_rate": 7.816716313299457e-06, + "loss": 8.6036, + "step": 148200 + }, + { + "epoch": 0.7401433244275762, + "grad_norm": 0.09198985993862152, + "learning_rate": 7.815214398357905e-06, + "loss": 8.5912, + "step": 148210 + }, + { + "epoch": 0.7401932632525157, + "grad_norm": 0.0869131088256836, + "learning_rate": 7.813712483416356e-06, + "loss": 8.6085, + "step": 148220 + }, + { + "epoch": 0.7402432020774551, + "grad_norm": 0.09751661121845245, + "learning_rate": 7.812210568474806e-06, + "loss": 8.6094, + "step": 148230 + }, + { + "epoch": 0.7402931409023946, + "grad_norm": 0.09016689658164978, + "learning_rate": 7.810708653533256e-06, + "loss": 8.6164, + "step": 148240 + }, + { + "epoch": 0.740343079727334, + "grad_norm": 0.09467813372612, + "learning_rate": 7.809206738591706e-06, + "loss": 8.616, + "step": 148250 + }, + { + "epoch": 0.7403930185522735, + "grad_norm": 0.09038371592760086, + "learning_rate": 7.807704823650153e-06, + "loss": 8.6001, + "step": 148260 + }, + { + "epoch": 0.7404429573772129, + "grad_norm": 0.09057788550853729, + "learning_rate": 7.806202908708603e-06, + "loss": 8.6125, + "step": 148270 + }, + { + "epoch": 0.7404928962021524, + "grad_norm": 0.09068594127893448, + "learning_rate": 7.804700993767053e-06, + "loss": 8.5882, + "step": 148280 + }, + { + "epoch": 0.7405428350270918, + "grad_norm": 0.09720471501350403, + "learning_rate": 7.803199078825503e-06, + "loss": 8.6165, + "step": 148290 + }, + { + "epoch": 0.7405927738520313, + "grad_norm": 0.09376747906208038, + "learning_rate": 7.801697163883954e-06, + "loss": 8.5862, + "step": 148300 + }, + { + "epoch": 0.7406427126769707, + "grad_norm": 0.09301353991031647, + "learning_rate": 7.8001952489424e-06, + "loss": 8.5996, + "step": 148310 + }, + { + "epoch": 0.7406926515019102, + "grad_norm": 0.09005246311426163, + "learning_rate": 7.79869333400085e-06, + "loss": 8.6072, + "step": 148320 + }, + { + "epoch": 0.7407425903268496, + "grad_norm": 0.09260305762290955, + "learning_rate": 7.7971914190593e-06, + "loss": 8.59, + "step": 148330 + }, + { + "epoch": 0.7407925291517891, + "grad_norm": 0.09277404844760895, + "learning_rate": 7.795689504117751e-06, + "loss": 8.618, + "step": 148340 + }, + { + "epoch": 0.7408424679767285, + "grad_norm": 0.09078875184059143, + "learning_rate": 7.794187589176201e-06, + "loss": 8.612, + "step": 148350 + }, + { + "epoch": 0.7408924068016679, + "grad_norm": 0.0957048162817955, + "learning_rate": 7.792685674234648e-06, + "loss": 8.5944, + "step": 148360 + }, + { + "epoch": 0.7409423456266074, + "grad_norm": 0.08615356683731079, + "learning_rate": 7.791183759293098e-06, + "loss": 8.6149, + "step": 148370 + }, + { + "epoch": 0.7409922844515469, + "grad_norm": 0.0927518680691719, + "learning_rate": 7.789681844351548e-06, + "loss": 8.5918, + "step": 148380 + }, + { + "epoch": 0.7410422232764863, + "grad_norm": 0.09268731623888016, + "learning_rate": 7.788179929409998e-06, + "loss": 8.5982, + "step": 148390 + }, + { + "epoch": 0.7410921621014257, + "grad_norm": 0.09085573256015778, + "learning_rate": 7.786678014468449e-06, + "loss": 8.6049, + "step": 148400 + }, + { + "epoch": 0.7411421009263652, + "grad_norm": 0.09364666044712067, + "learning_rate": 7.785176099526897e-06, + "loss": 8.5953, + "step": 148410 + }, + { + "epoch": 0.7411920397513047, + "grad_norm": 0.09044606983661652, + "learning_rate": 7.783674184585346e-06, + "loss": 8.5928, + "step": 148420 + }, + { + "epoch": 0.7412419785762441, + "grad_norm": 0.08722547441720963, + "learning_rate": 7.782172269643796e-06, + "loss": 8.5983, + "step": 148430 + }, + { + "epoch": 0.7412919174011835, + "grad_norm": 0.0928499698638916, + "learning_rate": 7.780670354702246e-06, + "loss": 8.615, + "step": 148440 + }, + { + "epoch": 0.741341856226123, + "grad_norm": 0.09413442015647888, + "learning_rate": 7.779168439760696e-06, + "loss": 8.6119, + "step": 148450 + }, + { + "epoch": 0.7413917950510625, + "grad_norm": 0.09118451923131943, + "learning_rate": 7.777666524819145e-06, + "loss": 8.6034, + "step": 148460 + }, + { + "epoch": 0.7414417338760019, + "grad_norm": 0.08917475491762161, + "learning_rate": 7.776164609877593e-06, + "loss": 8.5968, + "step": 148470 + }, + { + "epoch": 0.7414916727009413, + "grad_norm": 0.09661485254764557, + "learning_rate": 7.774662694936043e-06, + "loss": 8.589, + "step": 148480 + }, + { + "epoch": 0.7415416115258808, + "grad_norm": 0.0894039124250412, + "learning_rate": 7.773160779994493e-06, + "loss": 8.5998, + "step": 148490 + }, + { + "epoch": 0.7415915503508202, + "grad_norm": 0.09486133605241776, + "learning_rate": 7.771658865052944e-06, + "loss": 8.6045, + "step": 148500 + }, + { + "epoch": 0.7416414891757597, + "grad_norm": 0.09382303059101105, + "learning_rate": 7.770156950111392e-06, + "loss": 8.5874, + "step": 148510 + }, + { + "epoch": 0.7416914280006991, + "grad_norm": 0.0903899222612381, + "learning_rate": 7.76865503516984e-06, + "loss": 8.5879, + "step": 148520 + }, + { + "epoch": 0.7417413668256386, + "grad_norm": 0.08889366686344147, + "learning_rate": 7.76715312022829e-06, + "loss": 8.5956, + "step": 148530 + }, + { + "epoch": 0.741791305650578, + "grad_norm": 0.09097573906183243, + "learning_rate": 7.765651205286741e-06, + "loss": 8.5917, + "step": 148540 + }, + { + "epoch": 0.7418412444755175, + "grad_norm": 0.09453503787517548, + "learning_rate": 7.764149290345191e-06, + "loss": 8.5922, + "step": 148550 + }, + { + "epoch": 0.7418911833004569, + "grad_norm": 0.09525911509990692, + "learning_rate": 7.76264737540364e-06, + "loss": 8.6012, + "step": 148560 + }, + { + "epoch": 0.7419411221253964, + "grad_norm": 0.08901818841695786, + "learning_rate": 7.76114546046209e-06, + "loss": 8.6012, + "step": 148570 + }, + { + "epoch": 0.7419910609503358, + "grad_norm": 0.09080463647842407, + "learning_rate": 7.759643545520538e-06, + "loss": 8.6011, + "step": 148580 + }, + { + "epoch": 0.7420409997752753, + "grad_norm": 0.08285506069660187, + "learning_rate": 7.758141630578988e-06, + "loss": 8.5975, + "step": 148590 + }, + { + "epoch": 0.7420909386002147, + "grad_norm": 0.09270590543746948, + "learning_rate": 7.756639715637439e-06, + "loss": 8.6014, + "step": 148600 + }, + { + "epoch": 0.7421408774251542, + "grad_norm": 0.09853670746088028, + "learning_rate": 7.755137800695887e-06, + "loss": 8.5918, + "step": 148610 + }, + { + "epoch": 0.7421908162500936, + "grad_norm": 0.09975389391183853, + "learning_rate": 7.753635885754337e-06, + "loss": 8.601, + "step": 148620 + }, + { + "epoch": 0.7422407550750331, + "grad_norm": 0.09311430156230927, + "learning_rate": 7.752133970812786e-06, + "loss": 8.5938, + "step": 148630 + }, + { + "epoch": 0.7422906938999725, + "grad_norm": 0.09312795102596283, + "learning_rate": 7.750632055871236e-06, + "loss": 8.5892, + "step": 148640 + }, + { + "epoch": 0.742340632724912, + "grad_norm": 0.09401096403598785, + "learning_rate": 7.749130140929686e-06, + "loss": 8.6029, + "step": 148650 + }, + { + "epoch": 0.7423905715498514, + "grad_norm": 0.09024547040462494, + "learning_rate": 7.747628225988135e-06, + "loss": 8.5963, + "step": 148660 + }, + { + "epoch": 0.7424405103747909, + "grad_norm": 0.09483632445335388, + "learning_rate": 7.746126311046585e-06, + "loss": 8.611, + "step": 148670 + }, + { + "epoch": 0.7424904491997303, + "grad_norm": 0.08980057388544083, + "learning_rate": 7.744624396105033e-06, + "loss": 8.5968, + "step": 148680 + }, + { + "epoch": 0.7425403880246698, + "grad_norm": 0.09561526775360107, + "learning_rate": 7.743122481163483e-06, + "loss": 8.6023, + "step": 148690 + }, + { + "epoch": 0.7425903268496092, + "grad_norm": 0.09579294919967651, + "learning_rate": 7.741620566221934e-06, + "loss": 8.5962, + "step": 148700 + }, + { + "epoch": 0.7426402656745487, + "grad_norm": 0.09053392708301544, + "learning_rate": 7.740118651280382e-06, + "loss": 8.5915, + "step": 148710 + }, + { + "epoch": 0.7426902044994881, + "grad_norm": 0.08874721825122833, + "learning_rate": 7.738616736338832e-06, + "loss": 8.6017, + "step": 148720 + }, + { + "epoch": 0.7427401433244276, + "grad_norm": 0.09630593657493591, + "learning_rate": 7.737114821397282e-06, + "loss": 8.6011, + "step": 148730 + }, + { + "epoch": 0.742790082149367, + "grad_norm": 0.0932571217417717, + "learning_rate": 7.735612906455731e-06, + "loss": 8.6088, + "step": 148740 + }, + { + "epoch": 0.7428400209743065, + "grad_norm": 0.09587745368480682, + "learning_rate": 7.734110991514181e-06, + "loss": 8.6054, + "step": 148750 + }, + { + "epoch": 0.7428899597992459, + "grad_norm": 0.08768852055072784, + "learning_rate": 7.73260907657263e-06, + "loss": 8.5966, + "step": 148760 + }, + { + "epoch": 0.7429398986241854, + "grad_norm": 0.08625376224517822, + "learning_rate": 7.73110716163108e-06, + "loss": 8.5867, + "step": 148770 + }, + { + "epoch": 0.7429898374491248, + "grad_norm": 0.09649079293012619, + "learning_rate": 7.72960524668953e-06, + "loss": 8.6001, + "step": 148780 + }, + { + "epoch": 0.7430397762740643, + "grad_norm": 0.0902811661362648, + "learning_rate": 7.728103331747978e-06, + "loss": 8.5862, + "step": 148790 + }, + { + "epoch": 0.7430897150990037, + "grad_norm": 0.08683760464191437, + "learning_rate": 7.726601416806429e-06, + "loss": 8.5762, + "step": 148800 + }, + { + "epoch": 0.7431396539239432, + "grad_norm": 0.09725631028413773, + "learning_rate": 7.725099501864877e-06, + "loss": 8.6059, + "step": 148810 + }, + { + "epoch": 0.7431895927488826, + "grad_norm": 0.09072460979223251, + "learning_rate": 7.723597586923327e-06, + "loss": 8.5995, + "step": 148820 + }, + { + "epoch": 0.743239531573822, + "grad_norm": 0.0900563895702362, + "learning_rate": 7.722095671981777e-06, + "loss": 8.6039, + "step": 148830 + }, + { + "epoch": 0.7432894703987615, + "grad_norm": 0.08766742050647736, + "learning_rate": 7.720593757040226e-06, + "loss": 8.5947, + "step": 148840 + }, + { + "epoch": 0.743339409223701, + "grad_norm": 0.09723313897848129, + "learning_rate": 7.719091842098676e-06, + "loss": 8.6027, + "step": 148850 + }, + { + "epoch": 0.7433893480486404, + "grad_norm": 0.09286182373762131, + "learning_rate": 7.717589927157125e-06, + "loss": 8.5978, + "step": 148860 + }, + { + "epoch": 0.7434392868735799, + "grad_norm": 0.09512756764888763, + "learning_rate": 7.716088012215575e-06, + "loss": 8.5881, + "step": 148870 + }, + { + "epoch": 0.7434892256985193, + "grad_norm": 0.09215110540390015, + "learning_rate": 7.714586097274025e-06, + "loss": 8.5987, + "step": 148880 + }, + { + "epoch": 0.7435391645234588, + "grad_norm": 0.09654014557600021, + "learning_rate": 7.713084182332475e-06, + "loss": 8.5833, + "step": 148890 + }, + { + "epoch": 0.7435891033483982, + "grad_norm": 0.09393315017223358, + "learning_rate": 7.711582267390924e-06, + "loss": 8.6084, + "step": 148900 + }, + { + "epoch": 0.7436390421733376, + "grad_norm": 0.09271156042814255, + "learning_rate": 7.710080352449372e-06, + "loss": 8.5837, + "step": 148910 + }, + { + "epoch": 0.7436889809982771, + "grad_norm": 0.08688397705554962, + "learning_rate": 7.708578437507822e-06, + "loss": 8.5953, + "step": 148920 + }, + { + "epoch": 0.7437389198232166, + "grad_norm": 0.08975444734096527, + "learning_rate": 7.707076522566272e-06, + "loss": 8.585, + "step": 148930 + }, + { + "epoch": 0.743788858648156, + "grad_norm": 0.09515532851219177, + "learning_rate": 7.705574607624723e-06, + "loss": 8.5907, + "step": 148940 + }, + { + "epoch": 0.7438387974730954, + "grad_norm": 0.08886223286390305, + "learning_rate": 7.704072692683171e-06, + "loss": 8.5786, + "step": 148950 + }, + { + "epoch": 0.7438887362980349, + "grad_norm": 0.09450791031122208, + "learning_rate": 7.70257077774162e-06, + "loss": 8.5943, + "step": 148960 + }, + { + "epoch": 0.7439386751229744, + "grad_norm": 0.08783065527677536, + "learning_rate": 7.70106886280007e-06, + "loss": 8.6139, + "step": 148970 + }, + { + "epoch": 0.7439886139479138, + "grad_norm": 0.0896720141172409, + "learning_rate": 7.69956694785852e-06, + "loss": 8.6009, + "step": 148980 + }, + { + "epoch": 0.7440385527728532, + "grad_norm": 0.08917935937643051, + "learning_rate": 7.69806503291697e-06, + "loss": 8.5988, + "step": 148990 + }, + { + "epoch": 0.7440884915977927, + "grad_norm": 0.09412242472171783, + "learning_rate": 7.696563117975419e-06, + "loss": 8.5932, + "step": 149000 + }, + { + "epoch": 0.7441384304227322, + "grad_norm": 0.09121372550725937, + "learning_rate": 7.695061203033867e-06, + "loss": 8.5708, + "step": 149010 + }, + { + "epoch": 0.7441883692476716, + "grad_norm": 0.09384799003601074, + "learning_rate": 7.693559288092317e-06, + "loss": 8.6003, + "step": 149020 + }, + { + "epoch": 0.744238308072611, + "grad_norm": 0.088929682970047, + "learning_rate": 7.692057373150767e-06, + "loss": 8.5807, + "step": 149030 + }, + { + "epoch": 0.7442882468975505, + "grad_norm": 0.09719398617744446, + "learning_rate": 7.690555458209218e-06, + "loss": 8.5885, + "step": 149040 + }, + { + "epoch": 0.74433818572249, + "grad_norm": 0.09361552447080612, + "learning_rate": 7.689053543267668e-06, + "loss": 8.5899, + "step": 149050 + }, + { + "epoch": 0.7443881245474294, + "grad_norm": 0.08958575874567032, + "learning_rate": 7.687551628326115e-06, + "loss": 8.6113, + "step": 149060 + }, + { + "epoch": 0.7444380633723688, + "grad_norm": 0.09068027883768082, + "learning_rate": 7.686049713384565e-06, + "loss": 8.6056, + "step": 149070 + }, + { + "epoch": 0.7444880021973083, + "grad_norm": 0.09241528064012527, + "learning_rate": 7.684547798443015e-06, + "loss": 8.5827, + "step": 149080 + }, + { + "epoch": 0.7445379410222478, + "grad_norm": 0.09474475681781769, + "learning_rate": 7.683045883501465e-06, + "loss": 8.6088, + "step": 149090 + }, + { + "epoch": 0.7445878798471872, + "grad_norm": 0.09101393073797226, + "learning_rate": 7.681543968559915e-06, + "loss": 8.5973, + "step": 149100 + }, + { + "epoch": 0.7446378186721266, + "grad_norm": 0.09199006855487823, + "learning_rate": 7.680042053618362e-06, + "loss": 8.6017, + "step": 149110 + }, + { + "epoch": 0.7446877574970661, + "grad_norm": 0.09310968220233917, + "learning_rate": 7.678540138676812e-06, + "loss": 8.5962, + "step": 149120 + }, + { + "epoch": 0.7447376963220056, + "grad_norm": 0.08850269764661789, + "learning_rate": 7.677038223735262e-06, + "loss": 8.5832, + "step": 149130 + }, + { + "epoch": 0.744787635146945, + "grad_norm": 0.09304803609848022, + "learning_rate": 7.675536308793713e-06, + "loss": 8.5942, + "step": 149140 + }, + { + "epoch": 0.7448375739718844, + "grad_norm": 0.0940072312951088, + "learning_rate": 7.674034393852163e-06, + "loss": 8.6149, + "step": 149150 + }, + { + "epoch": 0.7448875127968239, + "grad_norm": 0.09144038707017899, + "learning_rate": 7.672532478910611e-06, + "loss": 8.5879, + "step": 149160 + }, + { + "epoch": 0.7449374516217634, + "grad_norm": 0.09090874344110489, + "learning_rate": 7.67103056396906e-06, + "loss": 8.595, + "step": 149170 + }, + { + "epoch": 0.7449873904467028, + "grad_norm": 0.10227791219949722, + "learning_rate": 7.66952864902751e-06, + "loss": 8.611, + "step": 149180 + }, + { + "epoch": 0.7450373292716422, + "grad_norm": 0.09328952431678772, + "learning_rate": 7.66802673408596e-06, + "loss": 8.574, + "step": 149190 + }, + { + "epoch": 0.7450872680965817, + "grad_norm": 0.09365438669919968, + "learning_rate": 7.66652481914441e-06, + "loss": 8.6266, + "step": 149200 + }, + { + "epoch": 0.7451372069215212, + "grad_norm": 0.0949469655752182, + "learning_rate": 7.665022904202859e-06, + "loss": 8.6018, + "step": 149210 + }, + { + "epoch": 0.7451871457464606, + "grad_norm": 0.0964326560497284, + "learning_rate": 7.663520989261307e-06, + "loss": 8.5941, + "step": 149220 + }, + { + "epoch": 0.7452370845714, + "grad_norm": 0.091887928545475, + "learning_rate": 7.662019074319758e-06, + "loss": 8.5932, + "step": 149230 + }, + { + "epoch": 0.7452870233963395, + "grad_norm": 0.09236712753772736, + "learning_rate": 7.660517159378208e-06, + "loss": 8.5758, + "step": 149240 + }, + { + "epoch": 0.745336962221279, + "grad_norm": 0.08846684545278549, + "learning_rate": 7.659015244436658e-06, + "loss": 8.5856, + "step": 149250 + }, + { + "epoch": 0.7453869010462184, + "grad_norm": 0.10501750558614731, + "learning_rate": 7.657513329495106e-06, + "loss": 8.5935, + "step": 149260 + }, + { + "epoch": 0.7454368398711578, + "grad_norm": 0.09326668828725815, + "learning_rate": 7.656011414553555e-06, + "loss": 8.598, + "step": 149270 + }, + { + "epoch": 0.7454867786960973, + "grad_norm": 0.09294133633375168, + "learning_rate": 7.654509499612005e-06, + "loss": 8.5952, + "step": 149280 + }, + { + "epoch": 0.7455367175210368, + "grad_norm": 0.09185370057821274, + "learning_rate": 7.653007584670455e-06, + "loss": 8.6152, + "step": 149290 + }, + { + "epoch": 0.7455866563459762, + "grad_norm": 0.09958349168300629, + "learning_rate": 7.651505669728905e-06, + "loss": 8.5855, + "step": 149300 + }, + { + "epoch": 0.7456365951709156, + "grad_norm": 0.1002863273024559, + "learning_rate": 7.650003754787356e-06, + "loss": 8.5972, + "step": 149310 + }, + { + "epoch": 0.745686533995855, + "grad_norm": 0.09235794842243195, + "learning_rate": 7.648501839845802e-06, + "loss": 8.5912, + "step": 149320 + }, + { + "epoch": 0.7457364728207945, + "grad_norm": 0.09120310097932816, + "learning_rate": 7.646999924904253e-06, + "loss": 8.5867, + "step": 149330 + }, + { + "epoch": 0.745786411645734, + "grad_norm": 0.09124782681465149, + "learning_rate": 7.645498009962703e-06, + "loss": 8.5841, + "step": 149340 + }, + { + "epoch": 0.7458363504706734, + "grad_norm": 0.09484837204217911, + "learning_rate": 7.643996095021153e-06, + "loss": 8.5788, + "step": 149350 + }, + { + "epoch": 0.7458862892956128, + "grad_norm": 0.08904207497835159, + "learning_rate": 7.642494180079603e-06, + "loss": 8.5973, + "step": 149360 + }, + { + "epoch": 0.7459362281205523, + "grad_norm": 0.08774899691343307, + "learning_rate": 7.64099226513805e-06, + "loss": 8.5849, + "step": 149370 + }, + { + "epoch": 0.7459861669454918, + "grad_norm": 0.09321718662977219, + "learning_rate": 7.6394903501965e-06, + "loss": 8.598, + "step": 149380 + }, + { + "epoch": 0.7460361057704312, + "grad_norm": 0.09559307992458344, + "learning_rate": 7.63798843525495e-06, + "loss": 8.6053, + "step": 149390 + }, + { + "epoch": 0.7460860445953706, + "grad_norm": 0.09065421670675278, + "learning_rate": 7.6364865203134e-06, + "loss": 8.5822, + "step": 149400 + }, + { + "epoch": 0.7461359834203101, + "grad_norm": 0.08813871443271637, + "learning_rate": 7.63498460537185e-06, + "loss": 8.6158, + "step": 149410 + }, + { + "epoch": 0.7461859222452496, + "grad_norm": 0.09453373402357101, + "learning_rate": 7.633482690430299e-06, + "loss": 8.595, + "step": 149420 + }, + { + "epoch": 0.746235861070189, + "grad_norm": 0.09520356357097626, + "learning_rate": 7.631980775488748e-06, + "loss": 8.6, + "step": 149430 + }, + { + "epoch": 0.7462857998951284, + "grad_norm": 0.09131111204624176, + "learning_rate": 7.630478860547198e-06, + "loss": 8.6118, + "step": 149440 + }, + { + "epoch": 0.7463357387200679, + "grad_norm": 0.09047097712755203, + "learning_rate": 7.628976945605647e-06, + "loss": 8.598, + "step": 149450 + }, + { + "epoch": 0.7463856775450074, + "grad_norm": 0.09119818359613419, + "learning_rate": 7.627475030664097e-06, + "loss": 8.6094, + "step": 149460 + }, + { + "epoch": 0.7464356163699468, + "grad_norm": 0.0884728953242302, + "learning_rate": 7.625973115722547e-06, + "loss": 8.6118, + "step": 149470 + }, + { + "epoch": 0.7464855551948862, + "grad_norm": 0.0959334447979927, + "learning_rate": 7.624471200780995e-06, + "loss": 8.5847, + "step": 149480 + }, + { + "epoch": 0.7465354940198257, + "grad_norm": 0.08748113363981247, + "learning_rate": 7.622969285839445e-06, + "loss": 8.5978, + "step": 149490 + }, + { + "epoch": 0.7465854328447652, + "grad_norm": 0.09139006584882736, + "learning_rate": 7.6214673708978945e-06, + "loss": 8.5974, + "step": 149500 + }, + { + "epoch": 0.7466353716697046, + "grad_norm": 0.089291051030159, + "learning_rate": 7.619965455956345e-06, + "loss": 8.5852, + "step": 149510 + }, + { + "epoch": 0.746685310494644, + "grad_norm": 0.0897984728217125, + "learning_rate": 7.618463541014795e-06, + "loss": 8.5904, + "step": 149520 + }, + { + "epoch": 0.7467352493195835, + "grad_norm": 0.09387893229722977, + "learning_rate": 7.6169616260732425e-06, + "loss": 8.5993, + "step": 149530 + }, + { + "epoch": 0.746785188144523, + "grad_norm": 0.09599009901285172, + "learning_rate": 7.615459711131693e-06, + "loss": 8.5884, + "step": 149540 + }, + { + "epoch": 0.7468351269694624, + "grad_norm": 0.10485220700502396, + "learning_rate": 7.613957796190142e-06, + "loss": 8.5845, + "step": 149550 + }, + { + "epoch": 0.7468850657944018, + "grad_norm": 0.09003337472677231, + "learning_rate": 7.612455881248592e-06, + "loss": 8.5792, + "step": 149560 + }, + { + "epoch": 0.7469350046193413, + "grad_norm": 0.08689581602811813, + "learning_rate": 7.610953966307042e-06, + "loss": 8.5919, + "step": 149570 + }, + { + "epoch": 0.7469849434442808, + "grad_norm": 0.09046222269535065, + "learning_rate": 7.609452051365492e-06, + "loss": 8.6032, + "step": 149580 + }, + { + "epoch": 0.7470348822692202, + "grad_norm": 0.0931020975112915, + "learning_rate": 7.60795013642394e-06, + "loss": 8.5898, + "step": 149590 + }, + { + "epoch": 0.7470848210941596, + "grad_norm": 0.09115353971719742, + "learning_rate": 7.6064482214823895e-06, + "loss": 8.5903, + "step": 149600 + }, + { + "epoch": 0.7471347599190991, + "grad_norm": 0.095846027135849, + "learning_rate": 7.60494630654084e-06, + "loss": 8.5989, + "step": 149610 + }, + { + "epoch": 0.7471846987440386, + "grad_norm": 0.0902191624045372, + "learning_rate": 7.60344439159929e-06, + "loss": 8.5844, + "step": 149620 + }, + { + "epoch": 0.747234637568978, + "grad_norm": 0.08753103017807007, + "learning_rate": 7.601942476657739e-06, + "loss": 8.5876, + "step": 149630 + }, + { + "epoch": 0.7472845763939174, + "grad_norm": 0.09205235540866852, + "learning_rate": 7.600440561716188e-06, + "loss": 8.6017, + "step": 149640 + }, + { + "epoch": 0.7473345152188569, + "grad_norm": 0.08869083225727081, + "learning_rate": 7.598938646774637e-06, + "loss": 8.5879, + "step": 149650 + }, + { + "epoch": 0.7473844540437964, + "grad_norm": 0.09355755895376205, + "learning_rate": 7.597436731833087e-06, + "loss": 8.6161, + "step": 149660 + }, + { + "epoch": 0.7474343928687358, + "grad_norm": 0.09638141095638275, + "learning_rate": 7.595934816891537e-06, + "loss": 8.5874, + "step": 149670 + }, + { + "epoch": 0.7474843316936752, + "grad_norm": 0.09386901557445526, + "learning_rate": 7.594432901949987e-06, + "loss": 8.596, + "step": 149680 + }, + { + "epoch": 0.7475342705186147, + "grad_norm": 0.08905579149723053, + "learning_rate": 7.592930987008435e-06, + "loss": 8.5965, + "step": 149690 + }, + { + "epoch": 0.7475842093435542, + "grad_norm": 0.09240230172872543, + "learning_rate": 7.5914290720668845e-06, + "loss": 8.5916, + "step": 149700 + }, + { + "epoch": 0.7476341481684936, + "grad_norm": 0.08725178986787796, + "learning_rate": 7.589927157125335e-06, + "loss": 8.61, + "step": 149710 + }, + { + "epoch": 0.747684086993433, + "grad_norm": 0.09127280861139297, + "learning_rate": 7.588425242183785e-06, + "loss": 8.5987, + "step": 149720 + }, + { + "epoch": 0.7477340258183724, + "grad_norm": 0.09402144700288773, + "learning_rate": 7.586923327242234e-06, + "loss": 8.6062, + "step": 149730 + }, + { + "epoch": 0.747783964643312, + "grad_norm": 0.08989568054676056, + "learning_rate": 7.585421412300684e-06, + "loss": 8.5849, + "step": 149740 + }, + { + "epoch": 0.7478339034682514, + "grad_norm": 0.09065549820661545, + "learning_rate": 7.583919497359133e-06, + "loss": 8.5967, + "step": 149750 + }, + { + "epoch": 0.7478838422931908, + "grad_norm": 0.09307155758142471, + "learning_rate": 7.582417582417582e-06, + "loss": 8.6004, + "step": 149760 + }, + { + "epoch": 0.7479337811181302, + "grad_norm": 0.09423384815454483, + "learning_rate": 7.580915667476032e-06, + "loss": 8.595, + "step": 149770 + }, + { + "epoch": 0.7479837199430698, + "grad_norm": 0.09108050912618637, + "learning_rate": 7.579413752534482e-06, + "loss": 8.5889, + "step": 149780 + }, + { + "epoch": 0.7480336587680092, + "grad_norm": 0.0906979888677597, + "learning_rate": 7.577911837592932e-06, + "loss": 8.5869, + "step": 149790 + }, + { + "epoch": 0.7480835975929486, + "grad_norm": 0.09189769625663757, + "learning_rate": 7.57640992265138e-06, + "loss": 8.5869, + "step": 149800 + }, + { + "epoch": 0.748133536417888, + "grad_norm": 0.09509114921092987, + "learning_rate": 7.57490800770983e-06, + "loss": 8.5979, + "step": 149810 + }, + { + "epoch": 0.7481834752428276, + "grad_norm": 0.09381816536188126, + "learning_rate": 7.57340609276828e-06, + "loss": 8.5848, + "step": 149820 + }, + { + "epoch": 0.748233414067767, + "grad_norm": 0.10044172406196594, + "learning_rate": 7.571904177826729e-06, + "loss": 8.568, + "step": 149830 + }, + { + "epoch": 0.7482833528927064, + "grad_norm": 0.09521984308958054, + "learning_rate": 7.570402262885179e-06, + "loss": 8.5959, + "step": 149840 + }, + { + "epoch": 0.7483332917176458, + "grad_norm": 0.09529656916856766, + "learning_rate": 7.568900347943628e-06, + "loss": 8.5873, + "step": 149850 + }, + { + "epoch": 0.7483832305425854, + "grad_norm": 0.09173472225666046, + "learning_rate": 7.567398433002077e-06, + "loss": 8.6029, + "step": 149860 + }, + { + "epoch": 0.7484331693675248, + "grad_norm": 0.0962473452091217, + "learning_rate": 7.565896518060527e-06, + "loss": 8.6064, + "step": 149870 + }, + { + "epoch": 0.7484831081924642, + "grad_norm": 0.0965263694524765, + "learning_rate": 7.564394603118977e-06, + "loss": 8.5685, + "step": 149880 + }, + { + "epoch": 0.7485330470174036, + "grad_norm": 0.09147603064775467, + "learning_rate": 7.562892688177427e-06, + "loss": 8.5998, + "step": 149890 + }, + { + "epoch": 0.7485829858423432, + "grad_norm": 0.09058742225170135, + "learning_rate": 7.561390773235877e-06, + "loss": 8.5803, + "step": 149900 + }, + { + "epoch": 0.7486329246672826, + "grad_norm": 0.09538568556308746, + "learning_rate": 7.559888858294325e-06, + "loss": 8.5839, + "step": 149910 + }, + { + "epoch": 0.748682863492222, + "grad_norm": 0.08751174807548523, + "learning_rate": 7.558386943352775e-06, + "loss": 8.5857, + "step": 149920 + }, + { + "epoch": 0.7487328023171614, + "grad_norm": 0.09170546382665634, + "learning_rate": 7.556885028411224e-06, + "loss": 8.5967, + "step": 149930 + }, + { + "epoch": 0.748782741142101, + "grad_norm": 0.09635576605796814, + "learning_rate": 7.5553831134696744e-06, + "loss": 8.59, + "step": 149940 + }, + { + "epoch": 0.7488326799670404, + "grad_norm": 0.09509419649839401, + "learning_rate": 7.553881198528125e-06, + "loss": 8.5917, + "step": 149950 + }, + { + "epoch": 0.7488826187919798, + "grad_norm": 0.08711056411266327, + "learning_rate": 7.552379283586572e-06, + "loss": 8.5994, + "step": 149960 + }, + { + "epoch": 0.7489325576169192, + "grad_norm": 0.0929313451051712, + "learning_rate": 7.550877368645022e-06, + "loss": 8.5855, + "step": 149970 + }, + { + "epoch": 0.7489824964418588, + "grad_norm": 0.09222406148910522, + "learning_rate": 7.549375453703472e-06, + "loss": 8.6066, + "step": 149980 + }, + { + "epoch": 0.7490324352667982, + "grad_norm": 0.0890507623553276, + "learning_rate": 7.547873538761922e-06, + "loss": 8.6042, + "step": 149990 + }, + { + "epoch": 0.7490823740917376, + "grad_norm": 0.08804387599229813, + "learning_rate": 7.546371623820372e-06, + "loss": 8.6063, + "step": 150000 + }, + { + "epoch": 0.749132312916677, + "grad_norm": 0.0897492840886116, + "learning_rate": 7.54486970887882e-06, + "loss": 8.5969, + "step": 150010 + }, + { + "epoch": 0.7491822517416166, + "grad_norm": 0.08836290240287781, + "learning_rate": 7.54336779393727e-06, + "loss": 8.5929, + "step": 150020 + }, + { + "epoch": 0.749232190566556, + "grad_norm": 0.09971511363983154, + "learning_rate": 7.541865878995719e-06, + "loss": 8.5952, + "step": 150030 + }, + { + "epoch": 0.7492821293914954, + "grad_norm": 0.09459482133388519, + "learning_rate": 7.5403639640541694e-06, + "loss": 8.5777, + "step": 150040 + }, + { + "epoch": 0.7493320682164348, + "grad_norm": 0.09455052018165588, + "learning_rate": 7.53886204911262e-06, + "loss": 8.5771, + "step": 150050 + }, + { + "epoch": 0.7493820070413744, + "grad_norm": 0.09091456979513168, + "learning_rate": 7.537360134171069e-06, + "loss": 8.6112, + "step": 150060 + }, + { + "epoch": 0.7494319458663138, + "grad_norm": 0.08837003260850906, + "learning_rate": 7.5358582192295174e-06, + "loss": 8.5919, + "step": 150070 + }, + { + "epoch": 0.7494818846912532, + "grad_norm": 0.09216312319040298, + "learning_rate": 7.534356304287967e-06, + "loss": 8.5865, + "step": 150080 + }, + { + "epoch": 0.7495318235161926, + "grad_norm": 0.09196959435939789, + "learning_rate": 7.532854389346417e-06, + "loss": 8.5715, + "step": 150090 + }, + { + "epoch": 0.7495817623411322, + "grad_norm": 0.0905485451221466, + "learning_rate": 7.531352474404867e-06, + "loss": 8.5851, + "step": 150100 + }, + { + "epoch": 0.7496317011660716, + "grad_norm": 0.0880269706249237, + "learning_rate": 7.5298505594633164e-06, + "loss": 8.5813, + "step": 150110 + }, + { + "epoch": 0.749681639991011, + "grad_norm": 0.08941976726055145, + "learning_rate": 7.528348644521765e-06, + "loss": 8.5816, + "step": 150120 + }, + { + "epoch": 0.7497315788159504, + "grad_norm": 0.08973130583763123, + "learning_rate": 7.526846729580214e-06, + "loss": 8.6029, + "step": 150130 + }, + { + "epoch": 0.74978151764089, + "grad_norm": 0.0887095034122467, + "learning_rate": 7.5253448146386644e-06, + "loss": 8.6152, + "step": 150140 + }, + { + "epoch": 0.7498314564658294, + "grad_norm": 0.09397480636835098, + "learning_rate": 7.523842899697115e-06, + "loss": 8.5888, + "step": 150150 + }, + { + "epoch": 0.7498813952907688, + "grad_norm": 0.09192110598087311, + "learning_rate": 7.522340984755564e-06, + "loss": 8.5769, + "step": 150160 + }, + { + "epoch": 0.7499313341157082, + "grad_norm": 0.09322792291641235, + "learning_rate": 7.5208390698140124e-06, + "loss": 8.5972, + "step": 150170 + }, + { + "epoch": 0.7499812729406478, + "grad_norm": 0.08950521796941757, + "learning_rate": 7.519337154872462e-06, + "loss": 8.5905, + "step": 150180 + }, + { + "epoch": 0.7500312117655872, + "grad_norm": 0.0880652666091919, + "learning_rate": 7.517835239930912e-06, + "loss": 8.6012, + "step": 150190 + }, + { + "epoch": 0.7500811505905266, + "grad_norm": 0.09776634722948074, + "learning_rate": 7.516333324989362e-06, + "loss": 8.5777, + "step": 150200 + }, + { + "epoch": 0.750131089415466, + "grad_norm": 0.09283629804849625, + "learning_rate": 7.5148314100478115e-06, + "loss": 8.5807, + "step": 150210 + }, + { + "epoch": 0.7501810282404056, + "grad_norm": 0.09522504359483719, + "learning_rate": 7.513329495106262e-06, + "loss": 8.576, + "step": 150220 + }, + { + "epoch": 0.750230967065345, + "grad_norm": 0.09157579392194748, + "learning_rate": 7.511827580164709e-06, + "loss": 8.5927, + "step": 150230 + }, + { + "epoch": 0.7502809058902844, + "grad_norm": 0.09480082988739014, + "learning_rate": 7.5103256652231594e-06, + "loss": 8.6008, + "step": 150240 + }, + { + "epoch": 0.7503308447152238, + "grad_norm": 0.0909399539232254, + "learning_rate": 7.50882375028161e-06, + "loss": 8.5823, + "step": 150250 + }, + { + "epoch": 0.7503807835401634, + "grad_norm": 0.09307267516851425, + "learning_rate": 7.507321835340059e-06, + "loss": 8.5907, + "step": 150260 + }, + { + "epoch": 0.7504307223651028, + "grad_norm": 0.08955024927854538, + "learning_rate": 7.505819920398509e-06, + "loss": 8.5954, + "step": 150270 + }, + { + "epoch": 0.7504806611900422, + "grad_norm": 0.09251957386732101, + "learning_rate": 7.504318005456957e-06, + "loss": 8.5802, + "step": 150280 + }, + { + "epoch": 0.7505306000149816, + "grad_norm": 0.09843941777944565, + "learning_rate": 7.502816090515407e-06, + "loss": 8.5834, + "step": 150290 + }, + { + "epoch": 0.7505805388399212, + "grad_norm": 0.09598127752542496, + "learning_rate": 7.501314175573857e-06, + "loss": 8.599, + "step": 150300 + }, + { + "epoch": 0.7506304776648606, + "grad_norm": 0.09445951879024506, + "learning_rate": 7.4998122606323065e-06, + "loss": 8.5844, + "step": 150310 + }, + { + "epoch": 0.7506804164898, + "grad_norm": 0.08694125711917877, + "learning_rate": 7.498310345690756e-06, + "loss": 8.606, + "step": 150320 + }, + { + "epoch": 0.7507303553147394, + "grad_norm": 0.08730349689722061, + "learning_rate": 7.496808430749205e-06, + "loss": 8.5941, + "step": 150330 + }, + { + "epoch": 0.7507802941396788, + "grad_norm": 0.0891655758023262, + "learning_rate": 7.495306515807655e-06, + "loss": 8.5755, + "step": 150340 + }, + { + "epoch": 0.7508302329646184, + "grad_norm": 0.08927493542432785, + "learning_rate": 7.493804600866105e-06, + "loss": 8.5948, + "step": 150350 + }, + { + "epoch": 0.7508801717895578, + "grad_norm": 0.09330999106168747, + "learning_rate": 7.492302685924554e-06, + "loss": 8.5993, + "step": 150360 + }, + { + "epoch": 0.7509301106144972, + "grad_norm": 0.09538490325212479, + "learning_rate": 7.490800770983003e-06, + "loss": 8.5914, + "step": 150370 + }, + { + "epoch": 0.7509800494394366, + "grad_norm": 0.09310005605220795, + "learning_rate": 7.489298856041453e-06, + "loss": 8.5874, + "step": 150380 + }, + { + "epoch": 0.7510299882643762, + "grad_norm": 0.0930032730102539, + "learning_rate": 7.487796941099903e-06, + "loss": 8.5895, + "step": 150390 + }, + { + "epoch": 0.7510799270893156, + "grad_norm": 0.08846841007471085, + "learning_rate": 7.486295026158352e-06, + "loss": 8.5724, + "step": 150400 + }, + { + "epoch": 0.751129865914255, + "grad_norm": 0.09779760241508484, + "learning_rate": 7.4847931112168015e-06, + "loss": 8.5736, + "step": 150410 + }, + { + "epoch": 0.7511798047391944, + "grad_norm": 0.10576785355806351, + "learning_rate": 7.483291196275252e-06, + "loss": 8.5726, + "step": 150420 + }, + { + "epoch": 0.751229743564134, + "grad_norm": 0.09451400488615036, + "learning_rate": 7.4817892813337e-06, + "loss": 8.5857, + "step": 150430 + }, + { + "epoch": 0.7512796823890734, + "grad_norm": 0.09466114640235901, + "learning_rate": 7.48028736639215e-06, + "loss": 8.597, + "step": 150440 + }, + { + "epoch": 0.7513296212140128, + "grad_norm": 0.08923625946044922, + "learning_rate": 7.4787854514506e-06, + "loss": 8.6096, + "step": 150450 + }, + { + "epoch": 0.7513795600389522, + "grad_norm": 0.08818081766366959, + "learning_rate": 7.477283536509049e-06, + "loss": 8.604, + "step": 150460 + }, + { + "epoch": 0.7514294988638918, + "grad_norm": 0.08820995688438416, + "learning_rate": 7.475781621567499e-06, + "loss": 8.5916, + "step": 150470 + }, + { + "epoch": 0.7514794376888312, + "grad_norm": 0.09002698957920074, + "learning_rate": 7.474279706625948e-06, + "loss": 8.5879, + "step": 150480 + }, + { + "epoch": 0.7515293765137706, + "grad_norm": 0.09170237928628922, + "learning_rate": 7.472777791684398e-06, + "loss": 8.5907, + "step": 150490 + }, + { + "epoch": 0.75157931533871, + "grad_norm": 0.09593456238508224, + "learning_rate": 7.471275876742848e-06, + "loss": 8.5872, + "step": 150500 + }, + { + "epoch": 0.7516292541636496, + "grad_norm": 0.09875084459781647, + "learning_rate": 7.4697739618012965e-06, + "loss": 8.5823, + "step": 150510 + }, + { + "epoch": 0.751679192988589, + "grad_norm": 0.09118742495775223, + "learning_rate": 7.468272046859747e-06, + "loss": 8.5823, + "step": 150520 + }, + { + "epoch": 0.7517291318135284, + "grad_norm": 0.09258361160755157, + "learning_rate": 7.466770131918195e-06, + "loss": 8.5972, + "step": 150530 + }, + { + "epoch": 0.7517790706384678, + "grad_norm": 0.08977973461151123, + "learning_rate": 7.465268216976645e-06, + "loss": 8.6059, + "step": 150540 + }, + { + "epoch": 0.7518290094634074, + "grad_norm": 0.092478908598423, + "learning_rate": 7.4637663020350955e-06, + "loss": 8.5935, + "step": 150550 + }, + { + "epoch": 0.7518789482883468, + "grad_norm": 0.09068572521209717, + "learning_rate": 7.462264387093544e-06, + "loss": 8.5762, + "step": 150560 + }, + { + "epoch": 0.7519288871132862, + "grad_norm": 0.09068078547716141, + "learning_rate": 7.460762472151994e-06, + "loss": 8.5839, + "step": 150570 + }, + { + "epoch": 0.7519788259382256, + "grad_norm": 0.09269540011882782, + "learning_rate": 7.4592605572104435e-06, + "loss": 8.5909, + "step": 150580 + }, + { + "epoch": 0.7520287647631652, + "grad_norm": 0.0924522653222084, + "learning_rate": 7.457758642268893e-06, + "loss": 8.5919, + "step": 150590 + }, + { + "epoch": 0.7520787035881046, + "grad_norm": 0.09281891584396362, + "learning_rate": 7.456256727327343e-06, + "loss": 8.5784, + "step": 150600 + }, + { + "epoch": 0.752128642413044, + "grad_norm": 0.08694092184305191, + "learning_rate": 7.4547548123857915e-06, + "loss": 8.5848, + "step": 150610 + }, + { + "epoch": 0.7521785812379834, + "grad_norm": 0.10442210733890533, + "learning_rate": 7.453252897444242e-06, + "loss": 8.5919, + "step": 150620 + }, + { + "epoch": 0.752228520062923, + "grad_norm": 0.09364094585180283, + "learning_rate": 7.451750982502691e-06, + "loss": 8.6023, + "step": 150630 + }, + { + "epoch": 0.7522784588878624, + "grad_norm": 0.08750166743993759, + "learning_rate": 7.45024906756114e-06, + "loss": 8.5849, + "step": 150640 + }, + { + "epoch": 0.7523283977128018, + "grad_norm": 0.08991952240467072, + "learning_rate": 7.4487471526195905e-06, + "loss": 8.5911, + "step": 150650 + }, + { + "epoch": 0.7523783365377412, + "grad_norm": 0.10013854503631592, + "learning_rate": 7.44724523767804e-06, + "loss": 8.5982, + "step": 150660 + }, + { + "epoch": 0.7524282753626808, + "grad_norm": 0.09347521513700485, + "learning_rate": 7.445743322736489e-06, + "loss": 8.5847, + "step": 150670 + }, + { + "epoch": 0.7524782141876202, + "grad_norm": 0.09399186819791794, + "learning_rate": 7.4442414077949385e-06, + "loss": 8.6024, + "step": 150680 + }, + { + "epoch": 0.7525281530125596, + "grad_norm": 0.09026849269866943, + "learning_rate": 7.442739492853388e-06, + "loss": 8.5992, + "step": 150690 + }, + { + "epoch": 0.752578091837499, + "grad_norm": 0.08900460600852966, + "learning_rate": 7.441237577911838e-06, + "loss": 8.5872, + "step": 150700 + }, + { + "epoch": 0.7526280306624386, + "grad_norm": 0.09363497048616409, + "learning_rate": 7.439735662970287e-06, + "loss": 8.5889, + "step": 150710 + }, + { + "epoch": 0.752677969487378, + "grad_norm": 0.09363177418708801, + "learning_rate": 7.438233748028737e-06, + "loss": 8.5861, + "step": 150720 + }, + { + "epoch": 0.7527279083123174, + "grad_norm": 0.08974208682775497, + "learning_rate": 7.436731833087186e-06, + "loss": 8.5841, + "step": 150730 + }, + { + "epoch": 0.7527778471372568, + "grad_norm": 0.09006848931312561, + "learning_rate": 7.435229918145636e-06, + "loss": 8.5754, + "step": 150740 + }, + { + "epoch": 0.7528277859621963, + "grad_norm": 0.08987919241189957, + "learning_rate": 7.4337280032040855e-06, + "loss": 8.5911, + "step": 150750 + }, + { + "epoch": 0.7528777247871358, + "grad_norm": 0.0890379548072815, + "learning_rate": 7.432226088262535e-06, + "loss": 8.5964, + "step": 150760 + }, + { + "epoch": 0.7529276636120752, + "grad_norm": 0.08632665872573853, + "learning_rate": 7.430724173320984e-06, + "loss": 8.5989, + "step": 150770 + }, + { + "epoch": 0.7529776024370146, + "grad_norm": 0.09325092285871506, + "learning_rate": 7.4292222583794335e-06, + "loss": 8.5849, + "step": 150780 + }, + { + "epoch": 0.7530275412619541, + "grad_norm": 0.09268093854188919, + "learning_rate": 7.427720343437884e-06, + "loss": 8.5921, + "step": 150790 + }, + { + "epoch": 0.7530774800868936, + "grad_norm": 0.09364005923271179, + "learning_rate": 7.426218428496333e-06, + "loss": 8.5925, + "step": 150800 + }, + { + "epoch": 0.753127418911833, + "grad_norm": 0.08780599385499954, + "learning_rate": 7.424716513554782e-06, + "loss": 8.601, + "step": 150810 + }, + { + "epoch": 0.7531773577367724, + "grad_norm": 0.0943470448255539, + "learning_rate": 7.4232145986132325e-06, + "loss": 8.6098, + "step": 150820 + }, + { + "epoch": 0.753227296561712, + "grad_norm": 0.08829768002033234, + "learning_rate": 7.421712683671681e-06, + "loss": 8.5951, + "step": 150830 + }, + { + "epoch": 0.7532772353866514, + "grad_norm": 0.0940447673201561, + "learning_rate": 7.420210768730131e-06, + "loss": 8.5996, + "step": 150840 + }, + { + "epoch": 0.7533271742115908, + "grad_norm": 0.09546596556901932, + "learning_rate": 7.4187088537885805e-06, + "loss": 8.5794, + "step": 150850 + }, + { + "epoch": 0.7533771130365302, + "grad_norm": 0.09206235408782959, + "learning_rate": 7.41720693884703e-06, + "loss": 8.5881, + "step": 150860 + }, + { + "epoch": 0.7534270518614697, + "grad_norm": 0.08884277194738388, + "learning_rate": 7.41570502390548e-06, + "loss": 8.6059, + "step": 150870 + }, + { + "epoch": 0.7534769906864092, + "grad_norm": 0.0894254744052887, + "learning_rate": 7.414203108963929e-06, + "loss": 8.6018, + "step": 150880 + }, + { + "epoch": 0.7535269295113486, + "grad_norm": 0.08869513869285583, + "learning_rate": 7.412701194022379e-06, + "loss": 8.5989, + "step": 150890 + }, + { + "epoch": 0.753576868336288, + "grad_norm": 0.08933467417955399, + "learning_rate": 7.411199279080829e-06, + "loss": 8.5957, + "step": 150900 + }, + { + "epoch": 0.7536268071612275, + "grad_norm": 0.09024026989936829, + "learning_rate": 7.409697364139277e-06, + "loss": 8.5688, + "step": 150910 + }, + { + "epoch": 0.753676745986167, + "grad_norm": 0.08978093415498734, + "learning_rate": 7.4081954491977275e-06, + "loss": 8.5984, + "step": 150920 + }, + { + "epoch": 0.7537266848111064, + "grad_norm": 0.09174445271492004, + "learning_rate": 7.406693534256177e-06, + "loss": 8.5912, + "step": 150930 + }, + { + "epoch": 0.7537766236360458, + "grad_norm": 0.09789343923330307, + "learning_rate": 7.405191619314626e-06, + "loss": 8.5811, + "step": 150940 + }, + { + "epoch": 0.7538265624609853, + "grad_norm": 0.09015634655952454, + "learning_rate": 7.403689704373076e-06, + "loss": 8.5899, + "step": 150950 + }, + { + "epoch": 0.7538765012859248, + "grad_norm": 0.08942896872758865, + "learning_rate": 7.402187789431525e-06, + "loss": 8.5947, + "step": 150960 + }, + { + "epoch": 0.7539264401108642, + "grad_norm": 0.09148923307657242, + "learning_rate": 7.400685874489975e-06, + "loss": 8.5759, + "step": 150970 + }, + { + "epoch": 0.7539763789358036, + "grad_norm": 0.09370266646146774, + "learning_rate": 7.399183959548425e-06, + "loss": 8.5746, + "step": 150980 + }, + { + "epoch": 0.7540263177607431, + "grad_norm": 0.09445227682590485, + "learning_rate": 7.397682044606874e-06, + "loss": 8.6121, + "step": 150990 + }, + { + "epoch": 0.7540762565856826, + "grad_norm": 0.08653920888900757, + "learning_rate": 7.396180129665324e-06, + "loss": 8.5719, + "step": 151000 + }, + { + "epoch": 0.754126195410622, + "grad_norm": 0.09342148154973984, + "learning_rate": 7.394678214723772e-06, + "loss": 8.5733, + "step": 151010 + }, + { + "epoch": 0.7541761342355614, + "grad_norm": 0.09228193759918213, + "learning_rate": 7.3931762997822225e-06, + "loss": 8.597, + "step": 151020 + }, + { + "epoch": 0.7542260730605009, + "grad_norm": 0.08755730837583542, + "learning_rate": 7.391674384840673e-06, + "loss": 8.5821, + "step": 151030 + }, + { + "epoch": 0.7542760118854404, + "grad_norm": 0.08993065357208252, + "learning_rate": 7.390172469899121e-06, + "loss": 8.6094, + "step": 151040 + }, + { + "epoch": 0.7543259507103798, + "grad_norm": 0.08330941945314407, + "learning_rate": 7.388670554957571e-06, + "loss": 8.6262, + "step": 151050 + }, + { + "epoch": 0.7543758895353192, + "grad_norm": 0.09145517647266388, + "learning_rate": 7.387168640016021e-06, + "loss": 8.5803, + "step": 151060 + }, + { + "epoch": 0.7544258283602587, + "grad_norm": 0.09170622378587723, + "learning_rate": 7.38566672507447e-06, + "loss": 8.568, + "step": 151070 + }, + { + "epoch": 0.7544757671851982, + "grad_norm": 0.08807757496833801, + "learning_rate": 7.38416481013292e-06, + "loss": 8.5906, + "step": 151080 + }, + { + "epoch": 0.7545257060101376, + "grad_norm": 0.09455586969852448, + "learning_rate": 7.382662895191369e-06, + "loss": 8.6016, + "step": 151090 + }, + { + "epoch": 0.754575644835077, + "grad_norm": 0.08837214857339859, + "learning_rate": 7.381160980249819e-06, + "loss": 8.5958, + "step": 151100 + }, + { + "epoch": 0.7546255836600165, + "grad_norm": 0.08906712383031845, + "learning_rate": 7.379659065308268e-06, + "loss": 8.5873, + "step": 151110 + }, + { + "epoch": 0.754675522484956, + "grad_norm": 0.10366376489400864, + "learning_rate": 7.3781571503667175e-06, + "loss": 8.5838, + "step": 151120 + }, + { + "epoch": 0.7547254613098954, + "grad_norm": 0.08967901021242142, + "learning_rate": 7.376655235425168e-06, + "loss": 8.5893, + "step": 151130 + }, + { + "epoch": 0.7547754001348348, + "grad_norm": 0.0880189910531044, + "learning_rate": 7.375153320483617e-06, + "loss": 8.5897, + "step": 151140 + }, + { + "epoch": 0.7548253389597743, + "grad_norm": 0.09614763408899307, + "learning_rate": 7.373651405542066e-06, + "loss": 8.5823, + "step": 151150 + }, + { + "epoch": 0.7548752777847137, + "grad_norm": 0.0998152643442154, + "learning_rate": 7.372149490600516e-06, + "loss": 8.5901, + "step": 151160 + }, + { + "epoch": 0.7549252166096532, + "grad_norm": 0.09459684044122696, + "learning_rate": 7.370647575658965e-06, + "loss": 8.5769, + "step": 151170 + }, + { + "epoch": 0.7549751554345926, + "grad_norm": 0.09051591157913208, + "learning_rate": 7.369145660717415e-06, + "loss": 8.593, + "step": 151180 + }, + { + "epoch": 0.7550250942595321, + "grad_norm": 0.0956014096736908, + "learning_rate": 7.3676437457758646e-06, + "loss": 8.5867, + "step": 151190 + }, + { + "epoch": 0.7550750330844715, + "grad_norm": 0.10061989724636078, + "learning_rate": 7.366141830834314e-06, + "loss": 8.5855, + "step": 151200 + }, + { + "epoch": 0.755124971909411, + "grad_norm": 0.09272131323814392, + "learning_rate": 7.364639915892763e-06, + "loss": 8.5966, + "step": 151210 + }, + { + "epoch": 0.7551749107343504, + "grad_norm": 0.09143613278865814, + "learning_rate": 7.363138000951213e-06, + "loss": 8.6061, + "step": 151220 + }, + { + "epoch": 0.7552248495592899, + "grad_norm": 0.09460750222206116, + "learning_rate": 7.361636086009663e-06, + "loss": 8.5901, + "step": 151230 + }, + { + "epoch": 0.7552747883842293, + "grad_norm": 0.09221066534519196, + "learning_rate": 7.360134171068112e-06, + "loss": 8.5844, + "step": 151240 + }, + { + "epoch": 0.7553247272091688, + "grad_norm": 0.09496848285198212, + "learning_rate": 7.358632256126561e-06, + "loss": 8.5875, + "step": 151250 + }, + { + "epoch": 0.7553746660341082, + "grad_norm": 0.0936916247010231, + "learning_rate": 7.357130341185011e-06, + "loss": 8.5976, + "step": 151260 + }, + { + "epoch": 0.7554246048590477, + "grad_norm": 0.09380991011857986, + "learning_rate": 7.355628426243461e-06, + "loss": 8.6017, + "step": 151270 + }, + { + "epoch": 0.7554745436839871, + "grad_norm": 0.09433276206254959, + "learning_rate": 7.35412651130191e-06, + "loss": 8.5803, + "step": 151280 + }, + { + "epoch": 0.7555244825089266, + "grad_norm": 0.08973414450883865, + "learning_rate": 7.3526245963603596e-06, + "loss": 8.5969, + "step": 151290 + }, + { + "epoch": 0.755574421333866, + "grad_norm": 0.09096003323793411, + "learning_rate": 7.35112268141881e-06, + "loss": 8.5919, + "step": 151300 + }, + { + "epoch": 0.7556243601588054, + "grad_norm": 0.0915827602148056, + "learning_rate": 7.349620766477258e-06, + "loss": 8.5893, + "step": 151310 + }, + { + "epoch": 0.7556742989837449, + "grad_norm": 0.09033535420894623, + "learning_rate": 7.348118851535708e-06, + "loss": 8.5979, + "step": 151320 + }, + { + "epoch": 0.7557242378086844, + "grad_norm": 0.0904705822467804, + "learning_rate": 7.346616936594158e-06, + "loss": 8.5985, + "step": 151330 + }, + { + "epoch": 0.7557741766336238, + "grad_norm": 0.0926397368311882, + "learning_rate": 7.345115021652607e-06, + "loss": 8.5686, + "step": 151340 + }, + { + "epoch": 0.7558241154585632, + "grad_norm": 0.09501095861196518, + "learning_rate": 7.343613106711057e-06, + "loss": 8.5688, + "step": 151350 + }, + { + "epoch": 0.7558740542835027, + "grad_norm": 0.09006702154874802, + "learning_rate": 7.342111191769506e-06, + "loss": 8.588, + "step": 151360 + }, + { + "epoch": 0.7559239931084422, + "grad_norm": 0.09147243946790695, + "learning_rate": 7.340609276827956e-06, + "loss": 8.5847, + "step": 151370 + }, + { + "epoch": 0.7559739319333816, + "grad_norm": 0.09361589699983597, + "learning_rate": 7.339107361886406e-06, + "loss": 8.5841, + "step": 151380 + }, + { + "epoch": 0.756023870758321, + "grad_norm": 0.0927942767739296, + "learning_rate": 7.3376054469448546e-06, + "loss": 8.6122, + "step": 151390 + }, + { + "epoch": 0.7560738095832605, + "grad_norm": 0.0871824249625206, + "learning_rate": 7.336103532003305e-06, + "loss": 8.5985, + "step": 151400 + }, + { + "epoch": 0.7561237484082, + "grad_norm": 0.09094393253326416, + "learning_rate": 7.334601617061753e-06, + "loss": 8.5906, + "step": 151410 + }, + { + "epoch": 0.7561736872331394, + "grad_norm": 0.08855657279491425, + "learning_rate": 7.333099702120203e-06, + "loss": 8.5768, + "step": 151420 + }, + { + "epoch": 0.7562236260580788, + "grad_norm": 0.09399604797363281, + "learning_rate": 7.331597787178654e-06, + "loss": 8.5941, + "step": 151430 + }, + { + "epoch": 0.7562735648830183, + "grad_norm": 0.09146516025066376, + "learning_rate": 7.330095872237102e-06, + "loss": 8.5875, + "step": 151440 + }, + { + "epoch": 0.7563235037079578, + "grad_norm": 0.09131018072366714, + "learning_rate": 7.328593957295552e-06, + "loss": 8.5829, + "step": 151450 + }, + { + "epoch": 0.7563734425328972, + "grad_norm": 0.09400021284818649, + "learning_rate": 7.327092042354002e-06, + "loss": 8.5939, + "step": 151460 + }, + { + "epoch": 0.7564233813578366, + "grad_norm": 0.08741091191768646, + "learning_rate": 7.325590127412451e-06, + "loss": 8.5914, + "step": 151470 + }, + { + "epoch": 0.7564733201827761, + "grad_norm": 0.09049748629331589, + "learning_rate": 7.324088212470901e-06, + "loss": 8.5944, + "step": 151480 + }, + { + "epoch": 0.7565232590077156, + "grad_norm": 0.09107566624879837, + "learning_rate": 7.32258629752935e-06, + "loss": 8.5838, + "step": 151490 + }, + { + "epoch": 0.756573197832655, + "grad_norm": 0.08954470604658127, + "learning_rate": 7.3210843825878e-06, + "loss": 8.5864, + "step": 151500 + }, + { + "epoch": 0.7566231366575944, + "grad_norm": 0.09204570204019547, + "learning_rate": 7.319582467646249e-06, + "loss": 8.5808, + "step": 151510 + }, + { + "epoch": 0.7566730754825339, + "grad_norm": 0.09428434073925018, + "learning_rate": 7.318080552704698e-06, + "loss": 8.5759, + "step": 151520 + }, + { + "epoch": 0.7567230143074734, + "grad_norm": 0.08692929893732071, + "learning_rate": 7.316578637763149e-06, + "loss": 8.5886, + "step": 151530 + }, + { + "epoch": 0.7567729531324128, + "grad_norm": 0.08495133370161057, + "learning_rate": 7.315076722821598e-06, + "loss": 8.5822, + "step": 151540 + }, + { + "epoch": 0.7568228919573522, + "grad_norm": 0.09004715830087662, + "learning_rate": 7.313574807880047e-06, + "loss": 8.5829, + "step": 151550 + }, + { + "epoch": 0.7568728307822917, + "grad_norm": 0.09108411520719528, + "learning_rate": 7.312072892938497e-06, + "loss": 8.5784, + "step": 151560 + }, + { + "epoch": 0.7569227696072311, + "grad_norm": 0.0907498225569725, + "learning_rate": 7.310570977996946e-06, + "loss": 8.5694, + "step": 151570 + }, + { + "epoch": 0.7569727084321706, + "grad_norm": 0.08905670791864395, + "learning_rate": 7.309069063055396e-06, + "loss": 8.5844, + "step": 151580 + }, + { + "epoch": 0.75702264725711, + "grad_norm": 0.08316656202077866, + "learning_rate": 7.3075671481138454e-06, + "loss": 8.5998, + "step": 151590 + }, + { + "epoch": 0.7570725860820495, + "grad_norm": 0.09294084459543228, + "learning_rate": 7.306065233172295e-06, + "loss": 8.5846, + "step": 151600 + }, + { + "epoch": 0.757122524906989, + "grad_norm": 0.08754400908946991, + "learning_rate": 7.304563318230744e-06, + "loss": 8.5693, + "step": 151610 + }, + { + "epoch": 0.7571724637319284, + "grad_norm": 0.09371070563793182, + "learning_rate": 7.303061403289194e-06, + "loss": 8.5871, + "step": 151620 + }, + { + "epoch": 0.7572224025568678, + "grad_norm": 0.08874963223934174, + "learning_rate": 7.301559488347644e-06, + "loss": 8.5855, + "step": 151630 + }, + { + "epoch": 0.7572723413818073, + "grad_norm": 0.09325557202100754, + "learning_rate": 7.300057573406093e-06, + "loss": 8.5884, + "step": 151640 + }, + { + "epoch": 0.7573222802067467, + "grad_norm": 0.0924559012055397, + "learning_rate": 7.298555658464542e-06, + "loss": 8.582, + "step": 151650 + }, + { + "epoch": 0.7573722190316862, + "grad_norm": 0.09226152300834656, + "learning_rate": 7.297053743522992e-06, + "loss": 8.5726, + "step": 151660 + }, + { + "epoch": 0.7574221578566256, + "grad_norm": 0.09244561940431595, + "learning_rate": 7.295551828581442e-06, + "loss": 8.5807, + "step": 151670 + }, + { + "epoch": 0.7574720966815651, + "grad_norm": 0.08866026997566223, + "learning_rate": 7.294049913639891e-06, + "loss": 8.6003, + "step": 151680 + }, + { + "epoch": 0.7575220355065045, + "grad_norm": 0.09373471140861511, + "learning_rate": 7.2925479986983404e-06, + "loss": 8.5914, + "step": 151690 + }, + { + "epoch": 0.757571974331444, + "grad_norm": 0.09084302186965942, + "learning_rate": 7.291046083756791e-06, + "loss": 8.5745, + "step": 151700 + }, + { + "epoch": 0.7576219131563834, + "grad_norm": 0.0983499139547348, + "learning_rate": 7.289544168815239e-06, + "loss": 8.5901, + "step": 151710 + }, + { + "epoch": 0.7576718519813229, + "grad_norm": 0.09153123944997787, + "learning_rate": 7.288042253873689e-06, + "loss": 8.5917, + "step": 151720 + }, + { + "epoch": 0.7577217908062623, + "grad_norm": 0.08924426138401031, + "learning_rate": 7.286540338932139e-06, + "loss": 8.5917, + "step": 151730 + }, + { + "epoch": 0.7577717296312018, + "grad_norm": 0.09073387831449509, + "learning_rate": 7.285038423990588e-06, + "loss": 8.5854, + "step": 151740 + }, + { + "epoch": 0.7578216684561412, + "grad_norm": 0.09264533221721649, + "learning_rate": 7.283536509049038e-06, + "loss": 8.5888, + "step": 151750 + }, + { + "epoch": 0.7578716072810807, + "grad_norm": 0.09185033291578293, + "learning_rate": 7.282034594107487e-06, + "loss": 8.585, + "step": 151760 + }, + { + "epoch": 0.7579215461060201, + "grad_norm": 0.09249900281429291, + "learning_rate": 7.280532679165937e-06, + "loss": 8.5612, + "step": 151770 + }, + { + "epoch": 0.7579714849309596, + "grad_norm": 0.08698815107345581, + "learning_rate": 7.279030764224387e-06, + "loss": 8.6089, + "step": 151780 + }, + { + "epoch": 0.758021423755899, + "grad_norm": 0.09104190021753311, + "learning_rate": 7.2775288492828355e-06, + "loss": 8.5981, + "step": 151790 + }, + { + "epoch": 0.7580713625808385, + "grad_norm": 0.08875631541013718, + "learning_rate": 7.276026934341286e-06, + "loss": 8.5945, + "step": 151800 + }, + { + "epoch": 0.7581213014057779, + "grad_norm": 0.08652567863464355, + "learning_rate": 7.274525019399734e-06, + "loss": 8.5882, + "step": 151810 + }, + { + "epoch": 0.7581712402307174, + "grad_norm": 0.0900021344423294, + "learning_rate": 7.273023104458184e-06, + "loss": 8.5922, + "step": 151820 + }, + { + "epoch": 0.7582211790556568, + "grad_norm": 0.09021075814962387, + "learning_rate": 7.2715211895166345e-06, + "loss": 8.5893, + "step": 151830 + }, + { + "epoch": 0.7582711178805963, + "grad_norm": 0.09226670861244202, + "learning_rate": 7.270019274575083e-06, + "loss": 8.5892, + "step": 151840 + }, + { + "epoch": 0.7583210567055357, + "grad_norm": 0.09223869442939758, + "learning_rate": 7.268517359633533e-06, + "loss": 8.5831, + "step": 151850 + }, + { + "epoch": 0.7583709955304752, + "grad_norm": 0.08831837028265, + "learning_rate": 7.2670154446919825e-06, + "loss": 8.5805, + "step": 151860 + }, + { + "epoch": 0.7584209343554146, + "grad_norm": 0.0934990644454956, + "learning_rate": 7.265513529750432e-06, + "loss": 8.5758, + "step": 151870 + }, + { + "epoch": 0.7584708731803541, + "grad_norm": 0.09644410014152527, + "learning_rate": 7.264011614808882e-06, + "loss": 8.5791, + "step": 151880 + }, + { + "epoch": 0.7585208120052935, + "grad_norm": 0.08798360824584961, + "learning_rate": 7.2625096998673305e-06, + "loss": 8.584, + "step": 151890 + }, + { + "epoch": 0.758570750830233, + "grad_norm": 0.08095114678144455, + "learning_rate": 7.261007784925781e-06, + "loss": 8.6034, + "step": 151900 + }, + { + "epoch": 0.7586206896551724, + "grad_norm": 0.09009264409542084, + "learning_rate": 7.25950586998423e-06, + "loss": 8.5723, + "step": 151910 + }, + { + "epoch": 0.7586706284801119, + "grad_norm": 0.08817630261182785, + "learning_rate": 7.258003955042679e-06, + "loss": 8.5885, + "step": 151920 + }, + { + "epoch": 0.7587205673050513, + "grad_norm": 0.09514699131250381, + "learning_rate": 7.2565020401011295e-06, + "loss": 8.5849, + "step": 151930 + }, + { + "epoch": 0.7587705061299908, + "grad_norm": 0.09281176328659058, + "learning_rate": 7.255000125159579e-06, + "loss": 8.579, + "step": 151940 + }, + { + "epoch": 0.7588204449549302, + "grad_norm": 0.08887535333633423, + "learning_rate": 7.253498210218028e-06, + "loss": 8.6016, + "step": 151950 + }, + { + "epoch": 0.7588703837798697, + "grad_norm": 0.08661656081676483, + "learning_rate": 7.251996295276478e-06, + "loss": 8.5982, + "step": 151960 + }, + { + "epoch": 0.7589203226048091, + "grad_norm": 0.09328266978263855, + "learning_rate": 7.250494380334927e-06, + "loss": 8.5797, + "step": 151970 + }, + { + "epoch": 0.7589702614297485, + "grad_norm": 0.09176629781723022, + "learning_rate": 7.248992465393377e-06, + "loss": 8.5894, + "step": 151980 + }, + { + "epoch": 0.759020200254688, + "grad_norm": 0.08731918781995773, + "learning_rate": 7.247490550451826e-06, + "loss": 8.5794, + "step": 151990 + }, + { + "epoch": 0.7590701390796275, + "grad_norm": 0.088376984000206, + "learning_rate": 7.245988635510276e-06, + "loss": 8.5798, + "step": 152000 + }, + { + "epoch": 0.7591200779045669, + "grad_norm": 0.09818810224533081, + "learning_rate": 7.244486720568726e-06, + "loss": 8.5913, + "step": 152010 + }, + { + "epoch": 0.7591700167295063, + "grad_norm": 0.08995293080806732, + "learning_rate": 7.242984805627175e-06, + "loss": 8.5928, + "step": 152020 + }, + { + "epoch": 0.7592199555544458, + "grad_norm": 0.09495987743139267, + "learning_rate": 7.2414828906856245e-06, + "loss": 8.5887, + "step": 152030 + }, + { + "epoch": 0.7592698943793853, + "grad_norm": 0.09125667065382004, + "learning_rate": 7.239980975744074e-06, + "loss": 8.5905, + "step": 152040 + }, + { + "epoch": 0.7593198332043247, + "grad_norm": 0.09113696217536926, + "learning_rate": 7.238479060802523e-06, + "loss": 8.5895, + "step": 152050 + }, + { + "epoch": 0.7593697720292641, + "grad_norm": 0.09109106659889221, + "learning_rate": 7.236977145860973e-06, + "loss": 8.5895, + "step": 152060 + }, + { + "epoch": 0.7594197108542036, + "grad_norm": 0.09017052501440048, + "learning_rate": 7.235475230919423e-06, + "loss": 8.5818, + "step": 152070 + }, + { + "epoch": 0.7594696496791431, + "grad_norm": 0.08849617093801498, + "learning_rate": 7.233973315977872e-06, + "loss": 8.5957, + "step": 152080 + }, + { + "epoch": 0.7595195885040825, + "grad_norm": 0.09171763062477112, + "learning_rate": 7.232471401036321e-06, + "loss": 8.5808, + "step": 152090 + }, + { + "epoch": 0.7595695273290219, + "grad_norm": 0.10110411792993546, + "learning_rate": 7.2309694860947715e-06, + "loss": 8.5877, + "step": 152100 + }, + { + "epoch": 0.7596194661539614, + "grad_norm": 0.09818938374519348, + "learning_rate": 7.229467571153221e-06, + "loss": 8.6031, + "step": 152110 + }, + { + "epoch": 0.7596694049789009, + "grad_norm": 0.09149213880300522, + "learning_rate": 7.22796565621167e-06, + "loss": 8.5883, + "step": 152120 + }, + { + "epoch": 0.7597193438038403, + "grad_norm": 0.09270060062408447, + "learning_rate": 7.2264637412701195e-06, + "loss": 8.603, + "step": 152130 + }, + { + "epoch": 0.7597692826287797, + "grad_norm": 0.08630634099245071, + "learning_rate": 7.224961826328569e-06, + "loss": 8.5836, + "step": 152140 + }, + { + "epoch": 0.7598192214537192, + "grad_norm": 0.0882948487997055, + "learning_rate": 7.223459911387019e-06, + "loss": 8.5835, + "step": 152150 + }, + { + "epoch": 0.7598691602786587, + "grad_norm": 0.09250038117170334, + "learning_rate": 7.221957996445468e-06, + "loss": 8.5625, + "step": 152160 + }, + { + "epoch": 0.7599190991035981, + "grad_norm": 0.09016095101833344, + "learning_rate": 7.220456081503918e-06, + "loss": 8.5824, + "step": 152170 + }, + { + "epoch": 0.7599690379285375, + "grad_norm": 0.08857563883066177, + "learning_rate": 7.218954166562367e-06, + "loss": 8.5876, + "step": 152180 + }, + { + "epoch": 0.760018976753477, + "grad_norm": 0.09116075932979584, + "learning_rate": 7.217452251620816e-06, + "loss": 8.577, + "step": 152190 + }, + { + "epoch": 0.7600689155784165, + "grad_norm": 0.09463620185852051, + "learning_rate": 7.2159503366792665e-06, + "loss": 8.5771, + "step": 152200 + }, + { + "epoch": 0.7601188544033559, + "grad_norm": 0.09192495793104172, + "learning_rate": 7.214448421737716e-06, + "loss": 8.5971, + "step": 152210 + }, + { + "epoch": 0.7601687932282953, + "grad_norm": 0.09425206482410431, + "learning_rate": 7.212946506796165e-06, + "loss": 8.5846, + "step": 152220 + }, + { + "epoch": 0.7602187320532348, + "grad_norm": 0.0923011526465416, + "learning_rate": 7.211444591854615e-06, + "loss": 8.576, + "step": 152230 + }, + { + "epoch": 0.7602686708781743, + "grad_norm": 0.08811124414205551, + "learning_rate": 7.209942676913064e-06, + "loss": 8.587, + "step": 152240 + }, + { + "epoch": 0.7603186097031137, + "grad_norm": 0.08695264905691147, + "learning_rate": 7.208440761971514e-06, + "loss": 8.59, + "step": 152250 + }, + { + "epoch": 0.7603685485280531, + "grad_norm": 0.0906856581568718, + "learning_rate": 7.206938847029963e-06, + "loss": 8.5963, + "step": 152260 + }, + { + "epoch": 0.7604184873529926, + "grad_norm": 0.09710587561130524, + "learning_rate": 7.205436932088413e-06, + "loss": 8.5624, + "step": 152270 + }, + { + "epoch": 0.7604684261779321, + "grad_norm": 0.09217939525842667, + "learning_rate": 7.203935017146863e-06, + "loss": 8.5973, + "step": 152280 + }, + { + "epoch": 0.7605183650028715, + "grad_norm": 0.09139391779899597, + "learning_rate": 7.202433102205311e-06, + "loss": 8.5674, + "step": 152290 + }, + { + "epoch": 0.7605683038278109, + "grad_norm": 0.08444222062826157, + "learning_rate": 7.2009311872637615e-06, + "loss": 8.5952, + "step": 152300 + }, + { + "epoch": 0.7606182426527504, + "grad_norm": 0.09180841594934464, + "learning_rate": 7.199429272322212e-06, + "loss": 8.596, + "step": 152310 + }, + { + "epoch": 0.7606681814776898, + "grad_norm": 0.09961362183094025, + "learning_rate": 7.19792735738066e-06, + "loss": 8.5865, + "step": 152320 + }, + { + "epoch": 0.7607181203026293, + "grad_norm": 0.09278172999620438, + "learning_rate": 7.19642544243911e-06, + "loss": 8.5995, + "step": 152330 + }, + { + "epoch": 0.7607680591275687, + "grad_norm": 0.08981714397668839, + "learning_rate": 7.194923527497559e-06, + "loss": 8.578, + "step": 152340 + }, + { + "epoch": 0.7608179979525082, + "grad_norm": 0.09385214000940323, + "learning_rate": 7.193421612556009e-06, + "loss": 8.5784, + "step": 152350 + }, + { + "epoch": 0.7608679367774476, + "grad_norm": 0.09259159117937088, + "learning_rate": 7.191919697614459e-06, + "loss": 8.5798, + "step": 152360 + }, + { + "epoch": 0.7609178756023871, + "grad_norm": 0.0899551659822464, + "learning_rate": 7.190417782672908e-06, + "loss": 8.5876, + "step": 152370 + }, + { + "epoch": 0.7609678144273265, + "grad_norm": 0.09498420357704163, + "learning_rate": 7.188915867731358e-06, + "loss": 8.5757, + "step": 152380 + }, + { + "epoch": 0.761017753252266, + "grad_norm": 0.09275230020284653, + "learning_rate": 7.187413952789807e-06, + "loss": 8.5825, + "step": 152390 + }, + { + "epoch": 0.7610676920772054, + "grad_norm": 0.09266764670610428, + "learning_rate": 7.1859120378482565e-06, + "loss": 8.5912, + "step": 152400 + }, + { + "epoch": 0.7611176309021449, + "grad_norm": 0.08858975768089294, + "learning_rate": 7.184410122906707e-06, + "loss": 8.5878, + "step": 152410 + }, + { + "epoch": 0.7611675697270843, + "grad_norm": 0.09748895466327667, + "learning_rate": 7.182908207965155e-06, + "loss": 8.5744, + "step": 152420 + }, + { + "epoch": 0.7612175085520237, + "grad_norm": 0.09607774019241333, + "learning_rate": 7.181406293023605e-06, + "loss": 8.5716, + "step": 152430 + }, + { + "epoch": 0.7612674473769632, + "grad_norm": 0.09278061985969543, + "learning_rate": 7.179904378082055e-06, + "loss": 8.5937, + "step": 152440 + }, + { + "epoch": 0.7613173862019027, + "grad_norm": 0.0908476933836937, + "learning_rate": 7.178402463140504e-06, + "loss": 8.5842, + "step": 152450 + }, + { + "epoch": 0.7613673250268421, + "grad_norm": 0.0958239734172821, + "learning_rate": 7.176900548198954e-06, + "loss": 8.593, + "step": 152460 + }, + { + "epoch": 0.7614172638517815, + "grad_norm": 0.08807183057069778, + "learning_rate": 7.1753986332574035e-06, + "loss": 8.5842, + "step": 152470 + }, + { + "epoch": 0.761467202676721, + "grad_norm": 0.09366779029369354, + "learning_rate": 7.173896718315853e-06, + "loss": 8.5819, + "step": 152480 + }, + { + "epoch": 0.7615171415016605, + "grad_norm": 0.09066038578748703, + "learning_rate": 7.172394803374302e-06, + "loss": 8.5982, + "step": 152490 + }, + { + "epoch": 0.7615670803265999, + "grad_norm": 0.08774998039007187, + "learning_rate": 7.1708928884327515e-06, + "loss": 8.5932, + "step": 152500 + }, + { + "epoch": 0.7616170191515393, + "grad_norm": 0.09205503761768341, + "learning_rate": 7.169390973491202e-06, + "loss": 8.5773, + "step": 152510 + }, + { + "epoch": 0.7616669579764788, + "grad_norm": 0.08959227055311203, + "learning_rate": 7.167889058549651e-06, + "loss": 8.586, + "step": 152520 + }, + { + "epoch": 0.7617168968014183, + "grad_norm": 0.09033005684614182, + "learning_rate": 7.1663871436081e-06, + "loss": 8.5832, + "step": 152530 + }, + { + "epoch": 0.7617668356263577, + "grad_norm": 0.08551044017076492, + "learning_rate": 7.16488522866655e-06, + "loss": 8.5845, + "step": 152540 + }, + { + "epoch": 0.7618167744512971, + "grad_norm": 0.09518376737833023, + "learning_rate": 7.163383313725e-06, + "loss": 8.5857, + "step": 152550 + }, + { + "epoch": 0.7618667132762366, + "grad_norm": 0.0905361995100975, + "learning_rate": 7.161881398783449e-06, + "loss": 8.5745, + "step": 152560 + }, + { + "epoch": 0.7619166521011761, + "grad_norm": 0.09344843029975891, + "learning_rate": 7.1603794838418985e-06, + "loss": 8.5731, + "step": 152570 + }, + { + "epoch": 0.7619665909261155, + "grad_norm": 0.08621740341186523, + "learning_rate": 7.158877568900348e-06, + "loss": 8.582, + "step": 152580 + }, + { + "epoch": 0.7620165297510549, + "grad_norm": 0.09143950790166855, + "learning_rate": 7.157375653958797e-06, + "loss": 8.5648, + "step": 152590 + }, + { + "epoch": 0.7620664685759944, + "grad_norm": 0.09078672528266907, + "learning_rate": 7.155873739017247e-06, + "loss": 8.5895, + "step": 152600 + }, + { + "epoch": 0.7621164074009339, + "grad_norm": 0.09113630652427673, + "learning_rate": 7.154371824075697e-06, + "loss": 8.5785, + "step": 152610 + }, + { + "epoch": 0.7621663462258733, + "grad_norm": 0.09150314331054688, + "learning_rate": 7.152869909134146e-06, + "loss": 8.5799, + "step": 152620 + }, + { + "epoch": 0.7622162850508127, + "grad_norm": 0.08845186978578568, + "learning_rate": 7.151367994192596e-06, + "loss": 8.5894, + "step": 152630 + }, + { + "epoch": 0.7622662238757522, + "grad_norm": 0.09440822899341583, + "learning_rate": 7.149866079251045e-06, + "loss": 8.5837, + "step": 152640 + }, + { + "epoch": 0.7623161627006917, + "grad_norm": 0.08900156617164612, + "learning_rate": 7.148364164309495e-06, + "loss": 8.5828, + "step": 152650 + }, + { + "epoch": 0.7623661015256311, + "grad_norm": 0.09400945901870728, + "learning_rate": 7.146862249367944e-06, + "loss": 8.576, + "step": 152660 + }, + { + "epoch": 0.7624160403505705, + "grad_norm": 0.09294036775827408, + "learning_rate": 7.1453603344263935e-06, + "loss": 8.5688, + "step": 152670 + }, + { + "epoch": 0.76246597917551, + "grad_norm": 0.09247752279043198, + "learning_rate": 7.143858419484844e-06, + "loss": 8.5811, + "step": 152680 + }, + { + "epoch": 0.7625159180004495, + "grad_norm": 0.09127040207386017, + "learning_rate": 7.142356504543292e-06, + "loss": 8.5837, + "step": 152690 + }, + { + "epoch": 0.7625658568253889, + "grad_norm": 0.0945616066455841, + "learning_rate": 7.140854589601742e-06, + "loss": 8.5798, + "step": 152700 + }, + { + "epoch": 0.7626157956503283, + "grad_norm": 0.08728938549757004, + "learning_rate": 7.1393526746601926e-06, + "loss": 8.5811, + "step": 152710 + }, + { + "epoch": 0.7626657344752678, + "grad_norm": 0.09225558489561081, + "learning_rate": 7.137850759718641e-06, + "loss": 8.6107, + "step": 152720 + }, + { + "epoch": 0.7627156733002073, + "grad_norm": 0.09513969719409943, + "learning_rate": 7.136348844777091e-06, + "loss": 8.5764, + "step": 152730 + }, + { + "epoch": 0.7627656121251467, + "grad_norm": 0.09618328511714935, + "learning_rate": 7.13484692983554e-06, + "loss": 8.5874, + "step": 152740 + }, + { + "epoch": 0.7628155509500861, + "grad_norm": 0.09063924103975296, + "learning_rate": 7.13334501489399e-06, + "loss": 8.5823, + "step": 152750 + }, + { + "epoch": 0.7628654897750256, + "grad_norm": 0.09331314265727997, + "learning_rate": 7.13184309995244e-06, + "loss": 8.5897, + "step": 152760 + }, + { + "epoch": 0.7629154285999651, + "grad_norm": 0.09065219014883041, + "learning_rate": 7.1303411850108886e-06, + "loss": 8.5748, + "step": 152770 + }, + { + "epoch": 0.7629653674249045, + "grad_norm": 0.09267207235097885, + "learning_rate": 7.128839270069339e-06, + "loss": 8.5953, + "step": 152780 + }, + { + "epoch": 0.7630153062498439, + "grad_norm": 0.094063401222229, + "learning_rate": 7.127337355127788e-06, + "loss": 8.5751, + "step": 152790 + }, + { + "epoch": 0.7630652450747833, + "grad_norm": 0.09036116302013397, + "learning_rate": 7.125835440186237e-06, + "loss": 8.6051, + "step": 152800 + }, + { + "epoch": 0.7631151838997229, + "grad_norm": 0.08873546123504639, + "learning_rate": 7.1243335252446876e-06, + "loss": 8.5893, + "step": 152810 + }, + { + "epoch": 0.7631651227246623, + "grad_norm": 0.0934579074382782, + "learning_rate": 7.122831610303136e-06, + "loss": 8.5921, + "step": 152820 + }, + { + "epoch": 0.7632150615496017, + "grad_norm": 0.08765194565057755, + "learning_rate": 7.121329695361586e-06, + "loss": 8.5853, + "step": 152830 + }, + { + "epoch": 0.7632650003745411, + "grad_norm": 0.09874917566776276, + "learning_rate": 7.1198277804200356e-06, + "loss": 8.6031, + "step": 152840 + }, + { + "epoch": 0.7633149391994807, + "grad_norm": 0.10156397521495819, + "learning_rate": 7.118325865478485e-06, + "loss": 8.5753, + "step": 152850 + }, + { + "epoch": 0.7633648780244201, + "grad_norm": 0.0943882167339325, + "learning_rate": 7.116823950536935e-06, + "loss": 8.5921, + "step": 152860 + }, + { + "epoch": 0.7634148168493595, + "grad_norm": 0.0905870795249939, + "learning_rate": 7.115322035595384e-06, + "loss": 8.5748, + "step": 152870 + }, + { + "epoch": 0.763464755674299, + "grad_norm": 0.09174995124340057, + "learning_rate": 7.113820120653834e-06, + "loss": 8.5743, + "step": 152880 + }, + { + "epoch": 0.7635146944992385, + "grad_norm": 0.08916423469781876, + "learning_rate": 7.112318205712283e-06, + "loss": 8.5986, + "step": 152890 + }, + { + "epoch": 0.7635646333241779, + "grad_norm": 0.0889330729842186, + "learning_rate": 7.110816290770732e-06, + "loss": 8.5888, + "step": 152900 + }, + { + "epoch": 0.7636145721491173, + "grad_norm": 0.08929458260536194, + "learning_rate": 7.109314375829183e-06, + "loss": 8.5861, + "step": 152910 + }, + { + "epoch": 0.7636645109740567, + "grad_norm": 0.08744913339614868, + "learning_rate": 7.107812460887632e-06, + "loss": 8.5891, + "step": 152920 + }, + { + "epoch": 0.7637144497989963, + "grad_norm": 0.08869093656539917, + "learning_rate": 7.106310545946081e-06, + "loss": 8.5786, + "step": 152930 + }, + { + "epoch": 0.7637643886239357, + "grad_norm": 0.09083859622478485, + "learning_rate": 7.1048086310045306e-06, + "loss": 8.5703, + "step": 152940 + }, + { + "epoch": 0.7638143274488751, + "grad_norm": 0.09250235557556152, + "learning_rate": 7.103306716062981e-06, + "loss": 8.5856, + "step": 152950 + }, + { + "epoch": 0.7638642662738145, + "grad_norm": 0.09155931323766708, + "learning_rate": 7.10180480112143e-06, + "loss": 8.5781, + "step": 152960 + }, + { + "epoch": 0.7639142050987541, + "grad_norm": 0.08976297080516815, + "learning_rate": 7.100302886179879e-06, + "loss": 8.5806, + "step": 152970 + }, + { + "epoch": 0.7639641439236935, + "grad_norm": 0.0865217074751854, + "learning_rate": 7.098800971238329e-06, + "loss": 8.5913, + "step": 152980 + }, + { + "epoch": 0.7640140827486329, + "grad_norm": 0.09239277988672256, + "learning_rate": 7.097299056296778e-06, + "loss": 8.5603, + "step": 152990 + }, + { + "epoch": 0.7640640215735723, + "grad_norm": 0.09613004326820374, + "learning_rate": 7.095797141355228e-06, + "loss": 8.5824, + "step": 153000 + }, + { + "epoch": 0.7641139603985119, + "grad_norm": 0.09154446423053741, + "learning_rate": 7.094295226413678e-06, + "loss": 8.5709, + "step": 153010 + }, + { + "epoch": 0.7641638992234513, + "grad_norm": 0.08664754033088684, + "learning_rate": 7.092793311472127e-06, + "loss": 8.5846, + "step": 153020 + }, + { + "epoch": 0.7642138380483907, + "grad_norm": 0.08845943212509155, + "learning_rate": 7.091291396530577e-06, + "loss": 8.5786, + "step": 153030 + }, + { + "epoch": 0.7642637768733301, + "grad_norm": 0.09169842302799225, + "learning_rate": 7.0897894815890264e-06, + "loss": 8.5761, + "step": 153040 + }, + { + "epoch": 0.7643137156982697, + "grad_norm": 0.09660103172063828, + "learning_rate": 7.088287566647476e-06, + "loss": 8.5811, + "step": 153050 + }, + { + "epoch": 0.7643636545232091, + "grad_norm": 0.09721507132053375, + "learning_rate": 7.086785651705925e-06, + "loss": 8.564, + "step": 153060 + }, + { + "epoch": 0.7644135933481485, + "grad_norm": 0.08967658877372742, + "learning_rate": 7.0852837367643744e-06, + "loss": 8.5768, + "step": 153070 + }, + { + "epoch": 0.7644635321730879, + "grad_norm": 0.08986034244298935, + "learning_rate": 7.083781821822825e-06, + "loss": 8.5778, + "step": 153080 + }, + { + "epoch": 0.7645134709980275, + "grad_norm": 0.09163659065961838, + "learning_rate": 7.082279906881274e-06, + "loss": 8.5838, + "step": 153090 + }, + { + "epoch": 0.7645634098229669, + "grad_norm": 0.0884322002530098, + "learning_rate": 7.080777991939723e-06, + "loss": 8.5956, + "step": 153100 + }, + { + "epoch": 0.7646133486479063, + "grad_norm": 0.08686332404613495, + "learning_rate": 7.0792760769981734e-06, + "loss": 8.5683, + "step": 153110 + }, + { + "epoch": 0.7646632874728457, + "grad_norm": 0.09294196218252182, + "learning_rate": 7.077774162056622e-06, + "loss": 8.5796, + "step": 153120 + }, + { + "epoch": 0.7647132262977853, + "grad_norm": 0.08890484273433685, + "learning_rate": 7.076272247115072e-06, + "loss": 8.5687, + "step": 153130 + }, + { + "epoch": 0.7647631651227247, + "grad_norm": 0.08904290199279785, + "learning_rate": 7.0747703321735214e-06, + "loss": 8.5894, + "step": 153140 + }, + { + "epoch": 0.7648131039476641, + "grad_norm": 0.09268071502447128, + "learning_rate": 7.073268417231971e-06, + "loss": 8.5896, + "step": 153150 + }, + { + "epoch": 0.7648630427726035, + "grad_norm": 0.08970651030540466, + "learning_rate": 7.071766502290421e-06, + "loss": 8.6045, + "step": 153160 + }, + { + "epoch": 0.7649129815975431, + "grad_norm": 0.09089753031730652, + "learning_rate": 7.0702645873488694e-06, + "loss": 8.5732, + "step": 153170 + }, + { + "epoch": 0.7649629204224825, + "grad_norm": 0.0952310562133789, + "learning_rate": 7.06876267240732e-06, + "loss": 8.5883, + "step": 153180 + }, + { + "epoch": 0.7650128592474219, + "grad_norm": 0.09604635834693909, + "learning_rate": 7.06726075746577e-06, + "loss": 8.5674, + "step": 153190 + }, + { + "epoch": 0.7650627980723613, + "grad_norm": 0.0942385122179985, + "learning_rate": 7.065758842524218e-06, + "loss": 8.5867, + "step": 153200 + }, + { + "epoch": 0.7651127368973009, + "grad_norm": 0.08863166719675064, + "learning_rate": 7.0642569275826685e-06, + "loss": 8.5591, + "step": 153210 + }, + { + "epoch": 0.7651626757222403, + "grad_norm": 0.09197341650724411, + "learning_rate": 7.062755012641117e-06, + "loss": 8.5931, + "step": 153220 + }, + { + "epoch": 0.7652126145471797, + "grad_norm": 0.09086757153272629, + "learning_rate": 7.061253097699567e-06, + "loss": 8.578, + "step": 153230 + }, + { + "epoch": 0.7652625533721191, + "grad_norm": 0.08695393055677414, + "learning_rate": 7.059751182758017e-06, + "loss": 8.5728, + "step": 153240 + }, + { + "epoch": 0.7653124921970587, + "grad_norm": 0.09496577829122543, + "learning_rate": 7.058249267816466e-06, + "loss": 8.5701, + "step": 153250 + }, + { + "epoch": 0.7653624310219981, + "grad_norm": 0.09787900000810623, + "learning_rate": 7.056747352874916e-06, + "loss": 8.5682, + "step": 153260 + }, + { + "epoch": 0.7654123698469375, + "grad_norm": 0.0917130559682846, + "learning_rate": 7.055245437933365e-06, + "loss": 8.57, + "step": 153270 + }, + { + "epoch": 0.7654623086718769, + "grad_norm": 0.08664479851722717, + "learning_rate": 7.053743522991815e-06, + "loss": 8.5829, + "step": 153280 + }, + { + "epoch": 0.7655122474968163, + "grad_norm": 0.08745972812175751, + "learning_rate": 7.052241608050265e-06, + "loss": 8.564, + "step": 153290 + }, + { + "epoch": 0.7655621863217559, + "grad_norm": 0.09336210042238235, + "learning_rate": 7.050739693108713e-06, + "loss": 8.5767, + "step": 153300 + }, + { + "epoch": 0.7656121251466953, + "grad_norm": 0.0911019816994667, + "learning_rate": 7.0492377781671635e-06, + "loss": 8.5855, + "step": 153310 + }, + { + "epoch": 0.7656620639716347, + "grad_norm": 0.09164589643478394, + "learning_rate": 7.047735863225613e-06, + "loss": 8.5782, + "step": 153320 + }, + { + "epoch": 0.7657120027965741, + "grad_norm": 0.09078686684370041, + "learning_rate": 7.046233948284062e-06, + "loss": 8.5899, + "step": 153330 + }, + { + "epoch": 0.7657619416215137, + "grad_norm": 0.09011105448007584, + "learning_rate": 7.044732033342512e-06, + "loss": 8.5706, + "step": 153340 + }, + { + "epoch": 0.7658118804464531, + "grad_norm": 0.08931349217891693, + "learning_rate": 7.043230118400962e-06, + "loss": 8.5779, + "step": 153350 + }, + { + "epoch": 0.7658618192713925, + "grad_norm": 0.09002659469842911, + "learning_rate": 7.041728203459411e-06, + "loss": 8.5732, + "step": 153360 + }, + { + "epoch": 0.7659117580963319, + "grad_norm": 0.0962076261639595, + "learning_rate": 7.04022628851786e-06, + "loss": 8.5705, + "step": 153370 + }, + { + "epoch": 0.7659616969212715, + "grad_norm": 0.0866306945681572, + "learning_rate": 7.03872437357631e-06, + "loss": 8.5906, + "step": 153380 + }, + { + "epoch": 0.7660116357462109, + "grad_norm": 0.09287164360284805, + "learning_rate": 7.03722245863476e-06, + "loss": 8.5881, + "step": 153390 + }, + { + "epoch": 0.7660615745711503, + "grad_norm": 0.09079669415950775, + "learning_rate": 7.035720543693209e-06, + "loss": 8.5676, + "step": 153400 + }, + { + "epoch": 0.7661115133960897, + "grad_norm": 0.08933369070291519, + "learning_rate": 7.0342186287516585e-06, + "loss": 8.5854, + "step": 153410 + }, + { + "epoch": 0.7661614522210293, + "grad_norm": 0.09217403084039688, + "learning_rate": 7.032716713810108e-06, + "loss": 8.5743, + "step": 153420 + }, + { + "epoch": 0.7662113910459687, + "grad_norm": 0.08750222623348236, + "learning_rate": 7.031214798868558e-06, + "loss": 8.5838, + "step": 153430 + }, + { + "epoch": 0.7662613298709081, + "grad_norm": 0.09187041968107224, + "learning_rate": 7.029712883927007e-06, + "loss": 8.5779, + "step": 153440 + }, + { + "epoch": 0.7663112686958475, + "grad_norm": 0.09300150722265244, + "learning_rate": 7.028210968985457e-06, + "loss": 8.5867, + "step": 153450 + }, + { + "epoch": 0.7663612075207871, + "grad_norm": 0.08883293718099594, + "learning_rate": 7.026709054043906e-06, + "loss": 8.5779, + "step": 153460 + }, + { + "epoch": 0.7664111463457265, + "grad_norm": 0.09035582095384598, + "learning_rate": 7.025207139102355e-06, + "loss": 8.5666, + "step": 153470 + }, + { + "epoch": 0.7664610851706659, + "grad_norm": 0.09275439381599426, + "learning_rate": 7.0237052241608055e-06, + "loss": 8.5823, + "step": 153480 + }, + { + "epoch": 0.7665110239956053, + "grad_norm": 0.08916142582893372, + "learning_rate": 7.022203309219255e-06, + "loss": 8.5674, + "step": 153490 + }, + { + "epoch": 0.7665609628205449, + "grad_norm": 0.09958826005458832, + "learning_rate": 7.020701394277704e-06, + "loss": 8.5855, + "step": 153500 + }, + { + "epoch": 0.7666109016454843, + "grad_norm": 0.09693287312984467, + "learning_rate": 7.019199479336154e-06, + "loss": 8.6037, + "step": 153510 + }, + { + "epoch": 0.7666608404704237, + "grad_norm": 0.09419762343168259, + "learning_rate": 7.017697564394603e-06, + "loss": 8.6014, + "step": 153520 + }, + { + "epoch": 0.7667107792953631, + "grad_norm": 0.08924181014299393, + "learning_rate": 7.016195649453053e-06, + "loss": 8.5692, + "step": 153530 + }, + { + "epoch": 0.7667607181203027, + "grad_norm": 0.09446694701910019, + "learning_rate": 7.014693734511502e-06, + "loss": 8.5536, + "step": 153540 + }, + { + "epoch": 0.7668106569452421, + "grad_norm": 0.09108909219503403, + "learning_rate": 7.013191819569952e-06, + "loss": 8.572, + "step": 153550 + }, + { + "epoch": 0.7668605957701815, + "grad_norm": 0.09171731770038605, + "learning_rate": 7.011689904628402e-06, + "loss": 8.5945, + "step": 153560 + }, + { + "epoch": 0.7669105345951209, + "grad_norm": 0.08465012907981873, + "learning_rate": 7.01018798968685e-06, + "loss": 8.5643, + "step": 153570 + }, + { + "epoch": 0.7669604734200605, + "grad_norm": 0.09661739319562912, + "learning_rate": 7.0086860747453005e-06, + "loss": 8.5638, + "step": 153580 + }, + { + "epoch": 0.7670104122449999, + "grad_norm": 0.0985170304775238, + "learning_rate": 7.007184159803751e-06, + "loss": 8.5805, + "step": 153590 + }, + { + "epoch": 0.7670603510699393, + "grad_norm": 0.08951466530561447, + "learning_rate": 7.005682244862199e-06, + "loss": 8.5677, + "step": 153600 + }, + { + "epoch": 0.7671102898948787, + "grad_norm": 0.08966083079576492, + "learning_rate": 7.004180329920649e-06, + "loss": 8.5769, + "step": 153610 + }, + { + "epoch": 0.7671602287198183, + "grad_norm": 0.09198177605867386, + "learning_rate": 7.002678414979098e-06, + "loss": 8.5674, + "step": 153620 + }, + { + "epoch": 0.7672101675447577, + "grad_norm": 0.0966639444231987, + "learning_rate": 7.001176500037548e-06, + "loss": 8.5847, + "step": 153630 + }, + { + "epoch": 0.7672601063696971, + "grad_norm": 0.10365894436836243, + "learning_rate": 6.999674585095998e-06, + "loss": 8.5755, + "step": 153640 + }, + { + "epoch": 0.7673100451946365, + "grad_norm": 0.08775070309638977, + "learning_rate": 6.998172670154447e-06, + "loss": 8.5752, + "step": 153650 + }, + { + "epoch": 0.7673599840195761, + "grad_norm": 0.09237588942050934, + "learning_rate": 6.996670755212897e-06, + "loss": 8.5767, + "step": 153660 + }, + { + "epoch": 0.7674099228445155, + "grad_norm": 0.0916270911693573, + "learning_rate": 6.995168840271346e-06, + "loss": 8.5828, + "step": 153670 + }, + { + "epoch": 0.7674598616694549, + "grad_norm": 0.08937940746545792, + "learning_rate": 6.9936669253297955e-06, + "loss": 8.5851, + "step": 153680 + }, + { + "epoch": 0.7675098004943943, + "grad_norm": 0.08998386561870575, + "learning_rate": 6.992165010388246e-06, + "loss": 8.5821, + "step": 153690 + }, + { + "epoch": 0.7675597393193339, + "grad_norm": 0.09344659745693207, + "learning_rate": 6.990663095446694e-06, + "loss": 8.5799, + "step": 153700 + }, + { + "epoch": 0.7676096781442733, + "grad_norm": 0.08797649294137955, + "learning_rate": 6.989161180505144e-06, + "loss": 8.5644, + "step": 153710 + }, + { + "epoch": 0.7676596169692127, + "grad_norm": 0.08696374297142029, + "learning_rate": 6.987659265563594e-06, + "loss": 8.575, + "step": 153720 + }, + { + "epoch": 0.7677095557941521, + "grad_norm": 0.08707814663648605, + "learning_rate": 6.986157350622043e-06, + "loss": 8.5779, + "step": 153730 + }, + { + "epoch": 0.7677594946190917, + "grad_norm": 0.0909426361322403, + "learning_rate": 6.984655435680493e-06, + "loss": 8.581, + "step": 153740 + }, + { + "epoch": 0.7678094334440311, + "grad_norm": 0.09398233890533447, + "learning_rate": 6.9831535207389425e-06, + "loss": 8.5727, + "step": 153750 + }, + { + "epoch": 0.7678593722689705, + "grad_norm": 0.09371063113212585, + "learning_rate": 6.981651605797392e-06, + "loss": 8.5808, + "step": 153760 + }, + { + "epoch": 0.7679093110939099, + "grad_norm": 0.09734338521957397, + "learning_rate": 6.980149690855841e-06, + "loss": 8.5737, + "step": 153770 + }, + { + "epoch": 0.7679592499188495, + "grad_norm": 0.09083466976881027, + "learning_rate": 6.9786477759142905e-06, + "loss": 8.5716, + "step": 153780 + }, + { + "epoch": 0.7680091887437889, + "grad_norm": 0.08842228353023529, + "learning_rate": 6.977145860972741e-06, + "loss": 8.5752, + "step": 153790 + }, + { + "epoch": 0.7680591275687283, + "grad_norm": 0.08697518706321716, + "learning_rate": 6.97564394603119e-06, + "loss": 8.5822, + "step": 153800 + }, + { + "epoch": 0.7681090663936677, + "grad_norm": 0.08936888724565506, + "learning_rate": 6.974142031089639e-06, + "loss": 8.582, + "step": 153810 + }, + { + "epoch": 0.7681590052186072, + "grad_norm": 0.08924106508493423, + "learning_rate": 6.972640116148089e-06, + "loss": 8.5737, + "step": 153820 + }, + { + "epoch": 0.7682089440435467, + "grad_norm": 0.08949612826108932, + "learning_rate": 6.971138201206539e-06, + "loss": 8.5744, + "step": 153830 + }, + { + "epoch": 0.7682588828684861, + "grad_norm": 0.08947469294071198, + "learning_rate": 6.969636286264988e-06, + "loss": 8.5754, + "step": 153840 + }, + { + "epoch": 0.7683088216934255, + "grad_norm": 0.09205352514982224, + "learning_rate": 6.9681343713234375e-06, + "loss": 8.5822, + "step": 153850 + }, + { + "epoch": 0.768358760518365, + "grad_norm": 0.09942862391471863, + "learning_rate": 6.966632456381887e-06, + "loss": 8.582, + "step": 153860 + }, + { + "epoch": 0.7684086993433045, + "grad_norm": 0.090413898229599, + "learning_rate": 6.965130541440336e-06, + "loss": 8.5727, + "step": 153870 + }, + { + "epoch": 0.7684586381682439, + "grad_norm": 0.09462569653987885, + "learning_rate": 6.963628626498786e-06, + "loss": 8.5756, + "step": 153880 + }, + { + "epoch": 0.7685085769931833, + "grad_norm": 0.09226179867982864, + "learning_rate": 6.962126711557236e-06, + "loss": 8.5864, + "step": 153890 + }, + { + "epoch": 0.7685585158181228, + "grad_norm": 0.08182681351900101, + "learning_rate": 6.960624796615685e-06, + "loss": 8.5841, + "step": 153900 + }, + { + "epoch": 0.7686084546430623, + "grad_norm": 0.09245304763317108, + "learning_rate": 6.959122881674135e-06, + "loss": 8.5817, + "step": 153910 + }, + { + "epoch": 0.7686583934680017, + "grad_norm": 0.09626985341310501, + "learning_rate": 6.957620966732584e-06, + "loss": 8.595, + "step": 153920 + }, + { + "epoch": 0.7687083322929411, + "grad_norm": 0.08915554732084274, + "learning_rate": 6.956119051791034e-06, + "loss": 8.5715, + "step": 153930 + }, + { + "epoch": 0.7687582711178806, + "grad_norm": 0.09438993036746979, + "learning_rate": 6.954617136849483e-06, + "loss": 8.5903, + "step": 153940 + }, + { + "epoch": 0.7688082099428201, + "grad_norm": 0.08598021417856216, + "learning_rate": 6.9531152219079325e-06, + "loss": 8.5767, + "step": 153950 + }, + { + "epoch": 0.7688581487677595, + "grad_norm": 0.09764537960290909, + "learning_rate": 6.951613306966383e-06, + "loss": 8.5781, + "step": 153960 + }, + { + "epoch": 0.7689080875926989, + "grad_norm": 0.09817574918270111, + "learning_rate": 6.950111392024831e-06, + "loss": 8.5746, + "step": 153970 + }, + { + "epoch": 0.7689580264176384, + "grad_norm": 0.08399667590856552, + "learning_rate": 6.948609477083281e-06, + "loss": 8.5705, + "step": 153980 + }, + { + "epoch": 0.7690079652425779, + "grad_norm": 0.09401653707027435, + "learning_rate": 6.9471075621417315e-06, + "loss": 8.5782, + "step": 153990 + }, + { + "epoch": 0.7690579040675173, + "grad_norm": 0.0929185077548027, + "learning_rate": 6.94560564720018e-06, + "loss": 8.5698, + "step": 154000 + }, + { + "epoch": 0.7691078428924567, + "grad_norm": 0.08887774497270584, + "learning_rate": 6.94410373225863e-06, + "loss": 8.5957, + "step": 154010 + }, + { + "epoch": 0.7691577817173962, + "grad_norm": 0.08719068765640259, + "learning_rate": 6.942601817317079e-06, + "loss": 8.5963, + "step": 154020 + }, + { + "epoch": 0.7692077205423357, + "grad_norm": 0.09064088016748428, + "learning_rate": 6.941099902375529e-06, + "loss": 8.5823, + "step": 154030 + }, + { + "epoch": 0.7692576593672751, + "grad_norm": 0.09441380947828293, + "learning_rate": 6.939597987433979e-06, + "loss": 8.5988, + "step": 154040 + }, + { + "epoch": 0.7693075981922145, + "grad_norm": 0.08752386271953583, + "learning_rate": 6.9380960724924275e-06, + "loss": 8.5738, + "step": 154050 + }, + { + "epoch": 0.769357537017154, + "grad_norm": 0.09293375164270401, + "learning_rate": 6.936594157550878e-06, + "loss": 8.581, + "step": 154060 + }, + { + "epoch": 0.7694074758420935, + "grad_norm": 0.09073223173618317, + "learning_rate": 6.935092242609327e-06, + "loss": 8.586, + "step": 154070 + }, + { + "epoch": 0.7694574146670329, + "grad_norm": 0.09582027792930603, + "learning_rate": 6.933590327667776e-06, + "loss": 8.5888, + "step": 154080 + }, + { + "epoch": 0.7695073534919723, + "grad_norm": 0.09688831865787506, + "learning_rate": 6.9320884127262265e-06, + "loss": 8.5683, + "step": 154090 + }, + { + "epoch": 0.7695572923169118, + "grad_norm": 0.09381963312625885, + "learning_rate": 6.930586497784675e-06, + "loss": 8.5728, + "step": 154100 + }, + { + "epoch": 0.7696072311418513, + "grad_norm": 0.0936012864112854, + "learning_rate": 6.929084582843125e-06, + "loss": 8.5665, + "step": 154110 + }, + { + "epoch": 0.7696571699667907, + "grad_norm": 0.08793763071298599, + "learning_rate": 6.9275826679015745e-06, + "loss": 8.6069, + "step": 154120 + }, + { + "epoch": 0.7697071087917301, + "grad_norm": 0.08785630762577057, + "learning_rate": 6.926080752960024e-06, + "loss": 8.581, + "step": 154130 + }, + { + "epoch": 0.7697570476166696, + "grad_norm": 0.09242880344390869, + "learning_rate": 6.924578838018474e-06, + "loss": 8.5786, + "step": 154140 + }, + { + "epoch": 0.769806986441609, + "grad_norm": 0.08848867565393448, + "learning_rate": 6.923076923076923e-06, + "loss": 8.578, + "step": 154150 + }, + { + "epoch": 0.7698569252665485, + "grad_norm": 0.09217596054077148, + "learning_rate": 6.921575008135373e-06, + "loss": 8.578, + "step": 154160 + }, + { + "epoch": 0.7699068640914879, + "grad_norm": 0.08778587728738785, + "learning_rate": 6.920073093193823e-06, + "loss": 8.5806, + "step": 154170 + }, + { + "epoch": 0.7699568029164274, + "grad_norm": 0.09108664095401764, + "learning_rate": 6.918571178252271e-06, + "loss": 8.5815, + "step": 154180 + }, + { + "epoch": 0.7700067417413669, + "grad_norm": 0.09245019406080246, + "learning_rate": 6.9170692633107216e-06, + "loss": 8.58, + "step": 154190 + }, + { + "epoch": 0.7700566805663063, + "grad_norm": 0.09027982503175735, + "learning_rate": 6.915567348369171e-06, + "loss": 8.5821, + "step": 154200 + }, + { + "epoch": 0.7701066193912457, + "grad_norm": 0.093569815158844, + "learning_rate": 6.91406543342762e-06, + "loss": 8.562, + "step": 154210 + }, + { + "epoch": 0.7701565582161852, + "grad_norm": 0.09079516679048538, + "learning_rate": 6.91256351848607e-06, + "loss": 8.5822, + "step": 154220 + }, + { + "epoch": 0.7702064970411246, + "grad_norm": 0.09518477320671082, + "learning_rate": 6.91106160354452e-06, + "loss": 8.5833, + "step": 154230 + }, + { + "epoch": 0.7702564358660641, + "grad_norm": 0.08532573282718658, + "learning_rate": 6.909559688602969e-06, + "loss": 8.5973, + "step": 154240 + }, + { + "epoch": 0.7703063746910035, + "grad_norm": 0.09336396306753159, + "learning_rate": 6.908057773661418e-06, + "loss": 8.5737, + "step": 154250 + }, + { + "epoch": 0.770356313515943, + "grad_norm": 0.09063505381345749, + "learning_rate": 6.906555858719868e-06, + "loss": 8.5763, + "step": 154260 + }, + { + "epoch": 0.7704062523408824, + "grad_norm": 0.08470286428928375, + "learning_rate": 6.905053943778318e-06, + "loss": 8.5924, + "step": 154270 + }, + { + "epoch": 0.7704561911658219, + "grad_norm": 0.09841927140951157, + "learning_rate": 6.903552028836767e-06, + "loss": 8.5754, + "step": 154280 + }, + { + "epoch": 0.7705061299907613, + "grad_norm": 0.09492678195238113, + "learning_rate": 6.9020501138952166e-06, + "loss": 8.5874, + "step": 154290 + }, + { + "epoch": 0.7705560688157007, + "grad_norm": 0.09044993668794632, + "learning_rate": 6.900548198953666e-06, + "loss": 8.5628, + "step": 154300 + }, + { + "epoch": 0.7706060076406402, + "grad_norm": 0.09086602926254272, + "learning_rate": 6.899046284012116e-06, + "loss": 8.5546, + "step": 154310 + }, + { + "epoch": 0.7706559464655797, + "grad_norm": 0.08962773531675339, + "learning_rate": 6.897544369070565e-06, + "loss": 8.5762, + "step": 154320 + }, + { + "epoch": 0.7707058852905191, + "grad_norm": 0.09127478301525116, + "learning_rate": 6.896042454129015e-06, + "loss": 8.5902, + "step": 154330 + }, + { + "epoch": 0.7707558241154585, + "grad_norm": 0.09498311579227448, + "learning_rate": 6.894540539187464e-06, + "loss": 8.574, + "step": 154340 + }, + { + "epoch": 0.770805762940398, + "grad_norm": 0.09378572553396225, + "learning_rate": 6.893038624245913e-06, + "loss": 8.5666, + "step": 154350 + }, + { + "epoch": 0.7708557017653375, + "grad_norm": 0.09132121503353119, + "learning_rate": 6.8915367093043636e-06, + "loss": 8.5664, + "step": 154360 + }, + { + "epoch": 0.7709056405902769, + "grad_norm": 0.0891404077410698, + "learning_rate": 6.890034794362813e-06, + "loss": 8.5671, + "step": 154370 + }, + { + "epoch": 0.7709555794152163, + "grad_norm": 0.0928260087966919, + "learning_rate": 6.888532879421262e-06, + "loss": 8.5717, + "step": 154380 + }, + { + "epoch": 0.7710055182401558, + "grad_norm": 0.092970110476017, + "learning_rate": 6.887030964479712e-06, + "loss": 8.5745, + "step": 154390 + }, + { + "epoch": 0.7710554570650953, + "grad_norm": 0.09026780724525452, + "learning_rate": 6.885529049538161e-06, + "loss": 8.5715, + "step": 154400 + }, + { + "epoch": 0.7711053958900347, + "grad_norm": 0.09131123125553131, + "learning_rate": 6.884027134596611e-06, + "loss": 8.5806, + "step": 154410 + }, + { + "epoch": 0.7711553347149741, + "grad_norm": 0.09628612548112869, + "learning_rate": 6.88252521965506e-06, + "loss": 8.5872, + "step": 154420 + }, + { + "epoch": 0.7712052735399136, + "grad_norm": 0.10040068626403809, + "learning_rate": 6.88102330471351e-06, + "loss": 8.5681, + "step": 154430 + }, + { + "epoch": 0.7712552123648531, + "grad_norm": 0.09062188118696213, + "learning_rate": 6.87952138977196e-06, + "loss": 8.5869, + "step": 154440 + }, + { + "epoch": 0.7713051511897925, + "grad_norm": 0.09356309473514557, + "learning_rate": 6.878019474830408e-06, + "loss": 8.58, + "step": 154450 + }, + { + "epoch": 0.7713550900147319, + "grad_norm": 0.09048853069543839, + "learning_rate": 6.876517559888859e-06, + "loss": 8.5787, + "step": 154460 + }, + { + "epoch": 0.7714050288396714, + "grad_norm": 0.08828842639923096, + "learning_rate": 6.875015644947309e-06, + "loss": 8.5799, + "step": 154470 + }, + { + "epoch": 0.7714549676646109, + "grad_norm": 0.0911807268857956, + "learning_rate": 6.873513730005757e-06, + "loss": 8.5726, + "step": 154480 + }, + { + "epoch": 0.7715049064895503, + "grad_norm": 0.08841972798109055, + "learning_rate": 6.8720118150642074e-06, + "loss": 8.5756, + "step": 154490 + }, + { + "epoch": 0.7715548453144897, + "grad_norm": 0.09796998649835587, + "learning_rate": 6.870509900122656e-06, + "loss": 8.559, + "step": 154500 + }, + { + "epoch": 0.7716047841394292, + "grad_norm": 0.09058402478694916, + "learning_rate": 6.869007985181106e-06, + "loss": 8.5675, + "step": 154510 + }, + { + "epoch": 0.7716547229643687, + "grad_norm": 0.08772189915180206, + "learning_rate": 6.867506070239556e-06, + "loss": 8.5848, + "step": 154520 + }, + { + "epoch": 0.7717046617893081, + "grad_norm": 0.08951723575592041, + "learning_rate": 6.866004155298005e-06, + "loss": 8.5791, + "step": 154530 + }, + { + "epoch": 0.7717546006142475, + "grad_norm": 0.08602496981620789, + "learning_rate": 6.864502240356455e-06, + "loss": 8.5794, + "step": 154540 + }, + { + "epoch": 0.771804539439187, + "grad_norm": 0.08945851027965546, + "learning_rate": 6.863000325414904e-06, + "loss": 8.5841, + "step": 154550 + }, + { + "epoch": 0.7718544782641265, + "grad_norm": 0.09233725816011429, + "learning_rate": 6.861498410473354e-06, + "loss": 8.5541, + "step": 154560 + }, + { + "epoch": 0.7719044170890659, + "grad_norm": 0.09467992186546326, + "learning_rate": 6.859996495531804e-06, + "loss": 8.5744, + "step": 154570 + }, + { + "epoch": 0.7719543559140053, + "grad_norm": 0.08942775428295135, + "learning_rate": 6.858494580590252e-06, + "loss": 8.5864, + "step": 154580 + }, + { + "epoch": 0.7720042947389448, + "grad_norm": 0.08920110762119293, + "learning_rate": 6.8569926656487024e-06, + "loss": 8.5959, + "step": 154590 + }, + { + "epoch": 0.7720542335638843, + "grad_norm": 0.08930312097072601, + "learning_rate": 6.855490750707152e-06, + "loss": 8.5837, + "step": 154600 + }, + { + "epoch": 0.7721041723888237, + "grad_norm": 0.09447375684976578, + "learning_rate": 6.853988835765601e-06, + "loss": 8.58, + "step": 154610 + }, + { + "epoch": 0.7721541112137631, + "grad_norm": 0.08980347961187363, + "learning_rate": 6.852486920824051e-06, + "loss": 8.5727, + "step": 154620 + }, + { + "epoch": 0.7722040500387026, + "grad_norm": 0.09173062443733215, + "learning_rate": 6.850985005882501e-06, + "loss": 8.5604, + "step": 154630 + }, + { + "epoch": 0.772253988863642, + "grad_norm": 0.08936478197574615, + "learning_rate": 6.84948309094095e-06, + "loss": 8.5684, + "step": 154640 + }, + { + "epoch": 0.7723039276885815, + "grad_norm": 0.09296960383653641, + "learning_rate": 6.847981175999399e-06, + "loss": 8.5477, + "step": 154650 + }, + { + "epoch": 0.7723538665135209, + "grad_norm": 0.09330233186483383, + "learning_rate": 6.846479261057849e-06, + "loss": 8.5855, + "step": 154660 + }, + { + "epoch": 0.7724038053384604, + "grad_norm": 0.08701977878808975, + "learning_rate": 6.844977346116299e-06, + "loss": 8.5892, + "step": 154670 + }, + { + "epoch": 0.7724537441633998, + "grad_norm": 0.10141323506832123, + "learning_rate": 6.843475431174748e-06, + "loss": 8.5798, + "step": 154680 + }, + { + "epoch": 0.7725036829883393, + "grad_norm": 0.08847815543413162, + "learning_rate": 6.8419735162331974e-06, + "loss": 8.579, + "step": 154690 + }, + { + "epoch": 0.7725536218132787, + "grad_norm": 0.10151761770248413, + "learning_rate": 6.840471601291647e-06, + "loss": 8.5819, + "step": 154700 + }, + { + "epoch": 0.7726035606382182, + "grad_norm": 0.09666801989078522, + "learning_rate": 6.838969686350097e-06, + "loss": 8.5728, + "step": 154710 + }, + { + "epoch": 0.7726534994631576, + "grad_norm": 0.09449943155050278, + "learning_rate": 6.837467771408546e-06, + "loss": 8.5817, + "step": 154720 + }, + { + "epoch": 0.7727034382880971, + "grad_norm": 0.09444770961999893, + "learning_rate": 6.835965856466996e-06, + "loss": 8.5654, + "step": 154730 + }, + { + "epoch": 0.7727533771130365, + "grad_norm": 0.09364951401948929, + "learning_rate": 6.834463941525445e-06, + "loss": 8.5647, + "step": 154740 + }, + { + "epoch": 0.772803315937976, + "grad_norm": 0.09371913969516754, + "learning_rate": 6.832962026583894e-06, + "loss": 8.5827, + "step": 154750 + }, + { + "epoch": 0.7728532547629154, + "grad_norm": 0.08978746831417084, + "learning_rate": 6.8314601116423445e-06, + "loss": 8.5917, + "step": 154760 + }, + { + "epoch": 0.7729031935878549, + "grad_norm": 0.0957767516374588, + "learning_rate": 6.829958196700794e-06, + "loss": 8.566, + "step": 154770 + }, + { + "epoch": 0.7729531324127943, + "grad_norm": 0.09105443209409714, + "learning_rate": 6.828456281759243e-06, + "loss": 8.5709, + "step": 154780 + }, + { + "epoch": 0.7730030712377338, + "grad_norm": 0.09461674094200134, + "learning_rate": 6.826954366817693e-06, + "loss": 8.5735, + "step": 154790 + }, + { + "epoch": 0.7730530100626732, + "grad_norm": 0.08995918929576874, + "learning_rate": 6.825452451876142e-06, + "loss": 8.5817, + "step": 154800 + }, + { + "epoch": 0.7731029488876127, + "grad_norm": 0.08911199867725372, + "learning_rate": 6.823950536934592e-06, + "loss": 8.578, + "step": 154810 + }, + { + "epoch": 0.7731528877125521, + "grad_norm": 0.09105192124843597, + "learning_rate": 6.822448621993041e-06, + "loss": 8.5733, + "step": 154820 + }, + { + "epoch": 0.7732028265374916, + "grad_norm": 0.08885302394628525, + "learning_rate": 6.820946707051491e-06, + "loss": 8.5597, + "step": 154830 + }, + { + "epoch": 0.773252765362431, + "grad_norm": 0.09799888730049133, + "learning_rate": 6.819444792109941e-06, + "loss": 8.5674, + "step": 154840 + }, + { + "epoch": 0.7733027041873705, + "grad_norm": 0.0921357050538063, + "learning_rate": 6.817942877168389e-06, + "loss": 8.5727, + "step": 154850 + }, + { + "epoch": 0.7733526430123099, + "grad_norm": 0.0890340581536293, + "learning_rate": 6.8164409622268395e-06, + "loss": 8.5742, + "step": 154860 + }, + { + "epoch": 0.7734025818372494, + "grad_norm": 0.09299721568822861, + "learning_rate": 6.81493904728529e-06, + "loss": 8.6013, + "step": 154870 + }, + { + "epoch": 0.7734525206621888, + "grad_norm": 0.09847550094127655, + "learning_rate": 6.813437132343738e-06, + "loss": 8.5676, + "step": 154880 + }, + { + "epoch": 0.7735024594871283, + "grad_norm": 0.09080928564071655, + "learning_rate": 6.811935217402188e-06, + "loss": 8.5686, + "step": 154890 + }, + { + "epoch": 0.7735523983120677, + "grad_norm": 0.09545505046844482, + "learning_rate": 6.810433302460637e-06, + "loss": 8.5719, + "step": 154900 + }, + { + "epoch": 0.7736023371370072, + "grad_norm": 0.08688024431467056, + "learning_rate": 6.808931387519087e-06, + "loss": 8.5832, + "step": 154910 + }, + { + "epoch": 0.7736522759619466, + "grad_norm": 0.0897180363535881, + "learning_rate": 6.807429472577537e-06, + "loss": 8.5695, + "step": 154920 + }, + { + "epoch": 0.7737022147868861, + "grad_norm": 0.0968603864312172, + "learning_rate": 6.805927557635986e-06, + "loss": 8.5708, + "step": 154930 + }, + { + "epoch": 0.7737521536118255, + "grad_norm": 0.09534350782632828, + "learning_rate": 6.804425642694436e-06, + "loss": 8.5643, + "step": 154940 + }, + { + "epoch": 0.773802092436765, + "grad_norm": 0.09181123226881027, + "learning_rate": 6.802923727752885e-06, + "loss": 8.5733, + "step": 154950 + }, + { + "epoch": 0.7738520312617044, + "grad_norm": 0.08840048313140869, + "learning_rate": 6.8014218128113345e-06, + "loss": 8.56, + "step": 154960 + }, + { + "epoch": 0.7739019700866439, + "grad_norm": 0.09602469950914383, + "learning_rate": 6.799919897869785e-06, + "loss": 8.582, + "step": 154970 + }, + { + "epoch": 0.7739519089115833, + "grad_norm": 0.08890822529792786, + "learning_rate": 6.798417982928233e-06, + "loss": 8.5752, + "step": 154980 + }, + { + "epoch": 0.7740018477365228, + "grad_norm": 0.0951463058590889, + "learning_rate": 6.796916067986683e-06, + "loss": 8.5782, + "step": 154990 + }, + { + "epoch": 0.7740517865614622, + "grad_norm": 0.08842669427394867, + "learning_rate": 6.795414153045133e-06, + "loss": 8.6005, + "step": 155000 + }, + { + "epoch": 0.7741017253864017, + "grad_norm": 0.0843082070350647, + "learning_rate": 6.793912238103582e-06, + "loss": 8.5825, + "step": 155010 + }, + { + "epoch": 0.7741516642113411, + "grad_norm": 0.09507893025875092, + "learning_rate": 6.792410323162032e-06, + "loss": 8.5753, + "step": 155020 + }, + { + "epoch": 0.7742016030362806, + "grad_norm": 0.09141889959573746, + "learning_rate": 6.7909084082204815e-06, + "loss": 8.5755, + "step": 155030 + }, + { + "epoch": 0.77425154186122, + "grad_norm": 0.09243453294038773, + "learning_rate": 6.789406493278931e-06, + "loss": 8.5904, + "step": 155040 + }, + { + "epoch": 0.7743014806861594, + "grad_norm": 0.09155929833650589, + "learning_rate": 6.78790457833738e-06, + "loss": 8.5654, + "step": 155050 + }, + { + "epoch": 0.7743514195110989, + "grad_norm": 0.09300145506858826, + "learning_rate": 6.7864026633958295e-06, + "loss": 8.5482, + "step": 155060 + }, + { + "epoch": 0.7744013583360384, + "grad_norm": 0.09293139725923538, + "learning_rate": 6.78490074845428e-06, + "loss": 8.5743, + "step": 155070 + }, + { + "epoch": 0.7744512971609778, + "grad_norm": 0.08827711641788483, + "learning_rate": 6.783398833512729e-06, + "loss": 8.5664, + "step": 155080 + }, + { + "epoch": 0.7745012359859172, + "grad_norm": 0.08957238495349884, + "learning_rate": 6.781896918571178e-06, + "loss": 8.586, + "step": 155090 + }, + { + "epoch": 0.7745511748108567, + "grad_norm": 0.08923155814409256, + "learning_rate": 6.780395003629628e-06, + "loss": 8.5718, + "step": 155100 + }, + { + "epoch": 0.7746011136357962, + "grad_norm": 0.0950160101056099, + "learning_rate": 6.778893088688077e-06, + "loss": 8.585, + "step": 155110 + }, + { + "epoch": 0.7746510524607356, + "grad_norm": 0.09716098755598068, + "learning_rate": 6.777391173746527e-06, + "loss": 8.5609, + "step": 155120 + }, + { + "epoch": 0.774700991285675, + "grad_norm": 0.09193146228790283, + "learning_rate": 6.7758892588049765e-06, + "loss": 8.5826, + "step": 155130 + }, + { + "epoch": 0.7747509301106145, + "grad_norm": 0.08806554228067398, + "learning_rate": 6.774387343863426e-06, + "loss": 8.5824, + "step": 155140 + }, + { + "epoch": 0.774800868935554, + "grad_norm": 0.09546437114477158, + "learning_rate": 6.772885428921875e-06, + "loss": 8.5459, + "step": 155150 + }, + { + "epoch": 0.7748508077604934, + "grad_norm": 0.08918742090463638, + "learning_rate": 6.771383513980325e-06, + "loss": 8.5828, + "step": 155160 + }, + { + "epoch": 0.7749007465854328, + "grad_norm": 0.0883205235004425, + "learning_rate": 6.769881599038775e-06, + "loss": 8.5882, + "step": 155170 + }, + { + "epoch": 0.7749506854103723, + "grad_norm": 0.09442654252052307, + "learning_rate": 6.768379684097224e-06, + "loss": 8.5788, + "step": 155180 + }, + { + "epoch": 0.7750006242353118, + "grad_norm": 0.08732815831899643, + "learning_rate": 6.766877769155673e-06, + "loss": 8.5597, + "step": 155190 + }, + { + "epoch": 0.7750505630602512, + "grad_norm": 0.09169010072946548, + "learning_rate": 6.7653758542141235e-06, + "loss": 8.5722, + "step": 155200 + }, + { + "epoch": 0.7751005018851906, + "grad_norm": 0.09443099051713943, + "learning_rate": 6.763873939272573e-06, + "loss": 8.59, + "step": 155210 + }, + { + "epoch": 0.7751504407101301, + "grad_norm": 0.09280900657176971, + "learning_rate": 6.762372024331022e-06, + "loss": 8.5831, + "step": 155220 + }, + { + "epoch": 0.7752003795350696, + "grad_norm": 0.09292498975992203, + "learning_rate": 6.7608701093894715e-06, + "loss": 8.5737, + "step": 155230 + }, + { + "epoch": 0.775250318360009, + "grad_norm": 0.08915520459413528, + "learning_rate": 6.759368194447922e-06, + "loss": 8.5831, + "step": 155240 + }, + { + "epoch": 0.7753002571849484, + "grad_norm": 0.08977535367012024, + "learning_rate": 6.757866279506371e-06, + "loss": 8.5632, + "step": 155250 + }, + { + "epoch": 0.7753501960098879, + "grad_norm": 0.09135992079973221, + "learning_rate": 6.75636436456482e-06, + "loss": 8.5602, + "step": 155260 + }, + { + "epoch": 0.7754001348348273, + "grad_norm": 0.0918012335896492, + "learning_rate": 6.75486244962327e-06, + "loss": 8.5713, + "step": 155270 + }, + { + "epoch": 0.7754500736597668, + "grad_norm": 0.0947796106338501, + "learning_rate": 6.753360534681719e-06, + "loss": 8.5765, + "step": 155280 + }, + { + "epoch": 0.7755000124847062, + "grad_norm": 0.09258890151977539, + "learning_rate": 6.751858619740169e-06, + "loss": 8.5802, + "step": 155290 + }, + { + "epoch": 0.7755499513096457, + "grad_norm": 0.09028926491737366, + "learning_rate": 6.7503567047986185e-06, + "loss": 8.5838, + "step": 155300 + }, + { + "epoch": 0.7755998901345851, + "grad_norm": 0.09300950914621353, + "learning_rate": 6.748854789857068e-06, + "loss": 8.5698, + "step": 155310 + }, + { + "epoch": 0.7756498289595246, + "grad_norm": 0.09121584892272949, + "learning_rate": 6.747352874915518e-06, + "loss": 8.5736, + "step": 155320 + }, + { + "epoch": 0.775699767784464, + "grad_norm": 0.0916307121515274, + "learning_rate": 6.7458509599739665e-06, + "loss": 8.5664, + "step": 155330 + }, + { + "epoch": 0.7757497066094035, + "grad_norm": 0.09273859858512878, + "learning_rate": 6.744349045032417e-06, + "loss": 8.576, + "step": 155340 + }, + { + "epoch": 0.7757996454343429, + "grad_norm": 0.08911219984292984, + "learning_rate": 6.742847130090866e-06, + "loss": 8.5848, + "step": 155350 + }, + { + "epoch": 0.7758495842592824, + "grad_norm": 0.0911405086517334, + "learning_rate": 6.741345215149315e-06, + "loss": 8.5869, + "step": 155360 + }, + { + "epoch": 0.7758995230842218, + "grad_norm": 0.09100606292486191, + "learning_rate": 6.7398433002077655e-06, + "loss": 8.5476, + "step": 155370 + }, + { + "epoch": 0.7759494619091613, + "grad_norm": 0.09772035479545593, + "learning_rate": 6.738341385266214e-06, + "loss": 8.5863, + "step": 155380 + }, + { + "epoch": 0.7759994007341007, + "grad_norm": 0.08929920196533203, + "learning_rate": 6.736839470324664e-06, + "loss": 8.5799, + "step": 155390 + }, + { + "epoch": 0.7760493395590402, + "grad_norm": 0.0926336944103241, + "learning_rate": 6.735337555383114e-06, + "loss": 8.5804, + "step": 155400 + }, + { + "epoch": 0.7760992783839796, + "grad_norm": 0.09426475316286087, + "learning_rate": 6.733835640441563e-06, + "loss": 8.5662, + "step": 155410 + }, + { + "epoch": 0.776149217208919, + "grad_norm": 0.08994784206151962, + "learning_rate": 6.732333725500013e-06, + "loss": 8.568, + "step": 155420 + }, + { + "epoch": 0.7761991560338585, + "grad_norm": 0.093609519302845, + "learning_rate": 6.7308318105584615e-06, + "loss": 8.5902, + "step": 155430 + }, + { + "epoch": 0.776249094858798, + "grad_norm": 0.09885302931070328, + "learning_rate": 6.729329895616912e-06, + "loss": 8.5656, + "step": 155440 + }, + { + "epoch": 0.7762990336837374, + "grad_norm": 0.08882102370262146, + "learning_rate": 6.727827980675362e-06, + "loss": 8.5813, + "step": 155450 + }, + { + "epoch": 0.7763489725086768, + "grad_norm": 0.09155222773551941, + "learning_rate": 6.72632606573381e-06, + "loss": 8.5743, + "step": 155460 + }, + { + "epoch": 0.7763989113336163, + "grad_norm": 0.09071797877550125, + "learning_rate": 6.7248241507922605e-06, + "loss": 8.5807, + "step": 155470 + }, + { + "epoch": 0.7764488501585558, + "grad_norm": 0.09299448877573013, + "learning_rate": 6.72332223585071e-06, + "loss": 8.555, + "step": 155480 + }, + { + "epoch": 0.7764987889834952, + "grad_norm": 0.09560713171958923, + "learning_rate": 6.721820320909159e-06, + "loss": 8.5618, + "step": 155490 + }, + { + "epoch": 0.7765487278084346, + "grad_norm": 0.08749803900718689, + "learning_rate": 6.720318405967609e-06, + "loss": 8.5805, + "step": 155500 + }, + { + "epoch": 0.7765986666333741, + "grad_norm": 0.09022912383079529, + "learning_rate": 6.718816491026058e-06, + "loss": 8.5955, + "step": 155510 + }, + { + "epoch": 0.7766486054583136, + "grad_norm": 0.09331797063350677, + "learning_rate": 6.717314576084508e-06, + "loss": 8.5657, + "step": 155520 + }, + { + "epoch": 0.776698544283253, + "grad_norm": 0.09521540254354477, + "learning_rate": 6.715812661142957e-06, + "loss": 8.5662, + "step": 155530 + }, + { + "epoch": 0.7767484831081924, + "grad_norm": 0.09175549447536469, + "learning_rate": 6.714310746201407e-06, + "loss": 8.5795, + "step": 155540 + }, + { + "epoch": 0.7767984219331319, + "grad_norm": 0.09057381004095078, + "learning_rate": 6.712808831259857e-06, + "loss": 8.5605, + "step": 155550 + }, + { + "epoch": 0.7768483607580714, + "grad_norm": 0.09255360811948776, + "learning_rate": 6.711306916318306e-06, + "loss": 8.5666, + "step": 155560 + }, + { + "epoch": 0.7768982995830108, + "grad_norm": 0.08889876306056976, + "learning_rate": 6.7098050013767555e-06, + "loss": 8.5632, + "step": 155570 + }, + { + "epoch": 0.7769482384079502, + "grad_norm": 0.09821149706840515, + "learning_rate": 6.708303086435205e-06, + "loss": 8.5697, + "step": 155580 + }, + { + "epoch": 0.7769981772328897, + "grad_norm": 0.09199816733598709, + "learning_rate": 6.706801171493654e-06, + "loss": 8.5783, + "step": 155590 + }, + { + "epoch": 0.7770481160578292, + "grad_norm": 0.08835037052631378, + "learning_rate": 6.705299256552104e-06, + "loss": 8.5675, + "step": 155600 + }, + { + "epoch": 0.7770980548827686, + "grad_norm": 0.09240995347499847, + "learning_rate": 6.703797341610554e-06, + "loss": 8.586, + "step": 155610 + }, + { + "epoch": 0.777147993707708, + "grad_norm": 0.08770235627889633, + "learning_rate": 6.702295426669003e-06, + "loss": 8.5867, + "step": 155620 + }, + { + "epoch": 0.7771979325326475, + "grad_norm": 0.0896248146891594, + "learning_rate": 6.700793511727452e-06, + "loss": 8.5803, + "step": 155630 + }, + { + "epoch": 0.777247871357587, + "grad_norm": 0.09114081412553787, + "learning_rate": 6.6992915967859025e-06, + "loss": 8.5682, + "step": 155640 + }, + { + "epoch": 0.7772978101825264, + "grad_norm": 0.08897022902965546, + "learning_rate": 6.697789681844352e-06, + "loss": 8.589, + "step": 155650 + }, + { + "epoch": 0.7773477490074658, + "grad_norm": 0.09216853976249695, + "learning_rate": 6.696287766902801e-06, + "loss": 8.5517, + "step": 155660 + }, + { + "epoch": 0.7773976878324053, + "grad_norm": 0.0946219190955162, + "learning_rate": 6.6947858519612505e-06, + "loss": 8.5782, + "step": 155670 + }, + { + "epoch": 0.7774476266573448, + "grad_norm": 0.09455423057079315, + "learning_rate": 6.6932839370197e-06, + "loss": 8.5608, + "step": 155680 + }, + { + "epoch": 0.7774975654822842, + "grad_norm": 0.0957740917801857, + "learning_rate": 6.69178202207815e-06, + "loss": 8.5761, + "step": 155690 + }, + { + "epoch": 0.7775475043072236, + "grad_norm": 0.09181936085224152, + "learning_rate": 6.690280107136599e-06, + "loss": 8.582, + "step": 155700 + }, + { + "epoch": 0.7775974431321631, + "grad_norm": 0.09328048676252365, + "learning_rate": 6.688778192195049e-06, + "loss": 8.5598, + "step": 155710 + }, + { + "epoch": 0.7776473819571026, + "grad_norm": 0.08828567713499069, + "learning_rate": 6.687276277253499e-06, + "loss": 8.5677, + "step": 155720 + }, + { + "epoch": 0.777697320782042, + "grad_norm": 0.0937914326786995, + "learning_rate": 6.685774362311947e-06, + "loss": 8.5704, + "step": 155730 + }, + { + "epoch": 0.7777472596069814, + "grad_norm": 0.09564419835805893, + "learning_rate": 6.6842724473703976e-06, + "loss": 8.5762, + "step": 155740 + }, + { + "epoch": 0.7777971984319209, + "grad_norm": 0.08568717539310455, + "learning_rate": 6.682770532428847e-06, + "loss": 8.558, + "step": 155750 + }, + { + "epoch": 0.7778471372568604, + "grad_norm": 0.09092507511377335, + "learning_rate": 6.681268617487296e-06, + "loss": 8.5532, + "step": 155760 + }, + { + "epoch": 0.7778970760817998, + "grad_norm": 0.09010069072246552, + "learning_rate": 6.679766702545746e-06, + "loss": 8.5674, + "step": 155770 + }, + { + "epoch": 0.7779470149067392, + "grad_norm": 0.0927618071436882, + "learning_rate": 6.678264787604195e-06, + "loss": 8.5879, + "step": 155780 + }, + { + "epoch": 0.7779969537316787, + "grad_norm": 0.09050939232110977, + "learning_rate": 6.676762872662645e-06, + "loss": 8.5588, + "step": 155790 + }, + { + "epoch": 0.7780468925566182, + "grad_norm": 0.10684484988451004, + "learning_rate": 6.675260957721095e-06, + "loss": 8.5721, + "step": 155800 + }, + { + "epoch": 0.7780968313815576, + "grad_norm": 0.08730565011501312, + "learning_rate": 6.673759042779544e-06, + "loss": 8.5582, + "step": 155810 + }, + { + "epoch": 0.778146770206497, + "grad_norm": 0.089006207883358, + "learning_rate": 6.672257127837994e-06, + "loss": 8.5687, + "step": 155820 + }, + { + "epoch": 0.7781967090314365, + "grad_norm": 0.09139891713857651, + "learning_rate": 6.670755212896442e-06, + "loss": 8.5781, + "step": 155830 + }, + { + "epoch": 0.778246647856376, + "grad_norm": 0.09247538447380066, + "learning_rate": 6.6692532979548926e-06, + "loss": 8.578, + "step": 155840 + }, + { + "epoch": 0.7782965866813154, + "grad_norm": 0.09350184351205826, + "learning_rate": 6.667751383013343e-06, + "loss": 8.5866, + "step": 155850 + }, + { + "epoch": 0.7783465255062548, + "grad_norm": 0.08985615521669388, + "learning_rate": 6.666249468071791e-06, + "loss": 8.5793, + "step": 155860 + }, + { + "epoch": 0.7783964643311942, + "grad_norm": 0.08648446202278137, + "learning_rate": 6.664747553130241e-06, + "loss": 8.5698, + "step": 155870 + }, + { + "epoch": 0.7784464031561338, + "grad_norm": 0.0879853144288063, + "learning_rate": 6.663245638188691e-06, + "loss": 8.5803, + "step": 155880 + }, + { + "epoch": 0.7784963419810732, + "grad_norm": 0.09268853813409805, + "learning_rate": 6.66174372324714e-06, + "loss": 8.5734, + "step": 155890 + }, + { + "epoch": 0.7785462808060126, + "grad_norm": 0.09160283207893372, + "learning_rate": 6.66024180830559e-06, + "loss": 8.5861, + "step": 155900 + }, + { + "epoch": 0.778596219630952, + "grad_norm": 0.09253548830747604, + "learning_rate": 6.658739893364039e-06, + "loss": 8.5787, + "step": 155910 + }, + { + "epoch": 0.7786461584558916, + "grad_norm": 0.09596210718154907, + "learning_rate": 6.657237978422489e-06, + "loss": 8.5661, + "step": 155920 + }, + { + "epoch": 0.778696097280831, + "grad_norm": 0.08908601105213165, + "learning_rate": 6.655736063480938e-06, + "loss": 8.5649, + "step": 155930 + }, + { + "epoch": 0.7787460361057704, + "grad_norm": 0.08889824151992798, + "learning_rate": 6.6542341485393876e-06, + "loss": 8.55, + "step": 155940 + }, + { + "epoch": 0.7787959749307098, + "grad_norm": 0.09235508739948273, + "learning_rate": 6.652732233597838e-06, + "loss": 8.581, + "step": 155950 + }, + { + "epoch": 0.7788459137556494, + "grad_norm": 0.09276876598596573, + "learning_rate": 6.651230318656287e-06, + "loss": 8.5593, + "step": 155960 + }, + { + "epoch": 0.7788958525805888, + "grad_norm": 0.08646731078624725, + "learning_rate": 6.649728403714736e-06, + "loss": 8.5816, + "step": 155970 + }, + { + "epoch": 0.7789457914055282, + "grad_norm": 0.08740647882223129, + "learning_rate": 6.648226488773186e-06, + "loss": 8.5732, + "step": 155980 + }, + { + "epoch": 0.7789957302304676, + "grad_norm": 0.09212644398212433, + "learning_rate": 6.646724573831635e-06, + "loss": 8.5731, + "step": 155990 + }, + { + "epoch": 0.7790456690554072, + "grad_norm": 0.0925680547952652, + "learning_rate": 6.645222658890085e-06, + "loss": 8.5776, + "step": 156000 + }, + { + "epoch": 0.7790956078803466, + "grad_norm": 0.08844732493162155, + "learning_rate": 6.643720743948535e-06, + "loss": 8.5941, + "step": 156010 + }, + { + "epoch": 0.779145546705286, + "grad_norm": 0.08517491817474365, + "learning_rate": 6.642218829006984e-06, + "loss": 8.5737, + "step": 156020 + }, + { + "epoch": 0.7791954855302254, + "grad_norm": 0.09461086988449097, + "learning_rate": 6.640716914065433e-06, + "loss": 8.5723, + "step": 156030 + }, + { + "epoch": 0.779245424355165, + "grad_norm": 0.09351557493209839, + "learning_rate": 6.6392149991238834e-06, + "loss": 8.5885, + "step": 156040 + }, + { + "epoch": 0.7792953631801044, + "grad_norm": 0.09356269985437393, + "learning_rate": 6.637713084182333e-06, + "loss": 8.5521, + "step": 156050 + }, + { + "epoch": 0.7793453020050438, + "grad_norm": 0.09160886704921722, + "learning_rate": 6.636211169240782e-06, + "loss": 8.5711, + "step": 156060 + }, + { + "epoch": 0.7793952408299832, + "grad_norm": 0.09123864024877548, + "learning_rate": 6.634709254299231e-06, + "loss": 8.5658, + "step": 156070 + }, + { + "epoch": 0.7794451796549228, + "grad_norm": 0.08630036562681198, + "learning_rate": 6.633207339357681e-06, + "loss": 8.5817, + "step": 156080 + }, + { + "epoch": 0.7794951184798622, + "grad_norm": 0.08908306062221527, + "learning_rate": 6.631705424416131e-06, + "loss": 8.5747, + "step": 156090 + }, + { + "epoch": 0.7795450573048016, + "grad_norm": 0.0923098474740982, + "learning_rate": 6.63020350947458e-06, + "loss": 8.5524, + "step": 156100 + }, + { + "epoch": 0.779594996129741, + "grad_norm": 0.09396307915449142, + "learning_rate": 6.62870159453303e-06, + "loss": 8.5752, + "step": 156110 + }, + { + "epoch": 0.7796449349546806, + "grad_norm": 0.09391772747039795, + "learning_rate": 6.62719967959148e-06, + "loss": 8.5701, + "step": 156120 + }, + { + "epoch": 0.77969487377962, + "grad_norm": 0.0857236236333847, + "learning_rate": 6.625697764649928e-06, + "loss": 8.5592, + "step": 156130 + }, + { + "epoch": 0.7797448126045594, + "grad_norm": 0.09074844419956207, + "learning_rate": 6.6241958497083784e-06, + "loss": 8.5821, + "step": 156140 + }, + { + "epoch": 0.7797947514294988, + "grad_norm": 0.08897120505571365, + "learning_rate": 6.622693934766828e-06, + "loss": 8.5788, + "step": 156150 + }, + { + "epoch": 0.7798446902544384, + "grad_norm": 0.08881950378417969, + "learning_rate": 6.621192019825277e-06, + "loss": 8.572, + "step": 156160 + }, + { + "epoch": 0.7798946290793778, + "grad_norm": 0.09620756655931473, + "learning_rate": 6.619690104883727e-06, + "loss": 8.5728, + "step": 156170 + }, + { + "epoch": 0.7799445679043172, + "grad_norm": 0.08694388717412949, + "learning_rate": 6.618188189942176e-06, + "loss": 8.5895, + "step": 156180 + }, + { + "epoch": 0.7799945067292566, + "grad_norm": 0.08693565428256989, + "learning_rate": 6.616686275000626e-06, + "loss": 8.5849, + "step": 156190 + }, + { + "epoch": 0.7800444455541962, + "grad_norm": 0.08743081241846085, + "learning_rate": 6.615184360059076e-06, + "loss": 8.579, + "step": 156200 + }, + { + "epoch": 0.7800943843791356, + "grad_norm": 0.09228560328483582, + "learning_rate": 6.613682445117525e-06, + "loss": 8.5556, + "step": 156210 + }, + { + "epoch": 0.780144323204075, + "grad_norm": 0.09084375947713852, + "learning_rate": 6.612180530175975e-06, + "loss": 8.5772, + "step": 156220 + }, + { + "epoch": 0.7801942620290144, + "grad_norm": 0.08933619409799576, + "learning_rate": 6.610678615234423e-06, + "loss": 8.5722, + "step": 156230 + }, + { + "epoch": 0.780244200853954, + "grad_norm": 0.09434707462787628, + "learning_rate": 6.6091767002928734e-06, + "loss": 8.5676, + "step": 156240 + }, + { + "epoch": 0.7802941396788934, + "grad_norm": 0.08935230225324631, + "learning_rate": 6.607674785351324e-06, + "loss": 8.5802, + "step": 156250 + }, + { + "epoch": 0.7803440785038328, + "grad_norm": 0.09848465025424957, + "learning_rate": 6.606172870409772e-06, + "loss": 8.555, + "step": 156260 + }, + { + "epoch": 0.7803940173287722, + "grad_norm": 0.09197591245174408, + "learning_rate": 6.604670955468222e-06, + "loss": 8.5815, + "step": 156270 + }, + { + "epoch": 0.7804439561537116, + "grad_norm": 0.0875692144036293, + "learning_rate": 6.603169040526672e-06, + "loss": 8.5706, + "step": 156280 + }, + { + "epoch": 0.7804938949786512, + "grad_norm": 0.09515390545129776, + "learning_rate": 6.601667125585121e-06, + "loss": 8.5762, + "step": 156290 + }, + { + "epoch": 0.7805438338035906, + "grad_norm": 0.0905672088265419, + "learning_rate": 6.600165210643571e-06, + "loss": 8.5627, + "step": 156300 + }, + { + "epoch": 0.78059377262853, + "grad_norm": 0.08882583677768707, + "learning_rate": 6.59866329570202e-06, + "loss": 8.5586, + "step": 156310 + }, + { + "epoch": 0.7806437114534694, + "grad_norm": 0.08932114392518997, + "learning_rate": 6.59716138076047e-06, + "loss": 8.5591, + "step": 156320 + }, + { + "epoch": 0.780693650278409, + "grad_norm": 0.09061101078987122, + "learning_rate": 6.59565946581892e-06, + "loss": 8.5806, + "step": 156330 + }, + { + "epoch": 0.7807435891033484, + "grad_norm": 0.08474583178758621, + "learning_rate": 6.5941575508773684e-06, + "loss": 8.5841, + "step": 156340 + }, + { + "epoch": 0.7807935279282878, + "grad_norm": 0.09312687814235687, + "learning_rate": 6.592655635935819e-06, + "loss": 8.5691, + "step": 156350 + }, + { + "epoch": 0.7808434667532272, + "grad_norm": 0.09034864604473114, + "learning_rate": 6.591153720994268e-06, + "loss": 8.5726, + "step": 156360 + }, + { + "epoch": 0.7808934055781668, + "grad_norm": 0.09045044332742691, + "learning_rate": 6.589651806052717e-06, + "loss": 8.5599, + "step": 156370 + }, + { + "epoch": 0.7809433444031062, + "grad_norm": 0.09198865294456482, + "learning_rate": 6.5881498911111675e-06, + "loss": 8.5653, + "step": 156380 + }, + { + "epoch": 0.7809932832280456, + "grad_norm": 0.08959676325321198, + "learning_rate": 6.586647976169616e-06, + "loss": 8.5642, + "step": 156390 + }, + { + "epoch": 0.781043222052985, + "grad_norm": 0.09269116818904877, + "learning_rate": 6.585146061228066e-06, + "loss": 8.559, + "step": 156400 + }, + { + "epoch": 0.7810931608779246, + "grad_norm": 0.08859947323799133, + "learning_rate": 6.5836441462865155e-06, + "loss": 8.5707, + "step": 156410 + }, + { + "epoch": 0.781143099702864, + "grad_norm": 0.08951582759618759, + "learning_rate": 6.582142231344965e-06, + "loss": 8.5724, + "step": 156420 + }, + { + "epoch": 0.7811930385278034, + "grad_norm": 0.08770976960659027, + "learning_rate": 6.580640316403415e-06, + "loss": 8.5788, + "step": 156430 + }, + { + "epoch": 0.7812429773527428, + "grad_norm": 0.09240131080150604, + "learning_rate": 6.579138401461864e-06, + "loss": 8.5691, + "step": 156440 + }, + { + "epoch": 0.7812929161776824, + "grad_norm": 0.08868956565856934, + "learning_rate": 6.577636486520314e-06, + "loss": 8.5805, + "step": 156450 + }, + { + "epoch": 0.7813428550026218, + "grad_norm": 0.09283566474914551, + "learning_rate": 6.576134571578763e-06, + "loss": 8.5591, + "step": 156460 + }, + { + "epoch": 0.7813927938275612, + "grad_norm": 0.09575477242469788, + "learning_rate": 6.574632656637212e-06, + "loss": 8.562, + "step": 156470 + }, + { + "epoch": 0.7814427326525006, + "grad_norm": 0.09500584006309509, + "learning_rate": 6.5731307416956625e-06, + "loss": 8.5661, + "step": 156480 + }, + { + "epoch": 0.7814926714774402, + "grad_norm": 0.0926256999373436, + "learning_rate": 6.571628826754112e-06, + "loss": 8.5676, + "step": 156490 + }, + { + "epoch": 0.7815426103023796, + "grad_norm": 0.09180507063865662, + "learning_rate": 6.570126911812561e-06, + "loss": 8.5845, + "step": 156500 + }, + { + "epoch": 0.781592549127319, + "grad_norm": 0.08600914478302002, + "learning_rate": 6.5686249968710105e-06, + "loss": 8.5769, + "step": 156510 + }, + { + "epoch": 0.7816424879522584, + "grad_norm": 0.0883520096540451, + "learning_rate": 6.567123081929461e-06, + "loss": 8.5766, + "step": 156520 + }, + { + "epoch": 0.781692426777198, + "grad_norm": 0.0899481549859047, + "learning_rate": 6.56562116698791e-06, + "loss": 8.5516, + "step": 156530 + }, + { + "epoch": 0.7817423656021374, + "grad_norm": 0.09373979270458221, + "learning_rate": 6.564119252046359e-06, + "loss": 8.5745, + "step": 156540 + }, + { + "epoch": 0.7817923044270768, + "grad_norm": 0.08799232542514801, + "learning_rate": 6.562617337104809e-06, + "loss": 8.5713, + "step": 156550 + }, + { + "epoch": 0.7818422432520162, + "grad_norm": 0.09024485945701599, + "learning_rate": 6.561115422163258e-06, + "loss": 8.5852, + "step": 156560 + }, + { + "epoch": 0.7818921820769558, + "grad_norm": 0.0885784775018692, + "learning_rate": 6.559613507221708e-06, + "loss": 8.5742, + "step": 156570 + }, + { + "epoch": 0.7819421209018952, + "grad_norm": 0.09604980051517487, + "learning_rate": 6.5581115922801575e-06, + "loss": 8.5856, + "step": 156580 + }, + { + "epoch": 0.7819920597268346, + "grad_norm": 0.08925112336874008, + "learning_rate": 6.556609677338607e-06, + "loss": 8.577, + "step": 156590 + }, + { + "epoch": 0.782041998551774, + "grad_norm": 0.08953628689050674, + "learning_rate": 6.555107762397057e-06, + "loss": 8.5945, + "step": 156600 + }, + { + "epoch": 0.7820919373767136, + "grad_norm": 0.08811146020889282, + "learning_rate": 6.5536058474555055e-06, + "loss": 8.5769, + "step": 156610 + }, + { + "epoch": 0.782141876201653, + "grad_norm": 0.08883983641862869, + "learning_rate": 6.552103932513956e-06, + "loss": 8.5647, + "step": 156620 + }, + { + "epoch": 0.7821918150265924, + "grad_norm": 0.09047286212444305, + "learning_rate": 6.550602017572405e-06, + "loss": 8.5689, + "step": 156630 + }, + { + "epoch": 0.7822417538515318, + "grad_norm": 0.091248519718647, + "learning_rate": 6.549100102630854e-06, + "loss": 8.5746, + "step": 156640 + }, + { + "epoch": 0.7822916926764714, + "grad_norm": 0.0949409082531929, + "learning_rate": 6.5475981876893045e-06, + "loss": 8.5657, + "step": 156650 + }, + { + "epoch": 0.7823416315014108, + "grad_norm": 0.09160256385803223, + "learning_rate": 6.546096272747753e-06, + "loss": 8.5602, + "step": 156660 + }, + { + "epoch": 0.7823915703263502, + "grad_norm": 0.09412173181772232, + "learning_rate": 6.544594357806203e-06, + "loss": 8.5698, + "step": 156670 + }, + { + "epoch": 0.7824415091512896, + "grad_norm": 0.08734364062547684, + "learning_rate": 6.543092442864653e-06, + "loss": 8.5699, + "step": 156680 + }, + { + "epoch": 0.7824914479762292, + "grad_norm": 0.08681227266788483, + "learning_rate": 6.541590527923102e-06, + "loss": 8.5734, + "step": 156690 + }, + { + "epoch": 0.7825413868011686, + "grad_norm": 0.09137750416994095, + "learning_rate": 6.540088612981552e-06, + "loss": 8.5507, + "step": 156700 + }, + { + "epoch": 0.782591325626108, + "grad_norm": 0.08570120483636856, + "learning_rate": 6.5385866980400005e-06, + "loss": 8.5657, + "step": 156710 + }, + { + "epoch": 0.7826412644510474, + "grad_norm": 0.0939328595995903, + "learning_rate": 6.537084783098451e-06, + "loss": 8.5711, + "step": 156720 + }, + { + "epoch": 0.782691203275987, + "grad_norm": 0.08831031620502472, + "learning_rate": 6.535582868156901e-06, + "loss": 8.5667, + "step": 156730 + }, + { + "epoch": 0.7827411421009264, + "grad_norm": 0.09072265774011612, + "learning_rate": 6.534080953215349e-06, + "loss": 8.5528, + "step": 156740 + }, + { + "epoch": 0.7827910809258658, + "grad_norm": 0.09154432266950607, + "learning_rate": 6.5325790382737995e-06, + "loss": 8.5787, + "step": 156750 + }, + { + "epoch": 0.7828410197508052, + "grad_norm": 0.0947096049785614, + "learning_rate": 6.531077123332249e-06, + "loss": 8.5638, + "step": 156760 + }, + { + "epoch": 0.7828909585757448, + "grad_norm": 0.09594990313053131, + "learning_rate": 6.529575208390698e-06, + "loss": 8.5761, + "step": 156770 + }, + { + "epoch": 0.7829408974006842, + "grad_norm": 0.0905158668756485, + "learning_rate": 6.528073293449148e-06, + "loss": 8.558, + "step": 156780 + }, + { + "epoch": 0.7829908362256236, + "grad_norm": 0.0911213606595993, + "learning_rate": 6.526571378507597e-06, + "loss": 8.5628, + "step": 156790 + }, + { + "epoch": 0.783040775050563, + "grad_norm": 0.08714745938777924, + "learning_rate": 6.525069463566047e-06, + "loss": 8.5658, + "step": 156800 + }, + { + "epoch": 0.7830907138755026, + "grad_norm": 0.09068936109542847, + "learning_rate": 6.523567548624496e-06, + "loss": 8.5696, + "step": 156810 + }, + { + "epoch": 0.783140652700442, + "grad_norm": 0.09490951150655746, + "learning_rate": 6.522065633682946e-06, + "loss": 8.5601, + "step": 156820 + }, + { + "epoch": 0.7831905915253814, + "grad_norm": 0.08792884647846222, + "learning_rate": 6.520563718741396e-06, + "loss": 8.5703, + "step": 156830 + }, + { + "epoch": 0.7832405303503208, + "grad_norm": 0.08713288605213165, + "learning_rate": 6.519061803799845e-06, + "loss": 8.5737, + "step": 156840 + }, + { + "epoch": 0.7832904691752604, + "grad_norm": 0.0946747288107872, + "learning_rate": 6.5175598888582945e-06, + "loss": 8.5595, + "step": 156850 + }, + { + "epoch": 0.7833404080001998, + "grad_norm": 0.08821689337491989, + "learning_rate": 6.516057973916744e-06, + "loss": 8.5619, + "step": 156860 + }, + { + "epoch": 0.7833903468251392, + "grad_norm": 0.09473678469657898, + "learning_rate": 6.514556058975193e-06, + "loss": 8.5639, + "step": 156870 + }, + { + "epoch": 0.7834402856500786, + "grad_norm": 0.09480735659599304, + "learning_rate": 6.513054144033643e-06, + "loss": 8.5744, + "step": 156880 + }, + { + "epoch": 0.7834902244750181, + "grad_norm": 0.09333161264657974, + "learning_rate": 6.511552229092093e-06, + "loss": 8.5615, + "step": 156890 + }, + { + "epoch": 0.7835401632999576, + "grad_norm": 0.09537094086408615, + "learning_rate": 6.510050314150542e-06, + "loss": 8.5697, + "step": 156900 + }, + { + "epoch": 0.783590102124897, + "grad_norm": 0.09100158512592316, + "learning_rate": 6.508548399208991e-06, + "loss": 8.5796, + "step": 156910 + }, + { + "epoch": 0.7836400409498364, + "grad_norm": 0.0884363129734993, + "learning_rate": 6.5070464842674415e-06, + "loss": 8.5578, + "step": 156920 + }, + { + "epoch": 0.783689979774776, + "grad_norm": 0.09135287255048752, + "learning_rate": 6.505544569325891e-06, + "loss": 8.5509, + "step": 156930 + }, + { + "epoch": 0.7837399185997154, + "grad_norm": 0.09011730551719666, + "learning_rate": 6.50404265438434e-06, + "loss": 8.562, + "step": 156940 + }, + { + "epoch": 0.7837898574246548, + "grad_norm": 0.08869774639606476, + "learning_rate": 6.5025407394427895e-06, + "loss": 8.5689, + "step": 156950 + }, + { + "epoch": 0.7838397962495942, + "grad_norm": 0.09371919184923172, + "learning_rate": 6.501038824501239e-06, + "loss": 8.5672, + "step": 156960 + }, + { + "epoch": 0.7838897350745337, + "grad_norm": 0.0950884222984314, + "learning_rate": 6.499536909559689e-06, + "loss": 8.5585, + "step": 156970 + }, + { + "epoch": 0.7839396738994732, + "grad_norm": 0.09123662114143372, + "learning_rate": 6.498034994618138e-06, + "loss": 8.5601, + "step": 156980 + }, + { + "epoch": 0.7839896127244126, + "grad_norm": 0.0944250077009201, + "learning_rate": 6.496533079676588e-06, + "loss": 8.5766, + "step": 156990 + }, + { + "epoch": 0.784039551549352, + "grad_norm": 0.08941183984279633, + "learning_rate": 6.495031164735038e-06, + "loss": 8.5673, + "step": 157000 + }, + { + "epoch": 0.7840894903742915, + "grad_norm": 0.0950687900185585, + "learning_rate": 6.493529249793486e-06, + "loss": 8.5736, + "step": 157010 + }, + { + "epoch": 0.784139429199231, + "grad_norm": 0.09286126494407654, + "learning_rate": 6.4920273348519365e-06, + "loss": 8.5549, + "step": 157020 + }, + { + "epoch": 0.7841893680241704, + "grad_norm": 0.09739340096712112, + "learning_rate": 6.490525419910386e-06, + "loss": 8.5606, + "step": 157030 + }, + { + "epoch": 0.7842393068491098, + "grad_norm": 0.09119179099798203, + "learning_rate": 6.489023504968835e-06, + "loss": 8.5585, + "step": 157040 + }, + { + "epoch": 0.7842892456740493, + "grad_norm": 0.09187173843383789, + "learning_rate": 6.487521590027285e-06, + "loss": 8.5867, + "step": 157050 + }, + { + "epoch": 0.7843391844989888, + "grad_norm": 0.08924529701471329, + "learning_rate": 6.486019675085734e-06, + "loss": 8.5617, + "step": 157060 + }, + { + "epoch": 0.7843891233239282, + "grad_norm": 0.08665250241756439, + "learning_rate": 6.484517760144184e-06, + "loss": 8.5643, + "step": 157070 + }, + { + "epoch": 0.7844390621488676, + "grad_norm": 0.08721933513879776, + "learning_rate": 6.483015845202634e-06, + "loss": 8.5747, + "step": 157080 + }, + { + "epoch": 0.7844890009738071, + "grad_norm": 0.09293626993894577, + "learning_rate": 6.481513930261083e-06, + "loss": 8.5616, + "step": 157090 + }, + { + "epoch": 0.7845389397987466, + "grad_norm": 0.09339628368616104, + "learning_rate": 6.480012015319533e-06, + "loss": 8.5674, + "step": 157100 + }, + { + "epoch": 0.784588878623686, + "grad_norm": 0.09324491769075394, + "learning_rate": 6.478510100377981e-06, + "loss": 8.561, + "step": 157110 + }, + { + "epoch": 0.7846388174486254, + "grad_norm": 0.08938504010438919, + "learning_rate": 6.4770081854364315e-06, + "loss": 8.5575, + "step": 157120 + }, + { + "epoch": 0.7846887562735649, + "grad_norm": 0.09114962816238403, + "learning_rate": 6.475506270494882e-06, + "loss": 8.5692, + "step": 157130 + }, + { + "epoch": 0.7847386950985044, + "grad_norm": 0.0890054702758789, + "learning_rate": 6.47400435555333e-06, + "loss": 8.5646, + "step": 157140 + }, + { + "epoch": 0.7847886339234438, + "grad_norm": 0.09435494989156723, + "learning_rate": 6.47250244061178e-06, + "loss": 8.5797, + "step": 157150 + }, + { + "epoch": 0.7848385727483832, + "grad_norm": 0.09404738992452621, + "learning_rate": 6.47100052567023e-06, + "loss": 8.5541, + "step": 157160 + }, + { + "epoch": 0.7848885115733227, + "grad_norm": 0.09190955758094788, + "learning_rate": 6.469498610728679e-06, + "loss": 8.5693, + "step": 157170 + }, + { + "epoch": 0.7849384503982622, + "grad_norm": 0.08809108287096024, + "learning_rate": 6.467996695787129e-06, + "loss": 8.5717, + "step": 157180 + }, + { + "epoch": 0.7849883892232016, + "grad_norm": 0.0869806781411171, + "learning_rate": 6.466494780845578e-06, + "loss": 8.5623, + "step": 157190 + }, + { + "epoch": 0.785038328048141, + "grad_norm": 0.093222476541996, + "learning_rate": 6.464992865904028e-06, + "loss": 8.5687, + "step": 157200 + }, + { + "epoch": 0.7850882668730805, + "grad_norm": 0.08760460466146469, + "learning_rate": 6.463490950962477e-06, + "loss": 8.5537, + "step": 157210 + }, + { + "epoch": 0.78513820569802, + "grad_norm": 0.09468575567007065, + "learning_rate": 6.4619890360209265e-06, + "loss": 8.5678, + "step": 157220 + }, + { + "epoch": 0.7851881445229594, + "grad_norm": 0.08602230995893478, + "learning_rate": 6.460487121079377e-06, + "loss": 8.5842, + "step": 157230 + }, + { + "epoch": 0.7852380833478988, + "grad_norm": 0.093845434486866, + "learning_rate": 6.458985206137826e-06, + "loss": 8.5626, + "step": 157240 + }, + { + "epoch": 0.7852880221728382, + "grad_norm": 0.08826538175344467, + "learning_rate": 6.457483291196275e-06, + "loss": 8.5479, + "step": 157250 + }, + { + "epoch": 0.7853379609977778, + "grad_norm": 0.09430468082427979, + "learning_rate": 6.455981376254725e-06, + "loss": 8.5857, + "step": 157260 + }, + { + "epoch": 0.7853878998227172, + "grad_norm": 0.08807472139596939, + "learning_rate": 6.454479461313174e-06, + "loss": 8.5541, + "step": 157270 + }, + { + "epoch": 0.7854378386476566, + "grad_norm": 0.09787096083164215, + "learning_rate": 6.452977546371624e-06, + "loss": 8.5621, + "step": 157280 + }, + { + "epoch": 0.785487777472596, + "grad_norm": 0.09540273249149323, + "learning_rate": 6.4514756314300736e-06, + "loss": 8.5687, + "step": 157290 + }, + { + "epoch": 0.7855377162975355, + "grad_norm": 0.09057477116584778, + "learning_rate": 6.449973716488523e-06, + "loss": 8.5679, + "step": 157300 + }, + { + "epoch": 0.785587655122475, + "grad_norm": 0.08641599118709564, + "learning_rate": 6.448471801546972e-06, + "loss": 8.5723, + "step": 157310 + }, + { + "epoch": 0.7856375939474144, + "grad_norm": 0.0898127630352974, + "learning_rate": 6.446969886605422e-06, + "loss": 8.5729, + "step": 157320 + }, + { + "epoch": 0.7856875327723538, + "grad_norm": 0.09772838652133942, + "learning_rate": 6.445467971663872e-06, + "loss": 8.5604, + "step": 157330 + }, + { + "epoch": 0.7857374715972933, + "grad_norm": 0.09075041115283966, + "learning_rate": 6.443966056722321e-06, + "loss": 8.5621, + "step": 157340 + }, + { + "epoch": 0.7857874104222328, + "grad_norm": 0.09176818281412125, + "learning_rate": 6.44246414178077e-06, + "loss": 8.5765, + "step": 157350 + }, + { + "epoch": 0.7858373492471722, + "grad_norm": 0.09130357950925827, + "learning_rate": 6.44096222683922e-06, + "loss": 8.5659, + "step": 157360 + }, + { + "epoch": 0.7858872880721116, + "grad_norm": 0.08570962399244308, + "learning_rate": 6.43946031189767e-06, + "loss": 8.5623, + "step": 157370 + }, + { + "epoch": 0.7859372268970511, + "grad_norm": 0.08622989058494568, + "learning_rate": 6.437958396956119e-06, + "loss": 8.5748, + "step": 157380 + }, + { + "epoch": 0.7859871657219906, + "grad_norm": 0.09150289744138718, + "learning_rate": 6.4364564820145686e-06, + "loss": 8.5404, + "step": 157390 + }, + { + "epoch": 0.78603710454693, + "grad_norm": 0.09090328216552734, + "learning_rate": 6.434954567073019e-06, + "loss": 8.5627, + "step": 157400 + }, + { + "epoch": 0.7860870433718694, + "grad_norm": 0.09460791200399399, + "learning_rate": 6.433452652131468e-06, + "loss": 8.5751, + "step": 157410 + }, + { + "epoch": 0.7861369821968089, + "grad_norm": 0.09171260893344879, + "learning_rate": 6.431950737189917e-06, + "loss": 8.5494, + "step": 157420 + }, + { + "epoch": 0.7861869210217484, + "grad_norm": 0.0896756649017334, + "learning_rate": 6.430448822248367e-06, + "loss": 8.5658, + "step": 157430 + }, + { + "epoch": 0.7862368598466878, + "grad_norm": 0.08946090936660767, + "learning_rate": 6.428946907306816e-06, + "loss": 8.5715, + "step": 157440 + }, + { + "epoch": 0.7862867986716272, + "grad_norm": 0.08953197300434113, + "learning_rate": 6.427444992365266e-06, + "loss": 8.5723, + "step": 157450 + }, + { + "epoch": 0.7863367374965667, + "grad_norm": 0.08706686645746231, + "learning_rate": 6.425943077423716e-06, + "loss": 8.5687, + "step": 157460 + }, + { + "epoch": 0.7863866763215062, + "grad_norm": 0.09402570128440857, + "learning_rate": 6.424441162482165e-06, + "loss": 8.5646, + "step": 157470 + }, + { + "epoch": 0.7864366151464456, + "grad_norm": 0.09386756271123886, + "learning_rate": 6.422939247540615e-06, + "loss": 8.5686, + "step": 157480 + }, + { + "epoch": 0.786486553971385, + "grad_norm": 0.09243669360876083, + "learning_rate": 6.4214373325990636e-06, + "loss": 8.5747, + "step": 157490 + }, + { + "epoch": 0.7865364927963245, + "grad_norm": 0.08882700651884079, + "learning_rate": 6.419935417657514e-06, + "loss": 8.553, + "step": 157500 + }, + { + "epoch": 0.786586431621264, + "grad_norm": 0.091353639960289, + "learning_rate": 6.418433502715963e-06, + "loss": 8.5603, + "step": 157510 + }, + { + "epoch": 0.7866363704462034, + "grad_norm": 0.09125305712223053, + "learning_rate": 6.416931587774412e-06, + "loss": 8.5857, + "step": 157520 + }, + { + "epoch": 0.7866863092711428, + "grad_norm": 0.09622442722320557, + "learning_rate": 6.415429672832863e-06, + "loss": 8.554, + "step": 157530 + }, + { + "epoch": 0.7867362480960823, + "grad_norm": 0.09017577767372131, + "learning_rate": 6.413927757891311e-06, + "loss": 8.5527, + "step": 157540 + }, + { + "epoch": 0.7867861869210218, + "grad_norm": 0.09028837084770203, + "learning_rate": 6.412425842949761e-06, + "loss": 8.5776, + "step": 157550 + }, + { + "epoch": 0.7868361257459612, + "grad_norm": 0.0956842452287674, + "learning_rate": 6.4109239280082114e-06, + "loss": 8.5637, + "step": 157560 + }, + { + "epoch": 0.7868860645709006, + "grad_norm": 0.09367352724075317, + "learning_rate": 6.40942201306666e-06, + "loss": 8.5664, + "step": 157570 + }, + { + "epoch": 0.7869360033958401, + "grad_norm": 0.0918663740158081, + "learning_rate": 6.40792009812511e-06, + "loss": 8.5806, + "step": 157580 + }, + { + "epoch": 0.7869859422207796, + "grad_norm": 0.08775880932807922, + "learning_rate": 6.406418183183559e-06, + "loss": 8.5442, + "step": 157590 + }, + { + "epoch": 0.787035881045719, + "grad_norm": 0.09422892332077026, + "learning_rate": 6.404916268242009e-06, + "loss": 8.5819, + "step": 157600 + }, + { + "epoch": 0.7870858198706584, + "grad_norm": 0.0917881578207016, + "learning_rate": 6.403414353300459e-06, + "loss": 8.56, + "step": 157610 + }, + { + "epoch": 0.7871357586955979, + "grad_norm": 0.08879712224006653, + "learning_rate": 6.401912438358907e-06, + "loss": 8.5751, + "step": 157620 + }, + { + "epoch": 0.7871856975205374, + "grad_norm": 0.0947839617729187, + "learning_rate": 6.400410523417358e-06, + "loss": 8.5617, + "step": 157630 + }, + { + "epoch": 0.7872356363454768, + "grad_norm": 0.09528858214616776, + "learning_rate": 6.398908608475807e-06, + "loss": 8.5535, + "step": 157640 + }, + { + "epoch": 0.7872855751704162, + "grad_norm": 0.08856210112571716, + "learning_rate": 6.397406693534256e-06, + "loss": 8.5532, + "step": 157650 + }, + { + "epoch": 0.7873355139953557, + "grad_norm": 0.08869683742523193, + "learning_rate": 6.3959047785927064e-06, + "loss": 8.5521, + "step": 157660 + }, + { + "epoch": 0.7873854528202952, + "grad_norm": 0.09273868054151535, + "learning_rate": 6.394402863651155e-06, + "loss": 8.5656, + "step": 157670 + }, + { + "epoch": 0.7874353916452346, + "grad_norm": 0.09226199239492416, + "learning_rate": 6.392900948709605e-06, + "loss": 8.5631, + "step": 157680 + }, + { + "epoch": 0.787485330470174, + "grad_norm": 0.09002158790826797, + "learning_rate": 6.3913990337680544e-06, + "loss": 8.5679, + "step": 157690 + }, + { + "epoch": 0.7875352692951135, + "grad_norm": 0.09723229706287384, + "learning_rate": 6.389897118826504e-06, + "loss": 8.5763, + "step": 157700 + }, + { + "epoch": 0.787585208120053, + "grad_norm": 0.0909029170870781, + "learning_rate": 6.388395203884954e-06, + "loss": 8.5659, + "step": 157710 + }, + { + "epoch": 0.7876351469449924, + "grad_norm": 0.08461030572652817, + "learning_rate": 6.386893288943403e-06, + "loss": 8.5652, + "step": 157720 + }, + { + "epoch": 0.7876850857699318, + "grad_norm": 0.0874033197760582, + "learning_rate": 6.385391374001853e-06, + "loss": 8.553, + "step": 157730 + }, + { + "epoch": 0.7877350245948713, + "grad_norm": 0.09039891511201859, + "learning_rate": 6.383889459060302e-06, + "loss": 8.5708, + "step": 157740 + }, + { + "epoch": 0.7877849634198107, + "grad_norm": 0.09345263242721558, + "learning_rate": 6.382387544118751e-06, + "loss": 8.5643, + "step": 157750 + }, + { + "epoch": 0.7878349022447502, + "grad_norm": 0.08719782531261444, + "learning_rate": 6.3808856291772014e-06, + "loss": 8.5788, + "step": 157760 + }, + { + "epoch": 0.7878848410696896, + "grad_norm": 0.09255023300647736, + "learning_rate": 6.379383714235651e-06, + "loss": 8.5487, + "step": 157770 + }, + { + "epoch": 0.7879347798946291, + "grad_norm": 0.08840210735797882, + "learning_rate": 6.3778817992941e-06, + "loss": 8.5636, + "step": 157780 + }, + { + "epoch": 0.7879847187195685, + "grad_norm": 0.08876688033342361, + "learning_rate": 6.3763798843525494e-06, + "loss": 8.5884, + "step": 157790 + }, + { + "epoch": 0.788034657544508, + "grad_norm": 0.0919869989156723, + "learning_rate": 6.374877969411e-06, + "loss": 8.5666, + "step": 157800 + }, + { + "epoch": 0.7880845963694474, + "grad_norm": 0.08744458109140396, + "learning_rate": 6.373376054469449e-06, + "loss": 8.5712, + "step": 157810 + }, + { + "epoch": 0.7881345351943869, + "grad_norm": 0.0894647017121315, + "learning_rate": 6.371874139527898e-06, + "loss": 8.5758, + "step": 157820 + }, + { + "epoch": 0.7881844740193263, + "grad_norm": 0.08984792977571487, + "learning_rate": 6.370372224586348e-06, + "loss": 8.5658, + "step": 157830 + }, + { + "epoch": 0.7882344128442658, + "grad_norm": 0.08753778785467148, + "learning_rate": 6.368870309644797e-06, + "loss": 8.5606, + "step": 157840 + }, + { + "epoch": 0.7882843516692052, + "grad_norm": 0.09227602183818817, + "learning_rate": 6.367368394703247e-06, + "loss": 8.5656, + "step": 157850 + }, + { + "epoch": 0.7883342904941447, + "grad_norm": 0.09825395792722702, + "learning_rate": 6.3658664797616965e-06, + "loss": 8.5529, + "step": 157860 + }, + { + "epoch": 0.7883842293190841, + "grad_norm": 0.08933484554290771, + "learning_rate": 6.364364564820146e-06, + "loss": 8.5791, + "step": 157870 + }, + { + "epoch": 0.7884341681440236, + "grad_norm": 0.09111601114273071, + "learning_rate": 6.362862649878596e-06, + "loss": 8.5454, + "step": 157880 + }, + { + "epoch": 0.788484106968963, + "grad_norm": 0.09824613481760025, + "learning_rate": 6.3613607349370444e-06, + "loss": 8.5477, + "step": 157890 + }, + { + "epoch": 0.7885340457939025, + "grad_norm": 0.09107904136180878, + "learning_rate": 6.359858819995495e-06, + "loss": 8.5625, + "step": 157900 + }, + { + "epoch": 0.7885839846188419, + "grad_norm": 0.08801267296075821, + "learning_rate": 6.358356905053944e-06, + "loss": 8.5432, + "step": 157910 + }, + { + "epoch": 0.7886339234437814, + "grad_norm": 0.09051348268985748, + "learning_rate": 6.356854990112393e-06, + "loss": 8.5402, + "step": 157920 + }, + { + "epoch": 0.7886838622687208, + "grad_norm": 0.0885823667049408, + "learning_rate": 6.3553530751708435e-06, + "loss": 8.5557, + "step": 157930 + }, + { + "epoch": 0.7887338010936603, + "grad_norm": 0.09226511418819427, + "learning_rate": 6.353851160229292e-06, + "loss": 8.5563, + "step": 157940 + }, + { + "epoch": 0.7887837399185997, + "grad_norm": 0.09105234593153, + "learning_rate": 6.352349245287742e-06, + "loss": 8.572, + "step": 157950 + }, + { + "epoch": 0.7888336787435392, + "grad_norm": 0.08796839416027069, + "learning_rate": 6.350847330346192e-06, + "loss": 8.5714, + "step": 157960 + }, + { + "epoch": 0.7888836175684786, + "grad_norm": 0.08775161951780319, + "learning_rate": 6.349345415404641e-06, + "loss": 8.5559, + "step": 157970 + }, + { + "epoch": 0.7889335563934181, + "grad_norm": 0.09324196726083755, + "learning_rate": 6.347843500463091e-06, + "loss": 8.5625, + "step": 157980 + }, + { + "epoch": 0.7889834952183575, + "grad_norm": 0.08922797441482544, + "learning_rate": 6.3463415855215395e-06, + "loss": 8.5759, + "step": 157990 + }, + { + "epoch": 0.789033434043297, + "grad_norm": 0.09467015415430069, + "learning_rate": 6.34483967057999e-06, + "loss": 8.5574, + "step": 158000 + }, + { + "epoch": 0.7890833728682364, + "grad_norm": 0.09067735075950623, + "learning_rate": 6.34333775563844e-06, + "loss": 8.5545, + "step": 158010 + }, + { + "epoch": 0.7891333116931759, + "grad_norm": 0.09173320233821869, + "learning_rate": 6.341835840696888e-06, + "loss": 8.5527, + "step": 158020 + }, + { + "epoch": 0.7891832505181153, + "grad_norm": 0.08913468569517136, + "learning_rate": 6.3403339257553385e-06, + "loss": 8.567, + "step": 158030 + }, + { + "epoch": 0.7892331893430548, + "grad_norm": 0.088861383497715, + "learning_rate": 6.338832010813787e-06, + "loss": 8.5653, + "step": 158040 + }, + { + "epoch": 0.7892831281679942, + "grad_norm": 0.08768237382173538, + "learning_rate": 6.337330095872237e-06, + "loss": 8.5644, + "step": 158050 + }, + { + "epoch": 0.7893330669929337, + "grad_norm": 0.09068477898836136, + "learning_rate": 6.335828180930687e-06, + "loss": 8.5581, + "step": 158060 + }, + { + "epoch": 0.7893830058178731, + "grad_norm": 0.09171970933675766, + "learning_rate": 6.334326265989136e-06, + "loss": 8.5701, + "step": 158070 + }, + { + "epoch": 0.7894329446428126, + "grad_norm": 0.09282483905553818, + "learning_rate": 6.332824351047586e-06, + "loss": 8.5821, + "step": 158080 + }, + { + "epoch": 0.789482883467752, + "grad_norm": 0.09467281401157379, + "learning_rate": 6.331322436106035e-06, + "loss": 8.5509, + "step": 158090 + }, + { + "epoch": 0.7895328222926915, + "grad_norm": 0.09098406136035919, + "learning_rate": 6.329820521164485e-06, + "loss": 8.5573, + "step": 158100 + }, + { + "epoch": 0.7895827611176309, + "grad_norm": 0.08324146270751953, + "learning_rate": 6.328318606222935e-06, + "loss": 8.557, + "step": 158110 + }, + { + "epoch": 0.7896326999425703, + "grad_norm": 0.09208779036998749, + "learning_rate": 6.326816691281383e-06, + "loss": 8.576, + "step": 158120 + }, + { + "epoch": 0.7896826387675098, + "grad_norm": 0.0910223051905632, + "learning_rate": 6.3253147763398335e-06, + "loss": 8.5419, + "step": 158130 + }, + { + "epoch": 0.7897325775924493, + "grad_norm": 0.0862598568201065, + "learning_rate": 6.323812861398283e-06, + "loss": 8.5799, + "step": 158140 + }, + { + "epoch": 0.7897825164173887, + "grad_norm": 0.0855773463845253, + "learning_rate": 6.322310946456732e-06, + "loss": 8.5602, + "step": 158150 + }, + { + "epoch": 0.7898324552423281, + "grad_norm": 0.09082646667957306, + "learning_rate": 6.320809031515182e-06, + "loss": 8.5751, + "step": 158160 + }, + { + "epoch": 0.7898823940672676, + "grad_norm": 0.09423871338367462, + "learning_rate": 6.319307116573632e-06, + "loss": 8.5738, + "step": 158170 + }, + { + "epoch": 0.7899323328922071, + "grad_norm": 0.08389170467853546, + "learning_rate": 6.317805201632081e-06, + "loss": 8.579, + "step": 158180 + }, + { + "epoch": 0.7899822717171465, + "grad_norm": 0.08935504406690598, + "learning_rate": 6.31630328669053e-06, + "loss": 8.5623, + "step": 158190 + }, + { + "epoch": 0.7900322105420859, + "grad_norm": 0.09032989293336868, + "learning_rate": 6.31480137174898e-06, + "loss": 8.5733, + "step": 158200 + }, + { + "epoch": 0.7900821493670254, + "grad_norm": 0.08665549755096436, + "learning_rate": 6.31329945680743e-06, + "loss": 8.5566, + "step": 158210 + }, + { + "epoch": 0.7901320881919648, + "grad_norm": 0.0938415452837944, + "learning_rate": 6.311797541865879e-06, + "loss": 8.5668, + "step": 158220 + }, + { + "epoch": 0.7901820270169043, + "grad_norm": 0.08663594722747803, + "learning_rate": 6.3102956269243285e-06, + "loss": 8.5642, + "step": 158230 + }, + { + "epoch": 0.7902319658418437, + "grad_norm": 0.08731528371572495, + "learning_rate": 6.308793711982778e-06, + "loss": 8.5803, + "step": 158240 + }, + { + "epoch": 0.7902819046667832, + "grad_norm": 0.08931776881217957, + "learning_rate": 6.307291797041228e-06, + "loss": 8.5432, + "step": 158250 + }, + { + "epoch": 0.7903318434917226, + "grad_norm": 0.09380349516868591, + "learning_rate": 6.305789882099677e-06, + "loss": 8.5696, + "step": 158260 + }, + { + "epoch": 0.7903817823166621, + "grad_norm": 0.09893126040697098, + "learning_rate": 6.304287967158127e-06, + "loss": 8.5641, + "step": 158270 + }, + { + "epoch": 0.7904317211416015, + "grad_norm": 0.0894632488489151, + "learning_rate": 6.302786052216576e-06, + "loss": 8.5541, + "step": 158280 + }, + { + "epoch": 0.790481659966541, + "grad_norm": 0.08917111158370972, + "learning_rate": 6.301284137275025e-06, + "loss": 8.5575, + "step": 158290 + }, + { + "epoch": 0.7905315987914804, + "grad_norm": 0.09342500567436218, + "learning_rate": 6.2997822223334755e-06, + "loss": 8.5596, + "step": 158300 + }, + { + "epoch": 0.7905815376164199, + "grad_norm": 0.0935325175523758, + "learning_rate": 6.298280307391925e-06, + "loss": 8.5477, + "step": 158310 + }, + { + "epoch": 0.7906314764413593, + "grad_norm": 0.09898251295089722, + "learning_rate": 6.296778392450374e-06, + "loss": 8.5758, + "step": 158320 + }, + { + "epoch": 0.7906814152662988, + "grad_norm": 0.09187868982553482, + "learning_rate": 6.295276477508824e-06, + "loss": 8.5561, + "step": 158330 + }, + { + "epoch": 0.7907313540912382, + "grad_norm": 0.08766231685876846, + "learning_rate": 6.293774562567273e-06, + "loss": 8.5648, + "step": 158340 + }, + { + "epoch": 0.7907812929161777, + "grad_norm": 0.09430361539125443, + "learning_rate": 6.292272647625723e-06, + "loss": 8.5563, + "step": 158350 + }, + { + "epoch": 0.7908312317411171, + "grad_norm": 0.08926588296890259, + "learning_rate": 6.290770732684172e-06, + "loss": 8.5654, + "step": 158360 + }, + { + "epoch": 0.7908811705660566, + "grad_norm": 0.08842246234416962, + "learning_rate": 6.289268817742622e-06, + "loss": 8.568, + "step": 158370 + }, + { + "epoch": 0.790931109390996, + "grad_norm": 0.08935972303152084, + "learning_rate": 6.287766902801072e-06, + "loss": 8.556, + "step": 158380 + }, + { + "epoch": 0.7909810482159355, + "grad_norm": 0.09105094522237778, + "learning_rate": 6.28626498785952e-06, + "loss": 8.5633, + "step": 158390 + }, + { + "epoch": 0.7910309870408749, + "grad_norm": 0.09042556583881378, + "learning_rate": 6.2847630729179705e-06, + "loss": 8.5534, + "step": 158400 + }, + { + "epoch": 0.7910809258658144, + "grad_norm": 0.09127331525087357, + "learning_rate": 6.283261157976421e-06, + "loss": 8.5687, + "step": 158410 + }, + { + "epoch": 0.7911308646907538, + "grad_norm": 0.09261275827884674, + "learning_rate": 6.281759243034869e-06, + "loss": 8.5473, + "step": 158420 + }, + { + "epoch": 0.7911808035156933, + "grad_norm": 0.08710244297981262, + "learning_rate": 6.280257328093319e-06, + "loss": 8.578, + "step": 158430 + }, + { + "epoch": 0.7912307423406327, + "grad_norm": 0.10005831718444824, + "learning_rate": 6.278755413151769e-06, + "loss": 8.5261, + "step": 158440 + }, + { + "epoch": 0.7912806811655722, + "grad_norm": 0.09465691447257996, + "learning_rate": 6.277253498210218e-06, + "loss": 8.5627, + "step": 158450 + }, + { + "epoch": 0.7913306199905116, + "grad_norm": 0.09323757886886597, + "learning_rate": 6.275751583268668e-06, + "loss": 8.5629, + "step": 158460 + }, + { + "epoch": 0.7913805588154511, + "grad_norm": 0.09241240471601486, + "learning_rate": 6.274249668327117e-06, + "loss": 8.564, + "step": 158470 + }, + { + "epoch": 0.7914304976403905, + "grad_norm": 0.09037698805332184, + "learning_rate": 6.272747753385567e-06, + "loss": 8.5478, + "step": 158480 + }, + { + "epoch": 0.79148043646533, + "grad_norm": 0.09861491620540619, + "learning_rate": 6.271245838444017e-06, + "loss": 8.5623, + "step": 158490 + }, + { + "epoch": 0.7915303752902694, + "grad_norm": 0.09813782572746277, + "learning_rate": 6.2697439235024655e-06, + "loss": 8.5839, + "step": 158500 + }, + { + "epoch": 0.7915803141152089, + "grad_norm": 0.08585695922374725, + "learning_rate": 6.268242008560916e-06, + "loss": 8.5453, + "step": 158510 + }, + { + "epoch": 0.7916302529401483, + "grad_norm": 0.09625940024852753, + "learning_rate": 6.266740093619364e-06, + "loss": 8.5728, + "step": 158520 + }, + { + "epoch": 0.7916801917650877, + "grad_norm": 0.09219558537006378, + "learning_rate": 6.265238178677814e-06, + "loss": 8.5559, + "step": 158530 + }, + { + "epoch": 0.7917301305900272, + "grad_norm": 0.0883294865489006, + "learning_rate": 6.2637362637362645e-06, + "loss": 8.5629, + "step": 158540 + }, + { + "epoch": 0.7917800694149667, + "grad_norm": 0.09103575348854065, + "learning_rate": 6.262234348794713e-06, + "loss": 8.5675, + "step": 158550 + }, + { + "epoch": 0.7918300082399061, + "grad_norm": 0.09251187741756439, + "learning_rate": 6.260732433853163e-06, + "loss": 8.5794, + "step": 158560 + }, + { + "epoch": 0.7918799470648455, + "grad_norm": 0.09562096744775772, + "learning_rate": 6.2592305189116125e-06, + "loss": 8.5676, + "step": 158570 + }, + { + "epoch": 0.791929885889785, + "grad_norm": 0.08727453649044037, + "learning_rate": 6.257728603970062e-06, + "loss": 8.5519, + "step": 158580 + }, + { + "epoch": 0.7919798247147245, + "grad_norm": 0.08689242601394653, + "learning_rate": 6.256226689028512e-06, + "loss": 8.561, + "step": 158590 + }, + { + "epoch": 0.7920297635396639, + "grad_norm": 0.09087180346250534, + "learning_rate": 6.2547247740869605e-06, + "loss": 8.5644, + "step": 158600 + }, + { + "epoch": 0.7920797023646033, + "grad_norm": 0.09163371473550797, + "learning_rate": 6.253222859145411e-06, + "loss": 8.5527, + "step": 158610 + }, + { + "epoch": 0.7921296411895428, + "grad_norm": 0.0966818630695343, + "learning_rate": 6.25172094420386e-06, + "loss": 8.5677, + "step": 158620 + }, + { + "epoch": 0.7921795800144823, + "grad_norm": 0.0895446240901947, + "learning_rate": 6.250219029262309e-06, + "loss": 8.5491, + "step": 158630 + }, + { + "epoch": 0.7922295188394217, + "grad_norm": 0.08966691046953201, + "learning_rate": 6.2487171143207595e-06, + "loss": 8.5715, + "step": 158640 + }, + { + "epoch": 0.7922794576643611, + "grad_norm": 0.09029648452997208, + "learning_rate": 6.247215199379209e-06, + "loss": 8.5655, + "step": 158650 + }, + { + "epoch": 0.7923293964893006, + "grad_norm": 0.09223111718893051, + "learning_rate": 6.245713284437658e-06, + "loss": 8.5537, + "step": 158660 + }, + { + "epoch": 0.7923793353142401, + "grad_norm": 0.09418982267379761, + "learning_rate": 6.2442113694961075e-06, + "loss": 8.5509, + "step": 158670 + }, + { + "epoch": 0.7924292741391795, + "grad_norm": 0.09183704107999802, + "learning_rate": 6.242709454554557e-06, + "loss": 8.5811, + "step": 158680 + }, + { + "epoch": 0.7924792129641189, + "grad_norm": 0.09103389829397202, + "learning_rate": 6.241207539613007e-06, + "loss": 8.5531, + "step": 158690 + }, + { + "epoch": 0.7925291517890584, + "grad_norm": 0.09130579978227615, + "learning_rate": 6.239705624671456e-06, + "loss": 8.5477, + "step": 158700 + }, + { + "epoch": 0.7925790906139979, + "grad_norm": 0.0948038399219513, + "learning_rate": 6.238203709729906e-06, + "loss": 8.5581, + "step": 158710 + }, + { + "epoch": 0.7926290294389373, + "grad_norm": 0.08801276236772537, + "learning_rate": 6.236701794788355e-06, + "loss": 8.5652, + "step": 158720 + }, + { + "epoch": 0.7926789682638767, + "grad_norm": 0.09371495991945267, + "learning_rate": 6.235199879846805e-06, + "loss": 8.54, + "step": 158730 + }, + { + "epoch": 0.7927289070888162, + "grad_norm": 0.09850714355707169, + "learning_rate": 6.2336979649052546e-06, + "loss": 8.5497, + "step": 158740 + }, + { + "epoch": 0.7927788459137557, + "grad_norm": 0.08900289982557297, + "learning_rate": 6.232196049963704e-06, + "loss": 8.5674, + "step": 158750 + }, + { + "epoch": 0.7928287847386951, + "grad_norm": 0.09315358847379684, + "learning_rate": 6.230694135022153e-06, + "loss": 8.566, + "step": 158760 + }, + { + "epoch": 0.7928787235636345, + "grad_norm": 0.09045889228582382, + "learning_rate": 6.2291922200806025e-06, + "loss": 8.5793, + "step": 158770 + }, + { + "epoch": 0.792928662388574, + "grad_norm": 0.0957542434334755, + "learning_rate": 6.227690305139053e-06, + "loss": 8.5479, + "step": 158780 + }, + { + "epoch": 0.7929786012135135, + "grad_norm": 0.08995579183101654, + "learning_rate": 6.226188390197502e-06, + "loss": 8.5627, + "step": 158790 + }, + { + "epoch": 0.7930285400384529, + "grad_norm": 0.09000235050916672, + "learning_rate": 6.224686475255951e-06, + "loss": 8.5479, + "step": 158800 + }, + { + "epoch": 0.7930784788633923, + "grad_norm": 0.08959494531154633, + "learning_rate": 6.2231845603144016e-06, + "loss": 8.5524, + "step": 158810 + }, + { + "epoch": 0.7931284176883318, + "grad_norm": 0.0925586149096489, + "learning_rate": 6.22168264537285e-06, + "loss": 8.566, + "step": 158820 + }, + { + "epoch": 0.7931783565132713, + "grad_norm": 0.08586516976356506, + "learning_rate": 6.2201807304313e-06, + "loss": 8.5513, + "step": 158830 + }, + { + "epoch": 0.7932282953382107, + "grad_norm": 0.0888560339808464, + "learning_rate": 6.2186788154897496e-06, + "loss": 8.5655, + "step": 158840 + }, + { + "epoch": 0.7932782341631501, + "grad_norm": 0.08735716342926025, + "learning_rate": 6.217176900548199e-06, + "loss": 8.5603, + "step": 158850 + }, + { + "epoch": 0.7933281729880896, + "grad_norm": 0.08948913961648941, + "learning_rate": 6.215674985606649e-06, + "loss": 8.5602, + "step": 158860 + }, + { + "epoch": 0.7933781118130291, + "grad_norm": 0.09545059502124786, + "learning_rate": 6.2141730706650976e-06, + "loss": 8.5611, + "step": 158870 + }, + { + "epoch": 0.7934280506379685, + "grad_norm": 0.08870154619216919, + "learning_rate": 6.212671155723548e-06, + "loss": 8.5613, + "step": 158880 + }, + { + "epoch": 0.7934779894629079, + "grad_norm": 0.08690696954727173, + "learning_rate": 6.211169240781998e-06, + "loss": 8.5648, + "step": 158890 + }, + { + "epoch": 0.7935279282878474, + "grad_norm": 0.09118922054767609, + "learning_rate": 6.209667325840446e-06, + "loss": 8.5562, + "step": 158900 + }, + { + "epoch": 0.7935778671127869, + "grad_norm": 0.09019847214221954, + "learning_rate": 6.2081654108988966e-06, + "loss": 8.5698, + "step": 158910 + }, + { + "epoch": 0.7936278059377263, + "grad_norm": 0.09406841546297073, + "learning_rate": 6.206663495957345e-06, + "loss": 8.5572, + "step": 158920 + }, + { + "epoch": 0.7936777447626657, + "grad_norm": 0.09367325156927109, + "learning_rate": 6.205161581015795e-06, + "loss": 8.5657, + "step": 158930 + }, + { + "epoch": 0.7937276835876051, + "grad_norm": 0.09416260570287704, + "learning_rate": 6.203659666074245e-06, + "loss": 8.5811, + "step": 158940 + }, + { + "epoch": 0.7937776224125447, + "grad_norm": 0.09471859037876129, + "learning_rate": 6.202157751132694e-06, + "loss": 8.5597, + "step": 158950 + }, + { + "epoch": 0.7938275612374841, + "grad_norm": 0.09237445890903473, + "learning_rate": 6.200655836191144e-06, + "loss": 8.5647, + "step": 158960 + }, + { + "epoch": 0.7938775000624235, + "grad_norm": 0.09338158369064331, + "learning_rate": 6.199153921249593e-06, + "loss": 8.5622, + "step": 158970 + }, + { + "epoch": 0.793927438887363, + "grad_norm": 0.08678948134183884, + "learning_rate": 6.197652006308043e-06, + "loss": 8.5693, + "step": 158980 + }, + { + "epoch": 0.7939773777123025, + "grad_norm": 0.09783153980970383, + "learning_rate": 6.196150091366493e-06, + "loss": 8.5626, + "step": 158990 + }, + { + "epoch": 0.7940273165372419, + "grad_norm": 0.09704387933015823, + "learning_rate": 6.194648176424941e-06, + "loss": 8.5705, + "step": 159000 + }, + { + "epoch": 0.7940772553621813, + "grad_norm": 0.09087003022432327, + "learning_rate": 6.193146261483392e-06, + "loss": 8.5679, + "step": 159010 + }, + { + "epoch": 0.7941271941871207, + "grad_norm": 0.09224209189414978, + "learning_rate": 6.191644346541841e-06, + "loss": 8.5447, + "step": 159020 + }, + { + "epoch": 0.7941771330120603, + "grad_norm": 0.08932413905858994, + "learning_rate": 6.19014243160029e-06, + "loss": 8.5509, + "step": 159030 + }, + { + "epoch": 0.7942270718369997, + "grad_norm": 0.09286679327487946, + "learning_rate": 6.18864051665874e-06, + "loss": 8.5673, + "step": 159040 + }, + { + "epoch": 0.7942770106619391, + "grad_norm": 0.09589332342147827, + "learning_rate": 6.18713860171719e-06, + "loss": 8.5603, + "step": 159050 + }, + { + "epoch": 0.7943269494868785, + "grad_norm": 0.10077871382236481, + "learning_rate": 6.185636686775639e-06, + "loss": 8.5626, + "step": 159060 + }, + { + "epoch": 0.7943768883118181, + "grad_norm": 0.0931212455034256, + "learning_rate": 6.184134771834088e-06, + "loss": 8.5467, + "step": 159070 + }, + { + "epoch": 0.7944268271367575, + "grad_norm": 0.09118203818798065, + "learning_rate": 6.182632856892538e-06, + "loss": 8.5558, + "step": 159080 + }, + { + "epoch": 0.7944767659616969, + "grad_norm": 0.08937732130289078, + "learning_rate": 6.181130941950988e-06, + "loss": 8.56, + "step": 159090 + }, + { + "epoch": 0.7945267047866363, + "grad_norm": 0.09185148030519485, + "learning_rate": 6.179629027009437e-06, + "loss": 8.5669, + "step": 159100 + }, + { + "epoch": 0.7945766436115759, + "grad_norm": 0.08831166476011276, + "learning_rate": 6.178127112067887e-06, + "loss": 8.5614, + "step": 159110 + }, + { + "epoch": 0.7946265824365153, + "grad_norm": 0.08398475497961044, + "learning_rate": 6.176625197126336e-06, + "loss": 8.5808, + "step": 159120 + }, + { + "epoch": 0.7946765212614547, + "grad_norm": 0.08763767033815384, + "learning_rate": 6.175123282184786e-06, + "loss": 8.567, + "step": 159130 + }, + { + "epoch": 0.7947264600863941, + "grad_norm": 0.10390178859233856, + "learning_rate": 6.1736213672432354e-06, + "loss": 8.542, + "step": 159140 + }, + { + "epoch": 0.7947763989113337, + "grad_norm": 0.08661069720983505, + "learning_rate": 6.172119452301685e-06, + "loss": 8.5623, + "step": 159150 + }, + { + "epoch": 0.7948263377362731, + "grad_norm": 0.0880892425775528, + "learning_rate": 6.170617537360134e-06, + "loss": 8.5611, + "step": 159160 + }, + { + "epoch": 0.7948762765612125, + "grad_norm": 0.0939859077334404, + "learning_rate": 6.169115622418583e-06, + "loss": 8.5483, + "step": 159170 + }, + { + "epoch": 0.7949262153861519, + "grad_norm": 0.09007950127124786, + "learning_rate": 6.167613707477034e-06, + "loss": 8.5587, + "step": 159180 + }, + { + "epoch": 0.7949761542110915, + "grad_norm": 0.09516777098178864, + "learning_rate": 6.166111792535483e-06, + "loss": 8.574, + "step": 159190 + }, + { + "epoch": 0.7950260930360309, + "grad_norm": 0.08972634375095367, + "learning_rate": 6.164609877593932e-06, + "loss": 8.5578, + "step": 159200 + }, + { + "epoch": 0.7950760318609703, + "grad_norm": 0.09445880353450775, + "learning_rate": 6.1631079626523824e-06, + "loss": 8.5576, + "step": 159210 + }, + { + "epoch": 0.7951259706859097, + "grad_norm": 0.08887441456317902, + "learning_rate": 6.161606047710831e-06, + "loss": 8.5647, + "step": 159220 + }, + { + "epoch": 0.7951759095108492, + "grad_norm": 0.09141810983419418, + "learning_rate": 6.160104132769281e-06, + "loss": 8.5579, + "step": 159230 + }, + { + "epoch": 0.7952258483357887, + "grad_norm": 0.0876648873090744, + "learning_rate": 6.1586022178277304e-06, + "loss": 8.5538, + "step": 159240 + }, + { + "epoch": 0.7952757871607281, + "grad_norm": 0.08809340745210648, + "learning_rate": 6.15710030288618e-06, + "loss": 8.5455, + "step": 159250 + }, + { + "epoch": 0.7953257259856675, + "grad_norm": 0.09779562056064606, + "learning_rate": 6.15559838794463e-06, + "loss": 8.5439, + "step": 159260 + }, + { + "epoch": 0.795375664810607, + "grad_norm": 0.08831503242254257, + "learning_rate": 6.1540964730030784e-06, + "loss": 8.5677, + "step": 159270 + }, + { + "epoch": 0.7954256036355465, + "grad_norm": 0.09317151457071304, + "learning_rate": 6.152594558061529e-06, + "loss": 8.551, + "step": 159280 + }, + { + "epoch": 0.7954755424604859, + "grad_norm": 0.08679518103599548, + "learning_rate": 6.151092643119979e-06, + "loss": 8.577, + "step": 159290 + }, + { + "epoch": 0.7955254812854253, + "grad_norm": 0.09124913066625595, + "learning_rate": 6.149590728178427e-06, + "loss": 8.5719, + "step": 159300 + }, + { + "epoch": 0.7955754201103648, + "grad_norm": 0.08849890530109406, + "learning_rate": 6.1480888132368774e-06, + "loss": 8.5562, + "step": 159310 + }, + { + "epoch": 0.7956253589353043, + "grad_norm": 0.09584712982177734, + "learning_rate": 6.146586898295326e-06, + "loss": 8.5612, + "step": 159320 + }, + { + "epoch": 0.7956752977602437, + "grad_norm": 0.09756402671337128, + "learning_rate": 6.145084983353776e-06, + "loss": 8.5604, + "step": 159330 + }, + { + "epoch": 0.7957252365851831, + "grad_norm": 0.09176575392484665, + "learning_rate": 6.143583068412226e-06, + "loss": 8.5578, + "step": 159340 + }, + { + "epoch": 0.7957751754101225, + "grad_norm": 0.09086544811725616, + "learning_rate": 6.142081153470675e-06, + "loss": 8.5625, + "step": 159350 + }, + { + "epoch": 0.7958251142350621, + "grad_norm": 0.09206107258796692, + "learning_rate": 6.140579238529125e-06, + "loss": 8.5577, + "step": 159360 + }, + { + "epoch": 0.7958750530600015, + "grad_norm": 0.09471509605646133, + "learning_rate": 6.139077323587574e-06, + "loss": 8.5724, + "step": 159370 + }, + { + "epoch": 0.7959249918849409, + "grad_norm": 0.09322207421064377, + "learning_rate": 6.137575408646024e-06, + "loss": 8.5488, + "step": 159380 + }, + { + "epoch": 0.7959749307098803, + "grad_norm": 0.09167961776256561, + "learning_rate": 6.136073493704474e-06, + "loss": 8.5676, + "step": 159390 + }, + { + "epoch": 0.7960248695348199, + "grad_norm": 0.09485690295696259, + "learning_rate": 6.134571578762922e-06, + "loss": 8.5436, + "step": 159400 + }, + { + "epoch": 0.7960748083597593, + "grad_norm": 0.08992945402860641, + "learning_rate": 6.1330696638213725e-06, + "loss": 8.5585, + "step": 159410 + }, + { + "epoch": 0.7961247471846987, + "grad_norm": 0.08964666724205017, + "learning_rate": 6.131567748879822e-06, + "loss": 8.5553, + "step": 159420 + }, + { + "epoch": 0.7961746860096381, + "grad_norm": 0.09216075390577316, + "learning_rate": 6.130065833938271e-06, + "loss": 8.5633, + "step": 159430 + }, + { + "epoch": 0.7962246248345777, + "grad_norm": 0.08898287266492844, + "learning_rate": 6.128563918996721e-06, + "loss": 8.5607, + "step": 159440 + }, + { + "epoch": 0.7962745636595171, + "grad_norm": 0.08977809548377991, + "learning_rate": 6.127062004055171e-06, + "loss": 8.5637, + "step": 159450 + }, + { + "epoch": 0.7963245024844565, + "grad_norm": 0.08928190171718597, + "learning_rate": 6.12556008911362e-06, + "loss": 8.5715, + "step": 159460 + }, + { + "epoch": 0.7963744413093959, + "grad_norm": 0.09225127846002579, + "learning_rate": 6.124058174172069e-06, + "loss": 8.5857, + "step": 159470 + }, + { + "epoch": 0.7964243801343355, + "grad_norm": 0.09755774587392807, + "learning_rate": 6.122556259230519e-06, + "loss": 8.5431, + "step": 159480 + }, + { + "epoch": 0.7964743189592749, + "grad_norm": 0.09840092062950134, + "learning_rate": 6.121054344288969e-06, + "loss": 8.5594, + "step": 159490 + }, + { + "epoch": 0.7965242577842143, + "grad_norm": 0.09154287725687027, + "learning_rate": 6.119552429347418e-06, + "loss": 8.5609, + "step": 159500 + }, + { + "epoch": 0.7965741966091537, + "grad_norm": 0.08567217737436295, + "learning_rate": 6.1180505144058675e-06, + "loss": 8.5555, + "step": 159510 + }, + { + "epoch": 0.7966241354340933, + "grad_norm": 0.08857017010450363, + "learning_rate": 6.116548599464317e-06, + "loss": 8.5614, + "step": 159520 + }, + { + "epoch": 0.7966740742590327, + "grad_norm": 0.08761531114578247, + "learning_rate": 6.115046684522767e-06, + "loss": 8.5644, + "step": 159530 + }, + { + "epoch": 0.7967240130839721, + "grad_norm": 0.0912371501326561, + "learning_rate": 6.113544769581216e-06, + "loss": 8.5736, + "step": 159540 + }, + { + "epoch": 0.7967739519089115, + "grad_norm": 0.09295840561389923, + "learning_rate": 6.112042854639666e-06, + "loss": 8.5424, + "step": 159550 + }, + { + "epoch": 0.7968238907338511, + "grad_norm": 0.09031003713607788, + "learning_rate": 6.110540939698115e-06, + "loss": 8.554, + "step": 159560 + }, + { + "epoch": 0.7968738295587905, + "grad_norm": 0.0904209241271019, + "learning_rate": 6.109039024756565e-06, + "loss": 8.5746, + "step": 159570 + }, + { + "epoch": 0.7969237683837299, + "grad_norm": 0.08807667344808578, + "learning_rate": 6.1075371098150145e-06, + "loss": 8.5526, + "step": 159580 + }, + { + "epoch": 0.7969737072086693, + "grad_norm": 0.09461508691310883, + "learning_rate": 6.106035194873464e-06, + "loss": 8.557, + "step": 159590 + }, + { + "epoch": 0.7970236460336089, + "grad_norm": 0.08692062646150589, + "learning_rate": 6.104533279931913e-06, + "loss": 8.5529, + "step": 159600 + }, + { + "epoch": 0.7970735848585483, + "grad_norm": 0.09569226950407028, + "learning_rate": 6.103031364990363e-06, + "loss": 8.5602, + "step": 159610 + }, + { + "epoch": 0.7971235236834877, + "grad_norm": 0.09210385382175446, + "learning_rate": 6.101529450048813e-06, + "loss": 8.5649, + "step": 159620 + }, + { + "epoch": 0.7971734625084271, + "grad_norm": 0.09162899106740952, + "learning_rate": 6.100027535107262e-06, + "loss": 8.5509, + "step": 159630 + }, + { + "epoch": 0.7972234013333667, + "grad_norm": 0.09765185415744781, + "learning_rate": 6.098525620165711e-06, + "loss": 8.5527, + "step": 159640 + }, + { + "epoch": 0.7972733401583061, + "grad_norm": 0.09412235766649246, + "learning_rate": 6.097023705224161e-06, + "loss": 8.5699, + "step": 159650 + }, + { + "epoch": 0.7973232789832455, + "grad_norm": 0.09401925653219223, + "learning_rate": 6.095521790282611e-06, + "loss": 8.5554, + "step": 159660 + }, + { + "epoch": 0.7973732178081849, + "grad_norm": 0.09170075505971909, + "learning_rate": 6.09401987534106e-06, + "loss": 8.5666, + "step": 159670 + }, + { + "epoch": 0.7974231566331245, + "grad_norm": 0.0880158469080925, + "learning_rate": 6.0925179603995095e-06, + "loss": 8.554, + "step": 159680 + }, + { + "epoch": 0.7974730954580639, + "grad_norm": 0.09159617871046066, + "learning_rate": 6.09101604545796e-06, + "loss": 8.5581, + "step": 159690 + }, + { + "epoch": 0.7975230342830033, + "grad_norm": 0.09276486188173294, + "learning_rate": 6.089514130516408e-06, + "loss": 8.5436, + "step": 159700 + }, + { + "epoch": 0.7975729731079427, + "grad_norm": 0.08525187522172928, + "learning_rate": 6.088012215574858e-06, + "loss": 8.5508, + "step": 159710 + }, + { + "epoch": 0.7976229119328823, + "grad_norm": 0.08781725913286209, + "learning_rate": 6.086510300633308e-06, + "loss": 8.5792, + "step": 159720 + }, + { + "epoch": 0.7976728507578217, + "grad_norm": 0.09643100947141647, + "learning_rate": 6.085008385691757e-06, + "loss": 8.5551, + "step": 159730 + }, + { + "epoch": 0.7977227895827611, + "grad_norm": 0.08715363591909409, + "learning_rate": 6.083506470750207e-06, + "loss": 8.5656, + "step": 159740 + }, + { + "epoch": 0.7977727284077005, + "grad_norm": 0.09139413386583328, + "learning_rate": 6.082004555808656e-06, + "loss": 8.5401, + "step": 159750 + }, + { + "epoch": 0.7978226672326401, + "grad_norm": 0.09375610202550888, + "learning_rate": 6.080502640867106e-06, + "loss": 8.5596, + "step": 159760 + }, + { + "epoch": 0.7978726060575795, + "grad_norm": 0.09116285294294357, + "learning_rate": 6.079000725925556e-06, + "loss": 8.5359, + "step": 159770 + }, + { + "epoch": 0.7979225448825189, + "grad_norm": 0.08968761563301086, + "learning_rate": 6.0774988109840045e-06, + "loss": 8.5511, + "step": 159780 + }, + { + "epoch": 0.7979724837074583, + "grad_norm": 0.09019223600625992, + "learning_rate": 6.075996896042455e-06, + "loss": 8.5646, + "step": 159790 + }, + { + "epoch": 0.7980224225323979, + "grad_norm": 0.09245903789997101, + "learning_rate": 6.074494981100903e-06, + "loss": 8.5769, + "step": 159800 + }, + { + "epoch": 0.7980723613573373, + "grad_norm": 0.08857943117618561, + "learning_rate": 6.072993066159353e-06, + "loss": 8.5494, + "step": 159810 + }, + { + "epoch": 0.7981223001822767, + "grad_norm": 0.09926991164684296, + "learning_rate": 6.0714911512178035e-06, + "loss": 8.569, + "step": 159820 + }, + { + "epoch": 0.7981722390072161, + "grad_norm": 0.09205451607704163, + "learning_rate": 6.069989236276252e-06, + "loss": 8.5501, + "step": 159830 + }, + { + "epoch": 0.7982221778321557, + "grad_norm": 0.09034121781587601, + "learning_rate": 6.068487321334702e-06, + "loss": 8.5465, + "step": 159840 + }, + { + "epoch": 0.7982721166570951, + "grad_norm": 0.08652927726507187, + "learning_rate": 6.0669854063931515e-06, + "loss": 8.5484, + "step": 159850 + }, + { + "epoch": 0.7983220554820345, + "grad_norm": 0.09696269780397415, + "learning_rate": 6.065483491451601e-06, + "loss": 8.5509, + "step": 159860 + }, + { + "epoch": 0.7983719943069739, + "grad_norm": 0.09118789434432983, + "learning_rate": 6.063981576510051e-06, + "loss": 8.5379, + "step": 159870 + }, + { + "epoch": 0.7984219331319135, + "grad_norm": 0.08937663584947586, + "learning_rate": 6.0624796615684995e-06, + "loss": 8.5392, + "step": 159880 + }, + { + "epoch": 0.7984718719568529, + "grad_norm": 0.08873353898525238, + "learning_rate": 6.06097774662695e-06, + "loss": 8.5635, + "step": 159890 + }, + { + "epoch": 0.7985218107817923, + "grad_norm": 0.0940752923488617, + "learning_rate": 6.059475831685399e-06, + "loss": 8.554, + "step": 159900 + }, + { + "epoch": 0.7985717496067317, + "grad_norm": 0.09297428280115128, + "learning_rate": 6.057973916743848e-06, + "loss": 8.5504, + "step": 159910 + }, + { + "epoch": 0.7986216884316713, + "grad_norm": 0.09415644407272339, + "learning_rate": 6.0564720018022985e-06, + "loss": 8.5671, + "step": 159920 + }, + { + "epoch": 0.7986716272566107, + "grad_norm": 0.08840908855199814, + "learning_rate": 6.054970086860748e-06, + "loss": 8.55, + "step": 159930 + }, + { + "epoch": 0.7987215660815501, + "grad_norm": 0.0910872295498848, + "learning_rate": 6.053468171919197e-06, + "loss": 8.5604, + "step": 159940 + }, + { + "epoch": 0.7987715049064895, + "grad_norm": 0.09535114467144012, + "learning_rate": 6.0519662569776465e-06, + "loss": 8.5668, + "step": 159950 + }, + { + "epoch": 0.798821443731429, + "grad_norm": 0.0967821255326271, + "learning_rate": 6.050464342036096e-06, + "loss": 8.5682, + "step": 159960 + }, + { + "epoch": 0.7988713825563685, + "grad_norm": 0.0884976014494896, + "learning_rate": 6.048962427094546e-06, + "loss": 8.5515, + "step": 159970 + }, + { + "epoch": 0.7989213213813079, + "grad_norm": 0.09149619191884995, + "learning_rate": 6.047460512152995e-06, + "loss": 8.5623, + "step": 159980 + }, + { + "epoch": 0.7989712602062473, + "grad_norm": 0.08907512575387955, + "learning_rate": 6.045958597211445e-06, + "loss": 8.558, + "step": 159990 + }, + { + "epoch": 0.7990211990311868, + "grad_norm": 0.0927252545952797, + "learning_rate": 6.044456682269894e-06, + "loss": 8.5367, + "step": 160000 + }, + { + "epoch": 0.7990711378561263, + "grad_norm": 0.09032353013753891, + "learning_rate": 6.042954767328344e-06, + "loss": 8.5554, + "step": 160010 + }, + { + "epoch": 0.7991210766810657, + "grad_norm": 0.09220920503139496, + "learning_rate": 6.0414528523867935e-06, + "loss": 8.5655, + "step": 160020 + }, + { + "epoch": 0.7991710155060051, + "grad_norm": 0.09536270052194595, + "learning_rate": 6.039950937445243e-06, + "loss": 8.5547, + "step": 160030 + }, + { + "epoch": 0.7992209543309446, + "grad_norm": 0.08931788057088852, + "learning_rate": 6.038449022503692e-06, + "loss": 8.5624, + "step": 160040 + }, + { + "epoch": 0.7992708931558841, + "grad_norm": 0.09477946907281876, + "learning_rate": 6.0369471075621415e-06, + "loss": 8.5688, + "step": 160050 + }, + { + "epoch": 0.7993208319808235, + "grad_norm": 0.09368328005075455, + "learning_rate": 6.035445192620592e-06, + "loss": 8.5624, + "step": 160060 + }, + { + "epoch": 0.7993707708057629, + "grad_norm": 0.09561215341091156, + "learning_rate": 6.033943277679041e-06, + "loss": 8.557, + "step": 160070 + }, + { + "epoch": 0.7994207096307024, + "grad_norm": 0.08733969926834106, + "learning_rate": 6.03244136273749e-06, + "loss": 8.5325, + "step": 160080 + }, + { + "epoch": 0.7994706484556419, + "grad_norm": 0.09188752621412277, + "learning_rate": 6.0309394477959405e-06, + "loss": 8.5459, + "step": 160090 + }, + { + "epoch": 0.7995205872805813, + "grad_norm": 0.09306297451257706, + "learning_rate": 6.029437532854389e-06, + "loss": 8.5527, + "step": 160100 + }, + { + "epoch": 0.7995705261055207, + "grad_norm": 0.09068245440721512, + "learning_rate": 6.027935617912839e-06, + "loss": 8.5579, + "step": 160110 + }, + { + "epoch": 0.7996204649304602, + "grad_norm": 0.09240369498729706, + "learning_rate": 6.0264337029712885e-06, + "loss": 8.5459, + "step": 160120 + }, + { + "epoch": 0.7996704037553997, + "grad_norm": 0.08835700154304504, + "learning_rate": 6.024931788029738e-06, + "loss": 8.552, + "step": 160130 + }, + { + "epoch": 0.7997203425803391, + "grad_norm": 0.08920862525701523, + "learning_rate": 6.023429873088188e-06, + "loss": 8.5368, + "step": 160140 + }, + { + "epoch": 0.7997702814052785, + "grad_norm": 0.09194939583539963, + "learning_rate": 6.0219279581466365e-06, + "loss": 8.5641, + "step": 160150 + }, + { + "epoch": 0.799820220230218, + "grad_norm": 0.09264218807220459, + "learning_rate": 6.020426043205087e-06, + "loss": 8.5531, + "step": 160160 + }, + { + "epoch": 0.7998701590551575, + "grad_norm": 0.08757354319095612, + "learning_rate": 6.018924128263537e-06, + "loss": 8.5538, + "step": 160170 + }, + { + "epoch": 0.7999200978800969, + "grad_norm": 0.09101276844739914, + "learning_rate": 6.017422213321985e-06, + "loss": 8.5525, + "step": 160180 + }, + { + "epoch": 0.7999700367050363, + "grad_norm": 0.09623536467552185, + "learning_rate": 6.0159202983804355e-06, + "loss": 8.5407, + "step": 160190 + }, + { + "epoch": 0.8000199755299757, + "grad_norm": 0.09045930206775665, + "learning_rate": 6.014418383438884e-06, + "loss": 8.5333, + "step": 160200 + }, + { + "epoch": 0.8000699143549153, + "grad_norm": 0.08761759847402573, + "learning_rate": 6.012916468497334e-06, + "loss": 8.552, + "step": 160210 + }, + { + "epoch": 0.8001198531798547, + "grad_norm": 0.09209952503442764, + "learning_rate": 6.011414553555784e-06, + "loss": 8.5461, + "step": 160220 + }, + { + "epoch": 0.8001697920047941, + "grad_norm": 0.08897832036018372, + "learning_rate": 6.009912638614233e-06, + "loss": 8.5743, + "step": 160230 + }, + { + "epoch": 0.8002197308297335, + "grad_norm": 0.09238340705633163, + "learning_rate": 6.008410723672683e-06, + "loss": 8.5463, + "step": 160240 + }, + { + "epoch": 0.800269669654673, + "grad_norm": 0.09207829087972641, + "learning_rate": 6.006908808731132e-06, + "loss": 8.5396, + "step": 160250 + }, + { + "epoch": 0.8003196084796125, + "grad_norm": 0.0933920294046402, + "learning_rate": 6.005406893789582e-06, + "loss": 8.5405, + "step": 160260 + }, + { + "epoch": 0.8003695473045519, + "grad_norm": 0.09098160266876221, + "learning_rate": 6.003904978848032e-06, + "loss": 8.548, + "step": 160270 + }, + { + "epoch": 0.8004194861294913, + "grad_norm": 0.0946149155497551, + "learning_rate": 6.00240306390648e-06, + "loss": 8.5488, + "step": 160280 + }, + { + "epoch": 0.8004694249544309, + "grad_norm": 0.09164818376302719, + "learning_rate": 6.0009011489649306e-06, + "loss": 8.5535, + "step": 160290 + }, + { + "epoch": 0.8005193637793703, + "grad_norm": 0.09267935901880264, + "learning_rate": 5.99939923402338e-06, + "loss": 8.571, + "step": 160300 + }, + { + "epoch": 0.8005693026043097, + "grad_norm": 0.0906684473156929, + "learning_rate": 5.997897319081829e-06, + "loss": 8.5541, + "step": 160310 + }, + { + "epoch": 0.8006192414292491, + "grad_norm": 0.09602951258420944, + "learning_rate": 5.996395404140279e-06, + "loss": 8.5577, + "step": 160320 + }, + { + "epoch": 0.8006691802541887, + "grad_norm": 0.09331289678812027, + "learning_rate": 5.994893489198729e-06, + "loss": 8.5534, + "step": 160330 + }, + { + "epoch": 0.8007191190791281, + "grad_norm": 0.0883229672908783, + "learning_rate": 5.993391574257178e-06, + "loss": 8.576, + "step": 160340 + }, + { + "epoch": 0.8007690579040675, + "grad_norm": 0.09256965667009354, + "learning_rate": 5.991889659315627e-06, + "loss": 8.5416, + "step": 160350 + }, + { + "epoch": 0.8008189967290069, + "grad_norm": 0.09165097028017044, + "learning_rate": 5.990387744374077e-06, + "loss": 8.5723, + "step": 160360 + }, + { + "epoch": 0.8008689355539464, + "grad_norm": 0.09180103242397308, + "learning_rate": 5.988885829432527e-06, + "loss": 8.5441, + "step": 160370 + }, + { + "epoch": 0.8009188743788859, + "grad_norm": 0.09208500385284424, + "learning_rate": 5.987383914490976e-06, + "loss": 8.5447, + "step": 160380 + }, + { + "epoch": 0.8009688132038253, + "grad_norm": 0.09202751517295837, + "learning_rate": 5.9858819995494256e-06, + "loss": 8.5482, + "step": 160390 + }, + { + "epoch": 0.8010187520287647, + "grad_norm": 0.09229252487421036, + "learning_rate": 5.984380084607875e-06, + "loss": 8.5692, + "step": 160400 + }, + { + "epoch": 0.8010686908537042, + "grad_norm": 0.09639665484428406, + "learning_rate": 5.982878169666325e-06, + "loss": 8.5544, + "step": 160410 + }, + { + "epoch": 0.8011186296786437, + "grad_norm": 0.08971371501684189, + "learning_rate": 5.981376254724774e-06, + "loss": 8.5706, + "step": 160420 + }, + { + "epoch": 0.8011685685035831, + "grad_norm": 0.08654212206602097, + "learning_rate": 5.979874339783224e-06, + "loss": 8.5672, + "step": 160430 + }, + { + "epoch": 0.8012185073285225, + "grad_norm": 0.09568259119987488, + "learning_rate": 5.978372424841673e-06, + "loss": 8.5543, + "step": 160440 + }, + { + "epoch": 0.801268446153462, + "grad_norm": 0.08771758526563644, + "learning_rate": 5.976870509900122e-06, + "loss": 8.5576, + "step": 160450 + }, + { + "epoch": 0.8013183849784015, + "grad_norm": 0.0924898236989975, + "learning_rate": 5.9753685949585726e-06, + "loss": 8.5352, + "step": 160460 + }, + { + "epoch": 0.8013683238033409, + "grad_norm": 0.08928176760673523, + "learning_rate": 5.973866680017022e-06, + "loss": 8.5789, + "step": 160470 + }, + { + "epoch": 0.8014182626282803, + "grad_norm": 0.08957140892744064, + "learning_rate": 5.972364765075471e-06, + "loss": 8.5642, + "step": 160480 + }, + { + "epoch": 0.8014682014532198, + "grad_norm": 0.0887003242969513, + "learning_rate": 5.970862850133921e-06, + "loss": 8.5642, + "step": 160490 + }, + { + "epoch": 0.8015181402781593, + "grad_norm": 0.09041450172662735, + "learning_rate": 5.96936093519237e-06, + "loss": 8.5659, + "step": 160500 + }, + { + "epoch": 0.8015680791030987, + "grad_norm": 0.08909320086240768, + "learning_rate": 5.96785902025082e-06, + "loss": 8.5378, + "step": 160510 + }, + { + "epoch": 0.8016180179280381, + "grad_norm": 0.09449964016675949, + "learning_rate": 5.966357105309269e-06, + "loss": 8.5644, + "step": 160520 + }, + { + "epoch": 0.8016679567529776, + "grad_norm": 0.0875280573964119, + "learning_rate": 5.964855190367719e-06, + "loss": 8.5552, + "step": 160530 + }, + { + "epoch": 0.8017178955779171, + "grad_norm": 0.09800990670919418, + "learning_rate": 5.963353275426169e-06, + "loss": 8.5692, + "step": 160540 + }, + { + "epoch": 0.8017678344028565, + "grad_norm": 0.10497017949819565, + "learning_rate": 5.961851360484617e-06, + "loss": 8.5514, + "step": 160550 + }, + { + "epoch": 0.8018177732277959, + "grad_norm": 0.09578979760408401, + "learning_rate": 5.960349445543068e-06, + "loss": 8.5585, + "step": 160560 + }, + { + "epoch": 0.8018677120527354, + "grad_norm": 0.0902516171336174, + "learning_rate": 5.958847530601518e-06, + "loss": 8.5426, + "step": 160570 + }, + { + "epoch": 0.8019176508776749, + "grad_norm": 0.087619349360466, + "learning_rate": 5.957345615659966e-06, + "loss": 8.5467, + "step": 160580 + }, + { + "epoch": 0.8019675897026143, + "grad_norm": 0.08661577105522156, + "learning_rate": 5.955843700718416e-06, + "loss": 8.5649, + "step": 160590 + }, + { + "epoch": 0.8020175285275537, + "grad_norm": 0.0935804471373558, + "learning_rate": 5.954341785776866e-06, + "loss": 8.558, + "step": 160600 + }, + { + "epoch": 0.8020674673524932, + "grad_norm": 0.08383012562990189, + "learning_rate": 5.952839870835315e-06, + "loss": 8.5579, + "step": 160610 + }, + { + "epoch": 0.8021174061774327, + "grad_norm": 0.09680390357971191, + "learning_rate": 5.951337955893765e-06, + "loss": 8.5733, + "step": 160620 + }, + { + "epoch": 0.8021673450023721, + "grad_norm": 0.0891953781247139, + "learning_rate": 5.949836040952214e-06, + "loss": 8.5736, + "step": 160630 + }, + { + "epoch": 0.8022172838273115, + "grad_norm": 0.0887405052781105, + "learning_rate": 5.948334126010664e-06, + "loss": 8.5685, + "step": 160640 + }, + { + "epoch": 0.802267222652251, + "grad_norm": 0.092610664665699, + "learning_rate": 5.946832211069113e-06, + "loss": 8.5667, + "step": 160650 + }, + { + "epoch": 0.8023171614771905, + "grad_norm": 0.08894860744476318, + "learning_rate": 5.945330296127563e-06, + "loss": 8.5653, + "step": 160660 + }, + { + "epoch": 0.8023671003021299, + "grad_norm": 0.09235876053571701, + "learning_rate": 5.943828381186013e-06, + "loss": 8.5525, + "step": 160670 + }, + { + "epoch": 0.8024170391270693, + "grad_norm": 0.0946565493941307, + "learning_rate": 5.942326466244461e-06, + "loss": 8.5589, + "step": 160680 + }, + { + "epoch": 0.8024669779520088, + "grad_norm": 0.08634993433952332, + "learning_rate": 5.9408245513029114e-06, + "loss": 8.5665, + "step": 160690 + }, + { + "epoch": 0.8025169167769483, + "grad_norm": 0.08891087025403976, + "learning_rate": 5.939322636361362e-06, + "loss": 8.5576, + "step": 160700 + }, + { + "epoch": 0.8025668556018877, + "grad_norm": 0.09388010948896408, + "learning_rate": 5.93782072141981e-06, + "loss": 8.5506, + "step": 160710 + }, + { + "epoch": 0.8026167944268271, + "grad_norm": 0.09189348667860031, + "learning_rate": 5.93631880647826e-06, + "loss": 8.554, + "step": 160720 + }, + { + "epoch": 0.8026667332517666, + "grad_norm": 0.08890991657972336, + "learning_rate": 5.93481689153671e-06, + "loss": 8.5491, + "step": 160730 + }, + { + "epoch": 0.802716672076706, + "grad_norm": 0.0901801586151123, + "learning_rate": 5.933314976595159e-06, + "loss": 8.5757, + "step": 160740 + }, + { + "epoch": 0.8027666109016455, + "grad_norm": 0.09369862824678421, + "learning_rate": 5.931813061653609e-06, + "loss": 8.5404, + "step": 160750 + }, + { + "epoch": 0.8028165497265849, + "grad_norm": 0.09540555626153946, + "learning_rate": 5.930311146712058e-06, + "loss": 8.552, + "step": 160760 + }, + { + "epoch": 0.8028664885515244, + "grad_norm": 0.09585050493478775, + "learning_rate": 5.928809231770508e-06, + "loss": 8.5557, + "step": 160770 + }, + { + "epoch": 0.8029164273764638, + "grad_norm": 0.088337741792202, + "learning_rate": 5.927307316828957e-06, + "loss": 8.5585, + "step": 160780 + }, + { + "epoch": 0.8029663662014033, + "grad_norm": 0.08791206032037735, + "learning_rate": 5.9258054018874064e-06, + "loss": 8.5687, + "step": 160790 + }, + { + "epoch": 0.8030163050263427, + "grad_norm": 0.09240417182445526, + "learning_rate": 5.924303486945857e-06, + "loss": 8.5421, + "step": 160800 + }, + { + "epoch": 0.8030662438512822, + "grad_norm": 0.08927211910486221, + "learning_rate": 5.922801572004306e-06, + "loss": 8.5463, + "step": 160810 + }, + { + "epoch": 0.8031161826762216, + "grad_norm": 0.09254935383796692, + "learning_rate": 5.921299657062755e-06, + "loss": 8.5417, + "step": 160820 + }, + { + "epoch": 0.8031661215011611, + "grad_norm": 0.09360701590776443, + "learning_rate": 5.919797742121205e-06, + "loss": 8.5466, + "step": 160830 + }, + { + "epoch": 0.8032160603261005, + "grad_norm": 0.09263437986373901, + "learning_rate": 5.918295827179654e-06, + "loss": 8.5576, + "step": 160840 + }, + { + "epoch": 0.80326599915104, + "grad_norm": 0.08811653405427933, + "learning_rate": 5.916793912238104e-06, + "loss": 8.5447, + "step": 160850 + }, + { + "epoch": 0.8033159379759794, + "grad_norm": 0.09161476045846939, + "learning_rate": 5.9152919972965535e-06, + "loss": 8.5664, + "step": 160860 + }, + { + "epoch": 0.8033658768009189, + "grad_norm": 0.08815915882587433, + "learning_rate": 5.913790082355003e-06, + "loss": 8.5393, + "step": 160870 + }, + { + "epoch": 0.8034158156258583, + "grad_norm": 0.08650946617126465, + "learning_rate": 5.912288167413452e-06, + "loss": 8.561, + "step": 160880 + }, + { + "epoch": 0.8034657544507978, + "grad_norm": 0.09026594460010529, + "learning_rate": 5.9107862524719014e-06, + "loss": 8.5529, + "step": 160890 + }, + { + "epoch": 0.8035156932757372, + "grad_norm": 0.08889591693878174, + "learning_rate": 5.909284337530352e-06, + "loss": 8.5591, + "step": 160900 + }, + { + "epoch": 0.8035656321006767, + "grad_norm": 0.09031382948160172, + "learning_rate": 5.907782422588801e-06, + "loss": 8.5549, + "step": 160910 + }, + { + "epoch": 0.8036155709256161, + "grad_norm": 0.08855296671390533, + "learning_rate": 5.90628050764725e-06, + "loss": 8.5692, + "step": 160920 + }, + { + "epoch": 0.8036655097505556, + "grad_norm": 0.09416501969099045, + "learning_rate": 5.9047785927057e-06, + "loss": 8.562, + "step": 160930 + }, + { + "epoch": 0.803715448575495, + "grad_norm": 0.09245479851961136, + "learning_rate": 5.90327667776415e-06, + "loss": 8.5468, + "step": 160940 + }, + { + "epoch": 0.8037653874004345, + "grad_norm": 0.09061010181903839, + "learning_rate": 5.901774762822599e-06, + "loss": 8.5644, + "step": 160950 + }, + { + "epoch": 0.8038153262253739, + "grad_norm": 0.08976545184850693, + "learning_rate": 5.9002728478810485e-06, + "loss": 8.5529, + "step": 160960 + }, + { + "epoch": 0.8038652650503134, + "grad_norm": 0.09560992568731308, + "learning_rate": 5.898770932939498e-06, + "loss": 8.5582, + "step": 160970 + }, + { + "epoch": 0.8039152038752528, + "grad_norm": 0.09577993303537369, + "learning_rate": 5.897269017997947e-06, + "loss": 8.558, + "step": 160980 + }, + { + "epoch": 0.8039651427001923, + "grad_norm": 0.08982730656862259, + "learning_rate": 5.895767103056397e-06, + "loss": 8.5651, + "step": 160990 + }, + { + "epoch": 0.8040150815251317, + "grad_norm": 0.08825762569904327, + "learning_rate": 5.894265188114847e-06, + "loss": 8.5539, + "step": 161000 + }, + { + "epoch": 0.8040650203500712, + "grad_norm": 0.0992155522108078, + "learning_rate": 5.892763273173296e-06, + "loss": 8.572, + "step": 161010 + }, + { + "epoch": 0.8041149591750106, + "grad_norm": 0.09141044318675995, + "learning_rate": 5.891261358231746e-06, + "loss": 8.5596, + "step": 161020 + }, + { + "epoch": 0.8041648979999501, + "grad_norm": 0.09531890600919724, + "learning_rate": 5.889759443290195e-06, + "loss": 8.5501, + "step": 161030 + }, + { + "epoch": 0.8042148368248895, + "grad_norm": 0.08679885417222977, + "learning_rate": 5.888257528348645e-06, + "loss": 8.5506, + "step": 161040 + }, + { + "epoch": 0.804264775649829, + "grad_norm": 0.08994447439908981, + "learning_rate": 5.886755613407094e-06, + "loss": 8.5481, + "step": 161050 + }, + { + "epoch": 0.8043147144747684, + "grad_norm": 0.09019139409065247, + "learning_rate": 5.8852536984655435e-06, + "loss": 8.5627, + "step": 161060 + }, + { + "epoch": 0.8043646532997079, + "grad_norm": 0.08854031562805176, + "learning_rate": 5.883751783523994e-06, + "loss": 8.5427, + "step": 161070 + }, + { + "epoch": 0.8044145921246473, + "grad_norm": 0.0935094803571701, + "learning_rate": 5.882249868582442e-06, + "loss": 8.5412, + "step": 161080 + }, + { + "epoch": 0.8044645309495868, + "grad_norm": 0.08779904246330261, + "learning_rate": 5.880747953640892e-06, + "loss": 8.5639, + "step": 161090 + }, + { + "epoch": 0.8045144697745262, + "grad_norm": 0.0949099212884903, + "learning_rate": 5.8792460386993425e-06, + "loss": 8.5561, + "step": 161100 + }, + { + "epoch": 0.8045644085994657, + "grad_norm": 0.0882745087146759, + "learning_rate": 5.877744123757791e-06, + "loss": 8.5621, + "step": 161110 + }, + { + "epoch": 0.8046143474244051, + "grad_norm": 0.09196063131093979, + "learning_rate": 5.876242208816241e-06, + "loss": 8.5656, + "step": 161120 + }, + { + "epoch": 0.8046642862493446, + "grad_norm": 0.08640565723180771, + "learning_rate": 5.87474029387469e-06, + "loss": 8.5621, + "step": 161130 + }, + { + "epoch": 0.804714225074284, + "grad_norm": 0.09424121677875519, + "learning_rate": 5.87323837893314e-06, + "loss": 8.5532, + "step": 161140 + }, + { + "epoch": 0.8047641638992235, + "grad_norm": 0.08842575550079346, + "learning_rate": 5.87173646399159e-06, + "loss": 8.561, + "step": 161150 + }, + { + "epoch": 0.8048141027241629, + "grad_norm": 0.09014535695314407, + "learning_rate": 5.8702345490500385e-06, + "loss": 8.5514, + "step": 161160 + }, + { + "epoch": 0.8048640415491024, + "grad_norm": 0.09411894530057907, + "learning_rate": 5.868732634108489e-06, + "loss": 8.544, + "step": 161170 + }, + { + "epoch": 0.8049139803740418, + "grad_norm": 0.09182292968034744, + "learning_rate": 5.867230719166938e-06, + "loss": 8.5573, + "step": 161180 + }, + { + "epoch": 0.8049639191989812, + "grad_norm": 0.08946768194437027, + "learning_rate": 5.865728804225387e-06, + "loss": 8.5418, + "step": 161190 + }, + { + "epoch": 0.8050138580239207, + "grad_norm": 0.08927719295024872, + "learning_rate": 5.8642268892838375e-06, + "loss": 8.5637, + "step": 161200 + }, + { + "epoch": 0.8050637968488601, + "grad_norm": 0.09145810455083847, + "learning_rate": 5.862724974342286e-06, + "loss": 8.5511, + "step": 161210 + }, + { + "epoch": 0.8051137356737996, + "grad_norm": 0.09392247349023819, + "learning_rate": 5.861223059400736e-06, + "loss": 8.5375, + "step": 161220 + }, + { + "epoch": 0.805163674498739, + "grad_norm": 0.09256572276353836, + "learning_rate": 5.8597211444591855e-06, + "loss": 8.5547, + "step": 161230 + }, + { + "epoch": 0.8052136133236785, + "grad_norm": 0.08906706422567368, + "learning_rate": 5.858219229517635e-06, + "loss": 8.5664, + "step": 161240 + }, + { + "epoch": 0.8052635521486179, + "grad_norm": 0.08576681464910507, + "learning_rate": 5.856717314576085e-06, + "loss": 8.5525, + "step": 161250 + }, + { + "epoch": 0.8053134909735574, + "grad_norm": 0.09053860604763031, + "learning_rate": 5.855215399634534e-06, + "loss": 8.5489, + "step": 161260 + }, + { + "epoch": 0.8053634297984968, + "grad_norm": 0.08544787019491196, + "learning_rate": 5.853713484692984e-06, + "loss": 8.5576, + "step": 161270 + }, + { + "epoch": 0.8054133686234363, + "grad_norm": 0.0925343781709671, + "learning_rate": 5.852211569751433e-06, + "loss": 8.5416, + "step": 161280 + }, + { + "epoch": 0.8054633074483757, + "grad_norm": 0.09434544295072556, + "learning_rate": 5.850709654809882e-06, + "loss": 8.5593, + "step": 161290 + }, + { + "epoch": 0.8055132462733152, + "grad_norm": 0.09324178099632263, + "learning_rate": 5.8492077398683325e-06, + "loss": 8.5498, + "step": 161300 + }, + { + "epoch": 0.8055631850982546, + "grad_norm": 0.09053826332092285, + "learning_rate": 5.847705824926782e-06, + "loss": 8.5522, + "step": 161310 + }, + { + "epoch": 0.8056131239231941, + "grad_norm": 0.09149667620658875, + "learning_rate": 5.846203909985231e-06, + "loss": 8.536, + "step": 161320 + }, + { + "epoch": 0.8056630627481335, + "grad_norm": 0.08661210536956787, + "learning_rate": 5.8447019950436805e-06, + "loss": 8.5475, + "step": 161330 + }, + { + "epoch": 0.805713001573073, + "grad_norm": 0.09454959630966187, + "learning_rate": 5.843200080102131e-06, + "loss": 8.5584, + "step": 161340 + }, + { + "epoch": 0.8057629403980124, + "grad_norm": 0.09166700392961502, + "learning_rate": 5.84169816516058e-06, + "loss": 8.5538, + "step": 161350 + }, + { + "epoch": 0.8058128792229519, + "grad_norm": 0.08817677199840546, + "learning_rate": 5.840196250219029e-06, + "loss": 8.5504, + "step": 161360 + }, + { + "epoch": 0.8058628180478913, + "grad_norm": 0.0908307358622551, + "learning_rate": 5.838694335277479e-06, + "loss": 8.5528, + "step": 161370 + }, + { + "epoch": 0.8059127568728308, + "grad_norm": 0.10027515888214111, + "learning_rate": 5.837192420335928e-06, + "loss": 8.5598, + "step": 161380 + }, + { + "epoch": 0.8059626956977702, + "grad_norm": 0.09583675861358643, + "learning_rate": 5.835690505394378e-06, + "loss": 8.5463, + "step": 161390 + }, + { + "epoch": 0.8060126345227097, + "grad_norm": 0.0983138233423233, + "learning_rate": 5.8341885904528275e-06, + "loss": 8.5588, + "step": 161400 + }, + { + "epoch": 0.8060625733476491, + "grad_norm": 0.08900081366300583, + "learning_rate": 5.832686675511277e-06, + "loss": 8.5361, + "step": 161410 + }, + { + "epoch": 0.8061125121725886, + "grad_norm": 0.093843013048172, + "learning_rate": 5.831184760569727e-06, + "loss": 8.5498, + "step": 161420 + }, + { + "epoch": 0.806162450997528, + "grad_norm": 0.09397601336240768, + "learning_rate": 5.8296828456281755e-06, + "loss": 8.5497, + "step": 161430 + }, + { + "epoch": 0.8062123898224675, + "grad_norm": 0.09373549371957779, + "learning_rate": 5.828180930686626e-06, + "loss": 8.563, + "step": 161440 + }, + { + "epoch": 0.8062623286474069, + "grad_norm": 0.09314602613449097, + "learning_rate": 5.826679015745075e-06, + "loss": 8.5504, + "step": 161450 + }, + { + "epoch": 0.8063122674723464, + "grad_norm": 0.09471584111452103, + "learning_rate": 5.825177100803524e-06, + "loss": 8.5783, + "step": 161460 + }, + { + "epoch": 0.8063622062972858, + "grad_norm": 0.08868781477212906, + "learning_rate": 5.8236751858619745e-06, + "loss": 8.5653, + "step": 161470 + }, + { + "epoch": 0.8064121451222253, + "grad_norm": 0.09533412009477615, + "learning_rate": 5.822173270920423e-06, + "loss": 8.5538, + "step": 161480 + }, + { + "epoch": 0.8064620839471647, + "grad_norm": 0.09250637888908386, + "learning_rate": 5.820671355978873e-06, + "loss": 8.5697, + "step": 161490 + }, + { + "epoch": 0.8065120227721042, + "grad_norm": 0.08923783898353577, + "learning_rate": 5.819169441037323e-06, + "loss": 8.5524, + "step": 161500 + }, + { + "epoch": 0.8065619615970436, + "grad_norm": 0.09027642011642456, + "learning_rate": 5.817667526095772e-06, + "loss": 8.5462, + "step": 161510 + }, + { + "epoch": 0.806611900421983, + "grad_norm": 0.08848720788955688, + "learning_rate": 5.816165611154222e-06, + "loss": 8.547, + "step": 161520 + }, + { + "epoch": 0.8066618392469225, + "grad_norm": 0.09159193187952042, + "learning_rate": 5.8146636962126705e-06, + "loss": 8.5562, + "step": 161530 + }, + { + "epoch": 0.806711778071862, + "grad_norm": 0.09201356023550034, + "learning_rate": 5.813161781271121e-06, + "loss": 8.5494, + "step": 161540 + }, + { + "epoch": 0.8067617168968014, + "grad_norm": 0.090115986764431, + "learning_rate": 5.811659866329571e-06, + "loss": 8.543, + "step": 161550 + }, + { + "epoch": 0.8068116557217409, + "grad_norm": 0.08883675187826157, + "learning_rate": 5.810157951388019e-06, + "loss": 8.566, + "step": 161560 + }, + { + "epoch": 0.8068615945466803, + "grad_norm": 0.09100556373596191, + "learning_rate": 5.8086560364464695e-06, + "loss": 8.5675, + "step": 161570 + }, + { + "epoch": 0.8069115333716198, + "grad_norm": 0.09314378350973129, + "learning_rate": 5.807154121504919e-06, + "loss": 8.5437, + "step": 161580 + }, + { + "epoch": 0.8069614721965592, + "grad_norm": 0.09172666072845459, + "learning_rate": 5.805652206563368e-06, + "loss": 8.5589, + "step": 161590 + }, + { + "epoch": 0.8070114110214986, + "grad_norm": 0.09398376941680908, + "learning_rate": 5.804150291621818e-06, + "loss": 8.5466, + "step": 161600 + }, + { + "epoch": 0.8070613498464381, + "grad_norm": 0.08542805910110474, + "learning_rate": 5.802648376680267e-06, + "loss": 8.5673, + "step": 161610 + }, + { + "epoch": 0.8071112886713776, + "grad_norm": 0.08974550664424896, + "learning_rate": 5.801146461738717e-06, + "loss": 8.5524, + "step": 161620 + }, + { + "epoch": 0.807161227496317, + "grad_norm": 0.09220361709594727, + "learning_rate": 5.799644546797166e-06, + "loss": 8.5404, + "step": 161630 + }, + { + "epoch": 0.8072111663212564, + "grad_norm": 0.08950573951005936, + "learning_rate": 5.798142631855616e-06, + "loss": 8.5477, + "step": 161640 + }, + { + "epoch": 0.8072611051461959, + "grad_norm": 0.0872855931520462, + "learning_rate": 5.796640716914066e-06, + "loss": 8.5645, + "step": 161650 + }, + { + "epoch": 0.8073110439711354, + "grad_norm": 0.09016703069210052, + "learning_rate": 5.795138801972515e-06, + "loss": 8.5501, + "step": 161660 + }, + { + "epoch": 0.8073609827960748, + "grad_norm": 0.09285937994718552, + "learning_rate": 5.7936368870309645e-06, + "loss": 8.5527, + "step": 161670 + }, + { + "epoch": 0.8074109216210142, + "grad_norm": 0.09346285462379456, + "learning_rate": 5.792134972089414e-06, + "loss": 8.5559, + "step": 161680 + }, + { + "epoch": 0.8074608604459537, + "grad_norm": 0.08668027818202972, + "learning_rate": 5.790633057147863e-06, + "loss": 8.5425, + "step": 161690 + }, + { + "epoch": 0.8075107992708932, + "grad_norm": 0.0898892879486084, + "learning_rate": 5.789131142206313e-06, + "loss": 8.5491, + "step": 161700 + }, + { + "epoch": 0.8075607380958326, + "grad_norm": 0.09450709819793701, + "learning_rate": 5.787629227264763e-06, + "loss": 8.5568, + "step": 161710 + }, + { + "epoch": 0.807610676920772, + "grad_norm": 0.08670226484537125, + "learning_rate": 5.786127312323212e-06, + "loss": 8.5596, + "step": 161720 + }, + { + "epoch": 0.8076606157457115, + "grad_norm": 0.08837024122476578, + "learning_rate": 5.784625397381662e-06, + "loss": 8.5445, + "step": 161730 + }, + { + "epoch": 0.807710554570651, + "grad_norm": 0.093661367893219, + "learning_rate": 5.7831234824401115e-06, + "loss": 8.5325, + "step": 161740 + }, + { + "epoch": 0.8077604933955904, + "grad_norm": 0.0894351676106453, + "learning_rate": 5.781621567498561e-06, + "loss": 8.551, + "step": 161750 + }, + { + "epoch": 0.8078104322205298, + "grad_norm": 0.08936372399330139, + "learning_rate": 5.78011965255701e-06, + "loss": 8.5591, + "step": 161760 + }, + { + "epoch": 0.8078603710454693, + "grad_norm": 0.09095845371484756, + "learning_rate": 5.7786177376154595e-06, + "loss": 8.5491, + "step": 161770 + }, + { + "epoch": 0.8079103098704088, + "grad_norm": 0.09978078305721283, + "learning_rate": 5.77711582267391e-06, + "loss": 8.5385, + "step": 161780 + }, + { + "epoch": 0.8079602486953482, + "grad_norm": 0.0931134968996048, + "learning_rate": 5.775613907732359e-06, + "loss": 8.554, + "step": 161790 + }, + { + "epoch": 0.8080101875202876, + "grad_norm": 0.09244008362293243, + "learning_rate": 5.774111992790808e-06, + "loss": 8.5561, + "step": 161800 + }, + { + "epoch": 0.8080601263452271, + "grad_norm": 0.09390776604413986, + "learning_rate": 5.772610077849258e-06, + "loss": 8.5361, + "step": 161810 + }, + { + "epoch": 0.8081100651701666, + "grad_norm": 0.09701921045780182, + "learning_rate": 5.771108162907708e-06, + "loss": 8.5554, + "step": 161820 + }, + { + "epoch": 0.808160003995106, + "grad_norm": 0.09131251275539398, + "learning_rate": 5.769606247966157e-06, + "loss": 8.5438, + "step": 161830 + }, + { + "epoch": 0.8082099428200454, + "grad_norm": 0.09114990383386612, + "learning_rate": 5.7681043330246066e-06, + "loss": 8.5591, + "step": 161840 + }, + { + "epoch": 0.8082598816449849, + "grad_norm": 0.08846677094697952, + "learning_rate": 5.766602418083056e-06, + "loss": 8.5434, + "step": 161850 + }, + { + "epoch": 0.8083098204699244, + "grad_norm": 0.08849672228097916, + "learning_rate": 5.765100503141505e-06, + "loss": 8.5666, + "step": 161860 + }, + { + "epoch": 0.8083597592948638, + "grad_norm": 0.0881495401263237, + "learning_rate": 5.763598588199955e-06, + "loss": 8.5384, + "step": 161870 + }, + { + "epoch": 0.8084096981198032, + "grad_norm": 0.09564726799726486, + "learning_rate": 5.762096673258405e-06, + "loss": 8.5536, + "step": 161880 + }, + { + "epoch": 0.8084596369447427, + "grad_norm": 0.08935084193944931, + "learning_rate": 5.760594758316854e-06, + "loss": 8.5496, + "step": 161890 + }, + { + "epoch": 0.8085095757696822, + "grad_norm": 0.09384889900684357, + "learning_rate": 5.759092843375304e-06, + "loss": 8.5448, + "step": 161900 + }, + { + "epoch": 0.8085595145946216, + "grad_norm": 0.08726388961076736, + "learning_rate": 5.757590928433753e-06, + "loss": 8.5489, + "step": 161910 + }, + { + "epoch": 0.808609453419561, + "grad_norm": 0.09072273224592209, + "learning_rate": 5.756089013492203e-06, + "loss": 8.5589, + "step": 161920 + }, + { + "epoch": 0.8086593922445005, + "grad_norm": 0.09182450920343399, + "learning_rate": 5.754587098550652e-06, + "loss": 8.5484, + "step": 161930 + }, + { + "epoch": 0.80870933106944, + "grad_norm": 0.08764579892158508, + "learning_rate": 5.7530851836091016e-06, + "loss": 8.5622, + "step": 161940 + }, + { + "epoch": 0.8087592698943794, + "grad_norm": 0.0898037999868393, + "learning_rate": 5.751583268667552e-06, + "loss": 8.5555, + "step": 161950 + }, + { + "epoch": 0.8088092087193188, + "grad_norm": 0.09432199597358704, + "learning_rate": 5.750081353726e-06, + "loss": 8.5663, + "step": 161960 + }, + { + "epoch": 0.8088591475442583, + "grad_norm": 0.09024576097726822, + "learning_rate": 5.74857943878445e-06, + "loss": 8.5594, + "step": 161970 + }, + { + "epoch": 0.8089090863691978, + "grad_norm": 0.08990621566772461, + "learning_rate": 5.747077523842901e-06, + "loss": 8.5471, + "step": 161980 + }, + { + "epoch": 0.8089590251941372, + "grad_norm": 0.09168119728565216, + "learning_rate": 5.745575608901349e-06, + "loss": 8.5391, + "step": 161990 + }, + { + "epoch": 0.8090089640190766, + "grad_norm": 0.09170830249786377, + "learning_rate": 5.744073693959799e-06, + "loss": 8.565, + "step": 162000 + }, + { + "epoch": 0.809058902844016, + "grad_norm": 0.08952143043279648, + "learning_rate": 5.742571779018248e-06, + "loss": 8.5513, + "step": 162010 + }, + { + "epoch": 0.8091088416689556, + "grad_norm": 0.08867137879133224, + "learning_rate": 5.741069864076698e-06, + "loss": 8.5306, + "step": 162020 + }, + { + "epoch": 0.809158780493895, + "grad_norm": 0.09371023625135422, + "learning_rate": 5.739567949135148e-06, + "loss": 8.542, + "step": 162030 + }, + { + "epoch": 0.8092087193188344, + "grad_norm": 0.09115874022245407, + "learning_rate": 5.7380660341935966e-06, + "loss": 8.5642, + "step": 162040 + }, + { + "epoch": 0.8092586581437738, + "grad_norm": 0.09029915928840637, + "learning_rate": 5.736564119252047e-06, + "loss": 8.5507, + "step": 162050 + }, + { + "epoch": 0.8093085969687134, + "grad_norm": 0.09160950779914856, + "learning_rate": 5.735062204310496e-06, + "loss": 8.5746, + "step": 162060 + }, + { + "epoch": 0.8093585357936528, + "grad_norm": 0.09389065206050873, + "learning_rate": 5.733560289368945e-06, + "loss": 8.5682, + "step": 162070 + }, + { + "epoch": 0.8094084746185922, + "grad_norm": 0.08822131901979446, + "learning_rate": 5.732058374427396e-06, + "loss": 8.5532, + "step": 162080 + }, + { + "epoch": 0.8094584134435316, + "grad_norm": 0.09017851203680038, + "learning_rate": 5.730556459485844e-06, + "loss": 8.559, + "step": 162090 + }, + { + "epoch": 0.8095083522684712, + "grad_norm": 0.09044285118579865, + "learning_rate": 5.729054544544294e-06, + "loss": 8.5549, + "step": 162100 + }, + { + "epoch": 0.8095582910934106, + "grad_norm": 0.09080391377210617, + "learning_rate": 5.727552629602744e-06, + "loss": 8.5327, + "step": 162110 + }, + { + "epoch": 0.80960822991835, + "grad_norm": 0.08978559076786041, + "learning_rate": 5.726050714661193e-06, + "loss": 8.5409, + "step": 162120 + }, + { + "epoch": 0.8096581687432894, + "grad_norm": 0.09273146092891693, + "learning_rate": 5.724548799719643e-06, + "loss": 8.5516, + "step": 162130 + }, + { + "epoch": 0.809708107568229, + "grad_norm": 0.09049143642187119, + "learning_rate": 5.7230468847780924e-06, + "loss": 8.5536, + "step": 162140 + }, + { + "epoch": 0.8097580463931684, + "grad_norm": 0.09314949065446854, + "learning_rate": 5.721544969836542e-06, + "loss": 8.5574, + "step": 162150 + }, + { + "epoch": 0.8098079852181078, + "grad_norm": 0.09063813835382462, + "learning_rate": 5.720043054894991e-06, + "loss": 8.5568, + "step": 162160 + }, + { + "epoch": 0.8098579240430472, + "grad_norm": 0.09309311211109161, + "learning_rate": 5.71854113995344e-06, + "loss": 8.5452, + "step": 162170 + }, + { + "epoch": 0.8099078628679867, + "grad_norm": 0.09010675549507141, + "learning_rate": 5.717039225011891e-06, + "loss": 8.5491, + "step": 162180 + }, + { + "epoch": 0.8099578016929262, + "grad_norm": 0.09501739591360092, + "learning_rate": 5.71553731007034e-06, + "loss": 8.5697, + "step": 162190 + }, + { + "epoch": 0.8100077405178656, + "grad_norm": 0.09098471701145172, + "learning_rate": 5.714035395128789e-06, + "loss": 8.5599, + "step": 162200 + }, + { + "epoch": 0.810057679342805, + "grad_norm": 0.0888034850358963, + "learning_rate": 5.712533480187239e-06, + "loss": 8.5362, + "step": 162210 + }, + { + "epoch": 0.8101076181677445, + "grad_norm": 0.09654735028743744, + "learning_rate": 5.711031565245689e-06, + "loss": 8.5474, + "step": 162220 + }, + { + "epoch": 0.810157556992684, + "grad_norm": 0.09235084056854248, + "learning_rate": 5.709529650304138e-06, + "loss": 8.5447, + "step": 162230 + }, + { + "epoch": 0.8102074958176234, + "grad_norm": 0.09002131223678589, + "learning_rate": 5.7080277353625874e-06, + "loss": 8.5462, + "step": 162240 + }, + { + "epoch": 0.8102574346425628, + "grad_norm": 0.08661506325006485, + "learning_rate": 5.706525820421037e-06, + "loss": 8.5672, + "step": 162250 + }, + { + "epoch": 0.8103073734675023, + "grad_norm": 0.08980439603328705, + "learning_rate": 5.705023905479486e-06, + "loss": 8.5427, + "step": 162260 + }, + { + "epoch": 0.8103573122924418, + "grad_norm": 0.08393685519695282, + "learning_rate": 5.703521990537936e-06, + "loss": 8.5484, + "step": 162270 + }, + { + "epoch": 0.8104072511173812, + "grad_norm": 0.09369827061891556, + "learning_rate": 5.702020075596386e-06, + "loss": 8.5434, + "step": 162280 + }, + { + "epoch": 0.8104571899423206, + "grad_norm": 0.09700123220682144, + "learning_rate": 5.700518160654835e-06, + "loss": 8.5523, + "step": 162290 + }, + { + "epoch": 0.81050712876726, + "grad_norm": 0.08638820052146912, + "learning_rate": 5.699016245713285e-06, + "loss": 8.5611, + "step": 162300 + }, + { + "epoch": 0.8105570675921996, + "grad_norm": 0.09503623843193054, + "learning_rate": 5.697514330771734e-06, + "loss": 8.5216, + "step": 162310 + }, + { + "epoch": 0.810607006417139, + "grad_norm": 0.09698504954576492, + "learning_rate": 5.696012415830184e-06, + "loss": 8.5362, + "step": 162320 + }, + { + "epoch": 0.8106569452420784, + "grad_norm": 0.09360737353563309, + "learning_rate": 5.694510500888633e-06, + "loss": 8.5411, + "step": 162330 + }, + { + "epoch": 0.8107068840670179, + "grad_norm": 0.08911336213350296, + "learning_rate": 5.6930085859470824e-06, + "loss": 8.5541, + "step": 162340 + }, + { + "epoch": 0.8107568228919574, + "grad_norm": 0.09235373139381409, + "learning_rate": 5.691506671005533e-06, + "loss": 8.5529, + "step": 162350 + }, + { + "epoch": 0.8108067617168968, + "grad_norm": 0.09205646812915802, + "learning_rate": 5.690004756063981e-06, + "loss": 8.5628, + "step": 162360 + }, + { + "epoch": 0.8108567005418362, + "grad_norm": 0.08802807331085205, + "learning_rate": 5.688502841122431e-06, + "loss": 8.5639, + "step": 162370 + }, + { + "epoch": 0.8109066393667757, + "grad_norm": 0.09021410346031189, + "learning_rate": 5.6870009261808815e-06, + "loss": 8.5609, + "step": 162380 + }, + { + "epoch": 0.8109565781917152, + "grad_norm": 0.09240546077489853, + "learning_rate": 5.68549901123933e-06, + "loss": 8.5595, + "step": 162390 + }, + { + "epoch": 0.8110065170166546, + "grad_norm": 0.08953437209129333, + "learning_rate": 5.68399709629778e-06, + "loss": 8.5495, + "step": 162400 + }, + { + "epoch": 0.811056455841594, + "grad_norm": 0.09370911866426468, + "learning_rate": 5.682495181356229e-06, + "loss": 8.541, + "step": 162410 + }, + { + "epoch": 0.8111063946665334, + "grad_norm": 0.09173066914081573, + "learning_rate": 5.680993266414679e-06, + "loss": 8.5352, + "step": 162420 + }, + { + "epoch": 0.811156333491473, + "grad_norm": 0.08917384594678879, + "learning_rate": 5.679491351473129e-06, + "loss": 8.56, + "step": 162430 + }, + { + "epoch": 0.8112062723164124, + "grad_norm": 0.09332337230443954, + "learning_rate": 5.6779894365315774e-06, + "loss": 8.5452, + "step": 162440 + }, + { + "epoch": 0.8112562111413518, + "grad_norm": 0.08929426968097687, + "learning_rate": 5.676487521590028e-06, + "loss": 8.5625, + "step": 162450 + }, + { + "epoch": 0.8113061499662912, + "grad_norm": 0.08893352746963501, + "learning_rate": 5.674985606648477e-06, + "loss": 8.5519, + "step": 162460 + }, + { + "epoch": 0.8113560887912308, + "grad_norm": 0.0881025567650795, + "learning_rate": 5.673483691706926e-06, + "loss": 8.5514, + "step": 162470 + }, + { + "epoch": 0.8114060276161702, + "grad_norm": 0.09232950210571289, + "learning_rate": 5.6719817767653765e-06, + "loss": 8.563, + "step": 162480 + }, + { + "epoch": 0.8114559664411096, + "grad_norm": 0.09270848333835602, + "learning_rate": 5.670479861823825e-06, + "loss": 8.5434, + "step": 162490 + }, + { + "epoch": 0.811505905266049, + "grad_norm": 0.0903157889842987, + "learning_rate": 5.668977946882275e-06, + "loss": 8.5461, + "step": 162500 + }, + { + "epoch": 0.8115558440909886, + "grad_norm": 0.09540119767189026, + "learning_rate": 5.6674760319407245e-06, + "loss": 8.5403, + "step": 162510 + }, + { + "epoch": 0.811605782915928, + "grad_norm": 0.09533382952213287, + "learning_rate": 5.665974116999174e-06, + "loss": 8.5465, + "step": 162520 + }, + { + "epoch": 0.8116557217408674, + "grad_norm": 0.09012457728385925, + "learning_rate": 5.664472202057624e-06, + "loss": 8.5571, + "step": 162530 + }, + { + "epoch": 0.8117056605658068, + "grad_norm": 0.09900517761707306, + "learning_rate": 5.662970287116073e-06, + "loss": 8.5448, + "step": 162540 + }, + { + "epoch": 0.8117555993907464, + "grad_norm": 0.08922384679317474, + "learning_rate": 5.661468372174523e-06, + "loss": 8.5561, + "step": 162550 + }, + { + "epoch": 0.8118055382156858, + "grad_norm": 0.09595987200737, + "learning_rate": 5.659966457232972e-06, + "loss": 8.5536, + "step": 162560 + }, + { + "epoch": 0.8118554770406252, + "grad_norm": 0.08500176668167114, + "learning_rate": 5.658464542291421e-06, + "loss": 8.552, + "step": 162570 + }, + { + "epoch": 0.8119054158655646, + "grad_norm": 0.0854329764842987, + "learning_rate": 5.6569626273498715e-06, + "loss": 8.5606, + "step": 162580 + }, + { + "epoch": 0.8119553546905042, + "grad_norm": 0.08596555888652802, + "learning_rate": 5.655460712408321e-06, + "loss": 8.5476, + "step": 162590 + }, + { + "epoch": 0.8120052935154436, + "grad_norm": 0.09005005657672882, + "learning_rate": 5.65395879746677e-06, + "loss": 8.5487, + "step": 162600 + }, + { + "epoch": 0.812055232340383, + "grad_norm": 0.08684473484754562, + "learning_rate": 5.6524568825252195e-06, + "loss": 8.5494, + "step": 162610 + }, + { + "epoch": 0.8121051711653224, + "grad_norm": 0.08870302140712738, + "learning_rate": 5.65095496758367e-06, + "loss": 8.5443, + "step": 162620 + }, + { + "epoch": 0.812155109990262, + "grad_norm": 0.0927698165178299, + "learning_rate": 5.649453052642119e-06, + "loss": 8.5475, + "step": 162630 + }, + { + "epoch": 0.8122050488152014, + "grad_norm": 0.09097401797771454, + "learning_rate": 5.647951137700568e-06, + "loss": 8.5476, + "step": 162640 + }, + { + "epoch": 0.8122549876401408, + "grad_norm": 0.0920330137014389, + "learning_rate": 5.646449222759018e-06, + "loss": 8.5655, + "step": 162650 + }, + { + "epoch": 0.8123049264650802, + "grad_norm": 0.09149347245693207, + "learning_rate": 5.644947307817467e-06, + "loss": 8.543, + "step": 162660 + }, + { + "epoch": 0.8123548652900198, + "grad_norm": 0.09597709774971008, + "learning_rate": 5.643445392875917e-06, + "loss": 8.5521, + "step": 162670 + }, + { + "epoch": 0.8124048041149592, + "grad_norm": 0.08816757798194885, + "learning_rate": 5.6419434779343665e-06, + "loss": 8.5451, + "step": 162680 + }, + { + "epoch": 0.8124547429398986, + "grad_norm": 0.0923578068614006, + "learning_rate": 5.640441562992816e-06, + "loss": 8.5497, + "step": 162690 + }, + { + "epoch": 0.812504681764838, + "grad_norm": 0.08633648604154587, + "learning_rate": 5.638939648051266e-06, + "loss": 8.5605, + "step": 162700 + }, + { + "epoch": 0.8125546205897776, + "grad_norm": 0.09606485068798065, + "learning_rate": 5.6374377331097145e-06, + "loss": 8.5607, + "step": 162710 + }, + { + "epoch": 0.812604559414717, + "grad_norm": 0.09084507077932358, + "learning_rate": 5.635935818168165e-06, + "loss": 8.559, + "step": 162720 + }, + { + "epoch": 0.8126544982396564, + "grad_norm": 0.0889735296368599, + "learning_rate": 5.634433903226614e-06, + "loss": 8.5486, + "step": 162730 + }, + { + "epoch": 0.8127044370645958, + "grad_norm": 0.09297562390565872, + "learning_rate": 5.632931988285063e-06, + "loss": 8.5603, + "step": 162740 + }, + { + "epoch": 0.8127543758895354, + "grad_norm": 0.08924896270036697, + "learning_rate": 5.6314300733435135e-06, + "loss": 8.5499, + "step": 162750 + }, + { + "epoch": 0.8128043147144748, + "grad_norm": 0.08938944339752197, + "learning_rate": 5.629928158401962e-06, + "loss": 8.5714, + "step": 162760 + }, + { + "epoch": 0.8128542535394142, + "grad_norm": 0.09662458300590515, + "learning_rate": 5.628426243460412e-06, + "loss": 8.5555, + "step": 162770 + }, + { + "epoch": 0.8129041923643536, + "grad_norm": 0.08405207097530365, + "learning_rate": 5.626924328518862e-06, + "loss": 8.5385, + "step": 162780 + }, + { + "epoch": 0.8129541311892932, + "grad_norm": 0.09311365336179733, + "learning_rate": 5.625422413577311e-06, + "loss": 8.5482, + "step": 162790 + }, + { + "epoch": 0.8130040700142326, + "grad_norm": 0.09118988364934921, + "learning_rate": 5.623920498635761e-06, + "loss": 8.5717, + "step": 162800 + }, + { + "epoch": 0.813054008839172, + "grad_norm": 0.09193669259548187, + "learning_rate": 5.62241858369421e-06, + "loss": 8.5485, + "step": 162810 + }, + { + "epoch": 0.8131039476641114, + "grad_norm": 0.09104682505130768, + "learning_rate": 5.62091666875266e-06, + "loss": 8.5567, + "step": 162820 + }, + { + "epoch": 0.813153886489051, + "grad_norm": 0.0893746018409729, + "learning_rate": 5.61941475381111e-06, + "loss": 8.5666, + "step": 162830 + }, + { + "epoch": 0.8132038253139904, + "grad_norm": 0.09030063450336456, + "learning_rate": 5.617912838869558e-06, + "loss": 8.5506, + "step": 162840 + }, + { + "epoch": 0.8132537641389298, + "grad_norm": 0.0945194736123085, + "learning_rate": 5.6164109239280085e-06, + "loss": 8.5281, + "step": 162850 + }, + { + "epoch": 0.8133037029638692, + "grad_norm": 0.09485594183206558, + "learning_rate": 5.614909008986459e-06, + "loss": 8.5553, + "step": 162860 + }, + { + "epoch": 0.8133536417888088, + "grad_norm": 0.08975322544574738, + "learning_rate": 5.613407094044907e-06, + "loss": 8.5503, + "step": 162870 + }, + { + "epoch": 0.8134035806137482, + "grad_norm": 0.09506627917289734, + "learning_rate": 5.611905179103357e-06, + "loss": 8.5368, + "step": 162880 + }, + { + "epoch": 0.8134535194386876, + "grad_norm": 0.09811557084321976, + "learning_rate": 5.610403264161806e-06, + "loss": 8.5708, + "step": 162890 + }, + { + "epoch": 0.813503458263627, + "grad_norm": 0.08716001361608505, + "learning_rate": 5.608901349220256e-06, + "loss": 8.5295, + "step": 162900 + }, + { + "epoch": 0.8135533970885666, + "grad_norm": 0.08750799298286438, + "learning_rate": 5.607399434278706e-06, + "loss": 8.5557, + "step": 162910 + }, + { + "epoch": 0.813603335913506, + "grad_norm": 0.09434546530246735, + "learning_rate": 5.605897519337155e-06, + "loss": 8.5626, + "step": 162920 + }, + { + "epoch": 0.8136532747384454, + "grad_norm": 0.08905541896820068, + "learning_rate": 5.604395604395605e-06, + "loss": 8.543, + "step": 162930 + }, + { + "epoch": 0.8137032135633848, + "grad_norm": 0.09109733253717422, + "learning_rate": 5.602893689454054e-06, + "loss": 8.5606, + "step": 162940 + }, + { + "epoch": 0.8137531523883244, + "grad_norm": 0.09967444837093353, + "learning_rate": 5.6013917745125035e-06, + "loss": 8.534, + "step": 162950 + }, + { + "epoch": 0.8138030912132638, + "grad_norm": 0.08702152967453003, + "learning_rate": 5.599889859570954e-06, + "loss": 8.5679, + "step": 162960 + }, + { + "epoch": 0.8138530300382032, + "grad_norm": 0.09200234711170197, + "learning_rate": 5.598387944629402e-06, + "loss": 8.5477, + "step": 162970 + }, + { + "epoch": 0.8139029688631426, + "grad_norm": 0.08344555646181107, + "learning_rate": 5.596886029687852e-06, + "loss": 8.5648, + "step": 162980 + }, + { + "epoch": 0.8139529076880822, + "grad_norm": 0.09271971136331558, + "learning_rate": 5.595384114746302e-06, + "loss": 8.5407, + "step": 162990 + }, + { + "epoch": 0.8140028465130216, + "grad_norm": 0.09208449721336365, + "learning_rate": 5.593882199804751e-06, + "loss": 8.5329, + "step": 163000 + }, + { + "epoch": 0.814052785337961, + "grad_norm": 0.09522075206041336, + "learning_rate": 5.592380284863201e-06, + "loss": 8.5586, + "step": 163010 + }, + { + "epoch": 0.8141027241629004, + "grad_norm": 0.09683191031217575, + "learning_rate": 5.5908783699216505e-06, + "loss": 8.5441, + "step": 163020 + }, + { + "epoch": 0.81415266298784, + "grad_norm": 0.09313095360994339, + "learning_rate": 5.5893764549801e-06, + "loss": 8.5572, + "step": 163030 + }, + { + "epoch": 0.8142026018127794, + "grad_norm": 0.09198789298534393, + "learning_rate": 5.587874540038549e-06, + "loss": 8.5576, + "step": 163040 + }, + { + "epoch": 0.8142525406377188, + "grad_norm": 0.0924067571759224, + "learning_rate": 5.5863726250969985e-06, + "loss": 8.5483, + "step": 163050 + }, + { + "epoch": 0.8143024794626582, + "grad_norm": 0.09334490448236465, + "learning_rate": 5.584870710155449e-06, + "loss": 8.5449, + "step": 163060 + }, + { + "epoch": 0.8143524182875977, + "grad_norm": 0.08748748898506165, + "learning_rate": 5.583368795213898e-06, + "loss": 8.5381, + "step": 163070 + }, + { + "epoch": 0.8144023571125372, + "grad_norm": 0.08966860920190811, + "learning_rate": 5.581866880272347e-06, + "loss": 8.5411, + "step": 163080 + }, + { + "epoch": 0.8144522959374766, + "grad_norm": 0.09077271819114685, + "learning_rate": 5.580364965330797e-06, + "loss": 8.545, + "step": 163090 + }, + { + "epoch": 0.814502234762416, + "grad_norm": 0.0929906815290451, + "learning_rate": 5.578863050389247e-06, + "loss": 8.5392, + "step": 163100 + }, + { + "epoch": 0.8145521735873555, + "grad_norm": 0.09005864709615707, + "learning_rate": 5.577361135447696e-06, + "loss": 8.5423, + "step": 163110 + }, + { + "epoch": 0.814602112412295, + "grad_norm": 0.09385271370410919, + "learning_rate": 5.5758592205061455e-06, + "loss": 8.5367, + "step": 163120 + }, + { + "epoch": 0.8146520512372344, + "grad_norm": 0.09610309451818466, + "learning_rate": 5.574357305564595e-06, + "loss": 8.5475, + "step": 163130 + }, + { + "epoch": 0.8147019900621738, + "grad_norm": 0.09091010689735413, + "learning_rate": 5.572855390623044e-06, + "loss": 8.5471, + "step": 163140 + }, + { + "epoch": 0.8147519288871133, + "grad_norm": 0.09262845665216446, + "learning_rate": 5.571353475681494e-06, + "loss": 8.5337, + "step": 163150 + }, + { + "epoch": 0.8148018677120528, + "grad_norm": 0.08759453892707825, + "learning_rate": 5.569851560739944e-06, + "loss": 8.5621, + "step": 163160 + }, + { + "epoch": 0.8148518065369922, + "grad_norm": 0.08776848018169403, + "learning_rate": 5.568349645798393e-06, + "loss": 8.5529, + "step": 163170 + }, + { + "epoch": 0.8149017453619316, + "grad_norm": 0.08764344453811646, + "learning_rate": 5.566847730856843e-06, + "loss": 8.5606, + "step": 163180 + }, + { + "epoch": 0.814951684186871, + "grad_norm": 0.0934050902724266, + "learning_rate": 5.565345815915292e-06, + "loss": 8.5542, + "step": 163190 + }, + { + "epoch": 0.8150016230118106, + "grad_norm": 0.08384846895933151, + "learning_rate": 5.563843900973742e-06, + "loss": 8.5444, + "step": 163200 + }, + { + "epoch": 0.81505156183675, + "grad_norm": 0.0902908518910408, + "learning_rate": 5.562341986032191e-06, + "loss": 8.5412, + "step": 163210 + }, + { + "epoch": 0.8151015006616894, + "grad_norm": 0.08977564424276352, + "learning_rate": 5.5608400710906405e-06, + "loss": 8.5599, + "step": 163220 + }, + { + "epoch": 0.8151514394866288, + "grad_norm": 0.09766307473182678, + "learning_rate": 5.559338156149091e-06, + "loss": 8.5354, + "step": 163230 + }, + { + "epoch": 0.8152013783115684, + "grad_norm": 0.08712057769298553, + "learning_rate": 5.557836241207539e-06, + "loss": 8.5372, + "step": 163240 + }, + { + "epoch": 0.8152513171365078, + "grad_norm": 0.0951530709862709, + "learning_rate": 5.556334326265989e-06, + "loss": 8.5354, + "step": 163250 + }, + { + "epoch": 0.8153012559614472, + "grad_norm": 0.09080353379249573, + "learning_rate": 5.5548324113244396e-06, + "loss": 8.5443, + "step": 163260 + }, + { + "epoch": 0.8153511947863866, + "grad_norm": 0.09223667532205582, + "learning_rate": 5.553330496382888e-06, + "loss": 8.5453, + "step": 163270 + }, + { + "epoch": 0.8154011336113262, + "grad_norm": 0.08771520853042603, + "learning_rate": 5.551828581441338e-06, + "loss": 8.5374, + "step": 163280 + }, + { + "epoch": 0.8154510724362656, + "grad_norm": 0.08749589323997498, + "learning_rate": 5.550326666499787e-06, + "loss": 8.5422, + "step": 163290 + }, + { + "epoch": 0.815501011261205, + "grad_norm": 0.08917036652565002, + "learning_rate": 5.548824751558237e-06, + "loss": 8.5481, + "step": 163300 + }, + { + "epoch": 0.8155509500861444, + "grad_norm": 0.0868101641535759, + "learning_rate": 5.547322836616687e-06, + "loss": 8.5619, + "step": 163310 + }, + { + "epoch": 0.815600888911084, + "grad_norm": 0.091803178191185, + "learning_rate": 5.5458209216751355e-06, + "loss": 8.5356, + "step": 163320 + }, + { + "epoch": 0.8156508277360234, + "grad_norm": 0.09425832331180573, + "learning_rate": 5.544319006733586e-06, + "loss": 8.5293, + "step": 163330 + }, + { + "epoch": 0.8157007665609628, + "grad_norm": 0.1101720854640007, + "learning_rate": 5.542817091792035e-06, + "loss": 8.544, + "step": 163340 + }, + { + "epoch": 0.8157507053859022, + "grad_norm": 0.0971333235502243, + "learning_rate": 5.541315176850484e-06, + "loss": 8.5534, + "step": 163350 + }, + { + "epoch": 0.8158006442108418, + "grad_norm": 0.09201129525899887, + "learning_rate": 5.5398132619089346e-06, + "loss": 8.5444, + "step": 163360 + }, + { + "epoch": 0.8158505830357812, + "grad_norm": 0.08867859095335007, + "learning_rate": 5.538311346967383e-06, + "loss": 8.5282, + "step": 163370 + }, + { + "epoch": 0.8159005218607206, + "grad_norm": 0.09262479841709137, + "learning_rate": 5.536809432025833e-06, + "loss": 8.5319, + "step": 163380 + }, + { + "epoch": 0.81595046068566, + "grad_norm": 0.08705681562423706, + "learning_rate": 5.5353075170842826e-06, + "loss": 8.5469, + "step": 163390 + }, + { + "epoch": 0.8160003995105996, + "grad_norm": 0.09213688969612122, + "learning_rate": 5.533805602142732e-06, + "loss": 8.5515, + "step": 163400 + }, + { + "epoch": 0.816050338335539, + "grad_norm": 0.0938788652420044, + "learning_rate": 5.532303687201182e-06, + "loss": 8.538, + "step": 163410 + }, + { + "epoch": 0.8161002771604784, + "grad_norm": 0.09791158139705658, + "learning_rate": 5.530801772259631e-06, + "loss": 8.5372, + "step": 163420 + }, + { + "epoch": 0.8161502159854178, + "grad_norm": 0.09159670770168304, + "learning_rate": 5.529299857318081e-06, + "loss": 8.551, + "step": 163430 + }, + { + "epoch": 0.8162001548103573, + "grad_norm": 0.08808984607458115, + "learning_rate": 5.52779794237653e-06, + "loss": 8.5498, + "step": 163440 + }, + { + "epoch": 0.8162500936352968, + "grad_norm": 0.09138135612010956, + "learning_rate": 5.526296027434979e-06, + "loss": 8.5526, + "step": 163450 + }, + { + "epoch": 0.8163000324602362, + "grad_norm": 0.08727425336837769, + "learning_rate": 5.5247941124934296e-06, + "loss": 8.5605, + "step": 163460 + }, + { + "epoch": 0.8163499712851756, + "grad_norm": 0.0928439050912857, + "learning_rate": 5.523292197551879e-06, + "loss": 8.5574, + "step": 163470 + }, + { + "epoch": 0.8163999101101151, + "grad_norm": 0.09845706075429916, + "learning_rate": 5.521790282610328e-06, + "loss": 8.5555, + "step": 163480 + }, + { + "epoch": 0.8164498489350546, + "grad_norm": 0.09574224054813385, + "learning_rate": 5.5202883676687776e-06, + "loss": 8.5523, + "step": 163490 + }, + { + "epoch": 0.816499787759994, + "grad_norm": 0.09411446005105972, + "learning_rate": 5.518786452727228e-06, + "loss": 8.5482, + "step": 163500 + }, + { + "epoch": 0.8165497265849334, + "grad_norm": 0.09284447878599167, + "learning_rate": 5.517284537785677e-06, + "loss": 8.5463, + "step": 163510 + }, + { + "epoch": 0.8165996654098729, + "grad_norm": 0.08727595210075378, + "learning_rate": 5.515782622844126e-06, + "loss": 8.5425, + "step": 163520 + }, + { + "epoch": 0.8166496042348124, + "grad_norm": 0.09593432396650314, + "learning_rate": 5.514280707902576e-06, + "loss": 8.5428, + "step": 163530 + }, + { + "epoch": 0.8166995430597518, + "grad_norm": 0.09068622440099716, + "learning_rate": 5.512778792961025e-06, + "loss": 8.5558, + "step": 163540 + }, + { + "epoch": 0.8167494818846912, + "grad_norm": 0.09026969224214554, + "learning_rate": 5.511276878019475e-06, + "loss": 8.5397, + "step": 163550 + }, + { + "epoch": 0.8167994207096307, + "grad_norm": 0.08629763126373291, + "learning_rate": 5.509774963077925e-06, + "loss": 8.5547, + "step": 163560 + }, + { + "epoch": 0.8168493595345702, + "grad_norm": 0.09150740504264832, + "learning_rate": 5.508273048136374e-06, + "loss": 8.5438, + "step": 163570 + }, + { + "epoch": 0.8168992983595096, + "grad_norm": 0.0950947180390358, + "learning_rate": 5.506771133194824e-06, + "loss": 8.556, + "step": 163580 + }, + { + "epoch": 0.816949237184449, + "grad_norm": 0.0921635702252388, + "learning_rate": 5.5052692182532726e-06, + "loss": 8.5484, + "step": 163590 + }, + { + "epoch": 0.8169991760093885, + "grad_norm": 0.08401766419410706, + "learning_rate": 5.503767303311723e-06, + "loss": 8.5567, + "step": 163600 + }, + { + "epoch": 0.817049114834328, + "grad_norm": 0.09006239473819733, + "learning_rate": 5.502265388370172e-06, + "loss": 8.55, + "step": 163610 + }, + { + "epoch": 0.8170990536592674, + "grad_norm": 0.08677154034376144, + "learning_rate": 5.500763473428621e-06, + "loss": 8.5348, + "step": 163620 + }, + { + "epoch": 0.8171489924842068, + "grad_norm": 0.08495960384607315, + "learning_rate": 5.499261558487072e-06, + "loss": 8.5472, + "step": 163630 + }, + { + "epoch": 0.8171989313091463, + "grad_norm": 0.08838516473770142, + "learning_rate": 5.49775964354552e-06, + "loss": 8.5546, + "step": 163640 + }, + { + "epoch": 0.8172488701340858, + "grad_norm": 0.09089840948581696, + "learning_rate": 5.49625772860397e-06, + "loss": 8.5567, + "step": 163650 + }, + { + "epoch": 0.8172988089590252, + "grad_norm": 0.08638277649879456, + "learning_rate": 5.4947558136624204e-06, + "loss": 8.5409, + "step": 163660 + }, + { + "epoch": 0.8173487477839646, + "grad_norm": 0.09370175004005432, + "learning_rate": 5.493253898720869e-06, + "loss": 8.5309, + "step": 163670 + }, + { + "epoch": 0.8173986866089041, + "grad_norm": 0.08680180460214615, + "learning_rate": 5.491751983779319e-06, + "loss": 8.5588, + "step": 163680 + }, + { + "epoch": 0.8174486254338436, + "grad_norm": 0.09824364632368088, + "learning_rate": 5.490250068837768e-06, + "loss": 8.5466, + "step": 163690 + }, + { + "epoch": 0.817498564258783, + "grad_norm": 0.09609740972518921, + "learning_rate": 5.488748153896218e-06, + "loss": 8.5486, + "step": 163700 + }, + { + "epoch": 0.8175485030837224, + "grad_norm": 0.09737160056829453, + "learning_rate": 5.487246238954668e-06, + "loss": 8.5543, + "step": 163710 + }, + { + "epoch": 0.8175984419086619, + "grad_norm": 0.09023190289735794, + "learning_rate": 5.485744324013116e-06, + "loss": 8.5539, + "step": 163720 + }, + { + "epoch": 0.8176483807336014, + "grad_norm": 0.08571267127990723, + "learning_rate": 5.484242409071567e-06, + "loss": 8.5504, + "step": 163730 + }, + { + "epoch": 0.8176983195585408, + "grad_norm": 0.08893782645463943, + "learning_rate": 5.482740494130016e-06, + "loss": 8.5526, + "step": 163740 + }, + { + "epoch": 0.8177482583834802, + "grad_norm": 0.08946535736322403, + "learning_rate": 5.481238579188465e-06, + "loss": 8.5538, + "step": 163750 + }, + { + "epoch": 0.8177981972084197, + "grad_norm": 0.09130077809095383, + "learning_rate": 5.4797366642469154e-06, + "loss": 8.5434, + "step": 163760 + }, + { + "epoch": 0.8178481360333592, + "grad_norm": 0.08708508312702179, + "learning_rate": 5.478234749305364e-06, + "loss": 8.5406, + "step": 163770 + }, + { + "epoch": 0.8178980748582986, + "grad_norm": 0.09231363236904144, + "learning_rate": 5.476732834363814e-06, + "loss": 8.5286, + "step": 163780 + }, + { + "epoch": 0.817948013683238, + "grad_norm": 0.09548377245664597, + "learning_rate": 5.4752309194222634e-06, + "loss": 8.5447, + "step": 163790 + }, + { + "epoch": 0.8179979525081775, + "grad_norm": 0.08776544034481049, + "learning_rate": 5.473729004480713e-06, + "loss": 8.5347, + "step": 163800 + }, + { + "epoch": 0.818047891333117, + "grad_norm": 0.09309916198253632, + "learning_rate": 5.472227089539163e-06, + "loss": 8.5546, + "step": 163810 + }, + { + "epoch": 0.8180978301580564, + "grad_norm": 0.09029585868120193, + "learning_rate": 5.4707251745976114e-06, + "loss": 8.531, + "step": 163820 + }, + { + "epoch": 0.8181477689829958, + "grad_norm": 0.08917348831892014, + "learning_rate": 5.469223259656062e-06, + "loss": 8.5553, + "step": 163830 + }, + { + "epoch": 0.8181977078079353, + "grad_norm": 0.09531484544277191, + "learning_rate": 5.467721344714511e-06, + "loss": 8.5415, + "step": 163840 + }, + { + "epoch": 0.8182476466328747, + "grad_norm": 0.08932767063379288, + "learning_rate": 5.46621942977296e-06, + "loss": 8.5473, + "step": 163850 + }, + { + "epoch": 0.8182975854578142, + "grad_norm": 0.08596362173557281, + "learning_rate": 5.4647175148314104e-06, + "loss": 8.552, + "step": 163860 + }, + { + "epoch": 0.8183475242827536, + "grad_norm": 0.09185688197612762, + "learning_rate": 5.46321559988986e-06, + "loss": 8.5515, + "step": 163870 + }, + { + "epoch": 0.8183974631076931, + "grad_norm": 0.09344456344842911, + "learning_rate": 5.461713684948309e-06, + "loss": 8.5385, + "step": 163880 + }, + { + "epoch": 0.8184474019326325, + "grad_norm": 0.08724787086248398, + "learning_rate": 5.4602117700067584e-06, + "loss": 8.5502, + "step": 163890 + }, + { + "epoch": 0.818497340757572, + "grad_norm": 0.09026943147182465, + "learning_rate": 5.458709855065208e-06, + "loss": 8.5269, + "step": 163900 + }, + { + "epoch": 0.8185472795825114, + "grad_norm": 0.0960424542427063, + "learning_rate": 5.457207940123658e-06, + "loss": 8.5535, + "step": 163910 + }, + { + "epoch": 0.8185972184074509, + "grad_norm": 0.09598157554864883, + "learning_rate": 5.455706025182107e-06, + "loss": 8.5696, + "step": 163920 + }, + { + "epoch": 0.8186471572323903, + "grad_norm": 0.10426607728004456, + "learning_rate": 5.454204110240557e-06, + "loss": 8.5406, + "step": 163930 + }, + { + "epoch": 0.8186970960573298, + "grad_norm": 0.09276427328586578, + "learning_rate": 5.452702195299007e-06, + "loss": 8.5369, + "step": 163940 + }, + { + "epoch": 0.8187470348822692, + "grad_norm": 0.09880458563566208, + "learning_rate": 5.451200280357456e-06, + "loss": 8.5536, + "step": 163950 + }, + { + "epoch": 0.8187969737072087, + "grad_norm": 0.09303481876850128, + "learning_rate": 5.4496983654159055e-06, + "loss": 8.5303, + "step": 163960 + }, + { + "epoch": 0.8188469125321481, + "grad_norm": 0.09363751113414764, + "learning_rate": 5.448196450474355e-06, + "loss": 8.5507, + "step": 163970 + }, + { + "epoch": 0.8188968513570876, + "grad_norm": 0.09529575705528259, + "learning_rate": 5.446694535532804e-06, + "loss": 8.5431, + "step": 163980 + }, + { + "epoch": 0.818946790182027, + "grad_norm": 0.09039454907178879, + "learning_rate": 5.445192620591254e-06, + "loss": 8.5674, + "step": 163990 + }, + { + "epoch": 0.8189967290069665, + "grad_norm": 0.08885947614908218, + "learning_rate": 5.443690705649704e-06, + "loss": 8.5497, + "step": 164000 + }, + { + "epoch": 0.8190466678319059, + "grad_norm": 0.09177469462156296, + "learning_rate": 5.442188790708153e-06, + "loss": 8.5541, + "step": 164010 + }, + { + "epoch": 0.8190966066568454, + "grad_norm": 0.08675147593021393, + "learning_rate": 5.440686875766602e-06, + "loss": 8.5576, + "step": 164020 + }, + { + "epoch": 0.8191465454817848, + "grad_norm": 0.09036063402891159, + "learning_rate": 5.4391849608250525e-06, + "loss": 8.5723, + "step": 164030 + }, + { + "epoch": 0.8191964843067243, + "grad_norm": 0.08989081531763077, + "learning_rate": 5.437683045883502e-06, + "loss": 8.5417, + "step": 164040 + }, + { + "epoch": 0.8192464231316637, + "grad_norm": 0.0942939966917038, + "learning_rate": 5.436181130941951e-06, + "loss": 8.5311, + "step": 164050 + }, + { + "epoch": 0.8192963619566032, + "grad_norm": 0.0847172811627388, + "learning_rate": 5.4346792160004005e-06, + "loss": 8.5449, + "step": 164060 + }, + { + "epoch": 0.8193463007815426, + "grad_norm": 0.09628713876008987, + "learning_rate": 5.43317730105885e-06, + "loss": 8.5535, + "step": 164070 + }, + { + "epoch": 0.8193962396064821, + "grad_norm": 0.08984513580799103, + "learning_rate": 5.4316753861173e-06, + "loss": 8.5574, + "step": 164080 + }, + { + "epoch": 0.8194461784314215, + "grad_norm": 0.0882403776049614, + "learning_rate": 5.430173471175749e-06, + "loss": 8.5421, + "step": 164090 + }, + { + "epoch": 0.819496117256361, + "grad_norm": 0.09667932987213135, + "learning_rate": 5.428671556234199e-06, + "loss": 8.5527, + "step": 164100 + }, + { + "epoch": 0.8195460560813004, + "grad_norm": 0.08921948820352554, + "learning_rate": 5.427169641292649e-06, + "loss": 8.5558, + "step": 164110 + }, + { + "epoch": 0.8195959949062399, + "grad_norm": 0.08947332203388214, + "learning_rate": 5.425667726351097e-06, + "loss": 8.5428, + "step": 164120 + }, + { + "epoch": 0.8196459337311793, + "grad_norm": 0.09034401178359985, + "learning_rate": 5.4241658114095475e-06, + "loss": 8.5296, + "step": 164130 + }, + { + "epoch": 0.8196958725561188, + "grad_norm": 0.0902322456240654, + "learning_rate": 5.422663896467997e-06, + "loss": 8.5575, + "step": 164140 + }, + { + "epoch": 0.8197458113810582, + "grad_norm": 0.09341645240783691, + "learning_rate": 5.421161981526446e-06, + "loss": 8.5492, + "step": 164150 + }, + { + "epoch": 0.8197957502059976, + "grad_norm": 0.09417828172445297, + "learning_rate": 5.419660066584896e-06, + "loss": 8.5322, + "step": 164160 + }, + { + "epoch": 0.8198456890309371, + "grad_norm": 0.09243685007095337, + "learning_rate": 5.418158151643345e-06, + "loss": 8.5467, + "step": 164170 + }, + { + "epoch": 0.8198956278558766, + "grad_norm": 0.09273440390825272, + "learning_rate": 5.416656236701795e-06, + "loss": 8.5316, + "step": 164180 + }, + { + "epoch": 0.819945566680816, + "grad_norm": 0.09528848528862, + "learning_rate": 5.415154321760245e-06, + "loss": 8.5459, + "step": 164190 + }, + { + "epoch": 0.8199955055057554, + "grad_norm": 0.08874344080686569, + "learning_rate": 5.413652406818694e-06, + "loss": 8.5414, + "step": 164200 + }, + { + "epoch": 0.8200454443306949, + "grad_norm": 0.09056615829467773, + "learning_rate": 5.412150491877144e-06, + "loss": 8.5484, + "step": 164210 + }, + { + "epoch": 0.8200953831556343, + "grad_norm": 0.09031280875205994, + "learning_rate": 5.410648576935592e-06, + "loss": 8.5455, + "step": 164220 + }, + { + "epoch": 0.8201453219805738, + "grad_norm": 0.09306702762842178, + "learning_rate": 5.4091466619940425e-06, + "loss": 8.547, + "step": 164230 + }, + { + "epoch": 0.8201952608055132, + "grad_norm": 0.09164511412382126, + "learning_rate": 5.407644747052493e-06, + "loss": 8.556, + "step": 164240 + }, + { + "epoch": 0.8202451996304527, + "grad_norm": 0.10014204680919647, + "learning_rate": 5.406142832110941e-06, + "loss": 8.546, + "step": 164250 + }, + { + "epoch": 0.8202951384553921, + "grad_norm": 0.09262999892234802, + "learning_rate": 5.404640917169391e-06, + "loss": 8.5525, + "step": 164260 + }, + { + "epoch": 0.8203450772803316, + "grad_norm": 0.09438300132751465, + "learning_rate": 5.403139002227841e-06, + "loss": 8.5283, + "step": 164270 + }, + { + "epoch": 0.820395016105271, + "grad_norm": 0.09390977770090103, + "learning_rate": 5.40163708728629e-06, + "loss": 8.5388, + "step": 164280 + }, + { + "epoch": 0.8204449549302105, + "grad_norm": 0.09154833853244781, + "learning_rate": 5.40013517234474e-06, + "loss": 8.5558, + "step": 164290 + }, + { + "epoch": 0.82049489375515, + "grad_norm": 0.09532206505537033, + "learning_rate": 5.398633257403189e-06, + "loss": 8.5325, + "step": 164300 + }, + { + "epoch": 0.8205448325800894, + "grad_norm": 0.08532291650772095, + "learning_rate": 5.397131342461639e-06, + "loss": 8.5563, + "step": 164310 + }, + { + "epoch": 0.8205947714050288, + "grad_norm": 0.10053151100873947, + "learning_rate": 5.395629427520088e-06, + "loss": 8.5493, + "step": 164320 + }, + { + "epoch": 0.8206447102299683, + "grad_norm": 0.08957508206367493, + "learning_rate": 5.3941275125785375e-06, + "loss": 8.5511, + "step": 164330 + }, + { + "epoch": 0.8206946490549077, + "grad_norm": 0.08716392517089844, + "learning_rate": 5.392625597636988e-06, + "loss": 8.5514, + "step": 164340 + }, + { + "epoch": 0.8207445878798472, + "grad_norm": 0.09494642913341522, + "learning_rate": 5.391123682695437e-06, + "loss": 8.5466, + "step": 164350 + }, + { + "epoch": 0.8207945267047866, + "grad_norm": 0.09567490220069885, + "learning_rate": 5.389621767753886e-06, + "loss": 8.5463, + "step": 164360 + }, + { + "epoch": 0.8208444655297261, + "grad_norm": 0.09385741502046585, + "learning_rate": 5.388119852812336e-06, + "loss": 8.5292, + "step": 164370 + }, + { + "epoch": 0.8208944043546655, + "grad_norm": 0.09710198640823364, + "learning_rate": 5.386617937870785e-06, + "loss": 8.5374, + "step": 164380 + }, + { + "epoch": 0.820944343179605, + "grad_norm": 0.09070218354463577, + "learning_rate": 5.385116022929235e-06, + "loss": 8.5362, + "step": 164390 + }, + { + "epoch": 0.8209942820045444, + "grad_norm": 0.09835637360811234, + "learning_rate": 5.3836141079876845e-06, + "loss": 8.5439, + "step": 164400 + }, + { + "epoch": 0.8210442208294839, + "grad_norm": 0.09094212204217911, + "learning_rate": 5.382112193046134e-06, + "loss": 8.5619, + "step": 164410 + }, + { + "epoch": 0.8210941596544233, + "grad_norm": 0.09400885552167892, + "learning_rate": 5.380610278104583e-06, + "loss": 8.5419, + "step": 164420 + }, + { + "epoch": 0.8211440984793628, + "grad_norm": 0.09460953623056412, + "learning_rate": 5.379108363163033e-06, + "loss": 8.5401, + "step": 164430 + }, + { + "epoch": 0.8211940373043022, + "grad_norm": 0.08946454524993896, + "learning_rate": 5.377606448221483e-06, + "loss": 8.5356, + "step": 164440 + }, + { + "epoch": 0.8212439761292417, + "grad_norm": 0.08983763307332993, + "learning_rate": 5.376104533279932e-06, + "loss": 8.5455, + "step": 164450 + }, + { + "epoch": 0.8212939149541811, + "grad_norm": 0.08810096234083176, + "learning_rate": 5.374602618338381e-06, + "loss": 8.5647, + "step": 164460 + }, + { + "epoch": 0.8213438537791206, + "grad_norm": 0.08846630901098251, + "learning_rate": 5.373100703396831e-06, + "loss": 8.5521, + "step": 164470 + }, + { + "epoch": 0.82139379260406, + "grad_norm": 0.08851570636034012, + "learning_rate": 5.371598788455281e-06, + "loss": 8.5412, + "step": 164480 + }, + { + "epoch": 0.8214437314289995, + "grad_norm": 0.09497125446796417, + "learning_rate": 5.37009687351373e-06, + "loss": 8.5321, + "step": 164490 + }, + { + "epoch": 0.8214936702539389, + "grad_norm": 0.09288336336612701, + "learning_rate": 5.3685949585721795e-06, + "loss": 8.5472, + "step": 164500 + }, + { + "epoch": 0.8215436090788784, + "grad_norm": 0.0919954776763916, + "learning_rate": 5.36709304363063e-06, + "loss": 8.5613, + "step": 164510 + }, + { + "epoch": 0.8215935479038178, + "grad_norm": 0.09269890189170837, + "learning_rate": 5.365591128689078e-06, + "loss": 8.546, + "step": 164520 + }, + { + "epoch": 0.8216434867287573, + "grad_norm": 0.08948935568332672, + "learning_rate": 5.364089213747528e-06, + "loss": 8.5627, + "step": 164530 + }, + { + "epoch": 0.8216934255536967, + "grad_norm": 0.09390202164649963, + "learning_rate": 5.362587298805978e-06, + "loss": 8.5363, + "step": 164540 + }, + { + "epoch": 0.8217433643786362, + "grad_norm": 0.09372469782829285, + "learning_rate": 5.361085383864427e-06, + "loss": 8.5518, + "step": 164550 + }, + { + "epoch": 0.8217933032035756, + "grad_norm": 0.0866970643401146, + "learning_rate": 5.359583468922877e-06, + "loss": 8.5598, + "step": 164560 + }, + { + "epoch": 0.8218432420285151, + "grad_norm": 0.08886361867189407, + "learning_rate": 5.358081553981326e-06, + "loss": 8.5504, + "step": 164570 + }, + { + "epoch": 0.8218931808534545, + "grad_norm": 0.09430024772882462, + "learning_rate": 5.356579639039776e-06, + "loss": 8.5434, + "step": 164580 + }, + { + "epoch": 0.821943119678394, + "grad_norm": 0.09142236411571503, + "learning_rate": 5.355077724098226e-06, + "loss": 8.5417, + "step": 164590 + }, + { + "epoch": 0.8219930585033334, + "grad_norm": 0.09642058610916138, + "learning_rate": 5.3535758091566745e-06, + "loss": 8.5808, + "step": 164600 + }, + { + "epoch": 0.8220429973282729, + "grad_norm": 0.08818865567445755, + "learning_rate": 5.352073894215125e-06, + "loss": 8.5353, + "step": 164610 + }, + { + "epoch": 0.8220929361532123, + "grad_norm": 0.09273871034383774, + "learning_rate": 5.350571979273573e-06, + "loss": 8.5444, + "step": 164620 + }, + { + "epoch": 0.8221428749781517, + "grad_norm": 0.09132785350084305, + "learning_rate": 5.349070064332023e-06, + "loss": 8.5611, + "step": 164630 + }, + { + "epoch": 0.8221928138030912, + "grad_norm": 0.09053760766983032, + "learning_rate": 5.3475681493904735e-06, + "loss": 8.5465, + "step": 164640 + }, + { + "epoch": 0.8222427526280307, + "grad_norm": 0.1000586673617363, + "learning_rate": 5.346066234448922e-06, + "loss": 8.5449, + "step": 164650 + }, + { + "epoch": 0.8222926914529701, + "grad_norm": 0.09241247922182083, + "learning_rate": 5.344564319507372e-06, + "loss": 8.5303, + "step": 164660 + }, + { + "epoch": 0.8223426302779095, + "grad_norm": 0.09306442737579346, + "learning_rate": 5.3430624045658215e-06, + "loss": 8.5248, + "step": 164670 + }, + { + "epoch": 0.822392569102849, + "grad_norm": 0.09631567448377609, + "learning_rate": 5.341560489624271e-06, + "loss": 8.5448, + "step": 164680 + }, + { + "epoch": 0.8224425079277885, + "grad_norm": 0.09375005215406418, + "learning_rate": 5.340058574682721e-06, + "loss": 8.5468, + "step": 164690 + }, + { + "epoch": 0.8224924467527279, + "grad_norm": 0.08916940540075302, + "learning_rate": 5.3385566597411695e-06, + "loss": 8.5525, + "step": 164700 + }, + { + "epoch": 0.8225423855776673, + "grad_norm": 0.09325413405895233, + "learning_rate": 5.33705474479962e-06, + "loss": 8.5474, + "step": 164710 + }, + { + "epoch": 0.8225923244026068, + "grad_norm": 0.08860702812671661, + "learning_rate": 5.335552829858069e-06, + "loss": 8.5582, + "step": 164720 + }, + { + "epoch": 0.8226422632275463, + "grad_norm": 0.09240319579839706, + "learning_rate": 5.334050914916518e-06, + "loss": 8.5351, + "step": 164730 + }, + { + "epoch": 0.8226922020524857, + "grad_norm": 0.09109298884868622, + "learning_rate": 5.3325489999749685e-06, + "loss": 8.5478, + "step": 164740 + }, + { + "epoch": 0.8227421408774251, + "grad_norm": 0.09379500150680542, + "learning_rate": 5.331047085033418e-06, + "loss": 8.5532, + "step": 164750 + }, + { + "epoch": 0.8227920797023646, + "grad_norm": 0.08987203985452652, + "learning_rate": 5.329545170091867e-06, + "loss": 8.554, + "step": 164760 + }, + { + "epoch": 0.8228420185273041, + "grad_norm": 0.08933199197053909, + "learning_rate": 5.3280432551503165e-06, + "loss": 8.5402, + "step": 164770 + }, + { + "epoch": 0.8228919573522435, + "grad_norm": 0.08755306154489517, + "learning_rate": 5.326541340208766e-06, + "loss": 8.5478, + "step": 164780 + }, + { + "epoch": 0.8229418961771829, + "grad_norm": 0.0940420925617218, + "learning_rate": 5.325039425267216e-06, + "loss": 8.5383, + "step": 164790 + }, + { + "epoch": 0.8229918350021224, + "grad_norm": 0.08608761429786682, + "learning_rate": 5.323537510325665e-06, + "loss": 8.5539, + "step": 164800 + }, + { + "epoch": 0.8230417738270619, + "grad_norm": 0.0908665880560875, + "learning_rate": 5.322035595384115e-06, + "loss": 8.5249, + "step": 164810 + }, + { + "epoch": 0.8230917126520013, + "grad_norm": 0.08952327817678452, + "learning_rate": 5.320533680442564e-06, + "loss": 8.5568, + "step": 164820 + }, + { + "epoch": 0.8231416514769407, + "grad_norm": 0.09053060412406921, + "learning_rate": 5.319031765501014e-06, + "loss": 8.5554, + "step": 164830 + }, + { + "epoch": 0.8231915903018802, + "grad_norm": 0.08716212213039398, + "learning_rate": 5.3175298505594636e-06, + "loss": 8.5512, + "step": 164840 + }, + { + "epoch": 0.8232415291268197, + "grad_norm": 0.08893406391143799, + "learning_rate": 5.316027935617913e-06, + "loss": 8.557, + "step": 164850 + }, + { + "epoch": 0.8232914679517591, + "grad_norm": 0.09567388892173767, + "learning_rate": 5.314526020676362e-06, + "loss": 8.5658, + "step": 164860 + }, + { + "epoch": 0.8233414067766985, + "grad_norm": 0.09032758325338364, + "learning_rate": 5.3130241057348115e-06, + "loss": 8.5635, + "step": 164870 + }, + { + "epoch": 0.823391345601638, + "grad_norm": 0.0944572165608406, + "learning_rate": 5.311522190793262e-06, + "loss": 8.5377, + "step": 164880 + }, + { + "epoch": 0.8234412844265775, + "grad_norm": 0.08695536106824875, + "learning_rate": 5.310020275851711e-06, + "loss": 8.5491, + "step": 164890 + }, + { + "epoch": 0.8234912232515169, + "grad_norm": 0.0900052860379219, + "learning_rate": 5.30851836091016e-06, + "loss": 8.5341, + "step": 164900 + }, + { + "epoch": 0.8235411620764563, + "grad_norm": 0.09355796128511429, + "learning_rate": 5.3070164459686106e-06, + "loss": 8.5427, + "step": 164910 + }, + { + "epoch": 0.8235911009013958, + "grad_norm": 0.09041264653205872, + "learning_rate": 5.305514531027059e-06, + "loss": 8.5535, + "step": 164920 + }, + { + "epoch": 0.8236410397263353, + "grad_norm": 0.09315498918294907, + "learning_rate": 5.304012616085509e-06, + "loss": 8.5472, + "step": 164930 + }, + { + "epoch": 0.8236909785512747, + "grad_norm": 0.08934438228607178, + "learning_rate": 5.3025107011439586e-06, + "loss": 8.5446, + "step": 164940 + }, + { + "epoch": 0.8237409173762141, + "grad_norm": 0.09067961573600769, + "learning_rate": 5.301008786202408e-06, + "loss": 8.5461, + "step": 164950 + }, + { + "epoch": 0.8237908562011536, + "grad_norm": 0.08916930854320526, + "learning_rate": 5.299506871260858e-06, + "loss": 8.5513, + "step": 164960 + }, + { + "epoch": 0.8238407950260931, + "grad_norm": 0.09387677907943726, + "learning_rate": 5.298004956319307e-06, + "loss": 8.5447, + "step": 164970 + }, + { + "epoch": 0.8238907338510325, + "grad_norm": 0.08811572939157486, + "learning_rate": 5.296503041377757e-06, + "loss": 8.5657, + "step": 164980 + }, + { + "epoch": 0.8239406726759719, + "grad_norm": 0.09204726666212082, + "learning_rate": 5.295001126436207e-06, + "loss": 8.5476, + "step": 164990 + }, + { + "epoch": 0.8239906115009114, + "grad_norm": 0.0907144770026207, + "learning_rate": 5.293499211494655e-06, + "loss": 8.5335, + "step": 165000 + }, + { + "epoch": 0.8240405503258509, + "grad_norm": 0.09380396455526352, + "learning_rate": 5.2919972965531056e-06, + "loss": 8.5313, + "step": 165010 + }, + { + "epoch": 0.8240904891507903, + "grad_norm": 0.09004945307970047, + "learning_rate": 5.290495381611555e-06, + "loss": 8.5442, + "step": 165020 + }, + { + "epoch": 0.8241404279757297, + "grad_norm": 0.08934416621923447, + "learning_rate": 5.288993466670004e-06, + "loss": 8.5422, + "step": 165030 + }, + { + "epoch": 0.8241903668006691, + "grad_norm": 0.08731923997402191, + "learning_rate": 5.287491551728454e-06, + "loss": 8.5354, + "step": 165040 + }, + { + "epoch": 0.8242403056256087, + "grad_norm": 0.09137827903032303, + "learning_rate": 5.285989636786903e-06, + "loss": 8.5595, + "step": 165050 + }, + { + "epoch": 0.8242902444505481, + "grad_norm": 0.09268289804458618, + "learning_rate": 5.284487721845353e-06, + "loss": 8.559, + "step": 165060 + }, + { + "epoch": 0.8243401832754875, + "grad_norm": 0.0933336690068245, + "learning_rate": 5.282985806903803e-06, + "loss": 8.535, + "step": 165070 + }, + { + "epoch": 0.824390122100427, + "grad_norm": 0.09035741537809372, + "learning_rate": 5.281483891962252e-06, + "loss": 8.546, + "step": 165080 + }, + { + "epoch": 0.8244400609253665, + "grad_norm": 0.08535844087600708, + "learning_rate": 5.279981977020702e-06, + "loss": 8.5388, + "step": 165090 + }, + { + "epoch": 0.8244899997503059, + "grad_norm": 0.09241851419210434, + "learning_rate": 5.27848006207915e-06, + "loss": 8.5666, + "step": 165100 + }, + { + "epoch": 0.8245399385752453, + "grad_norm": 0.09012720733880997, + "learning_rate": 5.276978147137601e-06, + "loss": 8.5522, + "step": 165110 + }, + { + "epoch": 0.8245898774001847, + "grad_norm": 0.09050553292036057, + "learning_rate": 5.275476232196051e-06, + "loss": 8.5381, + "step": 165120 + }, + { + "epoch": 0.8246398162251243, + "grad_norm": 0.09144661575555801, + "learning_rate": 5.273974317254499e-06, + "loss": 8.5375, + "step": 165130 + }, + { + "epoch": 0.8246897550500637, + "grad_norm": 0.08838358521461487, + "learning_rate": 5.272472402312949e-06, + "loss": 8.5498, + "step": 165140 + }, + { + "epoch": 0.8247396938750031, + "grad_norm": 0.09238158166408539, + "learning_rate": 5.270970487371399e-06, + "loss": 8.538, + "step": 165150 + }, + { + "epoch": 0.8247896326999425, + "grad_norm": 0.09907042235136032, + "learning_rate": 5.269468572429848e-06, + "loss": 8.5508, + "step": 165160 + }, + { + "epoch": 0.824839571524882, + "grad_norm": 0.08940906822681427, + "learning_rate": 5.267966657488298e-06, + "loss": 8.5438, + "step": 165170 + }, + { + "epoch": 0.8248895103498215, + "grad_norm": 0.09530402719974518, + "learning_rate": 5.266464742546747e-06, + "loss": 8.5375, + "step": 165180 + }, + { + "epoch": 0.8249394491747609, + "grad_norm": 0.0869201272726059, + "learning_rate": 5.264962827605197e-06, + "loss": 8.5371, + "step": 165190 + }, + { + "epoch": 0.8249893879997003, + "grad_norm": 0.08808686584234238, + "learning_rate": 5.263460912663646e-06, + "loss": 8.5476, + "step": 165200 + }, + { + "epoch": 0.8250393268246398, + "grad_norm": 0.09098785370588303, + "learning_rate": 5.261958997722096e-06, + "loss": 8.5324, + "step": 165210 + }, + { + "epoch": 0.8250892656495793, + "grad_norm": 0.09204449504613876, + "learning_rate": 5.260457082780546e-06, + "loss": 8.5425, + "step": 165220 + }, + { + "epoch": 0.8251392044745187, + "grad_norm": 0.09334173798561096, + "learning_rate": 5.258955167838995e-06, + "loss": 8.5315, + "step": 165230 + }, + { + "epoch": 0.8251891432994581, + "grad_norm": 0.08749334514141083, + "learning_rate": 5.2574532528974444e-06, + "loss": 8.5364, + "step": 165240 + }, + { + "epoch": 0.8252390821243976, + "grad_norm": 0.09419584274291992, + "learning_rate": 5.255951337955894e-06, + "loss": 8.5426, + "step": 165250 + }, + { + "epoch": 0.8252890209493371, + "grad_norm": 0.09130842983722687, + "learning_rate": 5.254449423014343e-06, + "loss": 8.5287, + "step": 165260 + }, + { + "epoch": 0.8253389597742765, + "grad_norm": 0.0886424109339714, + "learning_rate": 5.252947508072793e-06, + "loss": 8.5396, + "step": 165270 + }, + { + "epoch": 0.8253888985992159, + "grad_norm": 0.09385290741920471, + "learning_rate": 5.251445593131243e-06, + "loss": 8.5361, + "step": 165280 + }, + { + "epoch": 0.8254388374241554, + "grad_norm": 0.0890863835811615, + "learning_rate": 5.249943678189692e-06, + "loss": 8.5467, + "step": 165290 + }, + { + "epoch": 0.8254887762490949, + "grad_norm": 0.08851329982280731, + "learning_rate": 5.248441763248141e-06, + "loss": 8.5532, + "step": 165300 + }, + { + "epoch": 0.8255387150740343, + "grad_norm": 0.09374735504388809, + "learning_rate": 5.2469398483065914e-06, + "loss": 8.5461, + "step": 165310 + }, + { + "epoch": 0.8255886538989737, + "grad_norm": 0.08806613087654114, + "learning_rate": 5.245437933365041e-06, + "loss": 8.5575, + "step": 165320 + }, + { + "epoch": 0.8256385927239132, + "grad_norm": 0.09200981259346008, + "learning_rate": 5.24393601842349e-06, + "loss": 8.5345, + "step": 165330 + }, + { + "epoch": 0.8256885315488527, + "grad_norm": 0.08689451217651367, + "learning_rate": 5.2424341034819394e-06, + "loss": 8.5552, + "step": 165340 + }, + { + "epoch": 0.8257384703737921, + "grad_norm": 0.08550180494785309, + "learning_rate": 5.240932188540389e-06, + "loss": 8.5494, + "step": 165350 + }, + { + "epoch": 0.8257884091987315, + "grad_norm": 0.09254343062639236, + "learning_rate": 5.239430273598839e-06, + "loss": 8.5475, + "step": 165360 + }, + { + "epoch": 0.825838348023671, + "grad_norm": 0.08904233574867249, + "learning_rate": 5.237928358657288e-06, + "loss": 8.5499, + "step": 165370 + }, + { + "epoch": 0.8258882868486105, + "grad_norm": 0.09471232444047928, + "learning_rate": 5.236426443715738e-06, + "loss": 8.5489, + "step": 165380 + }, + { + "epoch": 0.8259382256735499, + "grad_norm": 0.09248729050159454, + "learning_rate": 5.234924528774188e-06, + "loss": 8.5425, + "step": 165390 + }, + { + "epoch": 0.8259881644984893, + "grad_norm": 0.09157528728246689, + "learning_rate": 5.233422613832636e-06, + "loss": 8.5438, + "step": 165400 + }, + { + "epoch": 0.8260381033234288, + "grad_norm": 0.09400694817304611, + "learning_rate": 5.2319206988910864e-06, + "loss": 8.5369, + "step": 165410 + }, + { + "epoch": 0.8260880421483683, + "grad_norm": 0.10043618083000183, + "learning_rate": 5.230418783949536e-06, + "loss": 8.5332, + "step": 165420 + }, + { + "epoch": 0.8261379809733077, + "grad_norm": 0.09534844756126404, + "learning_rate": 5.228916869007985e-06, + "loss": 8.5501, + "step": 165430 + }, + { + "epoch": 0.8261879197982471, + "grad_norm": 0.0883263498544693, + "learning_rate": 5.227414954066435e-06, + "loss": 8.5382, + "step": 165440 + }, + { + "epoch": 0.8262378586231865, + "grad_norm": 0.08956603705883026, + "learning_rate": 5.225913039124884e-06, + "loss": 8.5308, + "step": 165450 + }, + { + "epoch": 0.8262877974481261, + "grad_norm": 0.09134762734174728, + "learning_rate": 5.224411124183334e-06, + "loss": 8.5469, + "step": 165460 + }, + { + "epoch": 0.8263377362730655, + "grad_norm": 0.09247227758169174, + "learning_rate": 5.222909209241784e-06, + "loss": 8.5423, + "step": 165470 + }, + { + "epoch": 0.8263876750980049, + "grad_norm": 0.09392131119966507, + "learning_rate": 5.221407294300233e-06, + "loss": 8.5436, + "step": 165480 + }, + { + "epoch": 0.8264376139229443, + "grad_norm": 0.0913129672408104, + "learning_rate": 5.219905379358683e-06, + "loss": 8.5342, + "step": 165490 + }, + { + "epoch": 0.8264875527478839, + "grad_norm": 0.09089593589305878, + "learning_rate": 5.218403464417131e-06, + "loss": 8.5391, + "step": 165500 + }, + { + "epoch": 0.8265374915728233, + "grad_norm": 0.08943469077348709, + "learning_rate": 5.2169015494755815e-06, + "loss": 8.537, + "step": 165510 + }, + { + "epoch": 0.8265874303977627, + "grad_norm": 0.09096402674913406, + "learning_rate": 5.215399634534032e-06, + "loss": 8.5331, + "step": 165520 + }, + { + "epoch": 0.8266373692227021, + "grad_norm": 0.09284608066082001, + "learning_rate": 5.21389771959248e-06, + "loss": 8.5359, + "step": 165530 + }, + { + "epoch": 0.8266873080476417, + "grad_norm": 0.08914735913276672, + "learning_rate": 5.21239580465093e-06, + "loss": 8.5522, + "step": 165540 + }, + { + "epoch": 0.8267372468725811, + "grad_norm": 0.10136851668357849, + "learning_rate": 5.21089388970938e-06, + "loss": 8.5232, + "step": 165550 + }, + { + "epoch": 0.8267871856975205, + "grad_norm": 0.09322341531515121, + "learning_rate": 5.209391974767829e-06, + "loss": 8.5354, + "step": 165560 + }, + { + "epoch": 0.8268371245224599, + "grad_norm": 0.09235122054815292, + "learning_rate": 5.207890059826279e-06, + "loss": 8.5405, + "step": 165570 + }, + { + "epoch": 0.8268870633473995, + "grad_norm": 0.09125807136297226, + "learning_rate": 5.206388144884728e-06, + "loss": 8.5406, + "step": 165580 + }, + { + "epoch": 0.8269370021723389, + "grad_norm": 0.09730856120586395, + "learning_rate": 5.204886229943178e-06, + "loss": 8.5739, + "step": 165590 + }, + { + "epoch": 0.8269869409972783, + "grad_norm": 0.09331224858760834, + "learning_rate": 5.203384315001627e-06, + "loss": 8.5544, + "step": 165600 + }, + { + "epoch": 0.8270368798222177, + "grad_norm": 0.09145566821098328, + "learning_rate": 5.2018824000600765e-06, + "loss": 8.5305, + "step": 165610 + }, + { + "epoch": 0.8270868186471573, + "grad_norm": 0.09408487379550934, + "learning_rate": 5.200380485118527e-06, + "loss": 8.536, + "step": 165620 + }, + { + "epoch": 0.8271367574720967, + "grad_norm": 0.09319987148046494, + "learning_rate": 5.198878570176976e-06, + "loss": 8.5615, + "step": 165630 + }, + { + "epoch": 0.8271866962970361, + "grad_norm": 0.08938295394182205, + "learning_rate": 5.197376655235425e-06, + "loss": 8.5486, + "step": 165640 + }, + { + "epoch": 0.8272366351219755, + "grad_norm": 0.09183337539434433, + "learning_rate": 5.195874740293875e-06, + "loss": 8.5406, + "step": 165650 + }, + { + "epoch": 0.8272865739469151, + "grad_norm": 0.09237658977508545, + "learning_rate": 5.194372825352324e-06, + "loss": 8.5519, + "step": 165660 + }, + { + "epoch": 0.8273365127718545, + "grad_norm": 0.08938465267419815, + "learning_rate": 5.192870910410774e-06, + "loss": 8.5421, + "step": 165670 + }, + { + "epoch": 0.8273864515967939, + "grad_norm": 0.093377985060215, + "learning_rate": 5.1913689954692235e-06, + "loss": 8.5533, + "step": 165680 + }, + { + "epoch": 0.8274363904217333, + "grad_norm": 0.09096071869134903, + "learning_rate": 5.189867080527673e-06, + "loss": 8.5406, + "step": 165690 + }, + { + "epoch": 0.8274863292466729, + "grad_norm": 0.08967030048370361, + "learning_rate": 5.188365165586122e-06, + "loss": 8.5387, + "step": 165700 + }, + { + "epoch": 0.8275362680716123, + "grad_norm": 0.09120657294988632, + "learning_rate": 5.186863250644572e-06, + "loss": 8.5298, + "step": 165710 + }, + { + "epoch": 0.8275862068965517, + "grad_norm": 0.08661961555480957, + "learning_rate": 5.185361335703022e-06, + "loss": 8.538, + "step": 165720 + }, + { + "epoch": 0.8276361457214911, + "grad_norm": 0.0883423313498497, + "learning_rate": 5.183859420761471e-06, + "loss": 8.5444, + "step": 165730 + }, + { + "epoch": 0.8276860845464307, + "grad_norm": 0.09612096101045609, + "learning_rate": 5.18235750581992e-06, + "loss": 8.5432, + "step": 165740 + }, + { + "epoch": 0.8277360233713701, + "grad_norm": 0.08954691141843796, + "learning_rate": 5.18085559087837e-06, + "loss": 8.5485, + "step": 165750 + }, + { + "epoch": 0.8277859621963095, + "grad_norm": 0.09344494342803955, + "learning_rate": 5.17935367593682e-06, + "loss": 8.5501, + "step": 165760 + }, + { + "epoch": 0.8278359010212489, + "grad_norm": 0.09104925394058228, + "learning_rate": 5.177851760995269e-06, + "loss": 8.5515, + "step": 165770 + }, + { + "epoch": 0.8278858398461885, + "grad_norm": 0.09306161850690842, + "learning_rate": 5.1763498460537185e-06, + "loss": 8.5442, + "step": 165780 + }, + { + "epoch": 0.8279357786711279, + "grad_norm": 0.08997157216072083, + "learning_rate": 5.174847931112169e-06, + "loss": 8.5439, + "step": 165790 + }, + { + "epoch": 0.8279857174960673, + "grad_norm": 0.08354370296001434, + "learning_rate": 5.173346016170617e-06, + "loss": 8.5438, + "step": 165800 + }, + { + "epoch": 0.8280356563210067, + "grad_norm": 0.09156732261180878, + "learning_rate": 5.171844101229067e-06, + "loss": 8.5204, + "step": 165810 + }, + { + "epoch": 0.8280855951459463, + "grad_norm": 0.09536905586719513, + "learning_rate": 5.170342186287517e-06, + "loss": 8.5429, + "step": 165820 + }, + { + "epoch": 0.8281355339708857, + "grad_norm": 0.09520316123962402, + "learning_rate": 5.168840271345966e-06, + "loss": 8.5403, + "step": 165830 + }, + { + "epoch": 0.8281854727958251, + "grad_norm": 0.08938983827829361, + "learning_rate": 5.167338356404416e-06, + "loss": 8.5504, + "step": 165840 + }, + { + "epoch": 0.8282354116207645, + "grad_norm": 0.09314066916704178, + "learning_rate": 5.165836441462865e-06, + "loss": 8.5305, + "step": 165850 + }, + { + "epoch": 0.8282853504457041, + "grad_norm": 0.09131036698818207, + "learning_rate": 5.164334526521315e-06, + "loss": 8.5455, + "step": 165860 + }, + { + "epoch": 0.8283352892706435, + "grad_norm": 0.09065386652946472, + "learning_rate": 5.162832611579765e-06, + "loss": 8.5364, + "step": 165870 + }, + { + "epoch": 0.8283852280955829, + "grad_norm": 0.08912426233291626, + "learning_rate": 5.1613306966382135e-06, + "loss": 8.5208, + "step": 165880 + }, + { + "epoch": 0.8284351669205223, + "grad_norm": 0.0999988466501236, + "learning_rate": 5.159828781696664e-06, + "loss": 8.5334, + "step": 165890 + }, + { + "epoch": 0.8284851057454619, + "grad_norm": 0.09201761335134506, + "learning_rate": 5.158326866755112e-06, + "loss": 8.5401, + "step": 165900 + }, + { + "epoch": 0.8285350445704013, + "grad_norm": 0.08930778503417969, + "learning_rate": 5.156824951813562e-06, + "loss": 8.5424, + "step": 165910 + }, + { + "epoch": 0.8285849833953407, + "grad_norm": 0.08692941069602966, + "learning_rate": 5.1553230368720125e-06, + "loss": 8.554, + "step": 165920 + }, + { + "epoch": 0.8286349222202801, + "grad_norm": 0.09199637919664383, + "learning_rate": 5.153821121930461e-06, + "loss": 8.5407, + "step": 165930 + }, + { + "epoch": 0.8286848610452197, + "grad_norm": 0.09677516669034958, + "learning_rate": 5.152319206988911e-06, + "loss": 8.5306, + "step": 165940 + }, + { + "epoch": 0.8287347998701591, + "grad_norm": 0.0840715691447258, + "learning_rate": 5.1508172920473605e-06, + "loss": 8.5462, + "step": 165950 + }, + { + "epoch": 0.8287847386950985, + "grad_norm": 0.0945180281996727, + "learning_rate": 5.14931537710581e-06, + "loss": 8.5233, + "step": 165960 + }, + { + "epoch": 0.8288346775200379, + "grad_norm": 0.09411770850419998, + "learning_rate": 5.14781346216426e-06, + "loss": 8.5413, + "step": 165970 + }, + { + "epoch": 0.8288846163449775, + "grad_norm": 0.09039691835641861, + "learning_rate": 5.1463115472227085e-06, + "loss": 8.5273, + "step": 165980 + }, + { + "epoch": 0.8289345551699169, + "grad_norm": 0.09304576367139816, + "learning_rate": 5.144809632281159e-06, + "loss": 8.5407, + "step": 165990 + }, + { + "epoch": 0.8289844939948563, + "grad_norm": 0.09121018648147583, + "learning_rate": 5.143307717339608e-06, + "loss": 8.5493, + "step": 166000 + }, + { + "epoch": 0.8290344328197957, + "grad_norm": 0.0907362774014473, + "learning_rate": 5.141805802398057e-06, + "loss": 8.5375, + "step": 166010 + }, + { + "epoch": 0.8290843716447353, + "grad_norm": 0.0894848182797432, + "learning_rate": 5.1403038874565075e-06, + "loss": 8.5168, + "step": 166020 + }, + { + "epoch": 0.8291343104696747, + "grad_norm": 0.09260275959968567, + "learning_rate": 5.138801972514957e-06, + "loss": 8.5411, + "step": 166030 + }, + { + "epoch": 0.8291842492946141, + "grad_norm": 0.08942524343729019, + "learning_rate": 5.137300057573406e-06, + "loss": 8.5425, + "step": 166040 + }, + { + "epoch": 0.8292341881195535, + "grad_norm": 0.08970021456480026, + "learning_rate": 5.1357981426318555e-06, + "loss": 8.5597, + "step": 166050 + }, + { + "epoch": 0.829284126944493, + "grad_norm": 0.09671708196401596, + "learning_rate": 5.134296227690305e-06, + "loss": 8.5416, + "step": 166060 + }, + { + "epoch": 0.8293340657694325, + "grad_norm": 0.09718456864356995, + "learning_rate": 5.132794312748755e-06, + "loss": 8.5404, + "step": 166070 + }, + { + "epoch": 0.8293840045943719, + "grad_norm": 0.09095334261655807, + "learning_rate": 5.131292397807204e-06, + "loss": 8.5516, + "step": 166080 + }, + { + "epoch": 0.8294339434193113, + "grad_norm": 0.09243844449520111, + "learning_rate": 5.129790482865654e-06, + "loss": 8.5487, + "step": 166090 + }, + { + "epoch": 0.8294838822442508, + "grad_norm": 0.09933464974164963, + "learning_rate": 5.128288567924104e-06, + "loss": 8.5089, + "step": 166100 + }, + { + "epoch": 0.8295338210691903, + "grad_norm": 0.09080641716718674, + "learning_rate": 5.126786652982553e-06, + "loss": 8.5346, + "step": 166110 + }, + { + "epoch": 0.8295837598941297, + "grad_norm": 0.09354326874017715, + "learning_rate": 5.1252847380410025e-06, + "loss": 8.5647, + "step": 166120 + }, + { + "epoch": 0.8296336987190691, + "grad_norm": 0.08734377473592758, + "learning_rate": 5.123782823099452e-06, + "loss": 8.544, + "step": 166130 + }, + { + "epoch": 0.8296836375440085, + "grad_norm": 0.09000393003225327, + "learning_rate": 5.122280908157901e-06, + "loss": 8.532, + "step": 166140 + }, + { + "epoch": 0.8297335763689481, + "grad_norm": 0.08724624663591385, + "learning_rate": 5.120778993216351e-06, + "loss": 8.5537, + "step": 166150 + }, + { + "epoch": 0.8297835151938875, + "grad_norm": 0.09009451419115067, + "learning_rate": 5.119277078274801e-06, + "loss": 8.5666, + "step": 166160 + }, + { + "epoch": 0.8298334540188269, + "grad_norm": 0.08647624403238297, + "learning_rate": 5.11777516333325e-06, + "loss": 8.5261, + "step": 166170 + }, + { + "epoch": 0.8298833928437663, + "grad_norm": 0.09104521572589874, + "learning_rate": 5.116273248391699e-06, + "loss": 8.5332, + "step": 166180 + }, + { + "epoch": 0.8299333316687059, + "grad_norm": 0.0890510231256485, + "learning_rate": 5.1147713334501495e-06, + "loss": 8.5533, + "step": 166190 + }, + { + "epoch": 0.8299832704936453, + "grad_norm": 0.10203186422586441, + "learning_rate": 5.113269418508599e-06, + "loss": 8.5449, + "step": 166200 + }, + { + "epoch": 0.8300332093185847, + "grad_norm": 0.08849181234836578, + "learning_rate": 5.111767503567048e-06, + "loss": 8.5472, + "step": 166210 + }, + { + "epoch": 0.8300831481435241, + "grad_norm": 0.08866935223340988, + "learning_rate": 5.1102655886254975e-06, + "loss": 8.5432, + "step": 166220 + }, + { + "epoch": 0.8301330869684637, + "grad_norm": 0.08821461349725723, + "learning_rate": 5.108763673683947e-06, + "loss": 8.5514, + "step": 166230 + }, + { + "epoch": 0.8301830257934031, + "grad_norm": 0.09653317928314209, + "learning_rate": 5.107261758742397e-06, + "loss": 8.5564, + "step": 166240 + }, + { + "epoch": 0.8302329646183425, + "grad_norm": 0.09071271866559982, + "learning_rate": 5.105759843800846e-06, + "loss": 8.5254, + "step": 166250 + }, + { + "epoch": 0.8302829034432819, + "grad_norm": 0.09026788175106049, + "learning_rate": 5.104257928859296e-06, + "loss": 8.5428, + "step": 166260 + }, + { + "epoch": 0.8303328422682215, + "grad_norm": 0.08606173098087311, + "learning_rate": 5.102756013917746e-06, + "loss": 8.5425, + "step": 166270 + }, + { + "epoch": 0.8303827810931609, + "grad_norm": 0.09267769753932953, + "learning_rate": 5.101254098976194e-06, + "loss": 8.5487, + "step": 166280 + }, + { + "epoch": 0.8304327199181003, + "grad_norm": 0.08565813302993774, + "learning_rate": 5.0997521840346445e-06, + "loss": 8.5572, + "step": 166290 + }, + { + "epoch": 0.8304826587430397, + "grad_norm": 0.09123985469341278, + "learning_rate": 5.098250269093094e-06, + "loss": 8.5305, + "step": 166300 + }, + { + "epoch": 0.8305325975679793, + "grad_norm": 0.08855421096086502, + "learning_rate": 5.096748354151543e-06, + "loss": 8.5374, + "step": 166310 + }, + { + "epoch": 0.8305825363929187, + "grad_norm": 0.09398633986711502, + "learning_rate": 5.095246439209993e-06, + "loss": 8.5517, + "step": 166320 + }, + { + "epoch": 0.8306324752178581, + "grad_norm": 0.08958692103624344, + "learning_rate": 5.093744524268442e-06, + "loss": 8.5346, + "step": 166330 + }, + { + "epoch": 0.8306824140427975, + "grad_norm": 0.09231898933649063, + "learning_rate": 5.092242609326892e-06, + "loss": 8.5458, + "step": 166340 + }, + { + "epoch": 0.8307323528677371, + "grad_norm": 0.09124819934368134, + "learning_rate": 5.090740694385342e-06, + "loss": 8.5355, + "step": 166350 + }, + { + "epoch": 0.8307822916926765, + "grad_norm": 0.09533385187387466, + "learning_rate": 5.089238779443791e-06, + "loss": 8.5478, + "step": 166360 + }, + { + "epoch": 0.8308322305176159, + "grad_norm": 0.0879926010966301, + "learning_rate": 5.087736864502241e-06, + "loss": 8.5447, + "step": 166370 + }, + { + "epoch": 0.8308821693425553, + "grad_norm": 0.09048743546009064, + "learning_rate": 5.086234949560689e-06, + "loss": 8.5385, + "step": 166380 + }, + { + "epoch": 0.8309321081674949, + "grad_norm": 0.09626738727092743, + "learning_rate": 5.0847330346191396e-06, + "loss": 8.5422, + "step": 166390 + }, + { + "epoch": 0.8309820469924343, + "grad_norm": 0.09046705067157745, + "learning_rate": 5.08323111967759e-06, + "loss": 8.5361, + "step": 166400 + }, + { + "epoch": 0.8310319858173737, + "grad_norm": 0.0912972241640091, + "learning_rate": 5.081729204736038e-06, + "loss": 8.5421, + "step": 166410 + }, + { + "epoch": 0.8310819246423131, + "grad_norm": 0.08912695944309235, + "learning_rate": 5.080227289794488e-06, + "loss": 8.5457, + "step": 166420 + }, + { + "epoch": 0.8311318634672527, + "grad_norm": 0.09452259540557861, + "learning_rate": 5.078725374852938e-06, + "loss": 8.5539, + "step": 166430 + }, + { + "epoch": 0.8311818022921921, + "grad_norm": 0.0892590880393982, + "learning_rate": 5.077223459911387e-06, + "loss": 8.5343, + "step": 166440 + }, + { + "epoch": 0.8312317411171315, + "grad_norm": 0.09252618998289108, + "learning_rate": 5.075721544969837e-06, + "loss": 8.5364, + "step": 166450 + }, + { + "epoch": 0.8312816799420709, + "grad_norm": 0.0892346054315567, + "learning_rate": 5.074219630028286e-06, + "loss": 8.5372, + "step": 166460 + }, + { + "epoch": 0.8313316187670104, + "grad_norm": 0.08983513712882996, + "learning_rate": 5.072717715086736e-06, + "loss": 8.5547, + "step": 166470 + }, + { + "epoch": 0.8313815575919499, + "grad_norm": 0.09154525399208069, + "learning_rate": 5.071215800145185e-06, + "loss": 8.5316, + "step": 166480 + }, + { + "epoch": 0.8314314964168893, + "grad_norm": 0.09328502416610718, + "learning_rate": 5.0697138852036346e-06, + "loss": 8.5355, + "step": 166490 + }, + { + "epoch": 0.8314814352418287, + "grad_norm": 0.09083747118711472, + "learning_rate": 5.068211970262085e-06, + "loss": 8.5346, + "step": 166500 + }, + { + "epoch": 0.8315313740667682, + "grad_norm": 0.09179224073886871, + "learning_rate": 5.066710055320534e-06, + "loss": 8.5448, + "step": 166510 + }, + { + "epoch": 0.8315813128917077, + "grad_norm": 0.08915836364030838, + "learning_rate": 5.065208140378983e-06, + "loss": 8.5404, + "step": 166520 + }, + { + "epoch": 0.8316312517166471, + "grad_norm": 0.08934465050697327, + "learning_rate": 5.063706225437433e-06, + "loss": 8.5365, + "step": 166530 + }, + { + "epoch": 0.8316811905415865, + "grad_norm": 0.09658721089363098, + "learning_rate": 5.062204310495882e-06, + "loss": 8.5354, + "step": 166540 + }, + { + "epoch": 0.831731129366526, + "grad_norm": 0.09819358587265015, + "learning_rate": 5.060702395554332e-06, + "loss": 8.5305, + "step": 166550 + }, + { + "epoch": 0.8317810681914655, + "grad_norm": 0.0890253409743309, + "learning_rate": 5.0592004806127816e-06, + "loss": 8.5532, + "step": 166560 + }, + { + "epoch": 0.8318310070164049, + "grad_norm": 0.09140022099018097, + "learning_rate": 5.057698565671231e-06, + "loss": 8.5178, + "step": 166570 + }, + { + "epoch": 0.8318809458413443, + "grad_norm": 0.0952766090631485, + "learning_rate": 5.05619665072968e-06, + "loss": 8.5309, + "step": 166580 + }, + { + "epoch": 0.8319308846662838, + "grad_norm": 0.08961894363164902, + "learning_rate": 5.05469473578813e-06, + "loss": 8.5458, + "step": 166590 + }, + { + "epoch": 0.8319808234912233, + "grad_norm": 0.08949257433414459, + "learning_rate": 5.05319282084658e-06, + "loss": 8.5408, + "step": 166600 + }, + { + "epoch": 0.8320307623161627, + "grad_norm": 0.09538450837135315, + "learning_rate": 5.051690905905029e-06, + "loss": 8.5492, + "step": 166610 + }, + { + "epoch": 0.8320807011411021, + "grad_norm": 0.0917927548289299, + "learning_rate": 5.050188990963478e-06, + "loss": 8.5572, + "step": 166620 + }, + { + "epoch": 0.8321306399660416, + "grad_norm": 0.09348586946725845, + "learning_rate": 5.048687076021928e-06, + "loss": 8.5529, + "step": 166630 + }, + { + "epoch": 0.8321805787909811, + "grad_norm": 0.09373587369918823, + "learning_rate": 5.047185161080378e-06, + "loss": 8.527, + "step": 166640 + }, + { + "epoch": 0.8322305176159205, + "grad_norm": 0.08720920234918594, + "learning_rate": 5.045683246138827e-06, + "loss": 8.5178, + "step": 166650 + }, + { + "epoch": 0.8322804564408599, + "grad_norm": 0.08963396400213242, + "learning_rate": 5.044181331197277e-06, + "loss": 8.533, + "step": 166660 + }, + { + "epoch": 0.8323303952657994, + "grad_norm": 0.0868087187409401, + "learning_rate": 5.042679416255727e-06, + "loss": 8.5402, + "step": 166670 + }, + { + "epoch": 0.8323803340907389, + "grad_norm": 0.09393750131130219, + "learning_rate": 5.041177501314175e-06, + "loss": 8.5407, + "step": 166680 + }, + { + "epoch": 0.8324302729156783, + "grad_norm": 0.09192768484354019, + "learning_rate": 5.039675586372625e-06, + "loss": 8.5346, + "step": 166690 + }, + { + "epoch": 0.8324802117406177, + "grad_norm": 0.10022985190153122, + "learning_rate": 5.038173671431075e-06, + "loss": 8.5368, + "step": 166700 + }, + { + "epoch": 0.8325301505655572, + "grad_norm": 0.0937315970659256, + "learning_rate": 5.036671756489524e-06, + "loss": 8.5186, + "step": 166710 + }, + { + "epoch": 0.8325800893904967, + "grad_norm": 0.09312406182289124, + "learning_rate": 5.035169841547974e-06, + "loss": 8.5229, + "step": 166720 + }, + { + "epoch": 0.8326300282154361, + "grad_norm": 0.08995606750249863, + "learning_rate": 5.033667926606423e-06, + "loss": 8.5457, + "step": 166730 + }, + { + "epoch": 0.8326799670403755, + "grad_norm": 0.09093338251113892, + "learning_rate": 5.032166011664873e-06, + "loss": 8.5067, + "step": 166740 + }, + { + "epoch": 0.832729905865315, + "grad_norm": 0.0915585607290268, + "learning_rate": 5.030664096723322e-06, + "loss": 8.5639, + "step": 166750 + }, + { + "epoch": 0.8327798446902545, + "grad_norm": 0.09234713017940521, + "learning_rate": 5.029162181781772e-06, + "loss": 8.5346, + "step": 166760 + }, + { + "epoch": 0.8328297835151939, + "grad_norm": 0.08565306663513184, + "learning_rate": 5.027660266840222e-06, + "loss": 8.541, + "step": 166770 + }, + { + "epoch": 0.8328797223401333, + "grad_norm": 0.08641324937343597, + "learning_rate": 5.02615835189867e-06, + "loss": 8.5491, + "step": 166780 + }, + { + "epoch": 0.8329296611650728, + "grad_norm": 0.09579795598983765, + "learning_rate": 5.0246564369571204e-06, + "loss": 8.549, + "step": 166790 + }, + { + "epoch": 0.8329795999900123, + "grad_norm": 0.09320466220378876, + "learning_rate": 5.023154522015571e-06, + "loss": 8.5598, + "step": 166800 + }, + { + "epoch": 0.8330295388149517, + "grad_norm": 0.09680447727441788, + "learning_rate": 5.021652607074019e-06, + "loss": 8.5379, + "step": 166810 + }, + { + "epoch": 0.8330794776398911, + "grad_norm": 0.08433427661657333, + "learning_rate": 5.020150692132469e-06, + "loss": 8.5581, + "step": 166820 + }, + { + "epoch": 0.8331294164648306, + "grad_norm": 0.09262140095233917, + "learning_rate": 5.018648777190918e-06, + "loss": 8.524, + "step": 166830 + }, + { + "epoch": 0.83317935528977, + "grad_norm": 0.09593355655670166, + "learning_rate": 5.017146862249368e-06, + "loss": 8.5378, + "step": 166840 + }, + { + "epoch": 0.8332292941147095, + "grad_norm": 0.09165123105049133, + "learning_rate": 5.015644947307818e-06, + "loss": 8.54, + "step": 166850 + }, + { + "epoch": 0.8332792329396489, + "grad_norm": 0.09254296123981476, + "learning_rate": 5.014143032366267e-06, + "loss": 8.5392, + "step": 166860 + }, + { + "epoch": 0.8333291717645884, + "grad_norm": 0.0935739278793335, + "learning_rate": 5.012641117424717e-06, + "loss": 8.5476, + "step": 166870 + }, + { + "epoch": 0.8333791105895278, + "grad_norm": 0.09110825508832932, + "learning_rate": 5.011139202483166e-06, + "loss": 8.5356, + "step": 166880 + }, + { + "epoch": 0.8334290494144673, + "grad_norm": 0.09004233777523041, + "learning_rate": 5.0096372875416154e-06, + "loss": 8.5363, + "step": 166890 + }, + { + "epoch": 0.8334789882394067, + "grad_norm": 0.08766531944274902, + "learning_rate": 5.008135372600066e-06, + "loss": 8.5418, + "step": 166900 + }, + { + "epoch": 0.8335289270643462, + "grad_norm": 0.09655581414699554, + "learning_rate": 5.006633457658514e-06, + "loss": 8.5334, + "step": 166910 + }, + { + "epoch": 0.8335788658892856, + "grad_norm": 0.08657383173704147, + "learning_rate": 5.005131542716964e-06, + "loss": 8.5564, + "step": 166920 + }, + { + "epoch": 0.8336288047142251, + "grad_norm": 0.09356348216533661, + "learning_rate": 5.003629627775414e-06, + "loss": 8.5438, + "step": 166930 + }, + { + "epoch": 0.8336787435391645, + "grad_norm": 0.0967157855629921, + "learning_rate": 5.002127712833863e-06, + "loss": 8.5556, + "step": 166940 + }, + { + "epoch": 0.833728682364104, + "grad_norm": 0.09090452641248703, + "learning_rate": 5.000625797892313e-06, + "loss": 8.5707, + "step": 166950 + }, + { + "epoch": 0.8337786211890434, + "grad_norm": 0.09209217876195908, + "learning_rate": 4.9991238829507624e-06, + "loss": 8.5592, + "step": 166960 + }, + { + "epoch": 0.8338285600139829, + "grad_norm": 0.09082140028476715, + "learning_rate": 4.997621968009212e-06, + "loss": 8.5343, + "step": 166970 + }, + { + "epoch": 0.8338784988389223, + "grad_norm": 0.09138956665992737, + "learning_rate": 4.996120053067661e-06, + "loss": 8.5338, + "step": 166980 + }, + { + "epoch": 0.8339284376638618, + "grad_norm": 0.09424415230751038, + "learning_rate": 4.9946181381261104e-06, + "loss": 8.5322, + "step": 166990 + }, + { + "epoch": 0.8339783764888012, + "grad_norm": 0.09288877248764038, + "learning_rate": 4.993116223184561e-06, + "loss": 8.5364, + "step": 167000 + }, + { + "epoch": 0.8340283153137407, + "grad_norm": 0.08676401525735855, + "learning_rate": 4.99161430824301e-06, + "loss": 8.5354, + "step": 167010 + }, + { + "epoch": 0.8340782541386801, + "grad_norm": 0.08432342857122421, + "learning_rate": 4.990112393301459e-06, + "loss": 8.5373, + "step": 167020 + }, + { + "epoch": 0.8341281929636196, + "grad_norm": 0.09226579964160919, + "learning_rate": 4.988610478359909e-06, + "loss": 8.5151, + "step": 167030 + }, + { + "epoch": 0.834178131788559, + "grad_norm": 0.0887642651796341, + "learning_rate": 4.987108563418359e-06, + "loss": 8.5471, + "step": 167040 + }, + { + "epoch": 0.8342280706134985, + "grad_norm": 0.0907331258058548, + "learning_rate": 4.985606648476808e-06, + "loss": 8.5403, + "step": 167050 + }, + { + "epoch": 0.8342780094384379, + "grad_norm": 0.08834332972764969, + "learning_rate": 4.9841047335352575e-06, + "loss": 8.5228, + "step": 167060 + }, + { + "epoch": 0.8343279482633774, + "grad_norm": 0.08750323951244354, + "learning_rate": 4.982602818593707e-06, + "loss": 8.5424, + "step": 167070 + }, + { + "epoch": 0.8343778870883168, + "grad_norm": 0.08858982473611832, + "learning_rate": 4.981100903652156e-06, + "loss": 8.5551, + "step": 167080 + }, + { + "epoch": 0.8344278259132563, + "grad_norm": 0.09022153168916702, + "learning_rate": 4.979598988710606e-06, + "loss": 8.5344, + "step": 167090 + }, + { + "epoch": 0.8344777647381957, + "grad_norm": 0.0896066427230835, + "learning_rate": 4.978097073769056e-06, + "loss": 8.5426, + "step": 167100 + }, + { + "epoch": 0.8345277035631352, + "grad_norm": 0.09046115726232529, + "learning_rate": 4.976595158827505e-06, + "loss": 8.545, + "step": 167110 + }, + { + "epoch": 0.8345776423880746, + "grad_norm": 0.08858959376811981, + "learning_rate": 4.975093243885955e-06, + "loss": 8.5482, + "step": 167120 + }, + { + "epoch": 0.8346275812130141, + "grad_norm": 0.08675581961870193, + "learning_rate": 4.9735913289444045e-06, + "loss": 8.5382, + "step": 167130 + }, + { + "epoch": 0.8346775200379535, + "grad_norm": 0.08715889602899551, + "learning_rate": 4.972089414002854e-06, + "loss": 8.5394, + "step": 167140 + }, + { + "epoch": 0.8347274588628929, + "grad_norm": 0.0885719358921051, + "learning_rate": 4.970587499061303e-06, + "loss": 8.5276, + "step": 167150 + }, + { + "epoch": 0.8347773976878324, + "grad_norm": 0.09617254137992859, + "learning_rate": 4.9690855841197525e-06, + "loss": 8.5369, + "step": 167160 + }, + { + "epoch": 0.8348273365127719, + "grad_norm": 0.09246990084648132, + "learning_rate": 4.967583669178203e-06, + "loss": 8.5449, + "step": 167170 + }, + { + "epoch": 0.8348772753377113, + "grad_norm": 0.08657874912023544, + "learning_rate": 4.966081754236652e-06, + "loss": 8.5495, + "step": 167180 + }, + { + "epoch": 0.8349272141626507, + "grad_norm": 0.08776073902845383, + "learning_rate": 4.964579839295101e-06, + "loss": 8.5435, + "step": 167190 + }, + { + "epoch": 0.8349771529875902, + "grad_norm": 0.08979029953479767, + "learning_rate": 4.9630779243535515e-06, + "loss": 8.5452, + "step": 167200 + }, + { + "epoch": 0.8350270918125297, + "grad_norm": 0.09311855584383011, + "learning_rate": 4.961576009412e-06, + "loss": 8.5497, + "step": 167210 + }, + { + "epoch": 0.8350770306374691, + "grad_norm": 0.09252220392227173, + "learning_rate": 4.96007409447045e-06, + "loss": 8.5304, + "step": 167220 + }, + { + "epoch": 0.8351269694624085, + "grad_norm": 0.08825825899839401, + "learning_rate": 4.9585721795288995e-06, + "loss": 8.5425, + "step": 167230 + }, + { + "epoch": 0.835176908287348, + "grad_norm": 0.09545952081680298, + "learning_rate": 4.957070264587349e-06, + "loss": 8.5281, + "step": 167240 + }, + { + "epoch": 0.8352268471122875, + "grad_norm": 0.09352894127368927, + "learning_rate": 4.955568349645799e-06, + "loss": 8.5386, + "step": 167250 + }, + { + "epoch": 0.8352767859372269, + "grad_norm": 0.09909328073263168, + "learning_rate": 4.9540664347042475e-06, + "loss": 8.537, + "step": 167260 + }, + { + "epoch": 0.8353267247621663, + "grad_norm": 0.09386103600263596, + "learning_rate": 4.952564519762698e-06, + "loss": 8.545, + "step": 167270 + }, + { + "epoch": 0.8353766635871058, + "grad_norm": 0.08503425866365433, + "learning_rate": 4.951062604821148e-06, + "loss": 8.5802, + "step": 167280 + }, + { + "epoch": 0.8354266024120452, + "grad_norm": 0.09073678404092789, + "learning_rate": 4.949560689879596e-06, + "loss": 8.5313, + "step": 167290 + }, + { + "epoch": 0.8354765412369847, + "grad_norm": 0.09098771214485168, + "learning_rate": 4.9480587749380465e-06, + "loss": 8.5403, + "step": 167300 + }, + { + "epoch": 0.8355264800619241, + "grad_norm": 0.09336119145154953, + "learning_rate": 4.946556859996495e-06, + "loss": 8.5428, + "step": 167310 + }, + { + "epoch": 0.8355764188868636, + "grad_norm": 0.08903469145298004, + "learning_rate": 4.945054945054945e-06, + "loss": 8.5508, + "step": 167320 + }, + { + "epoch": 0.835626357711803, + "grad_norm": 0.08720388263463974, + "learning_rate": 4.943553030113395e-06, + "loss": 8.54, + "step": 167330 + }, + { + "epoch": 0.8356762965367425, + "grad_norm": 0.09315963834524155, + "learning_rate": 4.942051115171844e-06, + "loss": 8.549, + "step": 167340 + }, + { + "epoch": 0.8357262353616819, + "grad_norm": 0.08953353762626648, + "learning_rate": 4.940549200230294e-06, + "loss": 8.5557, + "step": 167350 + }, + { + "epoch": 0.8357761741866214, + "grad_norm": 0.0915287509560585, + "learning_rate": 4.939047285288743e-06, + "loss": 8.5594, + "step": 167360 + }, + { + "epoch": 0.8358261130115608, + "grad_norm": 0.09571286290884018, + "learning_rate": 4.937545370347193e-06, + "loss": 8.541, + "step": 167370 + }, + { + "epoch": 0.8358760518365003, + "grad_norm": 0.08816110342741013, + "learning_rate": 4.936043455405643e-06, + "loss": 8.5397, + "step": 167380 + }, + { + "epoch": 0.8359259906614397, + "grad_norm": 0.08931413292884827, + "learning_rate": 4.934541540464091e-06, + "loss": 8.5379, + "step": 167390 + }, + { + "epoch": 0.8359759294863792, + "grad_norm": 0.09298036247491837, + "learning_rate": 4.9330396255225415e-06, + "loss": 8.532, + "step": 167400 + }, + { + "epoch": 0.8360258683113186, + "grad_norm": 0.08843599259853363, + "learning_rate": 4.931537710580991e-06, + "loss": 8.539, + "step": 167410 + }, + { + "epoch": 0.8360758071362581, + "grad_norm": 0.0943143293261528, + "learning_rate": 4.93003579563944e-06, + "loss": 8.5463, + "step": 167420 + }, + { + "epoch": 0.8361257459611975, + "grad_norm": 0.08989337831735611, + "learning_rate": 4.92853388069789e-06, + "loss": 8.5562, + "step": 167430 + }, + { + "epoch": 0.836175684786137, + "grad_norm": 0.09261681884527206, + "learning_rate": 4.92703196575634e-06, + "loss": 8.5383, + "step": 167440 + }, + { + "epoch": 0.8362256236110764, + "grad_norm": 0.088409423828125, + "learning_rate": 4.925530050814789e-06, + "loss": 8.5393, + "step": 167450 + }, + { + "epoch": 0.8362755624360159, + "grad_norm": 0.0936957448720932, + "learning_rate": 4.924028135873238e-06, + "loss": 8.5375, + "step": 167460 + }, + { + "epoch": 0.8363255012609553, + "grad_norm": 0.08874178677797318, + "learning_rate": 4.922526220931688e-06, + "loss": 8.5396, + "step": 167470 + }, + { + "epoch": 0.8363754400858948, + "grad_norm": 0.09026958048343658, + "learning_rate": 4.921024305990138e-06, + "loss": 8.5348, + "step": 167480 + }, + { + "epoch": 0.8364253789108342, + "grad_norm": 0.09545276314020157, + "learning_rate": 4.919522391048587e-06, + "loss": 8.5391, + "step": 167490 + }, + { + "epoch": 0.8364753177357737, + "grad_norm": 0.08839011937379837, + "learning_rate": 4.9180204761070365e-06, + "loss": 8.5282, + "step": 167500 + }, + { + "epoch": 0.8365252565607131, + "grad_norm": 0.08772112429141998, + "learning_rate": 4.916518561165486e-06, + "loss": 8.5369, + "step": 167510 + }, + { + "epoch": 0.8365751953856526, + "grad_norm": 0.09278785437345505, + "learning_rate": 4.915016646223936e-06, + "loss": 8.5288, + "step": 167520 + }, + { + "epoch": 0.836625134210592, + "grad_norm": 0.09600167721509933, + "learning_rate": 4.913514731282385e-06, + "loss": 8.5258, + "step": 167530 + }, + { + "epoch": 0.8366750730355315, + "grad_norm": 0.08788032084703445, + "learning_rate": 4.912012816340835e-06, + "loss": 8.5342, + "step": 167540 + }, + { + "epoch": 0.8367250118604709, + "grad_norm": 0.08811583369970322, + "learning_rate": 4.910510901399284e-06, + "loss": 8.5487, + "step": 167550 + }, + { + "epoch": 0.8367749506854104, + "grad_norm": 0.08983556926250458, + "learning_rate": 4.909008986457733e-06, + "loss": 8.5451, + "step": 167560 + }, + { + "epoch": 0.8368248895103498, + "grad_norm": 0.08975329995155334, + "learning_rate": 4.9075070715161835e-06, + "loss": 8.5408, + "step": 167570 + }, + { + "epoch": 0.8368748283352893, + "grad_norm": 0.09413887560367584, + "learning_rate": 4.906005156574633e-06, + "loss": 8.5396, + "step": 167580 + }, + { + "epoch": 0.8369247671602287, + "grad_norm": 0.09020008891820908, + "learning_rate": 4.904503241633082e-06, + "loss": 8.5279, + "step": 167590 + }, + { + "epoch": 0.8369747059851682, + "grad_norm": 0.09177325665950775, + "learning_rate": 4.903001326691532e-06, + "loss": 8.5333, + "step": 167600 + }, + { + "epoch": 0.8370246448101076, + "grad_norm": 0.09214575588703156, + "learning_rate": 4.901499411749981e-06, + "loss": 8.5326, + "step": 167610 + }, + { + "epoch": 0.837074583635047, + "grad_norm": 0.09108281135559082, + "learning_rate": 4.899997496808431e-06, + "loss": 8.5556, + "step": 167620 + }, + { + "epoch": 0.8371245224599865, + "grad_norm": 0.09346097707748413, + "learning_rate": 4.89849558186688e-06, + "loss": 8.5301, + "step": 167630 + }, + { + "epoch": 0.837174461284926, + "grad_norm": 0.09483732283115387, + "learning_rate": 4.89699366692533e-06, + "loss": 8.5402, + "step": 167640 + }, + { + "epoch": 0.8372244001098654, + "grad_norm": 0.09763989597558975, + "learning_rate": 4.89549175198378e-06, + "loss": 8.5482, + "step": 167650 + }, + { + "epoch": 0.8372743389348049, + "grad_norm": 0.09391061961650848, + "learning_rate": 4.893989837042228e-06, + "loss": 8.5302, + "step": 167660 + }, + { + "epoch": 0.8373242777597443, + "grad_norm": 0.09133297950029373, + "learning_rate": 4.8924879221006785e-06, + "loss": 8.5292, + "step": 167670 + }, + { + "epoch": 0.8373742165846838, + "grad_norm": 0.09636905044317245, + "learning_rate": 4.890986007159129e-06, + "loss": 8.5271, + "step": 167680 + }, + { + "epoch": 0.8374241554096232, + "grad_norm": 0.09621647000312805, + "learning_rate": 4.889484092217577e-06, + "loss": 8.5309, + "step": 167690 + }, + { + "epoch": 0.8374740942345626, + "grad_norm": 0.09955976903438568, + "learning_rate": 4.887982177276027e-06, + "loss": 8.5377, + "step": 167700 + }, + { + "epoch": 0.8375240330595021, + "grad_norm": 0.08797106146812439, + "learning_rate": 4.886480262334476e-06, + "loss": 8.5314, + "step": 167710 + }, + { + "epoch": 0.8375739718844416, + "grad_norm": 0.08975420147180557, + "learning_rate": 4.884978347392926e-06, + "loss": 8.5436, + "step": 167720 + }, + { + "epoch": 0.837623910709381, + "grad_norm": 0.09256165474653244, + "learning_rate": 4.883476432451376e-06, + "loss": 8.5286, + "step": 167730 + }, + { + "epoch": 0.8376738495343204, + "grad_norm": 0.09067859500646591, + "learning_rate": 4.881974517509825e-06, + "loss": 8.5291, + "step": 167740 + }, + { + "epoch": 0.8377237883592599, + "grad_norm": 0.08854568004608154, + "learning_rate": 4.880472602568275e-06, + "loss": 8.5603, + "step": 167750 + }, + { + "epoch": 0.8377737271841994, + "grad_norm": 0.09424934536218643, + "learning_rate": 4.878970687626724e-06, + "loss": 8.544, + "step": 167760 + }, + { + "epoch": 0.8378236660091388, + "grad_norm": 0.08619468659162521, + "learning_rate": 4.8774687726851735e-06, + "loss": 8.5323, + "step": 167770 + }, + { + "epoch": 0.8378736048340782, + "grad_norm": 0.09160079061985016, + "learning_rate": 4.875966857743624e-06, + "loss": 8.5439, + "step": 167780 + }, + { + "epoch": 0.8379235436590177, + "grad_norm": 0.09085732698440552, + "learning_rate": 4.874464942802072e-06, + "loss": 8.54, + "step": 167790 + }, + { + "epoch": 0.8379734824839572, + "grad_norm": 0.08762364834547043, + "learning_rate": 4.872963027860522e-06, + "loss": 8.531, + "step": 167800 + }, + { + "epoch": 0.8380234213088966, + "grad_norm": 0.09410136938095093, + "learning_rate": 4.871461112918972e-06, + "loss": 8.5436, + "step": 167810 + }, + { + "epoch": 0.838073360133836, + "grad_norm": 0.0921444445848465, + "learning_rate": 4.869959197977421e-06, + "loss": 8.5332, + "step": 167820 + }, + { + "epoch": 0.8381232989587755, + "grad_norm": 0.09351623058319092, + "learning_rate": 4.868457283035871e-06, + "loss": 8.5444, + "step": 167830 + }, + { + "epoch": 0.838173237783715, + "grad_norm": 0.08999743312597275, + "learning_rate": 4.8669553680943205e-06, + "loss": 8.5551, + "step": 167840 + }, + { + "epoch": 0.8382231766086544, + "grad_norm": 0.09446761757135391, + "learning_rate": 4.86545345315277e-06, + "loss": 8.5314, + "step": 167850 + }, + { + "epoch": 0.8382731154335938, + "grad_norm": 0.09473822265863419, + "learning_rate": 4.863951538211219e-06, + "loss": 8.5381, + "step": 167860 + }, + { + "epoch": 0.8383230542585333, + "grad_norm": 0.09794218093156815, + "learning_rate": 4.8624496232696685e-06, + "loss": 8.5321, + "step": 167870 + }, + { + "epoch": 0.8383729930834728, + "grad_norm": 0.09353634715080261, + "learning_rate": 4.860947708328119e-06, + "loss": 8.5426, + "step": 167880 + }, + { + "epoch": 0.8384229319084122, + "grad_norm": 0.0915239006280899, + "learning_rate": 4.859445793386568e-06, + "loss": 8.5361, + "step": 167890 + }, + { + "epoch": 0.8384728707333516, + "grad_norm": 0.09556429833173752, + "learning_rate": 4.857943878445017e-06, + "loss": 8.5238, + "step": 167900 + }, + { + "epoch": 0.8385228095582911, + "grad_norm": 0.09241461008787155, + "learning_rate": 4.856441963503467e-06, + "loss": 8.5465, + "step": 167910 + }, + { + "epoch": 0.8385727483832306, + "grad_norm": 0.09638688713312149, + "learning_rate": 4.854940048561917e-06, + "loss": 8.5311, + "step": 167920 + }, + { + "epoch": 0.83862268720817, + "grad_norm": 0.09017990529537201, + "learning_rate": 4.853438133620366e-06, + "loss": 8.5524, + "step": 167930 + }, + { + "epoch": 0.8386726260331094, + "grad_norm": 0.0920003205537796, + "learning_rate": 4.8519362186788156e-06, + "loss": 8.5257, + "step": 167940 + }, + { + "epoch": 0.8387225648580489, + "grad_norm": 0.09186447411775589, + "learning_rate": 4.850434303737265e-06, + "loss": 8.5343, + "step": 167950 + }, + { + "epoch": 0.8387725036829884, + "grad_norm": 0.09250812232494354, + "learning_rate": 4.848932388795714e-06, + "loss": 8.5416, + "step": 167960 + }, + { + "epoch": 0.8388224425079278, + "grad_norm": 0.096126988530159, + "learning_rate": 4.847430473854164e-06, + "loss": 8.5307, + "step": 167970 + }, + { + "epoch": 0.8388723813328672, + "grad_norm": 0.09085419774055481, + "learning_rate": 4.845928558912614e-06, + "loss": 8.5487, + "step": 167980 + }, + { + "epoch": 0.8389223201578067, + "grad_norm": 0.09151645004749298, + "learning_rate": 4.844426643971063e-06, + "loss": 8.5358, + "step": 167990 + }, + { + "epoch": 0.8389722589827462, + "grad_norm": 0.09600605070590973, + "learning_rate": 4.842924729029513e-06, + "loss": 8.5415, + "step": 168000 + }, + { + "epoch": 0.8390221978076856, + "grad_norm": 0.08343446999788284, + "learning_rate": 4.841422814087962e-06, + "loss": 8.5486, + "step": 168010 + }, + { + "epoch": 0.839072136632625, + "grad_norm": 0.0970931351184845, + "learning_rate": 4.839920899146412e-06, + "loss": 8.5497, + "step": 168020 + }, + { + "epoch": 0.8391220754575645, + "grad_norm": 0.09128961712121964, + "learning_rate": 4.838418984204861e-06, + "loss": 8.5345, + "step": 168030 + }, + { + "epoch": 0.839172014282504, + "grad_norm": 0.08671637624502182, + "learning_rate": 4.8369170692633106e-06, + "loss": 8.5515, + "step": 168040 + }, + { + "epoch": 0.8392219531074434, + "grad_norm": 0.09237731248140335, + "learning_rate": 4.835415154321761e-06, + "loss": 8.4984, + "step": 168050 + }, + { + "epoch": 0.8392718919323828, + "grad_norm": 0.08762998133897781, + "learning_rate": 4.833913239380209e-06, + "loss": 8.5373, + "step": 168060 + }, + { + "epoch": 0.8393218307573223, + "grad_norm": 0.08894840627908707, + "learning_rate": 4.832411324438659e-06, + "loss": 8.5283, + "step": 168070 + }, + { + "epoch": 0.8393717695822618, + "grad_norm": 0.0915420651435852, + "learning_rate": 4.83090940949711e-06, + "loss": 8.5282, + "step": 168080 + }, + { + "epoch": 0.8394217084072012, + "grad_norm": 0.09219279140233994, + "learning_rate": 4.829407494555558e-06, + "loss": 8.5298, + "step": 168090 + }, + { + "epoch": 0.8394716472321406, + "grad_norm": 0.09041428565979004, + "learning_rate": 4.827905579614008e-06, + "loss": 8.5258, + "step": 168100 + }, + { + "epoch": 0.83952158605708, + "grad_norm": 0.08865369856357574, + "learning_rate": 4.826403664672457e-06, + "loss": 8.5421, + "step": 168110 + }, + { + "epoch": 0.8395715248820195, + "grad_norm": 0.09181597828865051, + "learning_rate": 4.824901749730907e-06, + "loss": 8.5309, + "step": 168120 + }, + { + "epoch": 0.839621463706959, + "grad_norm": 0.0943143367767334, + "learning_rate": 4.823399834789357e-06, + "loss": 8.5376, + "step": 168130 + }, + { + "epoch": 0.8396714025318984, + "grad_norm": 0.0917147845029831, + "learning_rate": 4.8218979198478056e-06, + "loss": 8.5423, + "step": 168140 + }, + { + "epoch": 0.8397213413568378, + "grad_norm": 0.09135729819536209, + "learning_rate": 4.820396004906256e-06, + "loss": 8.5337, + "step": 168150 + }, + { + "epoch": 0.8397712801817773, + "grad_norm": 0.09105221182107925, + "learning_rate": 4.818894089964705e-06, + "loss": 8.5277, + "step": 168160 + }, + { + "epoch": 0.8398212190067168, + "grad_norm": 0.09912960231304169, + "learning_rate": 4.817392175023154e-06, + "loss": 8.5492, + "step": 168170 + }, + { + "epoch": 0.8398711578316562, + "grad_norm": 0.08730529993772507, + "learning_rate": 4.815890260081605e-06, + "loss": 8.5422, + "step": 168180 + }, + { + "epoch": 0.8399210966565956, + "grad_norm": 0.09402299672365189, + "learning_rate": 4.814388345140053e-06, + "loss": 8.5343, + "step": 168190 + }, + { + "epoch": 0.8399710354815351, + "grad_norm": 0.09148437529802322, + "learning_rate": 4.812886430198503e-06, + "loss": 8.5231, + "step": 168200 + }, + { + "epoch": 0.8400209743064746, + "grad_norm": 0.08780474960803986, + "learning_rate": 4.811384515256953e-06, + "loss": 8.5322, + "step": 168210 + }, + { + "epoch": 0.840070913131414, + "grad_norm": 0.08754914999008179, + "learning_rate": 4.809882600315402e-06, + "loss": 8.5507, + "step": 168220 + }, + { + "epoch": 0.8401208519563534, + "grad_norm": 0.08717711269855499, + "learning_rate": 4.808380685373852e-06, + "loss": 8.5388, + "step": 168230 + }, + { + "epoch": 0.8401707907812929, + "grad_norm": 0.09865214675664902, + "learning_rate": 4.806878770432301e-06, + "loss": 8.5198, + "step": 168240 + }, + { + "epoch": 0.8402207296062324, + "grad_norm": 0.09098966419696808, + "learning_rate": 4.805376855490751e-06, + "loss": 8.5319, + "step": 168250 + }, + { + "epoch": 0.8402706684311718, + "grad_norm": 0.08804722875356674, + "learning_rate": 4.803874940549201e-06, + "loss": 8.5533, + "step": 168260 + }, + { + "epoch": 0.8403206072561112, + "grad_norm": 0.09023669362068176, + "learning_rate": 4.802373025607649e-06, + "loss": 8.5487, + "step": 168270 + }, + { + "epoch": 0.8403705460810507, + "grad_norm": 0.09287792444229126, + "learning_rate": 4.8008711106661e-06, + "loss": 8.5345, + "step": 168280 + }, + { + "epoch": 0.8404204849059902, + "grad_norm": 0.09334767609834671, + "learning_rate": 4.799369195724549e-06, + "loss": 8.5383, + "step": 168290 + }, + { + "epoch": 0.8404704237309296, + "grad_norm": 0.09085261076688766, + "learning_rate": 4.797867280782998e-06, + "loss": 8.5452, + "step": 168300 + }, + { + "epoch": 0.840520362555869, + "grad_norm": 0.08985559642314911, + "learning_rate": 4.7963653658414484e-06, + "loss": 8.5287, + "step": 168310 + }, + { + "epoch": 0.8405703013808085, + "grad_norm": 0.09192880243062973, + "learning_rate": 4.794863450899898e-06, + "loss": 8.5477, + "step": 168320 + }, + { + "epoch": 0.840620240205748, + "grad_norm": 0.08942985534667969, + "learning_rate": 4.793361535958347e-06, + "loss": 8.5286, + "step": 168330 + }, + { + "epoch": 0.8406701790306874, + "grad_norm": 0.09409672021865845, + "learning_rate": 4.7918596210167964e-06, + "loss": 8.5213, + "step": 168340 + }, + { + "epoch": 0.8407201178556268, + "grad_norm": 0.0878412127494812, + "learning_rate": 4.790357706075246e-06, + "loss": 8.5445, + "step": 168350 + }, + { + "epoch": 0.8407700566805663, + "grad_norm": 0.09042507410049438, + "learning_rate": 4.788855791133696e-06, + "loss": 8.5327, + "step": 168360 + }, + { + "epoch": 0.8408199955055058, + "grad_norm": 0.09061606973409653, + "learning_rate": 4.787353876192145e-06, + "loss": 8.5255, + "step": 168370 + }, + { + "epoch": 0.8408699343304452, + "grad_norm": 0.08747304230928421, + "learning_rate": 4.785851961250595e-06, + "loss": 8.5308, + "step": 168380 + }, + { + "epoch": 0.8409198731553846, + "grad_norm": 0.08752010762691498, + "learning_rate": 4.784350046309044e-06, + "loss": 8.5585, + "step": 168390 + }, + { + "epoch": 0.8409698119803241, + "grad_norm": 0.09220509976148605, + "learning_rate": 4.782848131367494e-06, + "loss": 8.5299, + "step": 168400 + }, + { + "epoch": 0.8410197508052636, + "grad_norm": 0.09601190686225891, + "learning_rate": 4.7813462164259434e-06, + "loss": 8.5213, + "step": 168410 + }, + { + "epoch": 0.841069689630203, + "grad_norm": 0.09144755452871323, + "learning_rate": 4.779844301484393e-06, + "loss": 8.5377, + "step": 168420 + }, + { + "epoch": 0.8411196284551424, + "grad_norm": 0.09117061644792557, + "learning_rate": 4.778342386542842e-06, + "loss": 8.5383, + "step": 168430 + }, + { + "epoch": 0.8411695672800819, + "grad_norm": 0.0880817323923111, + "learning_rate": 4.7768404716012914e-06, + "loss": 8.548, + "step": 168440 + }, + { + "epoch": 0.8412195061050214, + "grad_norm": 0.09438186883926392, + "learning_rate": 4.775338556659742e-06, + "loss": 8.5538, + "step": 168450 + }, + { + "epoch": 0.8412694449299608, + "grad_norm": 0.09241968393325806, + "learning_rate": 4.773836641718191e-06, + "loss": 8.5313, + "step": 168460 + }, + { + "epoch": 0.8413193837549002, + "grad_norm": 0.09603623300790787, + "learning_rate": 4.77233472677664e-06, + "loss": 8.5278, + "step": 168470 + }, + { + "epoch": 0.8413693225798397, + "grad_norm": 0.094639852643013, + "learning_rate": 4.7708328118350905e-06, + "loss": 8.5353, + "step": 168480 + }, + { + "epoch": 0.8414192614047792, + "grad_norm": 0.09024223685264587, + "learning_rate": 4.769330896893539e-06, + "loss": 8.5444, + "step": 168490 + }, + { + "epoch": 0.8414692002297186, + "grad_norm": 0.08873730897903442, + "learning_rate": 4.767828981951989e-06, + "loss": 8.523, + "step": 168500 + }, + { + "epoch": 0.841519139054658, + "grad_norm": 0.0888361856341362, + "learning_rate": 4.7663270670104384e-06, + "loss": 8.5363, + "step": 168510 + }, + { + "epoch": 0.8415690778795974, + "grad_norm": 0.0935666486620903, + "learning_rate": 4.764825152068888e-06, + "loss": 8.5435, + "step": 168520 + }, + { + "epoch": 0.841619016704537, + "grad_norm": 0.0903899073600769, + "learning_rate": 4.763323237127338e-06, + "loss": 8.5419, + "step": 168530 + }, + { + "epoch": 0.8416689555294764, + "grad_norm": 0.09109959751367569, + "learning_rate": 4.7618213221857864e-06, + "loss": 8.5273, + "step": 168540 + }, + { + "epoch": 0.8417188943544158, + "grad_norm": 0.08860781043767929, + "learning_rate": 4.760319407244237e-06, + "loss": 8.5301, + "step": 168550 + }, + { + "epoch": 0.8417688331793552, + "grad_norm": 0.09561283141374588, + "learning_rate": 4.758817492302687e-06, + "loss": 8.5381, + "step": 168560 + }, + { + "epoch": 0.8418187720042948, + "grad_norm": 0.09598873555660248, + "learning_rate": 4.757315577361135e-06, + "loss": 8.5349, + "step": 168570 + }, + { + "epoch": 0.8418687108292342, + "grad_norm": 0.09074491262435913, + "learning_rate": 4.7558136624195855e-06, + "loss": 8.5473, + "step": 168580 + }, + { + "epoch": 0.8419186496541736, + "grad_norm": 0.08866456151008606, + "learning_rate": 4.754311747478034e-06, + "loss": 8.5383, + "step": 168590 + }, + { + "epoch": 0.841968588479113, + "grad_norm": 0.09333822876214981, + "learning_rate": 4.752809832536484e-06, + "loss": 8.5318, + "step": 168600 + }, + { + "epoch": 0.8420185273040526, + "grad_norm": 0.09061551094055176, + "learning_rate": 4.751307917594934e-06, + "loss": 8.5349, + "step": 168610 + }, + { + "epoch": 0.842068466128992, + "grad_norm": 0.09228689223527908, + "learning_rate": 4.749806002653383e-06, + "loss": 8.5363, + "step": 168620 + }, + { + "epoch": 0.8421184049539314, + "grad_norm": 0.09009858220815659, + "learning_rate": 4.748304087711833e-06, + "loss": 8.5303, + "step": 168630 + }, + { + "epoch": 0.8421683437788708, + "grad_norm": 0.09529682248830795, + "learning_rate": 4.746802172770282e-06, + "loss": 8.5392, + "step": 168640 + }, + { + "epoch": 0.8422182826038104, + "grad_norm": 0.09646670520305634, + "learning_rate": 4.745300257828732e-06, + "loss": 8.5259, + "step": 168650 + }, + { + "epoch": 0.8422682214287498, + "grad_norm": 0.09157206863164902, + "learning_rate": 4.743798342887182e-06, + "loss": 8.5305, + "step": 168660 + }, + { + "epoch": 0.8423181602536892, + "grad_norm": 0.09323584288358688, + "learning_rate": 4.74229642794563e-06, + "loss": 8.5238, + "step": 168670 + }, + { + "epoch": 0.8423680990786286, + "grad_norm": 0.08792025595903397, + "learning_rate": 4.7407945130040805e-06, + "loss": 8.5433, + "step": 168680 + }, + { + "epoch": 0.8424180379035682, + "grad_norm": 0.08940023183822632, + "learning_rate": 4.73929259806253e-06, + "loss": 8.5252, + "step": 168690 + }, + { + "epoch": 0.8424679767285076, + "grad_norm": 0.09656330198049545, + "learning_rate": 4.737790683120979e-06, + "loss": 8.5101, + "step": 168700 + }, + { + "epoch": 0.842517915553447, + "grad_norm": 0.09180876612663269, + "learning_rate": 4.736288768179429e-06, + "loss": 8.5169, + "step": 168710 + }, + { + "epoch": 0.8425678543783864, + "grad_norm": 0.08640285581350327, + "learning_rate": 4.734786853237879e-06, + "loss": 8.5429, + "step": 168720 + }, + { + "epoch": 0.842617793203326, + "grad_norm": 0.0890103355050087, + "learning_rate": 4.733284938296328e-06, + "loss": 8.5434, + "step": 168730 + }, + { + "epoch": 0.8426677320282654, + "grad_norm": 0.09099707752466202, + "learning_rate": 4.731783023354777e-06, + "loss": 8.5422, + "step": 168740 + }, + { + "epoch": 0.8427176708532048, + "grad_norm": 0.09098941087722778, + "learning_rate": 4.730281108413227e-06, + "loss": 8.5546, + "step": 168750 + }, + { + "epoch": 0.8427676096781442, + "grad_norm": 0.09195592254400253, + "learning_rate": 4.728779193471677e-06, + "loss": 8.5293, + "step": 168760 + }, + { + "epoch": 0.8428175485030838, + "grad_norm": 0.09096460044384003, + "learning_rate": 4.727277278530126e-06, + "loss": 8.5446, + "step": 168770 + }, + { + "epoch": 0.8428674873280232, + "grad_norm": 0.08720100671052933, + "learning_rate": 4.7257753635885755e-06, + "loss": 8.5341, + "step": 168780 + }, + { + "epoch": 0.8429174261529626, + "grad_norm": 0.09274513274431229, + "learning_rate": 4.724273448647025e-06, + "loss": 8.5603, + "step": 168790 + }, + { + "epoch": 0.842967364977902, + "grad_norm": 0.09329745918512344, + "learning_rate": 4.722771533705475e-06, + "loss": 8.561, + "step": 168800 + }, + { + "epoch": 0.8430173038028416, + "grad_norm": 0.08319691568613052, + "learning_rate": 4.721269618763924e-06, + "loss": 8.531, + "step": 168810 + }, + { + "epoch": 0.843067242627781, + "grad_norm": 0.0921143889427185, + "learning_rate": 4.719767703822374e-06, + "loss": 8.539, + "step": 168820 + }, + { + "epoch": 0.8431171814527204, + "grad_norm": 0.09409576654434204, + "learning_rate": 4.718265788880823e-06, + "loss": 8.5339, + "step": 168830 + }, + { + "epoch": 0.8431671202776598, + "grad_norm": 0.08891736716032028, + "learning_rate": 4.716763873939272e-06, + "loss": 8.541, + "step": 168840 + }, + { + "epoch": 0.8432170591025994, + "grad_norm": 0.08662791550159454, + "learning_rate": 4.7152619589977225e-06, + "loss": 8.5403, + "step": 168850 + }, + { + "epoch": 0.8432669979275388, + "grad_norm": 0.09259507060050964, + "learning_rate": 4.713760044056172e-06, + "loss": 8.5186, + "step": 168860 + }, + { + "epoch": 0.8433169367524782, + "grad_norm": 0.09273169934749603, + "learning_rate": 4.712258129114621e-06, + "loss": 8.533, + "step": 168870 + }, + { + "epoch": 0.8433668755774176, + "grad_norm": 0.0853755995631218, + "learning_rate": 4.710756214173071e-06, + "loss": 8.5447, + "step": 168880 + }, + { + "epoch": 0.8434168144023572, + "grad_norm": 0.09415735304355621, + "learning_rate": 4.70925429923152e-06, + "loss": 8.5433, + "step": 168890 + }, + { + "epoch": 0.8434667532272966, + "grad_norm": 0.0893118679523468, + "learning_rate": 4.70775238428997e-06, + "loss": 8.5281, + "step": 168900 + }, + { + "epoch": 0.843516692052236, + "grad_norm": 0.09335058182477951, + "learning_rate": 4.706250469348419e-06, + "loss": 8.5326, + "step": 168910 + }, + { + "epoch": 0.8435666308771754, + "grad_norm": 0.09207750111818314, + "learning_rate": 4.704748554406869e-06, + "loss": 8.5416, + "step": 168920 + }, + { + "epoch": 0.843616569702115, + "grad_norm": 0.08903926610946655, + "learning_rate": 4.703246639465319e-06, + "loss": 8.5295, + "step": 168930 + }, + { + "epoch": 0.8436665085270544, + "grad_norm": 0.09720440953969955, + "learning_rate": 4.701744724523767e-06, + "loss": 8.5293, + "step": 168940 + }, + { + "epoch": 0.8437164473519938, + "grad_norm": 0.09019520878791809, + "learning_rate": 4.7002428095822175e-06, + "loss": 8.5264, + "step": 168950 + }, + { + "epoch": 0.8437663861769332, + "grad_norm": 0.08853203058242798, + "learning_rate": 4.698740894640668e-06, + "loss": 8.5345, + "step": 168960 + }, + { + "epoch": 0.8438163250018728, + "grad_norm": 0.09496580064296722, + "learning_rate": 4.697238979699116e-06, + "loss": 8.5143, + "step": 168970 + }, + { + "epoch": 0.8438662638268122, + "grad_norm": 0.09561099112033844, + "learning_rate": 4.695737064757566e-06, + "loss": 8.5355, + "step": 168980 + }, + { + "epoch": 0.8439162026517516, + "grad_norm": 0.08805878460407257, + "learning_rate": 4.694235149816015e-06, + "loss": 8.5346, + "step": 168990 + }, + { + "epoch": 0.843966141476691, + "grad_norm": 0.08811572194099426, + "learning_rate": 4.692733234874465e-06, + "loss": 8.5266, + "step": 169000 + }, + { + "epoch": 0.8440160803016306, + "grad_norm": 0.08896370977163315, + "learning_rate": 4.691231319932915e-06, + "loss": 8.5476, + "step": 169010 + }, + { + "epoch": 0.84406601912657, + "grad_norm": 0.09269924461841583, + "learning_rate": 4.689729404991364e-06, + "loss": 8.5217, + "step": 169020 + }, + { + "epoch": 0.8441159579515094, + "grad_norm": 0.0937482938170433, + "learning_rate": 4.688227490049814e-06, + "loss": 8.5131, + "step": 169030 + }, + { + "epoch": 0.8441658967764488, + "grad_norm": 0.09169386327266693, + "learning_rate": 4.686725575108263e-06, + "loss": 8.5409, + "step": 169040 + }, + { + "epoch": 0.8442158356013884, + "grad_norm": 0.09201053529977798, + "learning_rate": 4.6852236601667125e-06, + "loss": 8.5279, + "step": 169050 + }, + { + "epoch": 0.8442657744263278, + "grad_norm": 0.08865007758140564, + "learning_rate": 4.683721745225163e-06, + "loss": 8.5374, + "step": 169060 + }, + { + "epoch": 0.8443157132512672, + "grad_norm": 0.09043735265731812, + "learning_rate": 4.682219830283611e-06, + "loss": 8.5423, + "step": 169070 + }, + { + "epoch": 0.8443656520762066, + "grad_norm": 0.08876598626375198, + "learning_rate": 4.680717915342061e-06, + "loss": 8.5303, + "step": 169080 + }, + { + "epoch": 0.8444155909011462, + "grad_norm": 0.09009053558111191, + "learning_rate": 4.679216000400511e-06, + "loss": 8.5288, + "step": 169090 + }, + { + "epoch": 0.8444655297260856, + "grad_norm": 0.09224365651607513, + "learning_rate": 4.67771408545896e-06, + "loss": 8.5421, + "step": 169100 + }, + { + "epoch": 0.844515468551025, + "grad_norm": 0.09646381437778473, + "learning_rate": 4.67621217051741e-06, + "loss": 8.5252, + "step": 169110 + }, + { + "epoch": 0.8445654073759644, + "grad_norm": 0.09529738873243332, + "learning_rate": 4.6747102555758595e-06, + "loss": 8.5247, + "step": 169120 + }, + { + "epoch": 0.8446153462009038, + "grad_norm": 0.09440014511346817, + "learning_rate": 4.673208340634309e-06, + "loss": 8.5509, + "step": 169130 + }, + { + "epoch": 0.8446652850258434, + "grad_norm": 0.09085705131292343, + "learning_rate": 4.671706425692758e-06, + "loss": 8.5411, + "step": 169140 + }, + { + "epoch": 0.8447152238507828, + "grad_norm": 0.08810048550367355, + "learning_rate": 4.6702045107512075e-06, + "loss": 8.5213, + "step": 169150 + }, + { + "epoch": 0.8447651626757222, + "grad_norm": 0.0883641242980957, + "learning_rate": 4.668702595809658e-06, + "loss": 8.5451, + "step": 169160 + }, + { + "epoch": 0.8448151015006616, + "grad_norm": 0.09298266470432281, + "learning_rate": 4.667200680868107e-06, + "loss": 8.5362, + "step": 169170 + }, + { + "epoch": 0.8448650403256012, + "grad_norm": 0.08597780764102936, + "learning_rate": 4.665698765926556e-06, + "loss": 8.5315, + "step": 169180 + }, + { + "epoch": 0.8449149791505406, + "grad_norm": 0.09305087476968765, + "learning_rate": 4.664196850985006e-06, + "loss": 8.5321, + "step": 169190 + }, + { + "epoch": 0.84496491797548, + "grad_norm": 0.09523943066596985, + "learning_rate": 4.662694936043456e-06, + "loss": 8.5278, + "step": 169200 + }, + { + "epoch": 0.8450148568004194, + "grad_norm": 0.08965837210416794, + "learning_rate": 4.661193021101905e-06, + "loss": 8.5233, + "step": 169210 + }, + { + "epoch": 0.845064795625359, + "grad_norm": 0.08698379248380661, + "learning_rate": 4.6596911061603545e-06, + "loss": 8.5409, + "step": 169220 + }, + { + "epoch": 0.8451147344502984, + "grad_norm": 0.0960690900683403, + "learning_rate": 4.658189191218804e-06, + "loss": 8.5348, + "step": 169230 + }, + { + "epoch": 0.8451646732752378, + "grad_norm": 0.09241048991680145, + "learning_rate": 4.656687276277253e-06, + "loss": 8.5386, + "step": 169240 + }, + { + "epoch": 0.8452146121001772, + "grad_norm": 0.09009848535060883, + "learning_rate": 4.655185361335703e-06, + "loss": 8.5341, + "step": 169250 + }, + { + "epoch": 0.8452645509251168, + "grad_norm": 0.0927545353770256, + "learning_rate": 4.653683446394153e-06, + "loss": 8.5328, + "step": 169260 + }, + { + "epoch": 0.8453144897500562, + "grad_norm": 0.09297183901071548, + "learning_rate": 4.652181531452602e-06, + "loss": 8.5347, + "step": 169270 + }, + { + "epoch": 0.8453644285749956, + "grad_norm": 0.0915551409125328, + "learning_rate": 4.650679616511052e-06, + "loss": 8.5317, + "step": 169280 + }, + { + "epoch": 0.845414367399935, + "grad_norm": 0.0902283638715744, + "learning_rate": 4.649177701569501e-06, + "loss": 8.5346, + "step": 169290 + }, + { + "epoch": 0.8454643062248746, + "grad_norm": 0.0934210941195488, + "learning_rate": 4.647675786627951e-06, + "loss": 8.5406, + "step": 169300 + }, + { + "epoch": 0.845514245049814, + "grad_norm": 0.09512994438409805, + "learning_rate": 4.6461738716864e-06, + "loss": 8.5249, + "step": 169310 + }, + { + "epoch": 0.8455641838747534, + "grad_norm": 0.08952774852514267, + "learning_rate": 4.6446719567448495e-06, + "loss": 8.5413, + "step": 169320 + }, + { + "epoch": 0.8456141226996928, + "grad_norm": 0.09026441723108292, + "learning_rate": 4.6431700418033e-06, + "loss": 8.5283, + "step": 169330 + }, + { + "epoch": 0.8456640615246324, + "grad_norm": 0.09348812699317932, + "learning_rate": 4.641668126861749e-06, + "loss": 8.5296, + "step": 169340 + }, + { + "epoch": 0.8457140003495718, + "grad_norm": 0.09530653804540634, + "learning_rate": 4.640166211920198e-06, + "loss": 8.5413, + "step": 169350 + }, + { + "epoch": 0.8457639391745112, + "grad_norm": 0.0886545479297638, + "learning_rate": 4.6386642969786486e-06, + "loss": 8.5314, + "step": 169360 + }, + { + "epoch": 0.8458138779994506, + "grad_norm": 0.08922586590051651, + "learning_rate": 4.637162382037097e-06, + "loss": 8.5231, + "step": 169370 + }, + { + "epoch": 0.8458638168243902, + "grad_norm": 0.09260469675064087, + "learning_rate": 4.635660467095547e-06, + "loss": 8.5274, + "step": 169380 + }, + { + "epoch": 0.8459137556493296, + "grad_norm": 0.0915549173951149, + "learning_rate": 4.6341585521539965e-06, + "loss": 8.5405, + "step": 169390 + }, + { + "epoch": 0.845963694474269, + "grad_norm": 0.09589400887489319, + "learning_rate": 4.632656637212446e-06, + "loss": 8.5489, + "step": 169400 + }, + { + "epoch": 0.8460136332992084, + "grad_norm": 0.09545934200286865, + "learning_rate": 4.631154722270896e-06, + "loss": 8.5183, + "step": 169410 + }, + { + "epoch": 0.846063572124148, + "grad_norm": 0.09627781063318253, + "learning_rate": 4.6296528073293445e-06, + "loss": 8.5336, + "step": 169420 + }, + { + "epoch": 0.8461135109490874, + "grad_norm": 0.08923333883285522, + "learning_rate": 4.628150892387795e-06, + "loss": 8.5482, + "step": 169430 + }, + { + "epoch": 0.8461634497740268, + "grad_norm": 0.0943409726023674, + "learning_rate": 4.626648977446245e-06, + "loss": 8.5379, + "step": 169440 + }, + { + "epoch": 0.8462133885989662, + "grad_norm": 0.09771402925252914, + "learning_rate": 4.625147062504693e-06, + "loss": 8.5228, + "step": 169450 + }, + { + "epoch": 0.8462633274239058, + "grad_norm": 0.09028983116149902, + "learning_rate": 4.6236451475631436e-06, + "loss": 8.5318, + "step": 169460 + }, + { + "epoch": 0.8463132662488452, + "grad_norm": 0.09272675216197968, + "learning_rate": 4.622143232621592e-06, + "loss": 8.5224, + "step": 169470 + }, + { + "epoch": 0.8463632050737846, + "grad_norm": 0.09558822959661484, + "learning_rate": 4.620641317680042e-06, + "loss": 8.5264, + "step": 169480 + }, + { + "epoch": 0.846413143898724, + "grad_norm": 0.09761396795511246, + "learning_rate": 4.619139402738492e-06, + "loss": 8.5319, + "step": 169490 + }, + { + "epoch": 0.8464630827236636, + "grad_norm": 0.08748037368059158, + "learning_rate": 4.617637487796941e-06, + "loss": 8.543, + "step": 169500 + }, + { + "epoch": 0.846513021548603, + "grad_norm": 0.0940655842423439, + "learning_rate": 4.616135572855391e-06, + "loss": 8.5201, + "step": 169510 + }, + { + "epoch": 0.8465629603735424, + "grad_norm": 0.09007947146892548, + "learning_rate": 4.61463365791384e-06, + "loss": 8.5319, + "step": 169520 + }, + { + "epoch": 0.8466128991984818, + "grad_norm": 0.09856663644313812, + "learning_rate": 4.61313174297229e-06, + "loss": 8.5355, + "step": 169530 + }, + { + "epoch": 0.8466628380234213, + "grad_norm": 0.09389778226613998, + "learning_rate": 4.61162982803074e-06, + "loss": 8.5349, + "step": 169540 + }, + { + "epoch": 0.8467127768483608, + "grad_norm": 0.08669053763151169, + "learning_rate": 4.610127913089188e-06, + "loss": 8.5311, + "step": 169550 + }, + { + "epoch": 0.8467627156733002, + "grad_norm": 0.08856794238090515, + "learning_rate": 4.6086259981476386e-06, + "loss": 8.5275, + "step": 169560 + }, + { + "epoch": 0.8468126544982396, + "grad_norm": 0.09139453619718552, + "learning_rate": 4.607124083206088e-06, + "loss": 8.526, + "step": 169570 + }, + { + "epoch": 0.8468625933231791, + "grad_norm": 0.09540039300918579, + "learning_rate": 4.605622168264537e-06, + "loss": 8.5266, + "step": 169580 + }, + { + "epoch": 0.8469125321481186, + "grad_norm": 0.09145918488502502, + "learning_rate": 4.604120253322987e-06, + "loss": 8.5374, + "step": 169590 + }, + { + "epoch": 0.846962470973058, + "grad_norm": 0.08852418512105942, + "learning_rate": 4.602618338381437e-06, + "loss": 8.5433, + "step": 169600 + }, + { + "epoch": 0.8470124097979974, + "grad_norm": 0.09328535944223404, + "learning_rate": 4.601116423439886e-06, + "loss": 8.5504, + "step": 169610 + }, + { + "epoch": 0.847062348622937, + "grad_norm": 0.09695547074079514, + "learning_rate": 4.599614508498335e-06, + "loss": 8.5343, + "step": 169620 + }, + { + "epoch": 0.8471122874478764, + "grad_norm": 0.09098362177610397, + "learning_rate": 4.598112593556785e-06, + "loss": 8.5452, + "step": 169630 + }, + { + "epoch": 0.8471622262728158, + "grad_norm": 0.0890357717871666, + "learning_rate": 4.596610678615235e-06, + "loss": 8.5353, + "step": 169640 + }, + { + "epoch": 0.8472121650977552, + "grad_norm": 0.09094347059726715, + "learning_rate": 4.595108763673684e-06, + "loss": 8.5324, + "step": 169650 + }, + { + "epoch": 0.8472621039226947, + "grad_norm": 0.09218890964984894, + "learning_rate": 4.5936068487321336e-06, + "loss": 8.5394, + "step": 169660 + }, + { + "epoch": 0.8473120427476342, + "grad_norm": 0.09381541609764099, + "learning_rate": 4.592104933790583e-06, + "loss": 8.5285, + "step": 169670 + }, + { + "epoch": 0.8473619815725736, + "grad_norm": 0.09734323620796204, + "learning_rate": 4.590603018849032e-06, + "loss": 8.5248, + "step": 169680 + }, + { + "epoch": 0.847411920397513, + "grad_norm": 0.09610073268413544, + "learning_rate": 4.589101103907482e-06, + "loss": 8.526, + "step": 169690 + }, + { + "epoch": 0.8474618592224525, + "grad_norm": 0.08428271114826202, + "learning_rate": 4.587599188965932e-06, + "loss": 8.5368, + "step": 169700 + }, + { + "epoch": 0.847511798047392, + "grad_norm": 0.09294697642326355, + "learning_rate": 4.586097274024381e-06, + "loss": 8.537, + "step": 169710 + }, + { + "epoch": 0.8475617368723314, + "grad_norm": 0.08724416792392731, + "learning_rate": 4.58459535908283e-06, + "loss": 8.5352, + "step": 169720 + }, + { + "epoch": 0.8476116756972708, + "grad_norm": 0.08911699056625366, + "learning_rate": 4.583093444141281e-06, + "loss": 8.5249, + "step": 169730 + }, + { + "epoch": 0.8476616145222103, + "grad_norm": 0.08514885604381561, + "learning_rate": 4.58159152919973e-06, + "loss": 8.5576, + "step": 169740 + }, + { + "epoch": 0.8477115533471498, + "grad_norm": 0.08839089423418045, + "learning_rate": 4.580089614258179e-06, + "loss": 8.5511, + "step": 169750 + }, + { + "epoch": 0.8477614921720892, + "grad_norm": 0.0928574949502945, + "learning_rate": 4.578587699316629e-06, + "loss": 8.5221, + "step": 169760 + }, + { + "epoch": 0.8478114309970286, + "grad_norm": 0.09458362311124802, + "learning_rate": 4.577085784375078e-06, + "loss": 8.5231, + "step": 169770 + }, + { + "epoch": 0.8478613698219681, + "grad_norm": 0.09411700069904327, + "learning_rate": 4.575583869433528e-06, + "loss": 8.5104, + "step": 169780 + }, + { + "epoch": 0.8479113086469076, + "grad_norm": 0.09059763699769974, + "learning_rate": 4.574081954491977e-06, + "loss": 8.5246, + "step": 169790 + }, + { + "epoch": 0.847961247471847, + "grad_norm": 0.09194177389144897, + "learning_rate": 4.572580039550427e-06, + "loss": 8.5261, + "step": 169800 + }, + { + "epoch": 0.8480111862967864, + "grad_norm": 0.08624966442584991, + "learning_rate": 4.571078124608877e-06, + "loss": 8.517, + "step": 169810 + }, + { + "epoch": 0.8480611251217259, + "grad_norm": 0.09074903279542923, + "learning_rate": 4.569576209667325e-06, + "loss": 8.5229, + "step": 169820 + }, + { + "epoch": 0.8481110639466654, + "grad_norm": 0.09367188811302185, + "learning_rate": 4.568074294725776e-06, + "loss": 8.5213, + "step": 169830 + }, + { + "epoch": 0.8481610027716048, + "grad_norm": 0.09176365286111832, + "learning_rate": 4.566572379784225e-06, + "loss": 8.5176, + "step": 169840 + }, + { + "epoch": 0.8482109415965442, + "grad_norm": 0.09426657855510712, + "learning_rate": 4.565070464842674e-06, + "loss": 8.5222, + "step": 169850 + }, + { + "epoch": 0.8482608804214837, + "grad_norm": 0.09585726261138916, + "learning_rate": 4.5635685499011244e-06, + "loss": 8.5333, + "step": 169860 + }, + { + "epoch": 0.8483108192464232, + "grad_norm": 0.08798620849847794, + "learning_rate": 4.562066634959573e-06, + "loss": 8.5385, + "step": 169870 + }, + { + "epoch": 0.8483607580713626, + "grad_norm": 0.09025800973176956, + "learning_rate": 4.560564720018023e-06, + "loss": 8.5288, + "step": 169880 + }, + { + "epoch": 0.848410696896302, + "grad_norm": 0.09170225262641907, + "learning_rate": 4.559062805076473e-06, + "loss": 8.5304, + "step": 169890 + }, + { + "epoch": 0.8484606357212415, + "grad_norm": 0.08960150182247162, + "learning_rate": 4.557560890134922e-06, + "loss": 8.5373, + "step": 169900 + }, + { + "epoch": 0.848510574546181, + "grad_norm": 0.08712244778871536, + "learning_rate": 4.556058975193372e-06, + "loss": 8.5318, + "step": 169910 + }, + { + "epoch": 0.8485605133711204, + "grad_norm": 0.09316559135913849, + "learning_rate": 4.5545570602518204e-06, + "loss": 8.5409, + "step": 169920 + }, + { + "epoch": 0.8486104521960598, + "grad_norm": 0.0908828005194664, + "learning_rate": 4.553055145310271e-06, + "loss": 8.5518, + "step": 169930 + }, + { + "epoch": 0.8486603910209993, + "grad_norm": 0.09130831062793732, + "learning_rate": 4.551553230368721e-06, + "loss": 8.5231, + "step": 169940 + }, + { + "epoch": 0.8487103298459387, + "grad_norm": 0.0959784984588623, + "learning_rate": 4.550051315427169e-06, + "loss": 8.542, + "step": 169950 + }, + { + "epoch": 0.8487602686708782, + "grad_norm": 0.09128502756357193, + "learning_rate": 4.5485494004856194e-06, + "loss": 8.5307, + "step": 169960 + }, + { + "epoch": 0.8488102074958176, + "grad_norm": 0.10153272747993469, + "learning_rate": 4.547047485544069e-06, + "loss": 8.5301, + "step": 169970 + }, + { + "epoch": 0.8488601463207571, + "grad_norm": 0.09342774748802185, + "learning_rate": 4.545545570602518e-06, + "loss": 8.5356, + "step": 169980 + }, + { + "epoch": 0.8489100851456965, + "grad_norm": 0.0933808907866478, + "learning_rate": 4.544043655660968e-06, + "loss": 8.5405, + "step": 169990 + }, + { + "epoch": 0.848960023970636, + "grad_norm": 0.09019854664802551, + "learning_rate": 4.542541740719417e-06, + "loss": 8.5424, + "step": 170000 + }, + { + "epoch": 0.8490099627955754, + "grad_norm": 0.08802162855863571, + "learning_rate": 4.541039825777867e-06, + "loss": 8.532, + "step": 170010 + }, + { + "epoch": 0.8490599016205149, + "grad_norm": 0.09536787867546082, + "learning_rate": 4.539537910836316e-06, + "loss": 8.5354, + "step": 170020 + }, + { + "epoch": 0.8491098404454543, + "grad_norm": 0.0905720591545105, + "learning_rate": 4.538035995894766e-06, + "loss": 8.5336, + "step": 170030 + }, + { + "epoch": 0.8491597792703938, + "grad_norm": 0.09646139293909073, + "learning_rate": 4.536534080953216e-06, + "loss": 8.5237, + "step": 170040 + }, + { + "epoch": 0.8492097180953332, + "grad_norm": 0.09050284326076508, + "learning_rate": 4.535032166011665e-06, + "loss": 8.5426, + "step": 170050 + }, + { + "epoch": 0.8492596569202727, + "grad_norm": 0.09438873827457428, + "learning_rate": 4.5335302510701145e-06, + "loss": 8.5094, + "step": 170060 + }, + { + "epoch": 0.8493095957452121, + "grad_norm": 0.09040695428848267, + "learning_rate": 4.532028336128564e-06, + "loss": 8.5364, + "step": 170070 + }, + { + "epoch": 0.8493595345701516, + "grad_norm": 0.09196127206087112, + "learning_rate": 4.530526421187013e-06, + "loss": 8.5276, + "step": 170080 + }, + { + "epoch": 0.849409473395091, + "grad_norm": 0.09099120646715164, + "learning_rate": 4.529024506245463e-06, + "loss": 8.5473, + "step": 170090 + }, + { + "epoch": 0.8494594122200304, + "grad_norm": 0.08868347853422165, + "learning_rate": 4.527522591303913e-06, + "loss": 8.5644, + "step": 170100 + }, + { + "epoch": 0.8495093510449699, + "grad_norm": 0.09560364484786987, + "learning_rate": 4.526020676362362e-06, + "loss": 8.5208, + "step": 170110 + }, + { + "epoch": 0.8495592898699094, + "grad_norm": 0.09278056770563126, + "learning_rate": 4.524518761420811e-06, + "loss": 8.5264, + "step": 170120 + }, + { + "epoch": 0.8496092286948488, + "grad_norm": 0.09304804354906082, + "learning_rate": 4.5230168464792615e-06, + "loss": 8.5363, + "step": 170130 + }, + { + "epoch": 0.8496591675197882, + "grad_norm": 0.09038883447647095, + "learning_rate": 4.521514931537711e-06, + "loss": 8.521, + "step": 170140 + }, + { + "epoch": 0.8497091063447277, + "grad_norm": 0.09431783854961395, + "learning_rate": 4.52001301659616e-06, + "loss": 8.5134, + "step": 170150 + }, + { + "epoch": 0.8497590451696672, + "grad_norm": 0.09250378608703613, + "learning_rate": 4.5185111016546095e-06, + "loss": 8.5085, + "step": 170160 + }, + { + "epoch": 0.8498089839946066, + "grad_norm": 0.09243625402450562, + "learning_rate": 4.517009186713059e-06, + "loss": 8.5148, + "step": 170170 + }, + { + "epoch": 0.849858922819546, + "grad_norm": 0.09091030061244965, + "learning_rate": 4.515507271771509e-06, + "loss": 8.5367, + "step": 170180 + }, + { + "epoch": 0.8499088616444855, + "grad_norm": 0.09004100412130356, + "learning_rate": 4.514005356829958e-06, + "loss": 8.5418, + "step": 170190 + }, + { + "epoch": 0.849958800469425, + "grad_norm": 0.08924288302659988, + "learning_rate": 4.512503441888408e-06, + "loss": 8.5241, + "step": 170200 + }, + { + "epoch": 0.8500087392943644, + "grad_norm": 0.0939495712518692, + "learning_rate": 4.511001526946858e-06, + "loss": 8.5362, + "step": 170210 + }, + { + "epoch": 0.8500586781193038, + "grad_norm": 0.09469542652368546, + "learning_rate": 4.509499612005306e-06, + "loss": 8.5327, + "step": 170220 + }, + { + "epoch": 0.8501086169442433, + "grad_norm": 0.08831597119569778, + "learning_rate": 4.5079976970637565e-06, + "loss": 8.5328, + "step": 170230 + }, + { + "epoch": 0.8501585557691828, + "grad_norm": 0.0914112776517868, + "learning_rate": 4.506495782122206e-06, + "loss": 8.5414, + "step": 170240 + }, + { + "epoch": 0.8502084945941222, + "grad_norm": 0.0885150134563446, + "learning_rate": 4.504993867180655e-06, + "loss": 8.5305, + "step": 170250 + }, + { + "epoch": 0.8502584334190616, + "grad_norm": 0.08355226367712021, + "learning_rate": 4.503491952239105e-06, + "loss": 8.5322, + "step": 170260 + }, + { + "epoch": 0.8503083722440011, + "grad_norm": 0.08838168531656265, + "learning_rate": 4.501990037297554e-06, + "loss": 8.5281, + "step": 170270 + }, + { + "epoch": 0.8503583110689406, + "grad_norm": 0.0902080163359642, + "learning_rate": 4.500488122356004e-06, + "loss": 8.5394, + "step": 170280 + }, + { + "epoch": 0.85040824989388, + "grad_norm": 0.09007387608289719, + "learning_rate": 4.498986207414454e-06, + "loss": 8.5112, + "step": 170290 + }, + { + "epoch": 0.8504581887188194, + "grad_norm": 0.08401839435100555, + "learning_rate": 4.497484292472903e-06, + "loss": 8.533, + "step": 170300 + }, + { + "epoch": 0.8505081275437589, + "grad_norm": 0.0899522602558136, + "learning_rate": 4.495982377531353e-06, + "loss": 8.5388, + "step": 170310 + }, + { + "epoch": 0.8505580663686984, + "grad_norm": 0.09382461756467819, + "learning_rate": 4.494480462589801e-06, + "loss": 8.5133, + "step": 170320 + }, + { + "epoch": 0.8506080051936378, + "grad_norm": 0.09093036502599716, + "learning_rate": 4.4929785476482515e-06, + "loss": 8.5497, + "step": 170330 + }, + { + "epoch": 0.8506579440185772, + "grad_norm": 0.08482268452644348, + "learning_rate": 4.491476632706702e-06, + "loss": 8.5193, + "step": 170340 + }, + { + "epoch": 0.8507078828435167, + "grad_norm": 0.09074404090642929, + "learning_rate": 4.48997471776515e-06, + "loss": 8.5359, + "step": 170350 + }, + { + "epoch": 0.8507578216684561, + "grad_norm": 0.09321633726358414, + "learning_rate": 4.4884728028236e-06, + "loss": 8.5268, + "step": 170360 + }, + { + "epoch": 0.8508077604933956, + "grad_norm": 0.09202466160058975, + "learning_rate": 4.48697088788205e-06, + "loss": 8.5414, + "step": 170370 + }, + { + "epoch": 0.850857699318335, + "grad_norm": 0.08913063257932663, + "learning_rate": 4.485468972940499e-06, + "loss": 8.5411, + "step": 170380 + }, + { + "epoch": 0.8509076381432745, + "grad_norm": 0.09542038291692734, + "learning_rate": 4.483967057998949e-06, + "loss": 8.5335, + "step": 170390 + }, + { + "epoch": 0.850957576968214, + "grad_norm": 0.09215617924928665, + "learning_rate": 4.482465143057398e-06, + "loss": 8.5483, + "step": 170400 + }, + { + "epoch": 0.8510075157931534, + "grad_norm": 0.09391964226961136, + "learning_rate": 4.480963228115848e-06, + "loss": 8.5355, + "step": 170410 + }, + { + "epoch": 0.8510574546180928, + "grad_norm": 0.09085561335086823, + "learning_rate": 4.479461313174298e-06, + "loss": 8.5412, + "step": 170420 + }, + { + "epoch": 0.8511073934430323, + "grad_norm": 0.08789962530136108, + "learning_rate": 4.4779593982327465e-06, + "loss": 8.5273, + "step": 170430 + }, + { + "epoch": 0.8511573322679717, + "grad_norm": 0.08881513774394989, + "learning_rate": 4.476457483291197e-06, + "loss": 8.5312, + "step": 170440 + }, + { + "epoch": 0.8512072710929112, + "grad_norm": 0.0915045440196991, + "learning_rate": 4.474955568349646e-06, + "loss": 8.5231, + "step": 170450 + }, + { + "epoch": 0.8512572099178506, + "grad_norm": 0.09604188799858093, + "learning_rate": 4.473453653408095e-06, + "loss": 8.5366, + "step": 170460 + }, + { + "epoch": 0.8513071487427901, + "grad_norm": 0.09160809963941574, + "learning_rate": 4.4719517384665455e-06, + "loss": 8.5187, + "step": 170470 + }, + { + "epoch": 0.8513570875677295, + "grad_norm": 0.08846515417098999, + "learning_rate": 4.470449823524994e-06, + "loss": 8.5163, + "step": 170480 + }, + { + "epoch": 0.851407026392669, + "grad_norm": 0.09089051187038422, + "learning_rate": 4.468947908583444e-06, + "loss": 8.5191, + "step": 170490 + }, + { + "epoch": 0.8514569652176084, + "grad_norm": 0.09371612966060638, + "learning_rate": 4.4674459936418935e-06, + "loss": 8.5244, + "step": 170500 + }, + { + "epoch": 0.8515069040425479, + "grad_norm": 0.09188021719455719, + "learning_rate": 4.465944078700343e-06, + "loss": 8.5292, + "step": 170510 + }, + { + "epoch": 0.8515568428674873, + "grad_norm": 0.08881582319736481, + "learning_rate": 4.464442163758793e-06, + "loss": 8.5183, + "step": 170520 + }, + { + "epoch": 0.8516067816924268, + "grad_norm": 0.09312296658754349, + "learning_rate": 4.462940248817242e-06, + "loss": 8.5437, + "step": 170530 + }, + { + "epoch": 0.8516567205173662, + "grad_norm": 0.08998645842075348, + "learning_rate": 4.461438333875692e-06, + "loss": 8.5336, + "step": 170540 + }, + { + "epoch": 0.8517066593423057, + "grad_norm": 0.09388935565948486, + "learning_rate": 4.459936418934141e-06, + "loss": 8.5427, + "step": 170550 + }, + { + "epoch": 0.8517565981672451, + "grad_norm": 0.08806626498699188, + "learning_rate": 4.45843450399259e-06, + "loss": 8.5317, + "step": 170560 + }, + { + "epoch": 0.8518065369921846, + "grad_norm": 0.09021688997745514, + "learning_rate": 4.4569325890510405e-06, + "loss": 8.5494, + "step": 170570 + }, + { + "epoch": 0.851856475817124, + "grad_norm": 0.08856786042451859, + "learning_rate": 4.45543067410949e-06, + "loss": 8.5258, + "step": 170580 + }, + { + "epoch": 0.8519064146420635, + "grad_norm": 0.09338122606277466, + "learning_rate": 4.453928759167939e-06, + "loss": 8.5315, + "step": 170590 + }, + { + "epoch": 0.8519563534670029, + "grad_norm": 0.09389621019363403, + "learning_rate": 4.4524268442263885e-06, + "loss": 8.5314, + "step": 170600 + }, + { + "epoch": 0.8520062922919424, + "grad_norm": 0.08809863030910492, + "learning_rate": 4.450924929284839e-06, + "loss": 8.5296, + "step": 170610 + }, + { + "epoch": 0.8520562311168818, + "grad_norm": 0.08649566769599915, + "learning_rate": 4.449423014343288e-06, + "loss": 8.5521, + "step": 170620 + }, + { + "epoch": 0.8521061699418213, + "grad_norm": 0.09101077169179916, + "learning_rate": 4.447921099401737e-06, + "loss": 8.5458, + "step": 170630 + }, + { + "epoch": 0.8521561087667607, + "grad_norm": 0.0848645567893982, + "learning_rate": 4.446419184460187e-06, + "loss": 8.5266, + "step": 170640 + }, + { + "epoch": 0.8522060475917002, + "grad_norm": 0.0918324664235115, + "learning_rate": 4.444917269518636e-06, + "loss": 8.517, + "step": 170650 + }, + { + "epoch": 0.8522559864166396, + "grad_norm": 0.08957000821828842, + "learning_rate": 4.443415354577086e-06, + "loss": 8.5481, + "step": 170660 + }, + { + "epoch": 0.8523059252415791, + "grad_norm": 0.09019914269447327, + "learning_rate": 4.4419134396355355e-06, + "loss": 8.5189, + "step": 170670 + }, + { + "epoch": 0.8523558640665185, + "grad_norm": 0.09360361099243164, + "learning_rate": 4.440411524693985e-06, + "loss": 8.5331, + "step": 170680 + }, + { + "epoch": 0.852405802891458, + "grad_norm": 0.09016086161136627, + "learning_rate": 4.438909609752435e-06, + "loss": 8.5151, + "step": 170690 + }, + { + "epoch": 0.8524557417163974, + "grad_norm": 0.09195458889007568, + "learning_rate": 4.4374076948108835e-06, + "loss": 8.5457, + "step": 170700 + }, + { + "epoch": 0.8525056805413369, + "grad_norm": 0.0889841690659523, + "learning_rate": 4.435905779869334e-06, + "loss": 8.5435, + "step": 170710 + }, + { + "epoch": 0.8525556193662763, + "grad_norm": 0.09094245731830597, + "learning_rate": 4.434403864927783e-06, + "loss": 8.5134, + "step": 170720 + }, + { + "epoch": 0.8526055581912158, + "grad_norm": 0.09037609398365021, + "learning_rate": 4.432901949986232e-06, + "loss": 8.5313, + "step": 170730 + }, + { + "epoch": 0.8526554970161552, + "grad_norm": 0.0911756157875061, + "learning_rate": 4.4314000350446825e-06, + "loss": 8.5236, + "step": 170740 + }, + { + "epoch": 0.8527054358410947, + "grad_norm": 0.09256346523761749, + "learning_rate": 4.429898120103131e-06, + "loss": 8.5366, + "step": 170750 + }, + { + "epoch": 0.8527553746660341, + "grad_norm": 0.09345842152833939, + "learning_rate": 4.428396205161581e-06, + "loss": 8.5308, + "step": 170760 + }, + { + "epoch": 0.8528053134909735, + "grad_norm": 0.10060130059719086, + "learning_rate": 4.426894290220031e-06, + "loss": 8.5234, + "step": 170770 + }, + { + "epoch": 0.852855252315913, + "grad_norm": 0.09197411686182022, + "learning_rate": 4.42539237527848e-06, + "loss": 8.558, + "step": 170780 + }, + { + "epoch": 0.8529051911408525, + "grad_norm": 0.09203599393367767, + "learning_rate": 4.42389046033693e-06, + "loss": 8.5315, + "step": 170790 + }, + { + "epoch": 0.8529551299657919, + "grad_norm": 0.09319550544023514, + "learning_rate": 4.4223885453953785e-06, + "loss": 8.5225, + "step": 170800 + }, + { + "epoch": 0.8530050687907313, + "grad_norm": 0.08547905087471008, + "learning_rate": 4.420886630453829e-06, + "loss": 8.5404, + "step": 170810 + }, + { + "epoch": 0.8530550076156708, + "grad_norm": 0.08869150280952454, + "learning_rate": 4.419384715512279e-06, + "loss": 8.5281, + "step": 170820 + }, + { + "epoch": 0.8531049464406103, + "grad_norm": 0.08847400546073914, + "learning_rate": 4.417882800570727e-06, + "loss": 8.544, + "step": 170830 + }, + { + "epoch": 0.8531548852655497, + "grad_norm": 0.09268223494291306, + "learning_rate": 4.4163808856291775e-06, + "loss": 8.5184, + "step": 170840 + }, + { + "epoch": 0.8532048240904891, + "grad_norm": 0.09480337798595428, + "learning_rate": 4.414878970687627e-06, + "loss": 8.5208, + "step": 170850 + }, + { + "epoch": 0.8532547629154286, + "grad_norm": 0.09097526222467422, + "learning_rate": 4.413377055746076e-06, + "loss": 8.5258, + "step": 170860 + }, + { + "epoch": 0.8533047017403681, + "grad_norm": 0.0875224694609642, + "learning_rate": 4.411875140804526e-06, + "loss": 8.5318, + "step": 170870 + }, + { + "epoch": 0.8533546405653075, + "grad_norm": 0.08786869794130325, + "learning_rate": 4.410373225862975e-06, + "loss": 8.5334, + "step": 170880 + }, + { + "epoch": 0.8534045793902469, + "grad_norm": 0.08923707902431488, + "learning_rate": 4.408871310921425e-06, + "loss": 8.535, + "step": 170890 + }, + { + "epoch": 0.8534545182151864, + "grad_norm": 0.0923088863492012, + "learning_rate": 4.407369395979874e-06, + "loss": 8.5256, + "step": 170900 + }, + { + "epoch": 0.8535044570401259, + "grad_norm": 0.0889919325709343, + "learning_rate": 4.405867481038324e-06, + "loss": 8.5382, + "step": 170910 + }, + { + "epoch": 0.8535543958650653, + "grad_norm": 0.09233865886926651, + "learning_rate": 4.404365566096774e-06, + "loss": 8.5243, + "step": 170920 + }, + { + "epoch": 0.8536043346900047, + "grad_norm": 0.09140230715274811, + "learning_rate": 4.402863651155223e-06, + "loss": 8.5295, + "step": 170930 + }, + { + "epoch": 0.8536542735149442, + "grad_norm": 0.08698879182338715, + "learning_rate": 4.4013617362136725e-06, + "loss": 8.5412, + "step": 170940 + }, + { + "epoch": 0.8537042123398837, + "grad_norm": 0.09249941259622574, + "learning_rate": 4.399859821272122e-06, + "loss": 8.533, + "step": 170950 + }, + { + "epoch": 0.8537541511648231, + "grad_norm": 0.0933549553155899, + "learning_rate": 4.398357906330571e-06, + "loss": 8.5375, + "step": 170960 + }, + { + "epoch": 0.8538040899897625, + "grad_norm": 0.09042268991470337, + "learning_rate": 4.396855991389021e-06, + "loss": 8.5378, + "step": 170970 + }, + { + "epoch": 0.853854028814702, + "grad_norm": 0.09002401679754257, + "learning_rate": 4.395354076447471e-06, + "loss": 8.5339, + "step": 170980 + }, + { + "epoch": 0.8539039676396415, + "grad_norm": 0.09069207310676575, + "learning_rate": 4.39385216150592e-06, + "loss": 8.5289, + "step": 170990 + }, + { + "epoch": 0.8539539064645809, + "grad_norm": 0.09139145910739899, + "learning_rate": 4.392350246564369e-06, + "loss": 8.5352, + "step": 171000 + }, + { + "epoch": 0.8540038452895203, + "grad_norm": 0.0929713174700737, + "learning_rate": 4.3908483316228196e-06, + "loss": 8.5344, + "step": 171010 + }, + { + "epoch": 0.8540537841144598, + "grad_norm": 0.09418080747127533, + "learning_rate": 4.389346416681269e-06, + "loss": 8.5188, + "step": 171020 + }, + { + "epoch": 0.8541037229393993, + "grad_norm": 0.08637245744466782, + "learning_rate": 4.387844501739718e-06, + "loss": 8.5291, + "step": 171030 + }, + { + "epoch": 0.8541536617643387, + "grad_norm": 0.08483462035655975, + "learning_rate": 4.3863425867981676e-06, + "loss": 8.5162, + "step": 171040 + }, + { + "epoch": 0.8542036005892781, + "grad_norm": 0.08939448744058609, + "learning_rate": 4.384840671856617e-06, + "loss": 8.5235, + "step": 171050 + }, + { + "epoch": 0.8542535394142176, + "grad_norm": 0.09517613798379898, + "learning_rate": 4.383338756915067e-06, + "loss": 8.5371, + "step": 171060 + }, + { + "epoch": 0.8543034782391571, + "grad_norm": 0.09513095766305923, + "learning_rate": 4.381836841973516e-06, + "loss": 8.5406, + "step": 171070 + }, + { + "epoch": 0.8543534170640965, + "grad_norm": 0.08950556814670563, + "learning_rate": 4.380334927031966e-06, + "loss": 8.5353, + "step": 171080 + }, + { + "epoch": 0.8544033558890359, + "grad_norm": 0.09560289233922958, + "learning_rate": 4.378833012090416e-06, + "loss": 8.5242, + "step": 171090 + }, + { + "epoch": 0.8544532947139754, + "grad_norm": 0.0961078554391861, + "learning_rate": 4.377331097148864e-06, + "loss": 8.5337, + "step": 171100 + }, + { + "epoch": 0.8545032335389148, + "grad_norm": 0.0894903764128685, + "learning_rate": 4.3758291822073146e-06, + "loss": 8.5246, + "step": 171110 + }, + { + "epoch": 0.8545531723638543, + "grad_norm": 0.08703772723674774, + "learning_rate": 4.374327267265764e-06, + "loss": 8.5198, + "step": 171120 + }, + { + "epoch": 0.8546031111887937, + "grad_norm": 0.08781271427869797, + "learning_rate": 4.372825352324213e-06, + "loss": 8.5344, + "step": 171130 + }, + { + "epoch": 0.8546530500137332, + "grad_norm": 0.09302118420600891, + "learning_rate": 4.371323437382663e-06, + "loss": 8.5205, + "step": 171140 + }, + { + "epoch": 0.8547029888386726, + "grad_norm": 0.09112757444381714, + "learning_rate": 4.369821522441112e-06, + "loss": 8.5174, + "step": 171150 + }, + { + "epoch": 0.8547529276636121, + "grad_norm": 0.0904446691274643, + "learning_rate": 4.368319607499562e-06, + "loss": 8.5156, + "step": 171160 + }, + { + "epoch": 0.8548028664885515, + "grad_norm": 0.09799622744321823, + "learning_rate": 4.366817692558012e-06, + "loss": 8.538, + "step": 171170 + }, + { + "epoch": 0.854852805313491, + "grad_norm": 0.08375813066959381, + "learning_rate": 4.365315777616461e-06, + "loss": 8.5342, + "step": 171180 + }, + { + "epoch": 0.8549027441384304, + "grad_norm": 0.08957131952047348, + "learning_rate": 4.363813862674911e-06, + "loss": 8.5287, + "step": 171190 + }, + { + "epoch": 0.8549526829633699, + "grad_norm": 0.0968262180685997, + "learning_rate": 4.362311947733359e-06, + "loss": 8.5356, + "step": 171200 + }, + { + "epoch": 0.8550026217883093, + "grad_norm": 0.09194337576627731, + "learning_rate": 4.3608100327918096e-06, + "loss": 8.5205, + "step": 171210 + }, + { + "epoch": 0.8550525606132487, + "grad_norm": 0.09763994812965393, + "learning_rate": 4.35930811785026e-06, + "loss": 8.5341, + "step": 171220 + }, + { + "epoch": 0.8551024994381882, + "grad_norm": 0.09552284330129623, + "learning_rate": 4.357806202908708e-06, + "loss": 8.5164, + "step": 171230 + }, + { + "epoch": 0.8551524382631277, + "grad_norm": 0.09413187205791473, + "learning_rate": 4.356304287967158e-06, + "loss": 8.5397, + "step": 171240 + }, + { + "epoch": 0.8552023770880671, + "grad_norm": 0.09369870275259018, + "learning_rate": 4.354802373025608e-06, + "loss": 8.5478, + "step": 171250 + }, + { + "epoch": 0.8552523159130065, + "grad_norm": 0.08899905532598495, + "learning_rate": 4.353300458084057e-06, + "loss": 8.5269, + "step": 171260 + }, + { + "epoch": 0.855302254737946, + "grad_norm": 0.09390175342559814, + "learning_rate": 4.351798543142507e-06, + "loss": 8.5124, + "step": 171270 + }, + { + "epoch": 0.8553521935628855, + "grad_norm": 0.09079564362764359, + "learning_rate": 4.350296628200956e-06, + "loss": 8.5169, + "step": 171280 + }, + { + "epoch": 0.8554021323878249, + "grad_norm": 0.09164270758628845, + "learning_rate": 4.348794713259406e-06, + "loss": 8.5276, + "step": 171290 + }, + { + "epoch": 0.8554520712127643, + "grad_norm": 0.0873217061161995, + "learning_rate": 4.347292798317855e-06, + "loss": 8.5337, + "step": 171300 + }, + { + "epoch": 0.8555020100377038, + "grad_norm": 0.09027426689863205, + "learning_rate": 4.345790883376305e-06, + "loss": 8.5357, + "step": 171310 + }, + { + "epoch": 0.8555519488626433, + "grad_norm": 0.09546752274036407, + "learning_rate": 4.344288968434755e-06, + "loss": 8.5264, + "step": 171320 + }, + { + "epoch": 0.8556018876875827, + "grad_norm": 0.0898887887597084, + "learning_rate": 4.342787053493204e-06, + "loss": 8.5286, + "step": 171330 + }, + { + "epoch": 0.8556518265125221, + "grad_norm": 0.0918826311826706, + "learning_rate": 4.3412851385516534e-06, + "loss": 8.5244, + "step": 171340 + }, + { + "epoch": 0.8557017653374616, + "grad_norm": 0.09295768290758133, + "learning_rate": 4.339783223610103e-06, + "loss": 8.5359, + "step": 171350 + }, + { + "epoch": 0.8557517041624011, + "grad_norm": 0.08988602459430695, + "learning_rate": 4.338281308668552e-06, + "loss": 8.5364, + "step": 171360 + }, + { + "epoch": 0.8558016429873405, + "grad_norm": 0.0889463871717453, + "learning_rate": 4.336779393727002e-06, + "loss": 8.5293, + "step": 171370 + }, + { + "epoch": 0.8558515818122799, + "grad_norm": 0.09765569865703583, + "learning_rate": 4.335277478785452e-06, + "loss": 8.5084, + "step": 171380 + }, + { + "epoch": 0.8559015206372194, + "grad_norm": 0.08945247530937195, + "learning_rate": 4.333775563843901e-06, + "loss": 8.5243, + "step": 171390 + }, + { + "epoch": 0.8559514594621589, + "grad_norm": 0.08958765864372253, + "learning_rate": 4.33227364890235e-06, + "loss": 8.5287, + "step": 171400 + }, + { + "epoch": 0.8560013982870983, + "grad_norm": 0.09407828748226166, + "learning_rate": 4.3307717339608004e-06, + "loss": 8.521, + "step": 171410 + }, + { + "epoch": 0.8560513371120377, + "grad_norm": 0.09075424075126648, + "learning_rate": 4.32926981901925e-06, + "loss": 8.5308, + "step": 171420 + }, + { + "epoch": 0.8561012759369772, + "grad_norm": 0.09202433377504349, + "learning_rate": 4.327767904077699e-06, + "loss": 8.5345, + "step": 171430 + }, + { + "epoch": 0.8561512147619167, + "grad_norm": 0.09231635928153992, + "learning_rate": 4.3262659891361484e-06, + "loss": 8.5238, + "step": 171440 + }, + { + "epoch": 0.8562011535868561, + "grad_norm": 0.09123031049966812, + "learning_rate": 4.324764074194598e-06, + "loss": 8.5197, + "step": 171450 + }, + { + "epoch": 0.8562510924117955, + "grad_norm": 0.08414209634065628, + "learning_rate": 4.323262159253048e-06, + "loss": 8.5187, + "step": 171460 + }, + { + "epoch": 0.856301031236735, + "grad_norm": 0.09748626500368118, + "learning_rate": 4.321760244311497e-06, + "loss": 8.5257, + "step": 171470 + }, + { + "epoch": 0.8563509700616745, + "grad_norm": 0.08869002014398575, + "learning_rate": 4.320258329369947e-06, + "loss": 8.5425, + "step": 171480 + }, + { + "epoch": 0.8564009088866139, + "grad_norm": 0.09146328270435333, + "learning_rate": 4.318756414428397e-06, + "loss": 8.5481, + "step": 171490 + }, + { + "epoch": 0.8564508477115533, + "grad_norm": 0.0903269499540329, + "learning_rate": 4.317254499486846e-06, + "loss": 8.5161, + "step": 171500 + }, + { + "epoch": 0.8565007865364928, + "grad_norm": 0.09317917376756668, + "learning_rate": 4.3157525845452954e-06, + "loss": 8.5282, + "step": 171510 + }, + { + "epoch": 0.8565507253614323, + "grad_norm": 0.09211008250713348, + "learning_rate": 4.314250669603745e-06, + "loss": 8.5379, + "step": 171520 + }, + { + "epoch": 0.8566006641863717, + "grad_norm": 0.08755582571029663, + "learning_rate": 4.312748754662194e-06, + "loss": 8.5338, + "step": 171530 + }, + { + "epoch": 0.8566506030113111, + "grad_norm": 0.09266114234924316, + "learning_rate": 4.311246839720644e-06, + "loss": 8.5368, + "step": 171540 + }, + { + "epoch": 0.8567005418362506, + "grad_norm": 0.09207313507795334, + "learning_rate": 4.309744924779094e-06, + "loss": 8.5345, + "step": 171550 + }, + { + "epoch": 0.8567504806611901, + "grad_norm": 0.09798050671815872, + "learning_rate": 4.308243009837543e-06, + "loss": 8.5375, + "step": 171560 + }, + { + "epoch": 0.8568004194861295, + "grad_norm": 0.08942960947751999, + "learning_rate": 4.306741094895993e-06, + "loss": 8.542, + "step": 171570 + }, + { + "epoch": 0.8568503583110689, + "grad_norm": 0.0846295952796936, + "learning_rate": 4.305239179954442e-06, + "loss": 8.5393, + "step": 171580 + }, + { + "epoch": 0.8569002971360083, + "grad_norm": 0.09053222835063934, + "learning_rate": 4.303737265012892e-06, + "loss": 8.5335, + "step": 171590 + }, + { + "epoch": 0.8569502359609479, + "grad_norm": 0.09028905630111694, + "learning_rate": 4.302235350071341e-06, + "loss": 8.5206, + "step": 171600 + }, + { + "epoch": 0.8570001747858873, + "grad_norm": 0.09010299295186996, + "learning_rate": 4.3007334351297905e-06, + "loss": 8.5239, + "step": 171610 + }, + { + "epoch": 0.8570501136108267, + "grad_norm": 0.08910666406154633, + "learning_rate": 4.299231520188241e-06, + "loss": 8.5292, + "step": 171620 + }, + { + "epoch": 0.8571000524357661, + "grad_norm": 0.0874292179942131, + "learning_rate": 4.297729605246689e-06, + "loss": 8.5219, + "step": 171630 + }, + { + "epoch": 0.8571499912607057, + "grad_norm": 0.08846402913331985, + "learning_rate": 4.296227690305139e-06, + "loss": 8.5341, + "step": 171640 + }, + { + "epoch": 0.8571999300856451, + "grad_norm": 0.09597437083721161, + "learning_rate": 4.2947257753635895e-06, + "loss": 8.52, + "step": 171650 + }, + { + "epoch": 0.8572498689105845, + "grad_norm": 0.08995895087718964, + "learning_rate": 4.293223860422038e-06, + "loss": 8.5257, + "step": 171660 + }, + { + "epoch": 0.8572998077355239, + "grad_norm": 0.08784112334251404, + "learning_rate": 4.291721945480488e-06, + "loss": 8.5186, + "step": 171670 + }, + { + "epoch": 0.8573497465604635, + "grad_norm": 0.09828951209783554, + "learning_rate": 4.290220030538937e-06, + "loss": 8.5305, + "step": 171680 + }, + { + "epoch": 0.8573996853854029, + "grad_norm": 0.09064572304487228, + "learning_rate": 4.288718115597387e-06, + "loss": 8.5354, + "step": 171690 + }, + { + "epoch": 0.8574496242103423, + "grad_norm": 0.09304898232221603, + "learning_rate": 4.287216200655837e-06, + "loss": 8.5223, + "step": 171700 + }, + { + "epoch": 0.8574995630352817, + "grad_norm": 0.09134497493505478, + "learning_rate": 4.2857142857142855e-06, + "loss": 8.5243, + "step": 171710 + }, + { + "epoch": 0.8575495018602213, + "grad_norm": 0.0942414253950119, + "learning_rate": 4.284212370772736e-06, + "loss": 8.5371, + "step": 171720 + }, + { + "epoch": 0.8575994406851607, + "grad_norm": 0.10213427990674973, + "learning_rate": 4.282710455831185e-06, + "loss": 8.5147, + "step": 171730 + }, + { + "epoch": 0.8576493795101001, + "grad_norm": 0.09323253482580185, + "learning_rate": 4.281208540889634e-06, + "loss": 8.5178, + "step": 171740 + }, + { + "epoch": 0.8576993183350395, + "grad_norm": 0.09151145815849304, + "learning_rate": 4.2797066259480845e-06, + "loss": 8.5257, + "step": 171750 + }, + { + "epoch": 0.8577492571599791, + "grad_norm": 0.0880388393998146, + "learning_rate": 4.278204711006533e-06, + "loss": 8.5441, + "step": 171760 + }, + { + "epoch": 0.8577991959849185, + "grad_norm": 0.0961805209517479, + "learning_rate": 4.276702796064983e-06, + "loss": 8.5351, + "step": 171770 + }, + { + "epoch": 0.8578491348098579, + "grad_norm": 0.09380482137203217, + "learning_rate": 4.2752008811234325e-06, + "loss": 8.5219, + "step": 171780 + }, + { + "epoch": 0.8578990736347973, + "grad_norm": 0.08797604590654373, + "learning_rate": 4.273698966181882e-06, + "loss": 8.524, + "step": 171790 + }, + { + "epoch": 0.8579490124597369, + "grad_norm": 0.08902735263109207, + "learning_rate": 4.272197051240332e-06, + "loss": 8.5404, + "step": 171800 + }, + { + "epoch": 0.8579989512846763, + "grad_norm": 0.09067311882972717, + "learning_rate": 4.270695136298781e-06, + "loss": 8.5317, + "step": 171810 + }, + { + "epoch": 0.8580488901096157, + "grad_norm": 0.09175264835357666, + "learning_rate": 4.269193221357231e-06, + "loss": 8.5181, + "step": 171820 + }, + { + "epoch": 0.8580988289345551, + "grad_norm": 0.0938398614525795, + "learning_rate": 4.26769130641568e-06, + "loss": 8.5388, + "step": 171830 + }, + { + "epoch": 0.8581487677594947, + "grad_norm": 0.09269008040428162, + "learning_rate": 4.266189391474129e-06, + "loss": 8.5437, + "step": 171840 + }, + { + "epoch": 0.8581987065844341, + "grad_norm": 0.09139380604028702, + "learning_rate": 4.2646874765325795e-06, + "loss": 8.5397, + "step": 171850 + }, + { + "epoch": 0.8582486454093735, + "grad_norm": 0.09328168630599976, + "learning_rate": 4.263185561591029e-06, + "loss": 8.5047, + "step": 171860 + }, + { + "epoch": 0.8582985842343129, + "grad_norm": 0.0878739282488823, + "learning_rate": 4.261683646649478e-06, + "loss": 8.5314, + "step": 171870 + }, + { + "epoch": 0.8583485230592525, + "grad_norm": 0.0920386090874672, + "learning_rate": 4.2601817317079275e-06, + "loss": 8.5369, + "step": 171880 + }, + { + "epoch": 0.8583984618841919, + "grad_norm": 0.09493181109428406, + "learning_rate": 4.258679816766378e-06, + "loss": 8.5218, + "step": 171890 + }, + { + "epoch": 0.8584484007091313, + "grad_norm": 0.09276656806468964, + "learning_rate": 4.257177901824827e-06, + "loss": 8.5195, + "step": 171900 + }, + { + "epoch": 0.8584983395340707, + "grad_norm": 0.09002747386693954, + "learning_rate": 4.255675986883276e-06, + "loss": 8.5195, + "step": 171910 + }, + { + "epoch": 0.8585482783590103, + "grad_norm": 0.09539522230625153, + "learning_rate": 4.254174071941726e-06, + "loss": 8.5185, + "step": 171920 + }, + { + "epoch": 0.8585982171839497, + "grad_norm": 0.09918307512998581, + "learning_rate": 4.252672157000175e-06, + "loss": 8.5317, + "step": 171930 + }, + { + "epoch": 0.8586481560088891, + "grad_norm": 0.09323044866323471, + "learning_rate": 4.251170242058625e-06, + "loss": 8.5361, + "step": 171940 + }, + { + "epoch": 0.8586980948338285, + "grad_norm": 0.09227460622787476, + "learning_rate": 4.2496683271170745e-06, + "loss": 8.5207, + "step": 171950 + }, + { + "epoch": 0.8587480336587681, + "grad_norm": 0.09323973953723907, + "learning_rate": 4.248166412175524e-06, + "loss": 8.5152, + "step": 171960 + }, + { + "epoch": 0.8587979724837075, + "grad_norm": 0.0909019485116005, + "learning_rate": 4.246664497233974e-06, + "loss": 8.5242, + "step": 171970 + }, + { + "epoch": 0.8588479113086469, + "grad_norm": 0.08941137790679932, + "learning_rate": 4.2451625822924225e-06, + "loss": 8.5398, + "step": 171980 + }, + { + "epoch": 0.8588978501335863, + "grad_norm": 0.08684715628623962, + "learning_rate": 4.243660667350873e-06, + "loss": 8.5233, + "step": 171990 + }, + { + "epoch": 0.8589477889585259, + "grad_norm": 0.0915994718670845, + "learning_rate": 4.242158752409322e-06, + "loss": 8.5258, + "step": 172000 + }, + { + "epoch": 0.8589977277834653, + "grad_norm": 0.09104558825492859, + "learning_rate": 4.240656837467771e-06, + "loss": 8.534, + "step": 172010 + }, + { + "epoch": 0.8590476666084047, + "grad_norm": 0.09221166372299194, + "learning_rate": 4.2391549225262215e-06, + "loss": 8.5333, + "step": 172020 + }, + { + "epoch": 0.8590976054333441, + "grad_norm": 0.09822104871273041, + "learning_rate": 4.23765300758467e-06, + "loss": 8.52, + "step": 172030 + }, + { + "epoch": 0.8591475442582837, + "grad_norm": 0.08423840254545212, + "learning_rate": 4.23615109264312e-06, + "loss": 8.5458, + "step": 172040 + }, + { + "epoch": 0.8591974830832231, + "grad_norm": 0.0882524624466896, + "learning_rate": 4.23464917770157e-06, + "loss": 8.5479, + "step": 172050 + }, + { + "epoch": 0.8592474219081625, + "grad_norm": 0.09635046869516373, + "learning_rate": 4.233147262760019e-06, + "loss": 8.5266, + "step": 172060 + }, + { + "epoch": 0.8592973607331019, + "grad_norm": 0.09102864563465118, + "learning_rate": 4.231645347818469e-06, + "loss": 8.5349, + "step": 172070 + }, + { + "epoch": 0.8593472995580413, + "grad_norm": 0.09232207387685776, + "learning_rate": 4.2301434328769175e-06, + "loss": 8.5276, + "step": 172080 + }, + { + "epoch": 0.8593972383829809, + "grad_norm": 0.09616389870643616, + "learning_rate": 4.228641517935368e-06, + "loss": 8.5269, + "step": 172090 + }, + { + "epoch": 0.8594471772079203, + "grad_norm": 0.09412415325641632, + "learning_rate": 4.227139602993818e-06, + "loss": 8.5328, + "step": 172100 + }, + { + "epoch": 0.8594971160328597, + "grad_norm": 0.09291058033704758, + "learning_rate": 4.225637688052266e-06, + "loss": 8.5311, + "step": 172110 + }, + { + "epoch": 0.8595470548577991, + "grad_norm": 0.09364159405231476, + "learning_rate": 4.2241357731107165e-06, + "loss": 8.5437, + "step": 172120 + }, + { + "epoch": 0.8595969936827387, + "grad_norm": 0.09280949085950851, + "learning_rate": 4.222633858169166e-06, + "loss": 8.5254, + "step": 172130 + }, + { + "epoch": 0.8596469325076781, + "grad_norm": 0.09044373035430908, + "learning_rate": 4.221131943227615e-06, + "loss": 8.5283, + "step": 172140 + }, + { + "epoch": 0.8596968713326175, + "grad_norm": 0.0903530940413475, + "learning_rate": 4.219630028286065e-06, + "loss": 8.5234, + "step": 172150 + }, + { + "epoch": 0.8597468101575569, + "grad_norm": 0.09625843167304993, + "learning_rate": 4.218128113344514e-06, + "loss": 8.5441, + "step": 172160 + }, + { + "epoch": 0.8597967489824965, + "grad_norm": 0.08800741285085678, + "learning_rate": 4.216626198402964e-06, + "loss": 8.5485, + "step": 172170 + }, + { + "epoch": 0.8598466878074359, + "grad_norm": 0.0877734124660492, + "learning_rate": 4.215124283461413e-06, + "loss": 8.5253, + "step": 172180 + }, + { + "epoch": 0.8598966266323753, + "grad_norm": 0.09386802464723587, + "learning_rate": 4.213622368519863e-06, + "loss": 8.522, + "step": 172190 + }, + { + "epoch": 0.8599465654573147, + "grad_norm": 0.09531255811452866, + "learning_rate": 4.212120453578313e-06, + "loss": 8.5214, + "step": 172200 + }, + { + "epoch": 0.8599965042822543, + "grad_norm": 0.09443308413028717, + "learning_rate": 4.210618538636762e-06, + "loss": 8.5338, + "step": 172210 + }, + { + "epoch": 0.8600464431071937, + "grad_norm": 0.08627857267856598, + "learning_rate": 4.2091166236952115e-06, + "loss": 8.5226, + "step": 172220 + }, + { + "epoch": 0.8600963819321331, + "grad_norm": 0.09067834168672562, + "learning_rate": 4.207614708753661e-06, + "loss": 8.5358, + "step": 172230 + }, + { + "epoch": 0.8601463207570725, + "grad_norm": 0.08249117434024811, + "learning_rate": 4.20611279381211e-06, + "loss": 8.5418, + "step": 172240 + }, + { + "epoch": 0.8601962595820121, + "grad_norm": 0.08913594484329224, + "learning_rate": 4.20461087887056e-06, + "loss": 8.5061, + "step": 172250 + }, + { + "epoch": 0.8602461984069515, + "grad_norm": 0.09263671189546585, + "learning_rate": 4.20310896392901e-06, + "loss": 8.5203, + "step": 172260 + }, + { + "epoch": 0.8602961372318909, + "grad_norm": 0.09188566356897354, + "learning_rate": 4.201607048987459e-06, + "loss": 8.5296, + "step": 172270 + }, + { + "epoch": 0.8603460760568303, + "grad_norm": 0.0945729911327362, + "learning_rate": 4.200105134045908e-06, + "loss": 8.5218, + "step": 172280 + }, + { + "epoch": 0.8603960148817699, + "grad_norm": 0.09193189442157745, + "learning_rate": 4.1986032191043585e-06, + "loss": 8.5329, + "step": 172290 + }, + { + "epoch": 0.8604459537067093, + "grad_norm": 0.08755156397819519, + "learning_rate": 4.197101304162808e-06, + "loss": 8.5227, + "step": 172300 + }, + { + "epoch": 0.8604958925316487, + "grad_norm": 0.08843012899160385, + "learning_rate": 4.195599389221257e-06, + "loss": 8.5337, + "step": 172310 + }, + { + "epoch": 0.8605458313565881, + "grad_norm": 0.09322084486484528, + "learning_rate": 4.1940974742797065e-06, + "loss": 8.5342, + "step": 172320 + }, + { + "epoch": 0.8605957701815277, + "grad_norm": 0.09291605651378632, + "learning_rate": 4.192595559338156e-06, + "loss": 8.516, + "step": 172330 + }, + { + "epoch": 0.8606457090064671, + "grad_norm": 0.09476222097873688, + "learning_rate": 4.191093644396606e-06, + "loss": 8.5342, + "step": 172340 + }, + { + "epoch": 0.8606956478314065, + "grad_norm": 0.09349306672811508, + "learning_rate": 4.189591729455055e-06, + "loss": 8.5237, + "step": 172350 + }, + { + "epoch": 0.8607455866563459, + "grad_norm": 0.09003052115440369, + "learning_rate": 4.188089814513505e-06, + "loss": 8.5341, + "step": 172360 + }, + { + "epoch": 0.8607955254812855, + "grad_norm": 0.08857399970293045, + "learning_rate": 4.186587899571955e-06, + "loss": 8.5271, + "step": 172370 + }, + { + "epoch": 0.8608454643062249, + "grad_norm": 0.09072058647871017, + "learning_rate": 4.185085984630403e-06, + "loss": 8.5211, + "step": 172380 + }, + { + "epoch": 0.8608954031311643, + "grad_norm": 0.09271258860826492, + "learning_rate": 4.1835840696888535e-06, + "loss": 8.5312, + "step": 172390 + }, + { + "epoch": 0.8609453419561037, + "grad_norm": 0.09086389094591141, + "learning_rate": 4.182082154747303e-06, + "loss": 8.5093, + "step": 172400 + }, + { + "epoch": 0.8609952807810433, + "grad_norm": 0.09174318611621857, + "learning_rate": 4.180580239805752e-06, + "loss": 8.5343, + "step": 172410 + }, + { + "epoch": 0.8610452196059827, + "grad_norm": 0.1004238873720169, + "learning_rate": 4.179078324864202e-06, + "loss": 8.5097, + "step": 172420 + }, + { + "epoch": 0.8610951584309221, + "grad_norm": 0.09396187216043472, + "learning_rate": 4.177576409922651e-06, + "loss": 8.5219, + "step": 172430 + }, + { + "epoch": 0.8611450972558615, + "grad_norm": 0.09725307673215866, + "learning_rate": 4.176074494981101e-06, + "loss": 8.5057, + "step": 172440 + }, + { + "epoch": 0.8611950360808011, + "grad_norm": 0.0942634865641594, + "learning_rate": 4.174572580039551e-06, + "loss": 8.5479, + "step": 172450 + }, + { + "epoch": 0.8612449749057405, + "grad_norm": 0.09349733591079712, + "learning_rate": 4.173070665098e-06, + "loss": 8.5116, + "step": 172460 + }, + { + "epoch": 0.8612949137306799, + "grad_norm": 0.09070730209350586, + "learning_rate": 4.17156875015645e-06, + "loss": 8.5297, + "step": 172470 + }, + { + "epoch": 0.8613448525556193, + "grad_norm": 0.09090225398540497, + "learning_rate": 4.170066835214898e-06, + "loss": 8.5385, + "step": 172480 + }, + { + "epoch": 0.8613947913805589, + "grad_norm": 0.08755583316087723, + "learning_rate": 4.1685649202733485e-06, + "loss": 8.511, + "step": 172490 + }, + { + "epoch": 0.8614447302054983, + "grad_norm": 0.08842223882675171, + "learning_rate": 4.167063005331799e-06, + "loss": 8.53, + "step": 172500 + }, + { + "epoch": 0.8614946690304377, + "grad_norm": 0.08680158108472824, + "learning_rate": 4.165561090390247e-06, + "loss": 8.541, + "step": 172510 + }, + { + "epoch": 0.8615446078553771, + "grad_norm": 0.08641988784074783, + "learning_rate": 4.164059175448697e-06, + "loss": 8.5344, + "step": 172520 + }, + { + "epoch": 0.8615945466803167, + "grad_norm": 0.08488810807466507, + "learning_rate": 4.162557260507147e-06, + "loss": 8.5253, + "step": 172530 + }, + { + "epoch": 0.8616444855052561, + "grad_norm": 0.08614518493413925, + "learning_rate": 4.161055345565596e-06, + "loss": 8.5317, + "step": 172540 + }, + { + "epoch": 0.8616944243301955, + "grad_norm": 0.09032870084047318, + "learning_rate": 4.159553430624046e-06, + "loss": 8.5269, + "step": 172550 + }, + { + "epoch": 0.8617443631551349, + "grad_norm": 0.09359986335039139, + "learning_rate": 4.158051515682495e-06, + "loss": 8.5229, + "step": 172560 + }, + { + "epoch": 0.8617943019800745, + "grad_norm": 0.09470424801111221, + "learning_rate": 4.156549600740945e-06, + "loss": 8.5238, + "step": 172570 + }, + { + "epoch": 0.8618442408050139, + "grad_norm": 0.09623333811759949, + "learning_rate": 4.155047685799394e-06, + "loss": 8.5354, + "step": 172580 + }, + { + "epoch": 0.8618941796299533, + "grad_norm": 0.08704448491334915, + "learning_rate": 4.1535457708578436e-06, + "loss": 8.5351, + "step": 172590 + }, + { + "epoch": 0.8619441184548927, + "grad_norm": 0.09469842165708542, + "learning_rate": 4.152043855916294e-06, + "loss": 8.5372, + "step": 172600 + }, + { + "epoch": 0.8619940572798322, + "grad_norm": 0.09458217769861221, + "learning_rate": 4.150541940974742e-06, + "loss": 8.543, + "step": 172610 + }, + { + "epoch": 0.8620439961047717, + "grad_norm": 0.09154288470745087, + "learning_rate": 4.149040026033192e-06, + "loss": 8.5297, + "step": 172620 + }, + { + "epoch": 0.8620939349297111, + "grad_norm": 0.09419039636850357, + "learning_rate": 4.1475381110916426e-06, + "loss": 8.5533, + "step": 172630 + }, + { + "epoch": 0.8621438737546505, + "grad_norm": 0.09300480782985687, + "learning_rate": 4.146036196150091e-06, + "loss": 8.5419, + "step": 172640 + }, + { + "epoch": 0.86219381257959, + "grad_norm": 0.09122762829065323, + "learning_rate": 4.144534281208541e-06, + "loss": 8.5167, + "step": 172650 + }, + { + "epoch": 0.8622437514045295, + "grad_norm": 0.09213479608297348, + "learning_rate": 4.1430323662669906e-06, + "loss": 8.5241, + "step": 172660 + }, + { + "epoch": 0.8622936902294689, + "grad_norm": 0.09271895885467529, + "learning_rate": 4.14153045132544e-06, + "loss": 8.5352, + "step": 172670 + }, + { + "epoch": 0.8623436290544083, + "grad_norm": 0.08686958998441696, + "learning_rate": 4.14002853638389e-06, + "loss": 8.5167, + "step": 172680 + }, + { + "epoch": 0.8623935678793478, + "grad_norm": 0.08590806275606155, + "learning_rate": 4.1385266214423386e-06, + "loss": 8.5366, + "step": 172690 + }, + { + "epoch": 0.8624435067042873, + "grad_norm": 0.0904327780008316, + "learning_rate": 4.137024706500789e-06, + "loss": 8.5325, + "step": 172700 + }, + { + "epoch": 0.8624934455292267, + "grad_norm": 0.09133438020944595, + "learning_rate": 4.135522791559238e-06, + "loss": 8.5142, + "step": 172710 + }, + { + "epoch": 0.8625433843541661, + "grad_norm": 0.08940550684928894, + "learning_rate": 4.134020876617687e-06, + "loss": 8.51, + "step": 172720 + }, + { + "epoch": 0.8625933231791056, + "grad_norm": 0.09443370252847672, + "learning_rate": 4.132518961676138e-06, + "loss": 8.5289, + "step": 172730 + }, + { + "epoch": 0.8626432620040451, + "grad_norm": 0.09132427722215652, + "learning_rate": 4.131017046734587e-06, + "loss": 8.5351, + "step": 172740 + }, + { + "epoch": 0.8626932008289845, + "grad_norm": 0.08972794562578201, + "learning_rate": 4.129515131793036e-06, + "loss": 8.5338, + "step": 172750 + }, + { + "epoch": 0.8627431396539239, + "grad_norm": 0.09007082879543304, + "learning_rate": 4.128013216851486e-06, + "loss": 8.514, + "step": 172760 + }, + { + "epoch": 0.8627930784788634, + "grad_norm": 0.09930828213691711, + "learning_rate": 4.126511301909935e-06, + "loss": 8.5355, + "step": 172770 + }, + { + "epoch": 0.8628430173038029, + "grad_norm": 0.09435442090034485, + "learning_rate": 4.125009386968385e-06, + "loss": 8.539, + "step": 172780 + }, + { + "epoch": 0.8628929561287423, + "grad_norm": 0.09018246084451675, + "learning_rate": 4.123507472026834e-06, + "loss": 8.5403, + "step": 172790 + }, + { + "epoch": 0.8629428949536817, + "grad_norm": 0.09555966407060623, + "learning_rate": 4.122005557085284e-06, + "loss": 8.5138, + "step": 172800 + }, + { + "epoch": 0.8629928337786212, + "grad_norm": 0.09227462112903595, + "learning_rate": 4.120503642143733e-06, + "loss": 8.5498, + "step": 172810 + }, + { + "epoch": 0.8630427726035607, + "grad_norm": 0.08898046612739563, + "learning_rate": 4.119001727202183e-06, + "loss": 8.5182, + "step": 172820 + }, + { + "epoch": 0.8630927114285001, + "grad_norm": 0.08735623210668564, + "learning_rate": 4.117499812260633e-06, + "loss": 8.5232, + "step": 172830 + }, + { + "epoch": 0.8631426502534395, + "grad_norm": 0.09244896471500397, + "learning_rate": 4.115997897319082e-06, + "loss": 8.5405, + "step": 172840 + }, + { + "epoch": 0.863192589078379, + "grad_norm": 0.08761844038963318, + "learning_rate": 4.114495982377531e-06, + "loss": 8.5422, + "step": 172850 + }, + { + "epoch": 0.8632425279033185, + "grad_norm": 0.0887509286403656, + "learning_rate": 4.112994067435981e-06, + "loss": 8.5045, + "step": 172860 + }, + { + "epoch": 0.8632924667282579, + "grad_norm": 0.09360551834106445, + "learning_rate": 4.111492152494431e-06, + "loss": 8.5411, + "step": 172870 + }, + { + "epoch": 0.8633424055531973, + "grad_norm": 0.0890243873000145, + "learning_rate": 4.10999023755288e-06, + "loss": 8.5195, + "step": 172880 + }, + { + "epoch": 0.8633923443781368, + "grad_norm": 0.09118281304836273, + "learning_rate": 4.1084883226113294e-06, + "loss": 8.5162, + "step": 172890 + }, + { + "epoch": 0.8634422832030763, + "grad_norm": 0.08943572640419006, + "learning_rate": 4.10698640766978e-06, + "loss": 8.5457, + "step": 172900 + }, + { + "epoch": 0.8634922220280157, + "grad_norm": 0.09161094576120377, + "learning_rate": 4.105484492728228e-06, + "loss": 8.5199, + "step": 172910 + }, + { + "epoch": 0.8635421608529551, + "grad_norm": 0.09737364202737808, + "learning_rate": 4.103982577786678e-06, + "loss": 8.528, + "step": 172920 + }, + { + "epoch": 0.8635920996778946, + "grad_norm": 0.08938228338956833, + "learning_rate": 4.102480662845128e-06, + "loss": 8.5449, + "step": 172930 + }, + { + "epoch": 0.863642038502834, + "grad_norm": 0.09068731218576431, + "learning_rate": 4.100978747903577e-06, + "loss": 8.5252, + "step": 172940 + }, + { + "epoch": 0.8636919773277735, + "grad_norm": 0.09687364101409912, + "learning_rate": 4.099476832962027e-06, + "loss": 8.5316, + "step": 172950 + }, + { + "epoch": 0.8637419161527129, + "grad_norm": 0.09029592573642731, + "learning_rate": 4.097974918020476e-06, + "loss": 8.5268, + "step": 172960 + }, + { + "epoch": 0.8637918549776524, + "grad_norm": 0.0884825587272644, + "learning_rate": 4.096473003078926e-06, + "loss": 8.5217, + "step": 172970 + }, + { + "epoch": 0.8638417938025919, + "grad_norm": 0.09405983984470367, + "learning_rate": 4.094971088137376e-06, + "loss": 8.5221, + "step": 172980 + }, + { + "epoch": 0.8638917326275313, + "grad_norm": 0.0932493582367897, + "learning_rate": 4.0934691731958244e-06, + "loss": 8.5124, + "step": 172990 + }, + { + "epoch": 0.8639416714524707, + "grad_norm": 0.09520955383777618, + "learning_rate": 4.091967258254275e-06, + "loss": 8.5114, + "step": 173000 + }, + { + "epoch": 0.8639916102774102, + "grad_norm": 0.08801741153001785, + "learning_rate": 4.090465343312723e-06, + "loss": 8.5435, + "step": 173010 + }, + { + "epoch": 0.8640415491023496, + "grad_norm": 0.09374932199716568, + "learning_rate": 4.088963428371173e-06, + "loss": 8.513, + "step": 173020 + }, + { + "epoch": 0.8640914879272891, + "grad_norm": 0.09420095384120941, + "learning_rate": 4.0874615134296235e-06, + "loss": 8.5286, + "step": 173030 + }, + { + "epoch": 0.8641414267522285, + "grad_norm": 0.09835788607597351, + "learning_rate": 4.085959598488072e-06, + "loss": 8.514, + "step": 173040 + }, + { + "epoch": 0.864191365577168, + "grad_norm": 0.0848371684551239, + "learning_rate": 4.084457683546522e-06, + "loss": 8.5214, + "step": 173050 + }, + { + "epoch": 0.8642413044021074, + "grad_norm": 0.09027153998613358, + "learning_rate": 4.0829557686049714e-06, + "loss": 8.5318, + "step": 173060 + }, + { + "epoch": 0.8642912432270469, + "grad_norm": 0.09047489613294601, + "learning_rate": 4.081453853663421e-06, + "loss": 8.538, + "step": 173070 + }, + { + "epoch": 0.8643411820519863, + "grad_norm": 0.09193349629640579, + "learning_rate": 4.079951938721871e-06, + "loss": 8.517, + "step": 173080 + }, + { + "epoch": 0.8643911208769257, + "grad_norm": 0.09359715133905411, + "learning_rate": 4.0784500237803194e-06, + "loss": 8.5216, + "step": 173090 + }, + { + "epoch": 0.8644410597018652, + "grad_norm": 0.09064842015504837, + "learning_rate": 4.07694810883877e-06, + "loss": 8.5191, + "step": 173100 + }, + { + "epoch": 0.8644909985268047, + "grad_norm": 0.09371822327375412, + "learning_rate": 4.075446193897219e-06, + "loss": 8.5121, + "step": 173110 + }, + { + "epoch": 0.8645409373517441, + "grad_norm": 0.09190354496240616, + "learning_rate": 4.073944278955668e-06, + "loss": 8.4994, + "step": 173120 + }, + { + "epoch": 0.8645908761766835, + "grad_norm": 0.09204579889774323, + "learning_rate": 4.0724423640141185e-06, + "loss": 8.5339, + "step": 173130 + }, + { + "epoch": 0.864640815001623, + "grad_norm": 0.09318622201681137, + "learning_rate": 4.070940449072568e-06, + "loss": 8.5116, + "step": 173140 + }, + { + "epoch": 0.8646907538265625, + "grad_norm": 0.09237584471702576, + "learning_rate": 4.069438534131017e-06, + "loss": 8.5358, + "step": 173150 + }, + { + "epoch": 0.8647406926515019, + "grad_norm": 0.09040313959121704, + "learning_rate": 4.0679366191894665e-06, + "loss": 8.5232, + "step": 173160 + }, + { + "epoch": 0.8647906314764413, + "grad_norm": 0.09376216679811478, + "learning_rate": 4.066434704247916e-06, + "loss": 8.5028, + "step": 173170 + }, + { + "epoch": 0.8648405703013808, + "grad_norm": 0.08734447509050369, + "learning_rate": 4.064932789306366e-06, + "loss": 8.5056, + "step": 173180 + }, + { + "epoch": 0.8648905091263203, + "grad_norm": 0.0926559641957283, + "learning_rate": 4.063430874364815e-06, + "loss": 8.5099, + "step": 173190 + }, + { + "epoch": 0.8649404479512597, + "grad_norm": 0.08731448650360107, + "learning_rate": 4.061928959423265e-06, + "loss": 8.5165, + "step": 173200 + }, + { + "epoch": 0.8649903867761991, + "grad_norm": 0.08865402638912201, + "learning_rate": 4.060427044481714e-06, + "loss": 8.5291, + "step": 173210 + }, + { + "epoch": 0.8650403256011386, + "grad_norm": 0.08650528639554977, + "learning_rate": 4.058925129540164e-06, + "loss": 8.5137, + "step": 173220 + }, + { + "epoch": 0.8650902644260781, + "grad_norm": 0.08930091559886932, + "learning_rate": 4.0574232145986135e-06, + "loss": 8.5208, + "step": 173230 + }, + { + "epoch": 0.8651402032510175, + "grad_norm": 0.09235706925392151, + "learning_rate": 4.055921299657063e-06, + "loss": 8.5156, + "step": 173240 + }, + { + "epoch": 0.8651901420759569, + "grad_norm": 0.09423262625932693, + "learning_rate": 4.054419384715512e-06, + "loss": 8.5162, + "step": 173250 + }, + { + "epoch": 0.8652400809008964, + "grad_norm": 0.0886596292257309, + "learning_rate": 4.0529174697739615e-06, + "loss": 8.5152, + "step": 173260 + }, + { + "epoch": 0.8652900197258359, + "grad_norm": 0.08678395301103592, + "learning_rate": 4.051415554832412e-06, + "loss": 8.53, + "step": 173270 + }, + { + "epoch": 0.8653399585507753, + "grad_norm": 0.08788527548313141, + "learning_rate": 4.049913639890861e-06, + "loss": 8.5281, + "step": 173280 + }, + { + "epoch": 0.8653898973757147, + "grad_norm": 0.08688144385814667, + "learning_rate": 4.04841172494931e-06, + "loss": 8.5394, + "step": 173290 + }, + { + "epoch": 0.8654398362006542, + "grad_norm": 0.09183796495199203, + "learning_rate": 4.0469098100077605e-06, + "loss": 8.5355, + "step": 173300 + }, + { + "epoch": 0.8654897750255937, + "grad_norm": 0.09272154420614243, + "learning_rate": 4.045407895066209e-06, + "loss": 8.5449, + "step": 173310 + }, + { + "epoch": 0.8655397138505331, + "grad_norm": 0.0878337025642395, + "learning_rate": 4.043905980124659e-06, + "loss": 8.543, + "step": 173320 + }, + { + "epoch": 0.8655896526754725, + "grad_norm": 0.0915573239326477, + "learning_rate": 4.0424040651831085e-06, + "loss": 8.5153, + "step": 173330 + }, + { + "epoch": 0.865639591500412, + "grad_norm": 0.09731071442365646, + "learning_rate": 4.040902150241558e-06, + "loss": 8.5377, + "step": 173340 + }, + { + "epoch": 0.8656895303253515, + "grad_norm": 0.09491939097642899, + "learning_rate": 4.039400235300008e-06, + "loss": 8.5258, + "step": 173350 + }, + { + "epoch": 0.8657394691502909, + "grad_norm": 0.09417067468166351, + "learning_rate": 4.0378983203584565e-06, + "loss": 8.5305, + "step": 173360 + }, + { + "epoch": 0.8657894079752303, + "grad_norm": 0.09818973392248154, + "learning_rate": 4.036396405416907e-06, + "loss": 8.5226, + "step": 173370 + }, + { + "epoch": 0.8658393468001698, + "grad_norm": 0.10228383541107178, + "learning_rate": 4.034894490475357e-06, + "loss": 8.5162, + "step": 173380 + }, + { + "epoch": 0.8658892856251093, + "grad_norm": 0.09025364369153976, + "learning_rate": 4.033392575533805e-06, + "loss": 8.5282, + "step": 173390 + }, + { + "epoch": 0.8659392244500487, + "grad_norm": 0.09199435263872147, + "learning_rate": 4.0318906605922555e-06, + "loss": 8.5222, + "step": 173400 + }, + { + "epoch": 0.8659891632749881, + "grad_norm": 0.08623290807008743, + "learning_rate": 4.030388745650704e-06, + "loss": 8.5133, + "step": 173410 + }, + { + "epoch": 0.8660391020999276, + "grad_norm": 0.09399810433387756, + "learning_rate": 4.028886830709154e-06, + "loss": 8.5254, + "step": 173420 + }, + { + "epoch": 0.866089040924867, + "grad_norm": 0.08971837162971497, + "learning_rate": 4.027384915767604e-06, + "loss": 8.5333, + "step": 173430 + }, + { + "epoch": 0.8661389797498065, + "grad_norm": 0.08642110228538513, + "learning_rate": 4.025883000826053e-06, + "loss": 8.5237, + "step": 173440 + }, + { + "epoch": 0.8661889185747459, + "grad_norm": 0.09314007312059402, + "learning_rate": 4.024381085884503e-06, + "loss": 8.5056, + "step": 173450 + }, + { + "epoch": 0.8662388573996854, + "grad_norm": 0.09386899322271347, + "learning_rate": 4.022879170942952e-06, + "loss": 8.524, + "step": 173460 + }, + { + "epoch": 0.8662887962246248, + "grad_norm": 0.09295320510864258, + "learning_rate": 4.021377256001402e-06, + "loss": 8.5266, + "step": 173470 + }, + { + "epoch": 0.8663387350495643, + "grad_norm": 0.09699882566928864, + "learning_rate": 4.019875341059852e-06, + "loss": 8.5314, + "step": 173480 + }, + { + "epoch": 0.8663886738745037, + "grad_norm": 0.08937513083219528, + "learning_rate": 4.0183734261183e-06, + "loss": 8.5286, + "step": 173490 + }, + { + "epoch": 0.8664386126994432, + "grad_norm": 0.0886300727725029, + "learning_rate": 4.0168715111767505e-06, + "loss": 8.519, + "step": 173500 + }, + { + "epoch": 0.8664885515243826, + "grad_norm": 0.08783800154924393, + "learning_rate": 4.0153695962352e-06, + "loss": 8.5191, + "step": 173510 + }, + { + "epoch": 0.8665384903493221, + "grad_norm": 0.09207980334758759, + "learning_rate": 4.013867681293649e-06, + "loss": 8.5026, + "step": 173520 + }, + { + "epoch": 0.8665884291742615, + "grad_norm": 0.09273963421583176, + "learning_rate": 4.012365766352099e-06, + "loss": 8.5182, + "step": 173530 + }, + { + "epoch": 0.866638367999201, + "grad_norm": 0.08379366248846054, + "learning_rate": 4.010863851410549e-06, + "loss": 8.5278, + "step": 173540 + }, + { + "epoch": 0.8666883068241404, + "grad_norm": 0.08762573450803757, + "learning_rate": 4.009361936468998e-06, + "loss": 8.5255, + "step": 173550 + }, + { + "epoch": 0.8667382456490799, + "grad_norm": 0.088675856590271, + "learning_rate": 4.007860021527447e-06, + "loss": 8.5256, + "step": 173560 + }, + { + "epoch": 0.8667881844740193, + "grad_norm": 0.08745112270116806, + "learning_rate": 4.006358106585897e-06, + "loss": 8.5365, + "step": 173570 + }, + { + "epoch": 0.8668381232989588, + "grad_norm": 0.08667652308940887, + "learning_rate": 4.004856191644347e-06, + "loss": 8.5282, + "step": 173580 + }, + { + "epoch": 0.8668880621238982, + "grad_norm": 0.09410617500543594, + "learning_rate": 4.003354276702796e-06, + "loss": 8.5143, + "step": 173590 + }, + { + "epoch": 0.8669380009488377, + "grad_norm": 0.0875682532787323, + "learning_rate": 4.0018523617612455e-06, + "loss": 8.5359, + "step": 173600 + }, + { + "epoch": 0.8669879397737771, + "grad_norm": 0.08985573053359985, + "learning_rate": 4.000350446819695e-06, + "loss": 8.5462, + "step": 173610 + }, + { + "epoch": 0.8670378785987166, + "grad_norm": 0.09056945890188217, + "learning_rate": 3.998848531878145e-06, + "loss": 8.555, + "step": 173620 + }, + { + "epoch": 0.867087817423656, + "grad_norm": 0.09406935423612595, + "learning_rate": 3.997346616936594e-06, + "loss": 8.5018, + "step": 173630 + }, + { + "epoch": 0.8671377562485955, + "grad_norm": 0.08972731977701187, + "learning_rate": 3.995844701995044e-06, + "loss": 8.5286, + "step": 173640 + }, + { + "epoch": 0.8671876950735349, + "grad_norm": 0.09432131797075272, + "learning_rate": 3.994342787053493e-06, + "loss": 8.527, + "step": 173650 + }, + { + "epoch": 0.8672376338984744, + "grad_norm": 0.09251857548952103, + "learning_rate": 3.992840872111943e-06, + "loss": 8.5157, + "step": 173660 + }, + { + "epoch": 0.8672875727234138, + "grad_norm": 0.0903116911649704, + "learning_rate": 3.9913389571703925e-06, + "loss": 8.5218, + "step": 173670 + }, + { + "epoch": 0.8673375115483533, + "grad_norm": 0.09307548403739929, + "learning_rate": 3.989837042228842e-06, + "loss": 8.5296, + "step": 173680 + }, + { + "epoch": 0.8673874503732927, + "grad_norm": 0.08810535818338394, + "learning_rate": 3.988335127287291e-06, + "loss": 8.5257, + "step": 173690 + }, + { + "epoch": 0.8674373891982322, + "grad_norm": 0.09042003750801086, + "learning_rate": 3.986833212345741e-06, + "loss": 8.5462, + "step": 173700 + }, + { + "epoch": 0.8674873280231716, + "grad_norm": 0.09091649949550629, + "learning_rate": 3.985331297404191e-06, + "loss": 8.5249, + "step": 173710 + }, + { + "epoch": 0.867537266848111, + "grad_norm": 0.09188742935657501, + "learning_rate": 3.98382938246264e-06, + "loss": 8.5144, + "step": 173720 + }, + { + "epoch": 0.8675872056730505, + "grad_norm": 0.09003132581710815, + "learning_rate": 3.982327467521089e-06, + "loss": 8.5341, + "step": 173730 + }, + { + "epoch": 0.86763714449799, + "grad_norm": 0.09096615016460419, + "learning_rate": 3.980825552579539e-06, + "loss": 8.532, + "step": 173740 + }, + { + "epoch": 0.8676870833229294, + "grad_norm": 0.09003739058971405, + "learning_rate": 3.979323637637989e-06, + "loss": 8.5205, + "step": 173750 + }, + { + "epoch": 0.8677370221478689, + "grad_norm": 0.09441889822483063, + "learning_rate": 3.977821722696438e-06, + "loss": 8.5302, + "step": 173760 + }, + { + "epoch": 0.8677869609728083, + "grad_norm": 0.09117145091295242, + "learning_rate": 3.9763198077548875e-06, + "loss": 8.5129, + "step": 173770 + }, + { + "epoch": 0.8678368997977478, + "grad_norm": 0.0931628867983818, + "learning_rate": 3.974817892813338e-06, + "loss": 8.5337, + "step": 173780 + }, + { + "epoch": 0.8678868386226872, + "grad_norm": 0.09708821773529053, + "learning_rate": 3.973315977871786e-06, + "loss": 8.5166, + "step": 173790 + }, + { + "epoch": 0.8679367774476267, + "grad_norm": 0.08967326581478119, + "learning_rate": 3.971814062930236e-06, + "loss": 8.5294, + "step": 173800 + }, + { + "epoch": 0.8679867162725661, + "grad_norm": 0.09614343196153641, + "learning_rate": 3.970312147988686e-06, + "loss": 8.5138, + "step": 173810 + }, + { + "epoch": 0.8680366550975056, + "grad_norm": 0.09332430362701416, + "learning_rate": 3.968810233047135e-06, + "loss": 8.5287, + "step": 173820 + }, + { + "epoch": 0.868086593922445, + "grad_norm": 0.09939328581094742, + "learning_rate": 3.967308318105585e-06, + "loss": 8.5366, + "step": 173830 + }, + { + "epoch": 0.8681365327473844, + "grad_norm": 0.09348172694444656, + "learning_rate": 3.965806403164034e-06, + "loss": 8.524, + "step": 173840 + }, + { + "epoch": 0.8681864715723239, + "grad_norm": 0.09327511489391327, + "learning_rate": 3.964304488222484e-06, + "loss": 8.5344, + "step": 173850 + }, + { + "epoch": 0.8682364103972634, + "grad_norm": 0.089813232421875, + "learning_rate": 3.962802573280934e-06, + "loss": 8.5293, + "step": 173860 + }, + { + "epoch": 0.8682863492222028, + "grad_norm": 0.09029410034418106, + "learning_rate": 3.9613006583393825e-06, + "loss": 8.5281, + "step": 173870 + }, + { + "epoch": 0.8683362880471422, + "grad_norm": 0.09171853214502335, + "learning_rate": 3.959798743397833e-06, + "loss": 8.5053, + "step": 173880 + }, + { + "epoch": 0.8683862268720817, + "grad_norm": 0.08777931332588196, + "learning_rate": 3.958296828456281e-06, + "loss": 8.51, + "step": 173890 + }, + { + "epoch": 0.8684361656970212, + "grad_norm": 0.09193766117095947, + "learning_rate": 3.956794913514731e-06, + "loss": 8.5269, + "step": 173900 + }, + { + "epoch": 0.8684861045219606, + "grad_norm": 0.08400430530309677, + "learning_rate": 3.9552929985731815e-06, + "loss": 8.5356, + "step": 173910 + }, + { + "epoch": 0.8685360433469, + "grad_norm": 0.09466172009706497, + "learning_rate": 3.95379108363163e-06, + "loss": 8.5184, + "step": 173920 + }, + { + "epoch": 0.8685859821718395, + "grad_norm": 0.08804745227098465, + "learning_rate": 3.95228916869008e-06, + "loss": 8.5084, + "step": 173930 + }, + { + "epoch": 0.868635920996779, + "grad_norm": 0.08844754099845886, + "learning_rate": 3.9507872537485295e-06, + "loss": 8.5206, + "step": 173940 + }, + { + "epoch": 0.8686858598217184, + "grad_norm": 0.08931338787078857, + "learning_rate": 3.949285338806979e-06, + "loss": 8.5227, + "step": 173950 + }, + { + "epoch": 0.8687357986466578, + "grad_norm": 0.092722587287426, + "learning_rate": 3.947783423865429e-06, + "loss": 8.5357, + "step": 173960 + }, + { + "epoch": 0.8687857374715973, + "grad_norm": 0.08423691242933273, + "learning_rate": 3.9462815089238775e-06, + "loss": 8.5368, + "step": 173970 + }, + { + "epoch": 0.8688356762965368, + "grad_norm": 0.08981618285179138, + "learning_rate": 3.944779593982328e-06, + "loss": 8.5206, + "step": 173980 + }, + { + "epoch": 0.8688856151214762, + "grad_norm": 0.0881986990571022, + "learning_rate": 3.943277679040777e-06, + "loss": 8.5184, + "step": 173990 + }, + { + "epoch": 0.8689355539464156, + "grad_norm": 0.09177501499652863, + "learning_rate": 3.941775764099226e-06, + "loss": 8.5301, + "step": 174000 + }, + { + "epoch": 0.8689854927713551, + "grad_norm": 0.0890003889799118, + "learning_rate": 3.9402738491576766e-06, + "loss": 8.5185, + "step": 174010 + }, + { + "epoch": 0.8690354315962946, + "grad_norm": 0.08756007254123688, + "learning_rate": 3.938771934216126e-06, + "loss": 8.5242, + "step": 174020 + }, + { + "epoch": 0.869085370421234, + "grad_norm": 0.09130045026540756, + "learning_rate": 3.937270019274575e-06, + "loss": 8.5241, + "step": 174030 + }, + { + "epoch": 0.8691353092461734, + "grad_norm": 0.09635766595602036, + "learning_rate": 3.9357681043330246e-06, + "loss": 8.5303, + "step": 174040 + }, + { + "epoch": 0.8691852480711129, + "grad_norm": 0.09500184655189514, + "learning_rate": 3.934266189391474e-06, + "loss": 8.5289, + "step": 174050 + }, + { + "epoch": 0.8692351868960523, + "grad_norm": 0.08810250461101532, + "learning_rate": 3.932764274449924e-06, + "loss": 8.5216, + "step": 174060 + }, + { + "epoch": 0.8692851257209918, + "grad_norm": 0.08967293798923492, + "learning_rate": 3.931262359508373e-06, + "loss": 8.5238, + "step": 174070 + }, + { + "epoch": 0.8693350645459312, + "grad_norm": 0.0923919528722763, + "learning_rate": 3.929760444566823e-06, + "loss": 8.5099, + "step": 174080 + }, + { + "epoch": 0.8693850033708707, + "grad_norm": 0.09465716779232025, + "learning_rate": 3.928258529625272e-06, + "loss": 8.5313, + "step": 174090 + }, + { + "epoch": 0.8694349421958101, + "grad_norm": 0.08958126604557037, + "learning_rate": 3.926756614683722e-06, + "loss": 8.5358, + "step": 174100 + }, + { + "epoch": 0.8694848810207496, + "grad_norm": 0.0895143523812294, + "learning_rate": 3.9252546997421716e-06, + "loss": 8.5239, + "step": 174110 + }, + { + "epoch": 0.869534819845689, + "grad_norm": 0.0893610343337059, + "learning_rate": 3.923752784800621e-06, + "loss": 8.5262, + "step": 174120 + }, + { + "epoch": 0.8695847586706285, + "grad_norm": 0.0977412685751915, + "learning_rate": 3.92225086985907e-06, + "loss": 8.5102, + "step": 174130 + }, + { + "epoch": 0.8696346974955679, + "grad_norm": 0.09177926927804947, + "learning_rate": 3.9207489549175196e-06, + "loss": 8.5045, + "step": 174140 + }, + { + "epoch": 0.8696846363205074, + "grad_norm": 0.08462709933519363, + "learning_rate": 3.91924703997597e-06, + "loss": 8.5163, + "step": 174150 + }, + { + "epoch": 0.8697345751454468, + "grad_norm": 0.09177839756011963, + "learning_rate": 3.917745125034419e-06, + "loss": 8.5165, + "step": 174160 + }, + { + "epoch": 0.8697845139703863, + "grad_norm": 0.08942088484764099, + "learning_rate": 3.916243210092868e-06, + "loss": 8.542, + "step": 174170 + }, + { + "epoch": 0.8698344527953257, + "grad_norm": 0.0959954783320427, + "learning_rate": 3.914741295151319e-06, + "loss": 8.5206, + "step": 174180 + }, + { + "epoch": 0.8698843916202652, + "grad_norm": 0.09030862152576447, + "learning_rate": 3.913239380209767e-06, + "loss": 8.5127, + "step": 174190 + }, + { + "epoch": 0.8699343304452046, + "grad_norm": 0.08896990120410919, + "learning_rate": 3.911737465268217e-06, + "loss": 8.5275, + "step": 174200 + }, + { + "epoch": 0.869984269270144, + "grad_norm": 0.08994099497795105, + "learning_rate": 3.9102355503266666e-06, + "loss": 8.5147, + "step": 174210 + }, + { + "epoch": 0.8700342080950835, + "grad_norm": 0.0879681408405304, + "learning_rate": 3.908733635385116e-06, + "loss": 8.5341, + "step": 174220 + }, + { + "epoch": 0.870084146920023, + "grad_norm": 0.0852343812584877, + "learning_rate": 3.907231720443566e-06, + "loss": 8.5343, + "step": 174230 + }, + { + "epoch": 0.8701340857449624, + "grad_norm": 0.08718617260456085, + "learning_rate": 3.9057298055020146e-06, + "loss": 8.5334, + "step": 174240 + }, + { + "epoch": 0.8701840245699018, + "grad_norm": 0.09171243757009506, + "learning_rate": 3.904227890560465e-06, + "loss": 8.5341, + "step": 174250 + }, + { + "epoch": 0.8702339633948413, + "grad_norm": 0.08820389956235886, + "learning_rate": 3.902725975618915e-06, + "loss": 8.5417, + "step": 174260 + }, + { + "epoch": 0.8702839022197808, + "grad_norm": 0.08782992511987686, + "learning_rate": 3.901224060677363e-06, + "loss": 8.5333, + "step": 174270 + }, + { + "epoch": 0.8703338410447202, + "grad_norm": 0.08638270199298859, + "learning_rate": 3.899722145735814e-06, + "loss": 8.5396, + "step": 174280 + }, + { + "epoch": 0.8703837798696596, + "grad_norm": 0.09227801114320755, + "learning_rate": 3.898220230794262e-06, + "loss": 8.5034, + "step": 174290 + }, + { + "epoch": 0.8704337186945991, + "grad_norm": 0.09477287530899048, + "learning_rate": 3.896718315852712e-06, + "loss": 8.5067, + "step": 174300 + }, + { + "epoch": 0.8704836575195386, + "grad_norm": 0.09321620315313339, + "learning_rate": 3.8952164009111624e-06, + "loss": 8.5343, + "step": 174310 + }, + { + "epoch": 0.870533596344478, + "grad_norm": 0.08606991916894913, + "learning_rate": 3.893714485969611e-06, + "loss": 8.531, + "step": 174320 + }, + { + "epoch": 0.8705835351694174, + "grad_norm": 0.08892033994197845, + "learning_rate": 3.892212571028061e-06, + "loss": 8.531, + "step": 174330 + }, + { + "epoch": 0.8706334739943569, + "grad_norm": 0.08995813876390457, + "learning_rate": 3.89071065608651e-06, + "loss": 8.512, + "step": 174340 + }, + { + "epoch": 0.8706834128192964, + "grad_norm": 0.08822611719369888, + "learning_rate": 3.88920874114496e-06, + "loss": 8.527, + "step": 174350 + }, + { + "epoch": 0.8707333516442358, + "grad_norm": 0.09670012444257736, + "learning_rate": 3.88770682620341e-06, + "loss": 8.5085, + "step": 174360 + }, + { + "epoch": 0.8707832904691752, + "grad_norm": 0.0893353670835495, + "learning_rate": 3.886204911261858e-06, + "loss": 8.5151, + "step": 174370 + }, + { + "epoch": 0.8708332292941147, + "grad_norm": 0.0889049619436264, + "learning_rate": 3.884702996320309e-06, + "loss": 8.4997, + "step": 174380 + }, + { + "epoch": 0.8708831681190542, + "grad_norm": 0.08892576396465302, + "learning_rate": 3.883201081378758e-06, + "loss": 8.5215, + "step": 174390 + }, + { + "epoch": 0.8709331069439936, + "grad_norm": 0.09408024698495865, + "learning_rate": 3.881699166437207e-06, + "loss": 8.5249, + "step": 174400 + }, + { + "epoch": 0.870983045768933, + "grad_norm": 0.09239810705184937, + "learning_rate": 3.8801972514956574e-06, + "loss": 8.5204, + "step": 174410 + }, + { + "epoch": 0.8710329845938725, + "grad_norm": 0.08711476624011993, + "learning_rate": 3.878695336554107e-06, + "loss": 8.5171, + "step": 174420 + }, + { + "epoch": 0.871082923418812, + "grad_norm": 0.08488985896110535, + "learning_rate": 3.877193421612556e-06, + "loss": 8.5197, + "step": 174430 + }, + { + "epoch": 0.8711328622437514, + "grad_norm": 0.0881032943725586, + "learning_rate": 3.8756915066710054e-06, + "loss": 8.5149, + "step": 174440 + }, + { + "epoch": 0.8711828010686908, + "grad_norm": 0.0867091566324234, + "learning_rate": 3.874189591729455e-06, + "loss": 8.5106, + "step": 174450 + }, + { + "epoch": 0.8712327398936303, + "grad_norm": 0.09190431237220764, + "learning_rate": 3.872687676787905e-06, + "loss": 8.524, + "step": 174460 + }, + { + "epoch": 0.8712826787185698, + "grad_norm": 0.08907434344291687, + "learning_rate": 3.871185761846354e-06, + "loss": 8.5324, + "step": 174470 + }, + { + "epoch": 0.8713326175435092, + "grad_norm": 0.09729236364364624, + "learning_rate": 3.869683846904804e-06, + "loss": 8.5182, + "step": 174480 + }, + { + "epoch": 0.8713825563684486, + "grad_norm": 0.09291893243789673, + "learning_rate": 3.868181931963253e-06, + "loss": 8.5277, + "step": 174490 + }, + { + "epoch": 0.8714324951933881, + "grad_norm": 0.08960501104593277, + "learning_rate": 3.866680017021703e-06, + "loss": 8.5335, + "step": 174500 + }, + { + "epoch": 0.8714824340183276, + "grad_norm": 0.09359270334243774, + "learning_rate": 3.8651781020801524e-06, + "loss": 8.5333, + "step": 174510 + }, + { + "epoch": 0.871532372843267, + "grad_norm": 0.09425162523984909, + "learning_rate": 3.863676187138602e-06, + "loss": 8.5144, + "step": 174520 + }, + { + "epoch": 0.8715823116682064, + "grad_norm": 0.09045587480068207, + "learning_rate": 3.862174272197051e-06, + "loss": 8.5378, + "step": 174530 + }, + { + "epoch": 0.8716322504931459, + "grad_norm": 0.0939302071928978, + "learning_rate": 3.8606723572555004e-06, + "loss": 8.5233, + "step": 174540 + }, + { + "epoch": 0.8716821893180854, + "grad_norm": 0.08632202446460724, + "learning_rate": 3.859170442313951e-06, + "loss": 8.534, + "step": 174550 + }, + { + "epoch": 0.8717321281430248, + "grad_norm": 0.08903735131025314, + "learning_rate": 3.8576685273724e-06, + "loss": 8.537, + "step": 174560 + }, + { + "epoch": 0.8717820669679642, + "grad_norm": 0.08908429741859436, + "learning_rate": 3.856166612430849e-06, + "loss": 8.5324, + "step": 174570 + }, + { + "epoch": 0.8718320057929037, + "grad_norm": 0.08882272243499756, + "learning_rate": 3.8546646974892995e-06, + "loss": 8.5367, + "step": 174580 + }, + { + "epoch": 0.8718819446178432, + "grad_norm": 0.08850658684968948, + "learning_rate": 3.853162782547748e-06, + "loss": 8.5214, + "step": 174590 + }, + { + "epoch": 0.8719318834427826, + "grad_norm": 0.08655815571546555, + "learning_rate": 3.851660867606198e-06, + "loss": 8.5412, + "step": 174600 + }, + { + "epoch": 0.871981822267722, + "grad_norm": 0.08702993392944336, + "learning_rate": 3.8501589526646474e-06, + "loss": 8.5364, + "step": 174610 + }, + { + "epoch": 0.8720317610926615, + "grad_norm": 0.08738213032484055, + "learning_rate": 3.848657037723097e-06, + "loss": 8.517, + "step": 174620 + }, + { + "epoch": 0.872081699917601, + "grad_norm": 0.09153701364994049, + "learning_rate": 3.847155122781547e-06, + "loss": 8.504, + "step": 174630 + }, + { + "epoch": 0.8721316387425404, + "grad_norm": 0.09358830749988556, + "learning_rate": 3.8456532078399954e-06, + "loss": 8.5159, + "step": 174640 + }, + { + "epoch": 0.8721815775674798, + "grad_norm": 0.08956989645957947, + "learning_rate": 3.844151292898446e-06, + "loss": 8.5174, + "step": 174650 + }, + { + "epoch": 0.8722315163924192, + "grad_norm": 0.09366779029369354, + "learning_rate": 3.842649377956896e-06, + "loss": 8.5287, + "step": 174660 + }, + { + "epoch": 0.8722814552173588, + "grad_norm": 0.09194490313529968, + "learning_rate": 3.841147463015344e-06, + "loss": 8.5269, + "step": 174670 + }, + { + "epoch": 0.8723313940422982, + "grad_norm": 0.08473306894302368, + "learning_rate": 3.8396455480737945e-06, + "loss": 8.5332, + "step": 174680 + }, + { + "epoch": 0.8723813328672376, + "grad_norm": 0.09393809735774994, + "learning_rate": 3.838143633132243e-06, + "loss": 8.5116, + "step": 174690 + }, + { + "epoch": 0.872431271692177, + "grad_norm": 0.08729804307222366, + "learning_rate": 3.836641718190693e-06, + "loss": 8.5261, + "step": 174700 + }, + { + "epoch": 0.8724812105171166, + "grad_norm": 0.08915986865758896, + "learning_rate": 3.835139803249143e-06, + "loss": 8.5319, + "step": 174710 + }, + { + "epoch": 0.872531149342056, + "grad_norm": 0.08928072452545166, + "learning_rate": 3.833637888307592e-06, + "loss": 8.5346, + "step": 174720 + }, + { + "epoch": 0.8725810881669954, + "grad_norm": 0.09394170343875885, + "learning_rate": 3.832135973366042e-06, + "loss": 8.5138, + "step": 174730 + }, + { + "epoch": 0.8726310269919348, + "grad_norm": 0.09060081094503403, + "learning_rate": 3.830634058424491e-06, + "loss": 8.5273, + "step": 174740 + }, + { + "epoch": 0.8726809658168744, + "grad_norm": 0.08914122730493546, + "learning_rate": 3.829132143482941e-06, + "loss": 8.5229, + "step": 174750 + }, + { + "epoch": 0.8727309046418138, + "grad_norm": 0.09032674878835678, + "learning_rate": 3.827630228541391e-06, + "loss": 8.5249, + "step": 174760 + }, + { + "epoch": 0.8727808434667532, + "grad_norm": 0.09034202992916107, + "learning_rate": 3.826128313599839e-06, + "loss": 8.5362, + "step": 174770 + }, + { + "epoch": 0.8728307822916926, + "grad_norm": 0.08692754805088043, + "learning_rate": 3.8246263986582895e-06, + "loss": 8.5089, + "step": 174780 + }, + { + "epoch": 0.8728807211166322, + "grad_norm": 0.09234897792339325, + "learning_rate": 3.82312448371674e-06, + "loss": 8.5188, + "step": 174790 + }, + { + "epoch": 0.8729306599415716, + "grad_norm": 0.09195923805236816, + "learning_rate": 3.821622568775188e-06, + "loss": 8.5322, + "step": 174800 + }, + { + "epoch": 0.872980598766511, + "grad_norm": 0.0930572971701622, + "learning_rate": 3.820120653833638e-06, + "loss": 8.538, + "step": 174810 + }, + { + "epoch": 0.8730305375914504, + "grad_norm": 0.086077980697155, + "learning_rate": 3.818618738892088e-06, + "loss": 8.5311, + "step": 174820 + }, + { + "epoch": 0.87308047641639, + "grad_norm": 0.09503497928380966, + "learning_rate": 3.817116823950537e-06, + "loss": 8.5202, + "step": 174830 + }, + { + "epoch": 0.8731304152413294, + "grad_norm": 0.09077981114387512, + "learning_rate": 3.815614909008987e-06, + "loss": 8.5122, + "step": 174840 + }, + { + "epoch": 0.8731803540662688, + "grad_norm": 0.08733651787042618, + "learning_rate": 3.8141129940674356e-06, + "loss": 8.5313, + "step": 174850 + }, + { + "epoch": 0.8732302928912082, + "grad_norm": 0.08977235853672028, + "learning_rate": 3.8126110791258854e-06, + "loss": 8.5195, + "step": 174860 + }, + { + "epoch": 0.8732802317161478, + "grad_norm": 0.09327967464923859, + "learning_rate": 3.8111091641843356e-06, + "loss": 8.5095, + "step": 174870 + }, + { + "epoch": 0.8733301705410872, + "grad_norm": 0.08643508702516556, + "learning_rate": 3.8096072492427845e-06, + "loss": 8.5356, + "step": 174880 + }, + { + "epoch": 0.8733801093660266, + "grad_norm": 0.09138485044240952, + "learning_rate": 3.8081053343012342e-06, + "loss": 8.519, + "step": 174890 + }, + { + "epoch": 0.873430048190966, + "grad_norm": 0.09083333611488342, + "learning_rate": 3.806603419359684e-06, + "loss": 8.5285, + "step": 174900 + }, + { + "epoch": 0.8734799870159056, + "grad_norm": 0.09481485933065414, + "learning_rate": 3.805101504418133e-06, + "loss": 8.5086, + "step": 174910 + }, + { + "epoch": 0.873529925840845, + "grad_norm": 0.0904790461063385, + "learning_rate": 3.803599589476583e-06, + "loss": 8.5069, + "step": 174920 + }, + { + "epoch": 0.8735798646657844, + "grad_norm": 0.08950541913509369, + "learning_rate": 3.802097674535032e-06, + "loss": 8.5393, + "step": 174930 + }, + { + "epoch": 0.8736298034907238, + "grad_norm": 0.09654703736305237, + "learning_rate": 3.8005957595934817e-06, + "loss": 8.5304, + "step": 174940 + }, + { + "epoch": 0.8736797423156634, + "grad_norm": 0.09147506207227707, + "learning_rate": 3.7990938446519315e-06, + "loss": 8.5209, + "step": 174950 + }, + { + "epoch": 0.8737296811406028, + "grad_norm": 0.09071029722690582, + "learning_rate": 3.7975919297103804e-06, + "loss": 8.5118, + "step": 174960 + }, + { + "epoch": 0.8737796199655422, + "grad_norm": 0.0901302620768547, + "learning_rate": 3.7960900147688306e-06, + "loss": 8.5154, + "step": 174970 + }, + { + "epoch": 0.8738295587904816, + "grad_norm": 0.0890451967716217, + "learning_rate": 3.7945880998272803e-06, + "loss": 8.5242, + "step": 174980 + }, + { + "epoch": 0.8738794976154212, + "grad_norm": 0.09417781978845596, + "learning_rate": 3.7930861848857292e-06, + "loss": 8.5159, + "step": 174990 + }, + { + "epoch": 0.8739294364403606, + "grad_norm": 0.09130019694566727, + "learning_rate": 3.791584269944179e-06, + "loss": 8.4936, + "step": 175000 + }, + { + "epoch": 0.8739793752653, + "grad_norm": 0.09282857924699783, + "learning_rate": 3.7900823550026283e-06, + "loss": 8.5298, + "step": 175010 + }, + { + "epoch": 0.8740293140902394, + "grad_norm": 0.09820152819156647, + "learning_rate": 3.788580440061078e-06, + "loss": 8.5198, + "step": 175020 + }, + { + "epoch": 0.8740792529151789, + "grad_norm": 0.0940333679318428, + "learning_rate": 3.787078525119528e-06, + "loss": 8.5207, + "step": 175030 + }, + { + "epoch": 0.8741291917401184, + "grad_norm": 0.09212444722652435, + "learning_rate": 3.7855766101779767e-06, + "loss": 8.5327, + "step": 175040 + }, + { + "epoch": 0.8741791305650578, + "grad_norm": 0.09330432116985321, + "learning_rate": 3.7840746952364265e-06, + "loss": 8.5186, + "step": 175050 + }, + { + "epoch": 0.8742290693899972, + "grad_norm": 0.09266900271177292, + "learning_rate": 3.7825727802948767e-06, + "loss": 8.5161, + "step": 175060 + }, + { + "epoch": 0.8742790082149366, + "grad_norm": 0.09173017740249634, + "learning_rate": 3.7810708653533256e-06, + "loss": 8.5348, + "step": 175070 + }, + { + "epoch": 0.8743289470398762, + "grad_norm": 0.09216035157442093, + "learning_rate": 3.7795689504117753e-06, + "loss": 8.5296, + "step": 175080 + }, + { + "epoch": 0.8743788858648156, + "grad_norm": 0.08925978094339371, + "learning_rate": 3.7780670354702242e-06, + "loss": 8.5101, + "step": 175090 + }, + { + "epoch": 0.874428824689755, + "grad_norm": 0.09558253735303879, + "learning_rate": 3.776565120528674e-06, + "loss": 8.5213, + "step": 175100 + }, + { + "epoch": 0.8744787635146944, + "grad_norm": 0.09404308348894119, + "learning_rate": 3.775063205587124e-06, + "loss": 8.5122, + "step": 175110 + }, + { + "epoch": 0.874528702339634, + "grad_norm": 0.09096416085958481, + "learning_rate": 3.773561290645573e-06, + "loss": 8.5048, + "step": 175120 + }, + { + "epoch": 0.8745786411645734, + "grad_norm": 0.0968070700764656, + "learning_rate": 3.772059375704023e-06, + "loss": 8.5088, + "step": 175130 + }, + { + "epoch": 0.8746285799895128, + "grad_norm": 0.09349347651004791, + "learning_rate": 3.7705574607624726e-06, + "loss": 8.5247, + "step": 175140 + }, + { + "epoch": 0.8746785188144522, + "grad_norm": 0.09110265225172043, + "learning_rate": 3.7690555458209215e-06, + "loss": 8.5225, + "step": 175150 + }, + { + "epoch": 0.8747284576393918, + "grad_norm": 0.09158271551132202, + "learning_rate": 3.7675536308793717e-06, + "loss": 8.5216, + "step": 175160 + }, + { + "epoch": 0.8747783964643312, + "grad_norm": 0.09609760344028473, + "learning_rate": 3.7660517159378206e-06, + "loss": 8.5151, + "step": 175170 + }, + { + "epoch": 0.8748283352892706, + "grad_norm": 0.09342726320028305, + "learning_rate": 3.7645498009962703e-06, + "loss": 8.5056, + "step": 175180 + }, + { + "epoch": 0.87487827411421, + "grad_norm": 0.09422183781862259, + "learning_rate": 3.76304788605472e-06, + "loss": 8.5344, + "step": 175190 + }, + { + "epoch": 0.8749282129391496, + "grad_norm": 0.0885339081287384, + "learning_rate": 3.761545971113169e-06, + "loss": 8.5224, + "step": 175200 + }, + { + "epoch": 0.874978151764089, + "grad_norm": 0.09695085138082504, + "learning_rate": 3.760044056171619e-06, + "loss": 8.5084, + "step": 175210 + }, + { + "epoch": 0.8750280905890284, + "grad_norm": 0.08846613019704819, + "learning_rate": 3.758542141230069e-06, + "loss": 8.5175, + "step": 175220 + }, + { + "epoch": 0.8750780294139678, + "grad_norm": 0.09065619111061096, + "learning_rate": 3.757040226288518e-06, + "loss": 8.5186, + "step": 175230 + }, + { + "epoch": 0.8751279682389074, + "grad_norm": 0.09855975955724716, + "learning_rate": 3.7555383113469676e-06, + "loss": 8.5197, + "step": 175240 + }, + { + "epoch": 0.8751779070638468, + "grad_norm": 0.09044459462165833, + "learning_rate": 3.7540363964054165e-06, + "loss": 8.5144, + "step": 175250 + }, + { + "epoch": 0.8752278458887862, + "grad_norm": 0.08905865252017975, + "learning_rate": 3.7525344814638667e-06, + "loss": 8.5222, + "step": 175260 + }, + { + "epoch": 0.8752777847137256, + "grad_norm": 0.11130794882774353, + "learning_rate": 3.7510325665223164e-06, + "loss": 8.5262, + "step": 175270 + }, + { + "epoch": 0.8753277235386652, + "grad_norm": 0.08949120342731476, + "learning_rate": 3.7495306515807658e-06, + "loss": 8.5224, + "step": 175280 + }, + { + "epoch": 0.8753776623636046, + "grad_norm": 0.09382175654172897, + "learning_rate": 3.748028736639215e-06, + "loss": 8.5169, + "step": 175290 + }, + { + "epoch": 0.875427601188544, + "grad_norm": 0.09268946945667267, + "learning_rate": 3.7465268216976644e-06, + "loss": 8.5137, + "step": 175300 + }, + { + "epoch": 0.8754775400134834, + "grad_norm": 0.09075185656547546, + "learning_rate": 3.745024906756114e-06, + "loss": 8.5204, + "step": 175310 + }, + { + "epoch": 0.875527478838423, + "grad_norm": 0.0939631536602974, + "learning_rate": 3.743522991814564e-06, + "loss": 8.519, + "step": 175320 + }, + { + "epoch": 0.8755774176633624, + "grad_norm": 0.08865179121494293, + "learning_rate": 3.7420210768730133e-06, + "loss": 8.5212, + "step": 175330 + }, + { + "epoch": 0.8756273564883018, + "grad_norm": 0.0853448212146759, + "learning_rate": 3.7405191619314626e-06, + "loss": 8.5186, + "step": 175340 + }, + { + "epoch": 0.8756772953132412, + "grad_norm": 0.09132332354784012, + "learning_rate": 3.739017246989912e-06, + "loss": 8.5169, + "step": 175350 + }, + { + "epoch": 0.8757272341381808, + "grad_norm": 0.08930841088294983, + "learning_rate": 3.737515332048362e-06, + "loss": 8.5192, + "step": 175360 + }, + { + "epoch": 0.8757771729631202, + "grad_norm": 0.09462074190378189, + "learning_rate": 3.7360134171068115e-06, + "loss": 8.521, + "step": 175370 + }, + { + "epoch": 0.8758271117880596, + "grad_norm": 0.09071394056081772, + "learning_rate": 3.7345115021652608e-06, + "loss": 8.5251, + "step": 175380 + }, + { + "epoch": 0.875877050612999, + "grad_norm": 0.09404830634593964, + "learning_rate": 3.73300958722371e-06, + "loss": 8.5084, + "step": 175390 + }, + { + "epoch": 0.8759269894379386, + "grad_norm": 0.09391729533672333, + "learning_rate": 3.73150767228216e-06, + "loss": 8.5464, + "step": 175400 + }, + { + "epoch": 0.875976928262878, + "grad_norm": 0.08704748004674911, + "learning_rate": 3.7300057573406096e-06, + "loss": 8.516, + "step": 175410 + }, + { + "epoch": 0.8760268670878174, + "grad_norm": 0.096518374979496, + "learning_rate": 3.728503842399059e-06, + "loss": 8.5001, + "step": 175420 + }, + { + "epoch": 0.8760768059127568, + "grad_norm": 0.09533105045557022, + "learning_rate": 3.7270019274575083e-06, + "loss": 8.517, + "step": 175430 + }, + { + "epoch": 0.8761267447376964, + "grad_norm": 0.09067413210868835, + "learning_rate": 3.725500012515958e-06, + "loss": 8.5323, + "step": 175440 + }, + { + "epoch": 0.8761766835626358, + "grad_norm": 0.09444481879472733, + "learning_rate": 3.7239980975744074e-06, + "loss": 8.5037, + "step": 175450 + }, + { + "epoch": 0.8762266223875752, + "grad_norm": 0.08779852837324142, + "learning_rate": 3.722496182632857e-06, + "loss": 8.5255, + "step": 175460 + }, + { + "epoch": 0.8762765612125146, + "grad_norm": 0.08670119941234589, + "learning_rate": 3.7209942676913065e-06, + "loss": 8.5018, + "step": 175470 + }, + { + "epoch": 0.8763265000374542, + "grad_norm": 0.09427984058856964, + "learning_rate": 3.719492352749756e-06, + "loss": 8.5081, + "step": 175480 + }, + { + "epoch": 0.8763764388623936, + "grad_norm": 0.09233436733484268, + "learning_rate": 3.7179904378082055e-06, + "loss": 8.5266, + "step": 175490 + }, + { + "epoch": 0.876426377687333, + "grad_norm": 0.08909424394369125, + "learning_rate": 3.716488522866655e-06, + "loss": 8.5278, + "step": 175500 + }, + { + "epoch": 0.8764763165122724, + "grad_norm": 0.08954276144504547, + "learning_rate": 3.7149866079251046e-06, + "loss": 8.5341, + "step": 175510 + }, + { + "epoch": 0.876526255337212, + "grad_norm": 0.09071674197912216, + "learning_rate": 3.713484692983554e-06, + "loss": 8.5145, + "step": 175520 + }, + { + "epoch": 0.8765761941621514, + "grad_norm": 0.08852982521057129, + "learning_rate": 3.7119827780420037e-06, + "loss": 8.519, + "step": 175530 + }, + { + "epoch": 0.8766261329870908, + "grad_norm": 0.08784948289394379, + "learning_rate": 3.710480863100453e-06, + "loss": 8.5251, + "step": 175540 + }, + { + "epoch": 0.8766760718120302, + "grad_norm": 0.09313932061195374, + "learning_rate": 3.708978948158903e-06, + "loss": 8.5231, + "step": 175550 + }, + { + "epoch": 0.8767260106369698, + "grad_norm": 0.09138453006744385, + "learning_rate": 3.707477033217352e-06, + "loss": 8.5293, + "step": 175560 + }, + { + "epoch": 0.8767759494619092, + "grad_norm": 0.08735091984272003, + "learning_rate": 3.705975118275802e-06, + "loss": 8.5286, + "step": 175570 + }, + { + "epoch": 0.8768258882868486, + "grad_norm": 0.09122058749198914, + "learning_rate": 3.7044732033342512e-06, + "loss": 8.4957, + "step": 175580 + }, + { + "epoch": 0.876875827111788, + "grad_norm": 0.0900067389011383, + "learning_rate": 3.7029712883927006e-06, + "loss": 8.5311, + "step": 175590 + }, + { + "epoch": 0.8769257659367276, + "grad_norm": 0.08420617878437042, + "learning_rate": 3.7014693734511503e-06, + "loss": 8.5117, + "step": 175600 + }, + { + "epoch": 0.876975704761667, + "grad_norm": 0.08814489841461182, + "learning_rate": 3.6999674585096e-06, + "loss": 8.5307, + "step": 175610 + }, + { + "epoch": 0.8770256435866064, + "grad_norm": 0.09584464132785797, + "learning_rate": 3.6984655435680494e-06, + "loss": 8.525, + "step": 175620 + }, + { + "epoch": 0.8770755824115458, + "grad_norm": 0.08932922780513763, + "learning_rate": 3.6969636286264987e-06, + "loss": 8.5171, + "step": 175630 + }, + { + "epoch": 0.8771255212364854, + "grad_norm": 0.08885538578033447, + "learning_rate": 3.695461713684948e-06, + "loss": 8.5202, + "step": 175640 + }, + { + "epoch": 0.8771754600614248, + "grad_norm": 0.09274578839540482, + "learning_rate": 3.6939597987433982e-06, + "loss": 8.5244, + "step": 175650 + }, + { + "epoch": 0.8772253988863642, + "grad_norm": 0.09348073601722717, + "learning_rate": 3.6924578838018476e-06, + "loss": 8.5243, + "step": 175660 + }, + { + "epoch": 0.8772753377113036, + "grad_norm": 0.0866040512919426, + "learning_rate": 3.690955968860297e-06, + "loss": 8.5139, + "step": 175670 + }, + { + "epoch": 0.8773252765362431, + "grad_norm": 0.08929452300071716, + "learning_rate": 3.6894540539187462e-06, + "loss": 8.5253, + "step": 175680 + }, + { + "epoch": 0.8773752153611826, + "grad_norm": 0.0889781191945076, + "learning_rate": 3.687952138977196e-06, + "loss": 8.5166, + "step": 175690 + }, + { + "epoch": 0.877425154186122, + "grad_norm": 0.09169498085975647, + "learning_rate": 3.6864502240356457e-06, + "loss": 8.5326, + "step": 175700 + }, + { + "epoch": 0.8774750930110614, + "grad_norm": 0.08967631310224533, + "learning_rate": 3.684948309094095e-06, + "loss": 8.5351, + "step": 175710 + }, + { + "epoch": 0.877525031836001, + "grad_norm": 0.09231317043304443, + "learning_rate": 3.6834463941525444e-06, + "loss": 8.4988, + "step": 175720 + }, + { + "epoch": 0.8775749706609404, + "grad_norm": 0.09632280468940735, + "learning_rate": 3.681944479210994e-06, + "loss": 8.5177, + "step": 175730 + }, + { + "epoch": 0.8776249094858798, + "grad_norm": 0.0874035581946373, + "learning_rate": 3.6804425642694435e-06, + "loss": 8.5378, + "step": 175740 + }, + { + "epoch": 0.8776748483108192, + "grad_norm": 0.08852864801883698, + "learning_rate": 3.6789406493278932e-06, + "loss": 8.522, + "step": 175750 + }, + { + "epoch": 0.8777247871357587, + "grad_norm": 0.09550099074840546, + "learning_rate": 3.6774387343863426e-06, + "loss": 8.5171, + "step": 175760 + }, + { + "epoch": 0.8777747259606982, + "grad_norm": 0.09109804779291153, + "learning_rate": 3.6759368194447923e-06, + "loss": 8.5383, + "step": 175770 + }, + { + "epoch": 0.8778246647856376, + "grad_norm": 0.08961420506238937, + "learning_rate": 3.6744349045032417e-06, + "loss": 8.5162, + "step": 175780 + }, + { + "epoch": 0.877874603610577, + "grad_norm": 0.0872698426246643, + "learning_rate": 3.672932989561691e-06, + "loss": 8.5116, + "step": 175790 + }, + { + "epoch": 0.8779245424355165, + "grad_norm": 0.0968591645359993, + "learning_rate": 3.6714310746201407e-06, + "loss": 8.5413, + "step": 175800 + }, + { + "epoch": 0.877974481260456, + "grad_norm": 0.09531614184379578, + "learning_rate": 3.6699291596785905e-06, + "loss": 8.5404, + "step": 175810 + }, + { + "epoch": 0.8780244200853954, + "grad_norm": 0.09465590864419937, + "learning_rate": 3.66842724473704e-06, + "loss": 8.5129, + "step": 175820 + }, + { + "epoch": 0.8780743589103348, + "grad_norm": 0.09236373007297516, + "learning_rate": 3.666925329795489e-06, + "loss": 8.5317, + "step": 175830 + }, + { + "epoch": 0.8781242977352743, + "grad_norm": 0.09544330835342407, + "learning_rate": 3.6654234148539385e-06, + "loss": 8.5133, + "step": 175840 + }, + { + "epoch": 0.8781742365602138, + "grad_norm": 0.08793754130601883, + "learning_rate": 3.6639214999123887e-06, + "loss": 8.519, + "step": 175850 + }, + { + "epoch": 0.8782241753851532, + "grad_norm": 0.09597655385732651, + "learning_rate": 3.662419584970838e-06, + "loss": 8.5266, + "step": 175860 + }, + { + "epoch": 0.8782741142100926, + "grad_norm": 0.09199637174606323, + "learning_rate": 3.6609176700292873e-06, + "loss": 8.5188, + "step": 175870 + }, + { + "epoch": 0.8783240530350321, + "grad_norm": 0.09712479263544083, + "learning_rate": 3.6594157550877367e-06, + "loss": 8.5235, + "step": 175880 + }, + { + "epoch": 0.8783739918599716, + "grad_norm": 0.08960530161857605, + "learning_rate": 3.6579138401461864e-06, + "loss": 8.5275, + "step": 175890 + }, + { + "epoch": 0.878423930684911, + "grad_norm": 0.08847316354513168, + "learning_rate": 3.656411925204636e-06, + "loss": 8.5109, + "step": 175900 + }, + { + "epoch": 0.8784738695098504, + "grad_norm": 0.09034876525402069, + "learning_rate": 3.6549100102630855e-06, + "loss": 8.54, + "step": 175910 + }, + { + "epoch": 0.8785238083347899, + "grad_norm": 0.09454880654811859, + "learning_rate": 3.653408095321535e-06, + "loss": 8.5137, + "step": 175920 + }, + { + "epoch": 0.8785737471597294, + "grad_norm": 0.08720461279153824, + "learning_rate": 3.6519061803799846e-06, + "loss": 8.5258, + "step": 175930 + }, + { + "epoch": 0.8786236859846688, + "grad_norm": 0.08554758876562119, + "learning_rate": 3.650404265438434e-06, + "loss": 8.514, + "step": 175940 + }, + { + "epoch": 0.8786736248096082, + "grad_norm": 0.09344278275966644, + "learning_rate": 3.6489023504968837e-06, + "loss": 8.5226, + "step": 175950 + }, + { + "epoch": 0.8787235636345477, + "grad_norm": 0.09505718946456909, + "learning_rate": 3.647400435555333e-06, + "loss": 8.4881, + "step": 175960 + }, + { + "epoch": 0.8787735024594872, + "grad_norm": 0.08501995354890823, + "learning_rate": 3.6458985206137828e-06, + "loss": 8.5071, + "step": 175970 + }, + { + "epoch": 0.8788234412844266, + "grad_norm": 0.09370040893554688, + "learning_rate": 3.644396605672232e-06, + "loss": 8.5162, + "step": 175980 + }, + { + "epoch": 0.878873380109366, + "grad_norm": 0.08761072903871536, + "learning_rate": 3.6428946907306814e-06, + "loss": 8.5417, + "step": 175990 + }, + { + "epoch": 0.8789233189343055, + "grad_norm": 0.08839986473321915, + "learning_rate": 3.641392775789131e-06, + "loss": 8.5306, + "step": 176000 + }, + { + "epoch": 0.878973257759245, + "grad_norm": 0.09286805242300034, + "learning_rate": 3.639890860847581e-06, + "loss": 8.526, + "step": 176010 + }, + { + "epoch": 0.8790231965841844, + "grad_norm": 0.08983197808265686, + "learning_rate": 3.6383889459060303e-06, + "loss": 8.5081, + "step": 176020 + }, + { + "epoch": 0.8790731354091238, + "grad_norm": 0.09328821301460266, + "learning_rate": 3.6368870309644796e-06, + "loss": 8.5201, + "step": 176030 + }, + { + "epoch": 0.8791230742340632, + "grad_norm": 0.08646514266729355, + "learning_rate": 3.635385116022929e-06, + "loss": 8.522, + "step": 176040 + }, + { + "epoch": 0.8791730130590028, + "grad_norm": 0.09557259827852249, + "learning_rate": 3.633883201081379e-06, + "loss": 8.5124, + "step": 176050 + }, + { + "epoch": 0.8792229518839422, + "grad_norm": 0.08780386298894882, + "learning_rate": 3.6323812861398284e-06, + "loss": 8.5364, + "step": 176060 + }, + { + "epoch": 0.8792728907088816, + "grad_norm": 0.08918431401252747, + "learning_rate": 3.6308793711982778e-06, + "loss": 8.5319, + "step": 176070 + }, + { + "epoch": 0.879322829533821, + "grad_norm": 0.09585704654455185, + "learning_rate": 3.629377456256727e-06, + "loss": 8.5099, + "step": 176080 + }, + { + "epoch": 0.8793727683587605, + "grad_norm": 0.08608123660087585, + "learning_rate": 3.627875541315177e-06, + "loss": 8.5201, + "step": 176090 + }, + { + "epoch": 0.8794227071837, + "grad_norm": 0.09179461002349854, + "learning_rate": 3.6263736263736266e-06, + "loss": 8.5454, + "step": 176100 + }, + { + "epoch": 0.8794726460086394, + "grad_norm": 0.09087244421243668, + "learning_rate": 3.624871711432076e-06, + "loss": 8.5105, + "step": 176110 + }, + { + "epoch": 0.8795225848335788, + "grad_norm": 0.09345267713069916, + "learning_rate": 3.6233697964905253e-06, + "loss": 8.5111, + "step": 176120 + }, + { + "epoch": 0.8795725236585183, + "grad_norm": 0.09425700455904007, + "learning_rate": 3.621867881548975e-06, + "loss": 8.5227, + "step": 176130 + }, + { + "epoch": 0.8796224624834578, + "grad_norm": 0.08979269117116928, + "learning_rate": 3.6203659666074248e-06, + "loss": 8.5186, + "step": 176140 + }, + { + "epoch": 0.8796724013083972, + "grad_norm": 0.09118253737688065, + "learning_rate": 3.618864051665874e-06, + "loss": 8.5299, + "step": 176150 + }, + { + "epoch": 0.8797223401333366, + "grad_norm": 0.09220730513334274, + "learning_rate": 3.6173621367243234e-06, + "loss": 8.5191, + "step": 176160 + }, + { + "epoch": 0.8797722789582761, + "grad_norm": 0.08798353374004364, + "learning_rate": 3.615860221782773e-06, + "loss": 8.5048, + "step": 176170 + }, + { + "epoch": 0.8798222177832156, + "grad_norm": 0.0894918367266655, + "learning_rate": 3.6143583068412225e-06, + "loss": 8.5088, + "step": 176180 + }, + { + "epoch": 0.879872156608155, + "grad_norm": 0.09121794253587723, + "learning_rate": 3.6128563918996723e-06, + "loss": 8.5221, + "step": 176190 + }, + { + "epoch": 0.8799220954330944, + "grad_norm": 0.09195500612258911, + "learning_rate": 3.6113544769581216e-06, + "loss": 8.5301, + "step": 176200 + }, + { + "epoch": 0.8799720342580339, + "grad_norm": 0.08938493579626083, + "learning_rate": 3.6098525620165714e-06, + "loss": 8.5136, + "step": 176210 + }, + { + "epoch": 0.8800219730829734, + "grad_norm": 0.08818230032920837, + "learning_rate": 3.6083506470750207e-06, + "loss": 8.5135, + "step": 176220 + }, + { + "epoch": 0.8800719119079128, + "grad_norm": 0.09114837646484375, + "learning_rate": 3.60684873213347e-06, + "loss": 8.5182, + "step": 176230 + }, + { + "epoch": 0.8801218507328522, + "grad_norm": 0.09198463708162308, + "learning_rate": 3.60534681719192e-06, + "loss": 8.5281, + "step": 176240 + }, + { + "epoch": 0.8801717895577917, + "grad_norm": 0.08858832716941833, + "learning_rate": 3.6038449022503695e-06, + "loss": 8.5173, + "step": 176250 + }, + { + "epoch": 0.8802217283827312, + "grad_norm": 0.09060419350862503, + "learning_rate": 3.602342987308819e-06, + "loss": 8.5146, + "step": 176260 + }, + { + "epoch": 0.8802716672076706, + "grad_norm": 0.09478036314249039, + "learning_rate": 3.600841072367268e-06, + "loss": 8.5338, + "step": 176270 + }, + { + "epoch": 0.88032160603261, + "grad_norm": 0.09485729038715363, + "learning_rate": 3.5993391574257175e-06, + "loss": 8.5161, + "step": 176280 + }, + { + "epoch": 0.8803715448575495, + "grad_norm": 0.08923057466745377, + "learning_rate": 3.5978372424841677e-06, + "loss": 8.512, + "step": 176290 + }, + { + "epoch": 0.880421483682489, + "grad_norm": 0.09072960168123245, + "learning_rate": 3.596335327542617e-06, + "loss": 8.5052, + "step": 176300 + }, + { + "epoch": 0.8804714225074284, + "grad_norm": 0.08910852670669556, + "learning_rate": 3.5948334126010664e-06, + "loss": 8.511, + "step": 176310 + }, + { + "epoch": 0.8805213613323678, + "grad_norm": 0.09066713601350784, + "learning_rate": 3.5933314976595157e-06, + "loss": 8.5136, + "step": 176320 + }, + { + "epoch": 0.8805713001573073, + "grad_norm": 0.08695919066667557, + "learning_rate": 3.5918295827179655e-06, + "loss": 8.5367, + "step": 176330 + }, + { + "epoch": 0.8806212389822468, + "grad_norm": 0.08876381814479828, + "learning_rate": 3.5903276677764152e-06, + "loss": 8.5279, + "step": 176340 + }, + { + "epoch": 0.8806711778071862, + "grad_norm": 0.09524772316217422, + "learning_rate": 3.5888257528348646e-06, + "loss": 8.5178, + "step": 176350 + }, + { + "epoch": 0.8807211166321256, + "grad_norm": 0.0915902629494667, + "learning_rate": 3.587323837893314e-06, + "loss": 8.5005, + "step": 176360 + }, + { + "epoch": 0.8807710554570651, + "grad_norm": 0.091648168861866, + "learning_rate": 3.5858219229517636e-06, + "loss": 8.522, + "step": 176370 + }, + { + "epoch": 0.8808209942820046, + "grad_norm": 0.08661641925573349, + "learning_rate": 3.584320008010213e-06, + "loss": 8.5298, + "step": 176380 + }, + { + "epoch": 0.880870933106944, + "grad_norm": 0.09084923565387726, + "learning_rate": 3.5828180930686627e-06, + "loss": 8.5104, + "step": 176390 + }, + { + "epoch": 0.8809208719318834, + "grad_norm": 0.09532788395881653, + "learning_rate": 3.581316178127112e-06, + "loss": 8.5164, + "step": 176400 + }, + { + "epoch": 0.8809708107568229, + "grad_norm": 0.09041056036949158, + "learning_rate": 3.579814263185562e-06, + "loss": 8.5083, + "step": 176410 + }, + { + "epoch": 0.8810207495817624, + "grad_norm": 0.0913330689072609, + "learning_rate": 3.578312348244011e-06, + "loss": 8.526, + "step": 176420 + }, + { + "epoch": 0.8810706884067018, + "grad_norm": 0.09379500150680542, + "learning_rate": 3.5768104333024605e-06, + "loss": 8.5027, + "step": 176430 + }, + { + "epoch": 0.8811206272316412, + "grad_norm": 0.09111961722373962, + "learning_rate": 3.5753085183609102e-06, + "loss": 8.5041, + "step": 176440 + }, + { + "epoch": 0.8811705660565807, + "grad_norm": 0.09450194984674454, + "learning_rate": 3.57380660341936e-06, + "loss": 8.5164, + "step": 176450 + }, + { + "epoch": 0.8812205048815202, + "grad_norm": 0.08781801164150238, + "learning_rate": 3.5723046884778093e-06, + "loss": 8.5286, + "step": 176460 + }, + { + "epoch": 0.8812704437064596, + "grad_norm": 0.08742903918027878, + "learning_rate": 3.5708027735362586e-06, + "loss": 8.5282, + "step": 176470 + }, + { + "epoch": 0.881320382531399, + "grad_norm": 0.09143572300672531, + "learning_rate": 3.569300858594708e-06, + "loss": 8.5142, + "step": 176480 + }, + { + "epoch": 0.8813703213563385, + "grad_norm": 0.0917043462395668, + "learning_rate": 3.567798943653158e-06, + "loss": 8.5381, + "step": 176490 + }, + { + "epoch": 0.881420260181278, + "grad_norm": 0.08833152055740356, + "learning_rate": 3.5662970287116075e-06, + "loss": 8.5399, + "step": 176500 + }, + { + "epoch": 0.8814701990062174, + "grad_norm": 0.09015662968158722, + "learning_rate": 3.564795113770057e-06, + "loss": 8.5365, + "step": 176510 + }, + { + "epoch": 0.8815201378311568, + "grad_norm": 0.0858858972787857, + "learning_rate": 3.563293198828506e-06, + "loss": 8.5222, + "step": 176520 + }, + { + "epoch": 0.8815700766560963, + "grad_norm": 0.08976481109857559, + "learning_rate": 3.561791283886956e-06, + "loss": 8.5183, + "step": 176530 + }, + { + "epoch": 0.8816200154810357, + "grad_norm": 0.08899136632680893, + "learning_rate": 3.5602893689454057e-06, + "loss": 8.5094, + "step": 176540 + }, + { + "epoch": 0.8816699543059752, + "grad_norm": 0.09167985618114471, + "learning_rate": 3.558787454003855e-06, + "loss": 8.5039, + "step": 176550 + }, + { + "epoch": 0.8817198931309146, + "grad_norm": 0.09655990451574326, + "learning_rate": 3.5572855390623043e-06, + "loss": 8.5208, + "step": 176560 + }, + { + "epoch": 0.8817698319558541, + "grad_norm": 0.0897342711687088, + "learning_rate": 3.555783624120754e-06, + "loss": 8.511, + "step": 176570 + }, + { + "epoch": 0.8818197707807935, + "grad_norm": 0.08929044008255005, + "learning_rate": 3.5542817091792034e-06, + "loss": 8.5278, + "step": 176580 + }, + { + "epoch": 0.881869709605733, + "grad_norm": 0.09658628702163696, + "learning_rate": 3.552779794237653e-06, + "loss": 8.501, + "step": 176590 + }, + { + "epoch": 0.8819196484306724, + "grad_norm": 0.09207036346197128, + "learning_rate": 3.5512778792961025e-06, + "loss": 8.5236, + "step": 176600 + }, + { + "epoch": 0.8819695872556119, + "grad_norm": 0.09149431437253952, + "learning_rate": 3.5497759643545523e-06, + "loss": 8.5151, + "step": 176610 + }, + { + "epoch": 0.8820195260805513, + "grad_norm": 0.08894215524196625, + "learning_rate": 3.5482740494130016e-06, + "loss": 8.5278, + "step": 176620 + }, + { + "epoch": 0.8820694649054908, + "grad_norm": 0.09006328135728836, + "learning_rate": 3.5467721344714513e-06, + "loss": 8.5218, + "step": 176630 + }, + { + "epoch": 0.8821194037304302, + "grad_norm": 0.09437132626771927, + "learning_rate": 3.5452702195299007e-06, + "loss": 8.5154, + "step": 176640 + }, + { + "epoch": 0.8821693425553697, + "grad_norm": 0.09077361971139908, + "learning_rate": 3.5437683045883504e-06, + "loss": 8.5006, + "step": 176650 + }, + { + "epoch": 0.8822192813803091, + "grad_norm": 0.08984161168336868, + "learning_rate": 3.5422663896467998e-06, + "loss": 8.5379, + "step": 176660 + }, + { + "epoch": 0.8822692202052486, + "grad_norm": 0.09607677906751633, + "learning_rate": 3.540764474705249e-06, + "loss": 8.525, + "step": 176670 + }, + { + "epoch": 0.882319159030188, + "grad_norm": 0.09277524054050446, + "learning_rate": 3.539262559763699e-06, + "loss": 8.5213, + "step": 176680 + }, + { + "epoch": 0.8823690978551275, + "grad_norm": 0.09065360575914383, + "learning_rate": 3.5377606448221486e-06, + "loss": 8.5276, + "step": 176690 + }, + { + "epoch": 0.8824190366800669, + "grad_norm": 0.10031012445688248, + "learning_rate": 3.536258729880598e-06, + "loss": 8.4984, + "step": 176700 + }, + { + "epoch": 0.8824689755050064, + "grad_norm": 0.08632688969373703, + "learning_rate": 3.5347568149390473e-06, + "loss": 8.549, + "step": 176710 + }, + { + "epoch": 0.8825189143299458, + "grad_norm": 0.09275210648775101, + "learning_rate": 3.5332548999974966e-06, + "loss": 8.5149, + "step": 176720 + }, + { + "epoch": 0.8825688531548853, + "grad_norm": 0.08953607082366943, + "learning_rate": 3.5317529850559468e-06, + "loss": 8.5107, + "step": 176730 + }, + { + "epoch": 0.8826187919798247, + "grad_norm": 0.09068858623504639, + "learning_rate": 3.530251070114396e-06, + "loss": 8.5048, + "step": 176740 + }, + { + "epoch": 0.8826687308047642, + "grad_norm": 0.08825194090604782, + "learning_rate": 3.5287491551728454e-06, + "loss": 8.5169, + "step": 176750 + }, + { + "epoch": 0.8827186696297036, + "grad_norm": 0.08667933940887451, + "learning_rate": 3.5272472402312948e-06, + "loss": 8.5104, + "step": 176760 + }, + { + "epoch": 0.8827686084546431, + "grad_norm": 0.0986347422003746, + "learning_rate": 3.5257453252897445e-06, + "loss": 8.5012, + "step": 176770 + }, + { + "epoch": 0.8828185472795825, + "grad_norm": 0.09399598091840744, + "learning_rate": 3.5242434103481943e-06, + "loss": 8.5245, + "step": 176780 + }, + { + "epoch": 0.882868486104522, + "grad_norm": 0.0902981236577034, + "learning_rate": 3.5227414954066436e-06, + "loss": 8.5248, + "step": 176790 + }, + { + "epoch": 0.8829184249294614, + "grad_norm": 0.09248172491788864, + "learning_rate": 3.521239580465093e-06, + "loss": 8.526, + "step": 176800 + }, + { + "epoch": 0.8829683637544009, + "grad_norm": 0.08781624585390091, + "learning_rate": 3.5197376655235427e-06, + "loss": 8.5162, + "step": 176810 + }, + { + "epoch": 0.8830183025793403, + "grad_norm": 0.09201288223266602, + "learning_rate": 3.518235750581992e-06, + "loss": 8.5082, + "step": 176820 + }, + { + "epoch": 0.8830682414042798, + "grad_norm": 0.0937962606549263, + "learning_rate": 3.5167338356404418e-06, + "loss": 8.5262, + "step": 176830 + }, + { + "epoch": 0.8831181802292192, + "grad_norm": 0.08981435745954514, + "learning_rate": 3.515231920698891e-06, + "loss": 8.5141, + "step": 176840 + }, + { + "epoch": 0.8831681190541587, + "grad_norm": 0.09788450598716736, + "learning_rate": 3.513730005757341e-06, + "loss": 8.5209, + "step": 176850 + }, + { + "epoch": 0.8832180578790981, + "grad_norm": 0.09376143664121628, + "learning_rate": 3.51222809081579e-06, + "loss": 8.5123, + "step": 176860 + }, + { + "epoch": 0.8832679967040376, + "grad_norm": 0.09434860199689865, + "learning_rate": 3.5107261758742395e-06, + "loss": 8.5131, + "step": 176870 + }, + { + "epoch": 0.883317935528977, + "grad_norm": 0.09667465090751648, + "learning_rate": 3.5092242609326893e-06, + "loss": 8.5213, + "step": 176880 + }, + { + "epoch": 0.8833678743539165, + "grad_norm": 0.09229189902544022, + "learning_rate": 3.507722345991139e-06, + "loss": 8.5217, + "step": 176890 + }, + { + "epoch": 0.8834178131788559, + "grad_norm": 0.08936712145805359, + "learning_rate": 3.5062204310495884e-06, + "loss": 8.5231, + "step": 176900 + }, + { + "epoch": 0.8834677520037953, + "grad_norm": 0.08999984711408615, + "learning_rate": 3.5047185161080377e-06, + "loss": 8.5265, + "step": 176910 + }, + { + "epoch": 0.8835176908287348, + "grad_norm": 0.08811944723129272, + "learning_rate": 3.503216601166487e-06, + "loss": 8.5266, + "step": 176920 + }, + { + "epoch": 0.8835676296536743, + "grad_norm": 0.0963667631149292, + "learning_rate": 3.501714686224937e-06, + "loss": 8.5098, + "step": 176930 + }, + { + "epoch": 0.8836175684786137, + "grad_norm": 0.09169621020555496, + "learning_rate": 3.5002127712833865e-06, + "loss": 8.5253, + "step": 176940 + }, + { + "epoch": 0.8836675073035531, + "grad_norm": 0.095432348549366, + "learning_rate": 3.498710856341836e-06, + "loss": 8.5165, + "step": 176950 + }, + { + "epoch": 0.8837174461284926, + "grad_norm": 0.09653273224830627, + "learning_rate": 3.497208941400285e-06, + "loss": 8.5344, + "step": 176960 + }, + { + "epoch": 0.8837673849534321, + "grad_norm": 0.09007629007101059, + "learning_rate": 3.495707026458735e-06, + "loss": 8.5275, + "step": 176970 + }, + { + "epoch": 0.8838173237783715, + "grad_norm": 0.09573765844106674, + "learning_rate": 3.4942051115171847e-06, + "loss": 8.5092, + "step": 176980 + }, + { + "epoch": 0.8838672626033109, + "grad_norm": 0.09132372587919235, + "learning_rate": 3.492703196575634e-06, + "loss": 8.518, + "step": 176990 + }, + { + "epoch": 0.8839172014282504, + "grad_norm": 0.09207233786582947, + "learning_rate": 3.4912012816340834e-06, + "loss": 8.5078, + "step": 177000 + }, + { + "epoch": 0.8839671402531898, + "grad_norm": 0.09270022809505463, + "learning_rate": 3.489699366692533e-06, + "loss": 8.549, + "step": 177010 + }, + { + "epoch": 0.8840170790781293, + "grad_norm": 0.09225805103778839, + "learning_rate": 3.4881974517509825e-06, + "loss": 8.518, + "step": 177020 + }, + { + "epoch": 0.8840670179030687, + "grad_norm": 0.092227041721344, + "learning_rate": 3.4866955368094322e-06, + "loss": 8.5222, + "step": 177030 + }, + { + "epoch": 0.8841169567280082, + "grad_norm": 0.08810020238161087, + "learning_rate": 3.4851936218678815e-06, + "loss": 8.5089, + "step": 177040 + }, + { + "epoch": 0.8841668955529476, + "grad_norm": 0.08814115077257156, + "learning_rate": 3.4836917069263313e-06, + "loss": 8.5423, + "step": 177050 + }, + { + "epoch": 0.8842168343778871, + "grad_norm": 0.09531360119581223, + "learning_rate": 3.4821897919847806e-06, + "loss": 8.5245, + "step": 177060 + }, + { + "epoch": 0.8842667732028265, + "grad_norm": 0.08866927027702332, + "learning_rate": 3.48068787704323e-06, + "loss": 8.5009, + "step": 177070 + }, + { + "epoch": 0.884316712027766, + "grad_norm": 0.08944374322891235, + "learning_rate": 3.4791859621016797e-06, + "loss": 8.5293, + "step": 177080 + }, + { + "epoch": 0.8843666508527054, + "grad_norm": 0.08847851306200027, + "learning_rate": 3.4776840471601295e-06, + "loss": 8.5336, + "step": 177090 + }, + { + "epoch": 0.8844165896776449, + "grad_norm": 0.0904935672879219, + "learning_rate": 3.476182132218579e-06, + "loss": 8.5157, + "step": 177100 + }, + { + "epoch": 0.8844665285025843, + "grad_norm": 0.0903678685426712, + "learning_rate": 3.474680217277028e-06, + "loss": 8.517, + "step": 177110 + }, + { + "epoch": 0.8845164673275238, + "grad_norm": 0.09201235324144363, + "learning_rate": 3.4731783023354775e-06, + "loss": 8.5221, + "step": 177120 + }, + { + "epoch": 0.8845664061524632, + "grad_norm": 0.09328807145357132, + "learning_rate": 3.4716763873939276e-06, + "loss": 8.5328, + "step": 177130 + }, + { + "epoch": 0.8846163449774027, + "grad_norm": 0.08826225250959396, + "learning_rate": 3.470174472452377e-06, + "loss": 8.5153, + "step": 177140 + }, + { + "epoch": 0.8846662838023421, + "grad_norm": 0.09613511711359024, + "learning_rate": 3.4686725575108263e-06, + "loss": 8.5224, + "step": 177150 + }, + { + "epoch": 0.8847162226272816, + "grad_norm": 0.0888667032122612, + "learning_rate": 3.4671706425692756e-06, + "loss": 8.5245, + "step": 177160 + }, + { + "epoch": 0.884766161452221, + "grad_norm": 0.08986398577690125, + "learning_rate": 3.4656687276277254e-06, + "loss": 8.5181, + "step": 177170 + }, + { + "epoch": 0.8848161002771605, + "grad_norm": 0.09408170729875565, + "learning_rate": 3.464166812686175e-06, + "loss": 8.5075, + "step": 177180 + }, + { + "epoch": 0.8848660391020999, + "grad_norm": 0.08650843799114227, + "learning_rate": 3.4626648977446245e-06, + "loss": 8.5356, + "step": 177190 + }, + { + "epoch": 0.8849159779270394, + "grad_norm": 0.09117493778467178, + "learning_rate": 3.461162982803074e-06, + "loss": 8.5275, + "step": 177200 + }, + { + "epoch": 0.8849659167519788, + "grad_norm": 0.09649976342916489, + "learning_rate": 3.4596610678615236e-06, + "loss": 8.5083, + "step": 177210 + }, + { + "epoch": 0.8850158555769183, + "grad_norm": 0.09083914011716843, + "learning_rate": 3.4581591529199733e-06, + "loss": 8.5271, + "step": 177220 + }, + { + "epoch": 0.8850657944018577, + "grad_norm": 0.0903511494398117, + "learning_rate": 3.4566572379784227e-06, + "loss": 8.5062, + "step": 177230 + }, + { + "epoch": 0.8851157332267972, + "grad_norm": 0.09016105532646179, + "learning_rate": 3.455155323036872e-06, + "loss": 8.5287, + "step": 177240 + }, + { + "epoch": 0.8851656720517366, + "grad_norm": 0.09000012278556824, + "learning_rate": 3.4536534080953217e-06, + "loss": 8.4927, + "step": 177250 + }, + { + "epoch": 0.8852156108766761, + "grad_norm": 0.088304802775383, + "learning_rate": 3.452151493153771e-06, + "loss": 8.5076, + "step": 177260 + }, + { + "epoch": 0.8852655497016155, + "grad_norm": 0.09251948446035385, + "learning_rate": 3.450649578212221e-06, + "loss": 8.5103, + "step": 177270 + }, + { + "epoch": 0.885315488526555, + "grad_norm": 0.08819442987442017, + "learning_rate": 3.44914766327067e-06, + "loss": 8.5039, + "step": 177280 + }, + { + "epoch": 0.8853654273514944, + "grad_norm": 0.0890979915857315, + "learning_rate": 3.44764574832912e-06, + "loss": 8.5171, + "step": 177290 + }, + { + "epoch": 0.8854153661764339, + "grad_norm": 0.0990390032529831, + "learning_rate": 3.4461438333875692e-06, + "loss": 8.52, + "step": 177300 + }, + { + "epoch": 0.8854653050013733, + "grad_norm": 0.08952021598815918, + "learning_rate": 3.4446419184460186e-06, + "loss": 8.5213, + "step": 177310 + }, + { + "epoch": 0.8855152438263127, + "grad_norm": 0.08724192529916763, + "learning_rate": 3.4431400035044683e-06, + "loss": 8.5327, + "step": 177320 + }, + { + "epoch": 0.8855651826512522, + "grad_norm": 0.09806641191244125, + "learning_rate": 3.441638088562918e-06, + "loss": 8.5187, + "step": 177330 + }, + { + "epoch": 0.8856151214761917, + "grad_norm": 0.08996843546628952, + "learning_rate": 3.4401361736213674e-06, + "loss": 8.5578, + "step": 177340 + }, + { + "epoch": 0.8856650603011311, + "grad_norm": 0.09567862004041672, + "learning_rate": 3.4386342586798167e-06, + "loss": 8.5224, + "step": 177350 + }, + { + "epoch": 0.8857149991260705, + "grad_norm": 0.08767600357532501, + "learning_rate": 3.437132343738266e-06, + "loss": 8.5191, + "step": 177360 + }, + { + "epoch": 0.88576493795101, + "grad_norm": 0.09355778992176056, + "learning_rate": 3.4356304287967163e-06, + "loss": 8.5066, + "step": 177370 + }, + { + "epoch": 0.8858148767759495, + "grad_norm": 0.09189339727163315, + "learning_rate": 3.4341285138551656e-06, + "loss": 8.5076, + "step": 177380 + }, + { + "epoch": 0.8858648156008889, + "grad_norm": 0.08906829357147217, + "learning_rate": 3.432626598913615e-06, + "loss": 8.5052, + "step": 177390 + }, + { + "epoch": 0.8859147544258283, + "grad_norm": 0.08834553509950638, + "learning_rate": 3.4311246839720643e-06, + "loss": 8.5115, + "step": 177400 + }, + { + "epoch": 0.8859646932507678, + "grad_norm": 0.09251053631305695, + "learning_rate": 3.429622769030514e-06, + "loss": 8.5233, + "step": 177410 + }, + { + "epoch": 0.8860146320757073, + "grad_norm": 0.08647915720939636, + "learning_rate": 3.4281208540889638e-06, + "loss": 8.5078, + "step": 177420 + }, + { + "epoch": 0.8860645709006467, + "grad_norm": 0.09425395727157593, + "learning_rate": 3.426618939147413e-06, + "loss": 8.5261, + "step": 177430 + }, + { + "epoch": 0.8861145097255861, + "grad_norm": 0.08929485827684402, + "learning_rate": 3.4251170242058624e-06, + "loss": 8.5276, + "step": 177440 + }, + { + "epoch": 0.8861644485505256, + "grad_norm": 0.09134992957115173, + "learning_rate": 3.423615109264312e-06, + "loss": 8.531, + "step": 177450 + }, + { + "epoch": 0.8862143873754651, + "grad_norm": 0.08963406831026077, + "learning_rate": 3.4221131943227615e-06, + "loss": 8.517, + "step": 177460 + }, + { + "epoch": 0.8862643262004045, + "grad_norm": 0.09403959661722183, + "learning_rate": 3.4206112793812113e-06, + "loss": 8.5015, + "step": 177470 + }, + { + "epoch": 0.8863142650253439, + "grad_norm": 0.08884259313344955, + "learning_rate": 3.4191093644396606e-06, + "loss": 8.511, + "step": 177480 + }, + { + "epoch": 0.8863642038502834, + "grad_norm": 0.09389010816812515, + "learning_rate": 3.4176074494981104e-06, + "loss": 8.4983, + "step": 177490 + }, + { + "epoch": 0.8864141426752229, + "grad_norm": 0.08853202313184738, + "learning_rate": 3.4161055345565597e-06, + "loss": 8.5133, + "step": 177500 + }, + { + "epoch": 0.8864640815001623, + "grad_norm": 0.09294895827770233, + "learning_rate": 3.414603619615009e-06, + "loss": 8.5259, + "step": 177510 + }, + { + "epoch": 0.8865140203251017, + "grad_norm": 0.09223725646734238, + "learning_rate": 3.4131017046734588e-06, + "loss": 8.5059, + "step": 177520 + }, + { + "epoch": 0.8865639591500412, + "grad_norm": 0.09366583824157715, + "learning_rate": 3.4115997897319085e-06, + "loss": 8.5378, + "step": 177530 + }, + { + "epoch": 0.8866138979749807, + "grad_norm": 0.08734884113073349, + "learning_rate": 3.410097874790358e-06, + "loss": 8.5117, + "step": 177540 + }, + { + "epoch": 0.8866638367999201, + "grad_norm": 0.09199695289134979, + "learning_rate": 3.408595959848807e-06, + "loss": 8.5281, + "step": 177550 + }, + { + "epoch": 0.8867137756248595, + "grad_norm": 0.09172987192869186, + "learning_rate": 3.4070940449072565e-06, + "loss": 8.523, + "step": 177560 + }, + { + "epoch": 0.886763714449799, + "grad_norm": 0.09687057882547379, + "learning_rate": 3.4055921299657067e-06, + "loss": 8.532, + "step": 177570 + }, + { + "epoch": 0.8868136532747385, + "grad_norm": 0.09877201914787292, + "learning_rate": 3.404090215024156e-06, + "loss": 8.534, + "step": 177580 + }, + { + "epoch": 0.8868635920996779, + "grad_norm": 0.09122820198535919, + "learning_rate": 3.4025883000826054e-06, + "loss": 8.4999, + "step": 177590 + }, + { + "epoch": 0.8869135309246173, + "grad_norm": 0.09195327013731003, + "learning_rate": 3.4010863851410547e-06, + "loss": 8.5035, + "step": 177600 + }, + { + "epoch": 0.8869634697495568, + "grad_norm": 0.09369145333766937, + "learning_rate": 3.3995844701995044e-06, + "loss": 8.4957, + "step": 177610 + }, + { + "epoch": 0.8870134085744963, + "grad_norm": 0.09592372179031372, + "learning_rate": 3.398082555257954e-06, + "loss": 8.5032, + "step": 177620 + }, + { + "epoch": 0.8870633473994357, + "grad_norm": 0.09076791256666183, + "learning_rate": 3.3965806403164035e-06, + "loss": 8.5275, + "step": 177630 + }, + { + "epoch": 0.8871132862243751, + "grad_norm": 0.0895276591181755, + "learning_rate": 3.395078725374853e-06, + "loss": 8.5322, + "step": 177640 + }, + { + "epoch": 0.8871632250493146, + "grad_norm": 0.09077776968479156, + "learning_rate": 3.3935768104333026e-06, + "loss": 8.5201, + "step": 177650 + }, + { + "epoch": 0.8872131638742541, + "grad_norm": 0.09358912706375122, + "learning_rate": 3.392074895491752e-06, + "loss": 8.5083, + "step": 177660 + }, + { + "epoch": 0.8872631026991935, + "grad_norm": 0.09124220907688141, + "learning_rate": 3.3905729805502017e-06, + "loss": 8.514, + "step": 177670 + }, + { + "epoch": 0.8873130415241329, + "grad_norm": 0.09009285271167755, + "learning_rate": 3.389071065608651e-06, + "loss": 8.5199, + "step": 177680 + }, + { + "epoch": 0.8873629803490723, + "grad_norm": 0.08989504724740982, + "learning_rate": 3.3875691506671008e-06, + "loss": 8.525, + "step": 177690 + }, + { + "epoch": 0.8874129191740119, + "grad_norm": 0.09324032813310623, + "learning_rate": 3.38606723572555e-06, + "loss": 8.5225, + "step": 177700 + }, + { + "epoch": 0.8874628579989513, + "grad_norm": 0.09039938449859619, + "learning_rate": 3.3845653207839995e-06, + "loss": 8.5127, + "step": 177710 + }, + { + "epoch": 0.8875127968238907, + "grad_norm": 0.09255453944206238, + "learning_rate": 3.383063405842449e-06, + "loss": 8.5293, + "step": 177720 + }, + { + "epoch": 0.8875627356488301, + "grad_norm": 0.0964113101363182, + "learning_rate": 3.381561490900899e-06, + "loss": 8.5023, + "step": 177730 + }, + { + "epoch": 0.8876126744737697, + "grad_norm": 0.09030985832214355, + "learning_rate": 3.3800595759593483e-06, + "loss": 8.5003, + "step": 177740 + }, + { + "epoch": 0.8876626132987091, + "grad_norm": 0.08826965093612671, + "learning_rate": 3.3785576610177976e-06, + "loss": 8.5222, + "step": 177750 + }, + { + "epoch": 0.8877125521236485, + "grad_norm": 0.09713491797447205, + "learning_rate": 3.3770557460762474e-06, + "loss": 8.5149, + "step": 177760 + }, + { + "epoch": 0.887762490948588, + "grad_norm": 0.09569582343101501, + "learning_rate": 3.375553831134697e-06, + "loss": 8.5064, + "step": 177770 + }, + { + "epoch": 0.8878124297735275, + "grad_norm": 0.09315156936645508, + "learning_rate": 3.3740519161931465e-06, + "loss": 8.5055, + "step": 177780 + }, + { + "epoch": 0.8878623685984669, + "grad_norm": 0.09295808523893356, + "learning_rate": 3.372550001251596e-06, + "loss": 8.5233, + "step": 177790 + }, + { + "epoch": 0.8879123074234063, + "grad_norm": 0.08866126090288162, + "learning_rate": 3.371048086310045e-06, + "loss": 8.5343, + "step": 177800 + }, + { + "epoch": 0.8879622462483457, + "grad_norm": 0.09473447501659393, + "learning_rate": 3.3695461713684953e-06, + "loss": 8.5097, + "step": 177810 + }, + { + "epoch": 0.8880121850732853, + "grad_norm": 0.09259150177240372, + "learning_rate": 3.3680442564269446e-06, + "loss": 8.5188, + "step": 177820 + }, + { + "epoch": 0.8880621238982247, + "grad_norm": 0.09014898538589478, + "learning_rate": 3.366542341485394e-06, + "loss": 8.5231, + "step": 177830 + }, + { + "epoch": 0.8881120627231641, + "grad_norm": 0.09090849757194519, + "learning_rate": 3.3650404265438433e-06, + "loss": 8.5345, + "step": 177840 + }, + { + "epoch": 0.8881620015481035, + "grad_norm": 0.09438339620828629, + "learning_rate": 3.363538511602293e-06, + "loss": 8.5103, + "step": 177850 + }, + { + "epoch": 0.8882119403730431, + "grad_norm": 0.09024190902709961, + "learning_rate": 3.362036596660743e-06, + "loss": 8.5295, + "step": 177860 + }, + { + "epoch": 0.8882618791979825, + "grad_norm": 0.0944947823882103, + "learning_rate": 3.360534681719192e-06, + "loss": 8.5038, + "step": 177870 + }, + { + "epoch": 0.8883118180229219, + "grad_norm": 0.09072814136743546, + "learning_rate": 3.3590327667776415e-06, + "loss": 8.5217, + "step": 177880 + }, + { + "epoch": 0.8883617568478613, + "grad_norm": 0.09160330891609192, + "learning_rate": 3.3575308518360912e-06, + "loss": 8.4917, + "step": 177890 + }, + { + "epoch": 0.8884116956728009, + "grad_norm": 0.08163774758577347, + "learning_rate": 3.3560289368945406e-06, + "loss": 8.5104, + "step": 177900 + }, + { + "epoch": 0.8884616344977403, + "grad_norm": 0.09547600150108337, + "learning_rate": 3.3545270219529903e-06, + "loss": 8.5128, + "step": 177910 + }, + { + "epoch": 0.8885115733226797, + "grad_norm": 0.08949603140354156, + "learning_rate": 3.3530251070114396e-06, + "loss": 8.5251, + "step": 177920 + }, + { + "epoch": 0.8885615121476191, + "grad_norm": 0.09252443164587021, + "learning_rate": 3.3515231920698894e-06, + "loss": 8.5098, + "step": 177930 + }, + { + "epoch": 0.8886114509725587, + "grad_norm": 0.09023504704236984, + "learning_rate": 3.3500212771283387e-06, + "loss": 8.5021, + "step": 177940 + }, + { + "epoch": 0.8886613897974981, + "grad_norm": 0.0936330258846283, + "learning_rate": 3.348519362186788e-06, + "loss": 8.5215, + "step": 177950 + }, + { + "epoch": 0.8887113286224375, + "grad_norm": 0.09109587222337723, + "learning_rate": 3.347017447245238e-06, + "loss": 8.5083, + "step": 177960 + }, + { + "epoch": 0.8887612674473769, + "grad_norm": 0.08897558599710464, + "learning_rate": 3.3455155323036876e-06, + "loss": 8.5156, + "step": 177970 + }, + { + "epoch": 0.8888112062723165, + "grad_norm": 0.09321697801351547, + "learning_rate": 3.344013617362137e-06, + "loss": 8.5194, + "step": 177980 + }, + { + "epoch": 0.8888611450972559, + "grad_norm": 0.08879639208316803, + "learning_rate": 3.3425117024205862e-06, + "loss": 8.5025, + "step": 177990 + }, + { + "epoch": 0.8889110839221953, + "grad_norm": 0.08868400007486343, + "learning_rate": 3.3410097874790356e-06, + "loss": 8.524, + "step": 178000 + }, + { + "epoch": 0.8889610227471347, + "grad_norm": 0.0897117480635643, + "learning_rate": 3.3395078725374857e-06, + "loss": 8.5224, + "step": 178010 + }, + { + "epoch": 0.8890109615720742, + "grad_norm": 0.08525160700082779, + "learning_rate": 3.338005957595935e-06, + "loss": 8.5154, + "step": 178020 + }, + { + "epoch": 0.8890609003970137, + "grad_norm": 0.09332958608865738, + "learning_rate": 3.3365040426543844e-06, + "loss": 8.5251, + "step": 178030 + }, + { + "epoch": 0.8891108392219531, + "grad_norm": 0.08514176309108734, + "learning_rate": 3.3350021277128337e-06, + "loss": 8.5127, + "step": 178040 + }, + { + "epoch": 0.8891607780468925, + "grad_norm": 0.08755916357040405, + "learning_rate": 3.3335002127712835e-06, + "loss": 8.4974, + "step": 178050 + }, + { + "epoch": 0.889210716871832, + "grad_norm": 0.08407943695783615, + "learning_rate": 3.3319982978297332e-06, + "loss": 8.5274, + "step": 178060 + }, + { + "epoch": 0.8892606556967715, + "grad_norm": 0.08439040929079056, + "learning_rate": 3.3304963828881826e-06, + "loss": 8.5451, + "step": 178070 + }, + { + "epoch": 0.8893105945217109, + "grad_norm": 0.09115412086248398, + "learning_rate": 3.328994467946632e-06, + "loss": 8.515, + "step": 178080 + }, + { + "epoch": 0.8893605333466503, + "grad_norm": 0.0921141728758812, + "learning_rate": 3.3274925530050817e-06, + "loss": 8.5164, + "step": 178090 + }, + { + "epoch": 0.8894104721715897, + "grad_norm": 0.09111848473548889, + "learning_rate": 3.325990638063531e-06, + "loss": 8.5319, + "step": 178100 + }, + { + "epoch": 0.8894604109965293, + "grad_norm": 0.0888875350356102, + "learning_rate": 3.3244887231219808e-06, + "loss": 8.4937, + "step": 178110 + }, + { + "epoch": 0.8895103498214687, + "grad_norm": 0.08856623619794846, + "learning_rate": 3.32298680818043e-06, + "loss": 8.5252, + "step": 178120 + }, + { + "epoch": 0.8895602886464081, + "grad_norm": 0.0940069705247879, + "learning_rate": 3.32148489323888e-06, + "loss": 8.514, + "step": 178130 + }, + { + "epoch": 0.8896102274713475, + "grad_norm": 0.09143373370170593, + "learning_rate": 3.319982978297329e-06, + "loss": 8.5126, + "step": 178140 + }, + { + "epoch": 0.8896601662962871, + "grad_norm": 0.08722451329231262, + "learning_rate": 3.3184810633557785e-06, + "loss": 8.5131, + "step": 178150 + }, + { + "epoch": 0.8897101051212265, + "grad_norm": 0.09241468459367752, + "learning_rate": 3.3169791484142283e-06, + "loss": 8.5162, + "step": 178160 + }, + { + "epoch": 0.8897600439461659, + "grad_norm": 0.09892044216394424, + "learning_rate": 3.315477233472678e-06, + "loss": 8.5069, + "step": 178170 + }, + { + "epoch": 0.8898099827711053, + "grad_norm": 0.09388883411884308, + "learning_rate": 3.3139753185311273e-06, + "loss": 8.5245, + "step": 178180 + }, + { + "epoch": 0.8898599215960449, + "grad_norm": 0.08809326589107513, + "learning_rate": 3.3124734035895767e-06, + "loss": 8.5341, + "step": 178190 + }, + { + "epoch": 0.8899098604209843, + "grad_norm": 0.09530996531248093, + "learning_rate": 3.310971488648026e-06, + "loss": 8.4991, + "step": 178200 + }, + { + "epoch": 0.8899597992459237, + "grad_norm": 0.09561774879693985, + "learning_rate": 3.309469573706476e-06, + "loss": 8.5052, + "step": 178210 + }, + { + "epoch": 0.8900097380708631, + "grad_norm": 0.09873489290475845, + "learning_rate": 3.3079676587649255e-06, + "loss": 8.5081, + "step": 178220 + }, + { + "epoch": 0.8900596768958027, + "grad_norm": 0.08937592804431915, + "learning_rate": 3.306465743823375e-06, + "loss": 8.5305, + "step": 178230 + }, + { + "epoch": 0.8901096157207421, + "grad_norm": 0.09414834529161453, + "learning_rate": 3.304963828881824e-06, + "loss": 8.5262, + "step": 178240 + }, + { + "epoch": 0.8901595545456815, + "grad_norm": 0.09100335836410522, + "learning_rate": 3.303461913940274e-06, + "loss": 8.5105, + "step": 178250 + }, + { + "epoch": 0.8902094933706209, + "grad_norm": 0.09098581969738007, + "learning_rate": 3.3019599989987237e-06, + "loss": 8.521, + "step": 178260 + }, + { + "epoch": 0.8902594321955605, + "grad_norm": 0.09130746126174927, + "learning_rate": 3.300458084057173e-06, + "loss": 8.5235, + "step": 178270 + }, + { + "epoch": 0.8903093710204999, + "grad_norm": 0.08628986030817032, + "learning_rate": 3.2989561691156223e-06, + "loss": 8.5126, + "step": 178280 + }, + { + "epoch": 0.8903593098454393, + "grad_norm": 0.09175537526607513, + "learning_rate": 3.297454254174072e-06, + "loss": 8.5197, + "step": 178290 + }, + { + "epoch": 0.8904092486703787, + "grad_norm": 0.09056618064641953, + "learning_rate": 3.295952339232522e-06, + "loss": 8.5097, + "step": 178300 + }, + { + "epoch": 0.8904591874953183, + "grad_norm": 0.09293998777866364, + "learning_rate": 3.294450424290971e-06, + "loss": 8.5344, + "step": 178310 + }, + { + "epoch": 0.8905091263202577, + "grad_norm": 0.09091665595769882, + "learning_rate": 3.2929485093494205e-06, + "loss": 8.5299, + "step": 178320 + }, + { + "epoch": 0.8905590651451971, + "grad_norm": 0.0961182564496994, + "learning_rate": 3.2914465944078703e-06, + "loss": 8.5307, + "step": 178330 + }, + { + "epoch": 0.8906090039701365, + "grad_norm": 0.09689652919769287, + "learning_rate": 3.2899446794663196e-06, + "loss": 8.5363, + "step": 178340 + }, + { + "epoch": 0.8906589427950761, + "grad_norm": 0.09379088878631592, + "learning_rate": 3.2884427645247694e-06, + "loss": 8.5213, + "step": 178350 + }, + { + "epoch": 0.8907088816200155, + "grad_norm": 0.09888619929552078, + "learning_rate": 3.2869408495832187e-06, + "loss": 8.5056, + "step": 178360 + }, + { + "epoch": 0.8907588204449549, + "grad_norm": 0.09750135242938995, + "learning_rate": 3.2854389346416684e-06, + "loss": 8.5178, + "step": 178370 + }, + { + "epoch": 0.8908087592698943, + "grad_norm": 0.09058021754026413, + "learning_rate": 3.2839370197001178e-06, + "loss": 8.5104, + "step": 178380 + }, + { + "epoch": 0.8908586980948339, + "grad_norm": 0.08557943254709244, + "learning_rate": 3.282435104758567e-06, + "loss": 8.5229, + "step": 178390 + }, + { + "epoch": 0.8909086369197733, + "grad_norm": 0.09042729437351227, + "learning_rate": 3.280933189817017e-06, + "loss": 8.5308, + "step": 178400 + }, + { + "epoch": 0.8909585757447127, + "grad_norm": 0.08974535018205643, + "learning_rate": 3.279431274875466e-06, + "loss": 8.5126, + "step": 178410 + }, + { + "epoch": 0.8910085145696521, + "grad_norm": 0.0879034698009491, + "learning_rate": 3.277929359933916e-06, + "loss": 8.5104, + "step": 178420 + }, + { + "epoch": 0.8910584533945917, + "grad_norm": 0.08987148851156235, + "learning_rate": 3.2764274449923653e-06, + "loss": 8.4953, + "step": 178430 + }, + { + "epoch": 0.8911083922195311, + "grad_norm": 0.09033453464508057, + "learning_rate": 3.2749255300508146e-06, + "loss": 8.5089, + "step": 178440 + }, + { + "epoch": 0.8911583310444705, + "grad_norm": 0.09760011732578278, + "learning_rate": 3.2734236151092644e-06, + "loss": 8.4875, + "step": 178450 + }, + { + "epoch": 0.8912082698694099, + "grad_norm": 0.09189391136169434, + "learning_rate": 3.271921700167714e-06, + "loss": 8.5124, + "step": 178460 + }, + { + "epoch": 0.8912582086943495, + "grad_norm": 0.09732663631439209, + "learning_rate": 3.2704197852261635e-06, + "loss": 8.5091, + "step": 178470 + }, + { + "epoch": 0.8913081475192889, + "grad_norm": 0.0918707624077797, + "learning_rate": 3.2689178702846128e-06, + "loss": 8.5142, + "step": 178480 + }, + { + "epoch": 0.8913580863442283, + "grad_norm": 0.09437326341867447, + "learning_rate": 3.267415955343062e-06, + "loss": 8.5027, + "step": 178490 + }, + { + "epoch": 0.8914080251691677, + "grad_norm": 0.09226923435926437, + "learning_rate": 3.2659140404015123e-06, + "loss": 8.5185, + "step": 178500 + }, + { + "epoch": 0.8914579639941073, + "grad_norm": 0.09034575521945953, + "learning_rate": 3.2644121254599616e-06, + "loss": 8.5148, + "step": 178510 + }, + { + "epoch": 0.8915079028190467, + "grad_norm": 0.09127431362867355, + "learning_rate": 3.262910210518411e-06, + "loss": 8.5053, + "step": 178520 + }, + { + "epoch": 0.8915578416439861, + "grad_norm": 0.09689009934663773, + "learning_rate": 3.2614082955768603e-06, + "loss": 8.4981, + "step": 178530 + }, + { + "epoch": 0.8916077804689255, + "grad_norm": 0.09404323995113373, + "learning_rate": 3.25990638063531e-06, + "loss": 8.5289, + "step": 178540 + }, + { + "epoch": 0.8916577192938651, + "grad_norm": 0.08964677900075912, + "learning_rate": 3.25840446569376e-06, + "loss": 8.5246, + "step": 178550 + }, + { + "epoch": 0.8917076581188045, + "grad_norm": 0.08872044086456299, + "learning_rate": 3.256902550752209e-06, + "loss": 8.5265, + "step": 178560 + }, + { + "epoch": 0.8917575969437439, + "grad_norm": 0.08949794620275497, + "learning_rate": 3.2554006358106585e-06, + "loss": 8.5037, + "step": 178570 + }, + { + "epoch": 0.8918075357686833, + "grad_norm": 0.08609029650688171, + "learning_rate": 3.2538987208691082e-06, + "loss": 8.5024, + "step": 178580 + }, + { + "epoch": 0.8918574745936229, + "grad_norm": 0.09221512079238892, + "learning_rate": 3.2523968059275575e-06, + "loss": 8.5054, + "step": 178590 + }, + { + "epoch": 0.8919074134185623, + "grad_norm": 0.08988411724567413, + "learning_rate": 3.2508948909860073e-06, + "loss": 8.5123, + "step": 178600 + }, + { + "epoch": 0.8919573522435017, + "grad_norm": 0.08725704252719879, + "learning_rate": 3.2493929760444566e-06, + "loss": 8.525, + "step": 178610 + }, + { + "epoch": 0.8920072910684411, + "grad_norm": 0.09931132942438126, + "learning_rate": 3.2478910611029064e-06, + "loss": 8.514, + "step": 178620 + }, + { + "epoch": 0.8920572298933807, + "grad_norm": 0.09435875713825226, + "learning_rate": 3.2463891461613557e-06, + "loss": 8.5047, + "step": 178630 + }, + { + "epoch": 0.8921071687183201, + "grad_norm": 0.092837855219841, + "learning_rate": 3.244887231219805e-06, + "loss": 8.5033, + "step": 178640 + }, + { + "epoch": 0.8921571075432595, + "grad_norm": 0.09155397117137909, + "learning_rate": 3.243385316278255e-06, + "loss": 8.5026, + "step": 178650 + }, + { + "epoch": 0.8922070463681989, + "grad_norm": 0.08951444178819656, + "learning_rate": 3.2418834013367046e-06, + "loss": 8.5213, + "step": 178660 + }, + { + "epoch": 0.8922569851931385, + "grad_norm": 0.08599022775888443, + "learning_rate": 3.240381486395154e-06, + "loss": 8.5207, + "step": 178670 + }, + { + "epoch": 0.8923069240180779, + "grad_norm": 0.08356813341379166, + "learning_rate": 3.2388795714536032e-06, + "loss": 8.5287, + "step": 178680 + }, + { + "epoch": 0.8923568628430173, + "grad_norm": 0.09023530781269073, + "learning_rate": 3.2373776565120526e-06, + "loss": 8.525, + "step": 178690 + }, + { + "epoch": 0.8924068016679567, + "grad_norm": 0.09185218065977097, + "learning_rate": 3.2358757415705027e-06, + "loss": 8.5036, + "step": 178700 + }, + { + "epoch": 0.8924567404928962, + "grad_norm": 0.09013650566339493, + "learning_rate": 3.234373826628952e-06, + "loss": 8.5288, + "step": 178710 + }, + { + "epoch": 0.8925066793178357, + "grad_norm": 0.0958433449268341, + "learning_rate": 3.2328719116874014e-06, + "loss": 8.5167, + "step": 178720 + }, + { + "epoch": 0.8925566181427751, + "grad_norm": 0.08722110837697983, + "learning_rate": 3.2313699967458507e-06, + "loss": 8.5105, + "step": 178730 + }, + { + "epoch": 0.8926065569677145, + "grad_norm": 0.09434662014245987, + "learning_rate": 3.2298680818043005e-06, + "loss": 8.5244, + "step": 178740 + }, + { + "epoch": 0.892656495792654, + "grad_norm": 0.09160378575325012, + "learning_rate": 3.2283661668627502e-06, + "loss": 8.5077, + "step": 178750 + }, + { + "epoch": 0.8927064346175935, + "grad_norm": 0.09734846651554108, + "learning_rate": 3.2268642519211996e-06, + "loss": 8.517, + "step": 178760 + }, + { + "epoch": 0.8927563734425329, + "grad_norm": 0.0917469784617424, + "learning_rate": 3.225362336979649e-06, + "loss": 8.5156, + "step": 178770 + }, + { + "epoch": 0.8928063122674723, + "grad_norm": 0.09718631953001022, + "learning_rate": 3.2238604220380987e-06, + "loss": 8.4938, + "step": 178780 + }, + { + "epoch": 0.8928562510924118, + "grad_norm": 0.09104308485984802, + "learning_rate": 3.222358507096548e-06, + "loss": 8.5254, + "step": 178790 + }, + { + "epoch": 0.8929061899173513, + "grad_norm": 0.09059741348028183, + "learning_rate": 3.2208565921549977e-06, + "loss": 8.529, + "step": 178800 + }, + { + "epoch": 0.8929561287422907, + "grad_norm": 0.0890093445777893, + "learning_rate": 3.219354677213447e-06, + "loss": 8.5212, + "step": 178810 + }, + { + "epoch": 0.8930060675672301, + "grad_norm": 0.09546619653701782, + "learning_rate": 3.217852762271897e-06, + "loss": 8.5247, + "step": 178820 + }, + { + "epoch": 0.8930560063921696, + "grad_norm": 0.09299060702323914, + "learning_rate": 3.216350847330346e-06, + "loss": 8.517, + "step": 178830 + }, + { + "epoch": 0.8931059452171091, + "grad_norm": 0.09568481147289276, + "learning_rate": 3.214848932388796e-06, + "loss": 8.5188, + "step": 178840 + }, + { + "epoch": 0.8931558840420485, + "grad_norm": 0.09317091107368469, + "learning_rate": 3.2133470174472452e-06, + "loss": 8.5156, + "step": 178850 + }, + { + "epoch": 0.8932058228669879, + "grad_norm": 0.08648492395877838, + "learning_rate": 3.211845102505695e-06, + "loss": 8.5408, + "step": 178860 + }, + { + "epoch": 0.8932557616919274, + "grad_norm": 0.09608660638332367, + "learning_rate": 3.2103431875641443e-06, + "loss": 8.5176, + "step": 178870 + }, + { + "epoch": 0.8933057005168669, + "grad_norm": 0.09281721711158752, + "learning_rate": 3.2088412726225937e-06, + "loss": 8.5124, + "step": 178880 + }, + { + "epoch": 0.8933556393418063, + "grad_norm": 0.08907417207956314, + "learning_rate": 3.2073393576810434e-06, + "loss": 8.5096, + "step": 178890 + }, + { + "epoch": 0.8934055781667457, + "grad_norm": 0.09067781269550323, + "learning_rate": 3.205837442739493e-06, + "loss": 8.5257, + "step": 178900 + }, + { + "epoch": 0.8934555169916852, + "grad_norm": 0.08892224729061127, + "learning_rate": 3.2043355277979425e-06, + "loss": 8.5168, + "step": 178910 + }, + { + "epoch": 0.8935054558166247, + "grad_norm": 0.08958274871110916, + "learning_rate": 3.202833612856392e-06, + "loss": 8.5153, + "step": 178920 + }, + { + "epoch": 0.8935553946415641, + "grad_norm": 0.08994346857070923, + "learning_rate": 3.201331697914841e-06, + "loss": 8.5179, + "step": 178930 + }, + { + "epoch": 0.8936053334665035, + "grad_norm": 0.09714482724666595, + "learning_rate": 3.1998297829732913e-06, + "loss": 8.5238, + "step": 178940 + }, + { + "epoch": 0.893655272291443, + "grad_norm": 0.09184648096561432, + "learning_rate": 3.1983278680317407e-06, + "loss": 8.5161, + "step": 178950 + }, + { + "epoch": 0.8937052111163825, + "grad_norm": 0.0893314927816391, + "learning_rate": 3.19682595309019e-06, + "loss": 8.5033, + "step": 178960 + }, + { + "epoch": 0.8937551499413219, + "grad_norm": 0.09436094015836716, + "learning_rate": 3.1953240381486393e-06, + "loss": 8.4939, + "step": 178970 + }, + { + "epoch": 0.8938050887662613, + "grad_norm": 0.08903580904006958, + "learning_rate": 3.193822123207089e-06, + "loss": 8.5137, + "step": 178980 + }, + { + "epoch": 0.8938550275912007, + "grad_norm": 0.089007668197155, + "learning_rate": 3.192320208265539e-06, + "loss": 8.5193, + "step": 178990 + }, + { + "epoch": 0.8939049664161403, + "grad_norm": 0.09373140335083008, + "learning_rate": 3.190818293323988e-06, + "loss": 8.4969, + "step": 179000 + }, + { + "epoch": 0.8939549052410797, + "grad_norm": 0.08744634687900543, + "learning_rate": 3.1893163783824375e-06, + "loss": 8.5342, + "step": 179010 + }, + { + "epoch": 0.8940048440660191, + "grad_norm": 0.0939723551273346, + "learning_rate": 3.1878144634408873e-06, + "loss": 8.5059, + "step": 179020 + }, + { + "epoch": 0.8940547828909585, + "grad_norm": 0.09691902250051498, + "learning_rate": 3.1863125484993366e-06, + "loss": 8.5022, + "step": 179030 + }, + { + "epoch": 0.894104721715898, + "grad_norm": 0.09357941895723343, + "learning_rate": 3.1848106335577864e-06, + "loss": 8.5155, + "step": 179040 + }, + { + "epoch": 0.8941546605408375, + "grad_norm": 0.09528280794620514, + "learning_rate": 3.1833087186162357e-06, + "loss": 8.5088, + "step": 179050 + }, + { + "epoch": 0.8942045993657769, + "grad_norm": 0.09074392914772034, + "learning_rate": 3.1818068036746854e-06, + "loss": 8.4962, + "step": 179060 + }, + { + "epoch": 0.8942545381907163, + "grad_norm": 0.09400563687086105, + "learning_rate": 3.1803048887331348e-06, + "loss": 8.5115, + "step": 179070 + }, + { + "epoch": 0.8943044770156559, + "grad_norm": 0.09290362149477005, + "learning_rate": 3.178802973791584e-06, + "loss": 8.5178, + "step": 179080 + }, + { + "epoch": 0.8943544158405953, + "grad_norm": 0.09567950665950775, + "learning_rate": 3.177301058850034e-06, + "loss": 8.5186, + "step": 179090 + }, + { + "epoch": 0.8944043546655347, + "grad_norm": 0.09070657938718796, + "learning_rate": 3.1757991439084836e-06, + "loss": 8.5161, + "step": 179100 + }, + { + "epoch": 0.8944542934904741, + "grad_norm": 0.08824972063302994, + "learning_rate": 3.174297228966933e-06, + "loss": 8.5189, + "step": 179110 + }, + { + "epoch": 0.8945042323154136, + "grad_norm": 0.09949322789907455, + "learning_rate": 3.1727953140253823e-06, + "loss": 8.5051, + "step": 179120 + }, + { + "epoch": 0.8945541711403531, + "grad_norm": 0.09183038771152496, + "learning_rate": 3.1712933990838316e-06, + "loss": 8.5139, + "step": 179130 + }, + { + "epoch": 0.8946041099652925, + "grad_norm": 0.09553587436676025, + "learning_rate": 3.1697914841422818e-06, + "loss": 8.4899, + "step": 179140 + }, + { + "epoch": 0.8946540487902319, + "grad_norm": 0.089163638651371, + "learning_rate": 3.168289569200731e-06, + "loss": 8.5241, + "step": 179150 + }, + { + "epoch": 0.8947039876151714, + "grad_norm": 0.09962226450443268, + "learning_rate": 3.1667876542591804e-06, + "loss": 8.5203, + "step": 179160 + }, + { + "epoch": 0.8947539264401109, + "grad_norm": 0.09414087980985641, + "learning_rate": 3.1652857393176298e-06, + "loss": 8.5183, + "step": 179170 + }, + { + "epoch": 0.8948038652650503, + "grad_norm": 0.09427635371685028, + "learning_rate": 3.1637838243760795e-06, + "loss": 8.5292, + "step": 179180 + }, + { + "epoch": 0.8948538040899897, + "grad_norm": 0.08661588281393051, + "learning_rate": 3.1622819094345293e-06, + "loss": 8.5173, + "step": 179190 + }, + { + "epoch": 0.8949037429149292, + "grad_norm": 0.09423363953828812, + "learning_rate": 3.1607799944929786e-06, + "loss": 8.5142, + "step": 179200 + }, + { + "epoch": 0.8949536817398687, + "grad_norm": 0.08995388448238373, + "learning_rate": 3.159278079551428e-06, + "loss": 8.529, + "step": 179210 + }, + { + "epoch": 0.8950036205648081, + "grad_norm": 0.09153537452220917, + "learning_rate": 3.1577761646098777e-06, + "loss": 8.5241, + "step": 179220 + }, + { + "epoch": 0.8950535593897475, + "grad_norm": 0.09005282819271088, + "learning_rate": 3.156274249668327e-06, + "loss": 8.5244, + "step": 179230 + }, + { + "epoch": 0.895103498214687, + "grad_norm": 0.08991970866918564, + "learning_rate": 3.154772334726777e-06, + "loss": 8.5312, + "step": 179240 + }, + { + "epoch": 0.8951534370396265, + "grad_norm": 0.08929390460252762, + "learning_rate": 3.153270419785226e-06, + "loss": 8.5027, + "step": 179250 + }, + { + "epoch": 0.8952033758645659, + "grad_norm": 0.08898353576660156, + "learning_rate": 3.151768504843676e-06, + "loss": 8.5349, + "step": 179260 + }, + { + "epoch": 0.8952533146895053, + "grad_norm": 0.08719191700220108, + "learning_rate": 3.150266589902125e-06, + "loss": 8.5089, + "step": 179270 + }, + { + "epoch": 0.8953032535144448, + "grad_norm": 0.09334076195955276, + "learning_rate": 3.1487646749605745e-06, + "loss": 8.5229, + "step": 179280 + }, + { + "epoch": 0.8953531923393843, + "grad_norm": 0.09189531207084656, + "learning_rate": 3.1472627600190243e-06, + "loss": 8.518, + "step": 179290 + }, + { + "epoch": 0.8954031311643237, + "grad_norm": 0.08680342137813568, + "learning_rate": 3.145760845077474e-06, + "loss": 8.5283, + "step": 179300 + }, + { + "epoch": 0.8954530699892631, + "grad_norm": 0.0863579586148262, + "learning_rate": 3.1442589301359234e-06, + "loss": 8.5031, + "step": 179310 + }, + { + "epoch": 0.8955030088142026, + "grad_norm": 0.09513013064861298, + "learning_rate": 3.1427570151943727e-06, + "loss": 8.5172, + "step": 179320 + }, + { + "epoch": 0.8955529476391421, + "grad_norm": 0.08853243291378021, + "learning_rate": 3.1412551002528225e-06, + "loss": 8.5315, + "step": 179330 + }, + { + "epoch": 0.8956028864640815, + "grad_norm": 0.09000103175640106, + "learning_rate": 3.1397531853112722e-06, + "loss": 8.5139, + "step": 179340 + }, + { + "epoch": 0.8956528252890209, + "grad_norm": 0.0949261337518692, + "learning_rate": 3.1382512703697216e-06, + "loss": 8.5104, + "step": 179350 + }, + { + "epoch": 0.8957027641139604, + "grad_norm": 0.09614192694425583, + "learning_rate": 3.136749355428171e-06, + "loss": 8.5061, + "step": 179360 + }, + { + "epoch": 0.8957527029388999, + "grad_norm": 0.09426548331975937, + "learning_rate": 3.1352474404866202e-06, + "loss": 8.5072, + "step": 179370 + }, + { + "epoch": 0.8958026417638393, + "grad_norm": 0.09127119183540344, + "learning_rate": 3.1337455255450704e-06, + "loss": 8.5092, + "step": 179380 + }, + { + "epoch": 0.8958525805887787, + "grad_norm": 0.09280092269182205, + "learning_rate": 3.1322436106035197e-06, + "loss": 8.5003, + "step": 179390 + }, + { + "epoch": 0.8959025194137182, + "grad_norm": 0.09022334963083267, + "learning_rate": 3.130741695661969e-06, + "loss": 8.5157, + "step": 179400 + }, + { + "epoch": 0.8959524582386577, + "grad_norm": 0.08918463438749313, + "learning_rate": 3.1292397807204184e-06, + "loss": 8.5252, + "step": 179410 + }, + { + "epoch": 0.8960023970635971, + "grad_norm": 0.09359756857156754, + "learning_rate": 3.127737865778868e-06, + "loss": 8.5137, + "step": 179420 + }, + { + "epoch": 0.8960523358885365, + "grad_norm": 0.08726957440376282, + "learning_rate": 3.126235950837318e-06, + "loss": 8.5027, + "step": 179430 + }, + { + "epoch": 0.896102274713476, + "grad_norm": 0.08867673575878143, + "learning_rate": 3.1247340358957672e-06, + "loss": 8.5117, + "step": 179440 + }, + { + "epoch": 0.8961522135384155, + "grad_norm": 0.0853889063000679, + "learning_rate": 3.1232321209542166e-06, + "loss": 8.5262, + "step": 179450 + }, + { + "epoch": 0.8962021523633549, + "grad_norm": 0.08606785535812378, + "learning_rate": 3.1217302060126663e-06, + "loss": 8.5221, + "step": 179460 + }, + { + "epoch": 0.8962520911882943, + "grad_norm": 0.08745083957910538, + "learning_rate": 3.1202282910711156e-06, + "loss": 8.5107, + "step": 179470 + }, + { + "epoch": 0.8963020300132338, + "grad_norm": 0.09155963361263275, + "learning_rate": 3.1187263761295654e-06, + "loss": 8.4951, + "step": 179480 + }, + { + "epoch": 0.8963519688381733, + "grad_norm": 0.09494645148515701, + "learning_rate": 3.1172244611880147e-06, + "loss": 8.509, + "step": 179490 + }, + { + "epoch": 0.8964019076631127, + "grad_norm": 0.0906292051076889, + "learning_rate": 3.1157225462464645e-06, + "loss": 8.5103, + "step": 179500 + }, + { + "epoch": 0.8964518464880521, + "grad_norm": 0.08988522738218307, + "learning_rate": 3.114220631304914e-06, + "loss": 8.5154, + "step": 179510 + }, + { + "epoch": 0.8965017853129916, + "grad_norm": 0.09074413031339645, + "learning_rate": 3.112718716363363e-06, + "loss": 8.5289, + "step": 179520 + }, + { + "epoch": 0.896551724137931, + "grad_norm": 0.0895250216126442, + "learning_rate": 3.111216801421813e-06, + "loss": 8.5203, + "step": 179530 + }, + { + "epoch": 0.8966016629628705, + "grad_norm": 0.09052935242652893, + "learning_rate": 3.1097148864802627e-06, + "loss": 8.5309, + "step": 179540 + }, + { + "epoch": 0.8966516017878099, + "grad_norm": 0.09444441646337509, + "learning_rate": 3.108212971538712e-06, + "loss": 8.515, + "step": 179550 + }, + { + "epoch": 0.8967015406127494, + "grad_norm": 0.09313017874956131, + "learning_rate": 3.1067110565971613e-06, + "loss": 8.5094, + "step": 179560 + }, + { + "epoch": 0.8967514794376888, + "grad_norm": 0.09455044567584991, + "learning_rate": 3.1052091416556107e-06, + "loss": 8.5063, + "step": 179570 + }, + { + "epoch": 0.8968014182626283, + "grad_norm": 0.09298877418041229, + "learning_rate": 3.103707226714061e-06, + "loss": 8.5096, + "step": 179580 + }, + { + "epoch": 0.8968513570875677, + "grad_norm": 0.09229998290538788, + "learning_rate": 3.10220531177251e-06, + "loss": 8.5201, + "step": 179590 + }, + { + "epoch": 0.8969012959125072, + "grad_norm": 0.08736059069633484, + "learning_rate": 3.1007033968309595e-06, + "loss": 8.4993, + "step": 179600 + }, + { + "epoch": 0.8969512347374466, + "grad_norm": 0.09250369668006897, + "learning_rate": 3.099201481889409e-06, + "loss": 8.494, + "step": 179610 + }, + { + "epoch": 0.8970011735623861, + "grad_norm": 0.08871186524629593, + "learning_rate": 3.0976995669478586e-06, + "loss": 8.5137, + "step": 179620 + }, + { + "epoch": 0.8970511123873255, + "grad_norm": 0.09203743189573288, + "learning_rate": 3.0961976520063083e-06, + "loss": 8.5138, + "step": 179630 + }, + { + "epoch": 0.897101051212265, + "grad_norm": 0.09076462686061859, + "learning_rate": 3.0946957370647577e-06, + "loss": 8.5117, + "step": 179640 + }, + { + "epoch": 0.8971509900372044, + "grad_norm": 0.09100484848022461, + "learning_rate": 3.093193822123207e-06, + "loss": 8.5129, + "step": 179650 + }, + { + "epoch": 0.8972009288621439, + "grad_norm": 0.0954102948307991, + "learning_rate": 3.0916919071816568e-06, + "loss": 8.4978, + "step": 179660 + }, + { + "epoch": 0.8972508676870833, + "grad_norm": 0.09269285947084427, + "learning_rate": 3.090189992240106e-06, + "loss": 8.5116, + "step": 179670 + }, + { + "epoch": 0.8973008065120228, + "grad_norm": 0.08553628623485565, + "learning_rate": 3.088688077298556e-06, + "loss": 8.5297, + "step": 179680 + }, + { + "epoch": 0.8973507453369622, + "grad_norm": 0.08914821594953537, + "learning_rate": 3.087186162357005e-06, + "loss": 8.5084, + "step": 179690 + }, + { + "epoch": 0.8974006841619017, + "grad_norm": 0.08601313084363937, + "learning_rate": 3.085684247415455e-06, + "loss": 8.507, + "step": 179700 + }, + { + "epoch": 0.8974506229868411, + "grad_norm": 0.09413706511259079, + "learning_rate": 3.0841823324739043e-06, + "loss": 8.5197, + "step": 179710 + }, + { + "epoch": 0.8975005618117806, + "grad_norm": 0.09138408303260803, + "learning_rate": 3.0826804175323536e-06, + "loss": 8.5177, + "step": 179720 + }, + { + "epoch": 0.89755050063672, + "grad_norm": 0.09180725365877151, + "learning_rate": 3.0811785025908033e-06, + "loss": 8.5083, + "step": 179730 + }, + { + "epoch": 0.8976004394616595, + "grad_norm": 0.09291674196720123, + "learning_rate": 3.079676587649253e-06, + "loss": 8.5193, + "step": 179740 + }, + { + "epoch": 0.8976503782865989, + "grad_norm": 0.08994516730308533, + "learning_rate": 3.0781746727077024e-06, + "loss": 8.519, + "step": 179750 + }, + { + "epoch": 0.8977003171115384, + "grad_norm": 0.09701023995876312, + "learning_rate": 3.0766727577661518e-06, + "loss": 8.5088, + "step": 179760 + }, + { + "epoch": 0.8977502559364778, + "grad_norm": 0.0889933779835701, + "learning_rate": 3.075170842824601e-06, + "loss": 8.5115, + "step": 179770 + }, + { + "epoch": 0.8978001947614173, + "grad_norm": 0.09396040439605713, + "learning_rate": 3.0736689278830513e-06, + "loss": 8.5349, + "step": 179780 + }, + { + "epoch": 0.8978501335863567, + "grad_norm": 0.09631121158599854, + "learning_rate": 3.0721670129415006e-06, + "loss": 8.4926, + "step": 179790 + }, + { + "epoch": 0.8979000724112962, + "grad_norm": 0.08764296770095825, + "learning_rate": 3.07066509799995e-06, + "loss": 8.524, + "step": 179800 + }, + { + "epoch": 0.8979500112362356, + "grad_norm": 0.09530488401651382, + "learning_rate": 3.0691631830583993e-06, + "loss": 8.513, + "step": 179810 + }, + { + "epoch": 0.8979999500611751, + "grad_norm": 0.08981040865182877, + "learning_rate": 3.067661268116849e-06, + "loss": 8.5024, + "step": 179820 + }, + { + "epoch": 0.8980498888861145, + "grad_norm": 0.09306181967258453, + "learning_rate": 3.0661593531752988e-06, + "loss": 8.5005, + "step": 179830 + }, + { + "epoch": 0.898099827711054, + "grad_norm": 0.09456732124090195, + "learning_rate": 3.064657438233748e-06, + "loss": 8.5144, + "step": 179840 + }, + { + "epoch": 0.8981497665359934, + "grad_norm": 0.08984339982271194, + "learning_rate": 3.0631555232921974e-06, + "loss": 8.5388, + "step": 179850 + }, + { + "epoch": 0.8981997053609329, + "grad_norm": 0.08446503430604935, + "learning_rate": 3.061653608350647e-06, + "loss": 8.5152, + "step": 179860 + }, + { + "epoch": 0.8982496441858723, + "grad_norm": 0.09314480423927307, + "learning_rate": 3.0601516934090965e-06, + "loss": 8.5108, + "step": 179870 + }, + { + "epoch": 0.8982995830108118, + "grad_norm": 0.09413187950849533, + "learning_rate": 3.0586497784675463e-06, + "loss": 8.5027, + "step": 179880 + }, + { + "epoch": 0.8983495218357512, + "grad_norm": 0.09056896716356277, + "learning_rate": 3.0571478635259956e-06, + "loss": 8.5067, + "step": 179890 + }, + { + "epoch": 0.8983994606606907, + "grad_norm": 0.08975179493427277, + "learning_rate": 3.0556459485844454e-06, + "loss": 8.5157, + "step": 179900 + }, + { + "epoch": 0.8984493994856301, + "grad_norm": 0.08911077678203583, + "learning_rate": 3.0541440336428947e-06, + "loss": 8.516, + "step": 179910 + }, + { + "epoch": 0.8984993383105696, + "grad_norm": 0.09924951940774918, + "learning_rate": 3.0526421187013444e-06, + "loss": 8.4984, + "step": 179920 + }, + { + "epoch": 0.898549277135509, + "grad_norm": 0.09128329157829285, + "learning_rate": 3.0511402037597938e-06, + "loss": 8.5007, + "step": 179930 + }, + { + "epoch": 0.8985992159604484, + "grad_norm": 0.08956824243068695, + "learning_rate": 3.0496382888182435e-06, + "loss": 8.5084, + "step": 179940 + }, + { + "epoch": 0.8986491547853879, + "grad_norm": 0.09009304642677307, + "learning_rate": 3.048136373876693e-06, + "loss": 8.5131, + "step": 179950 + }, + { + "epoch": 0.8986990936103274, + "grad_norm": 0.0977887287735939, + "learning_rate": 3.046634458935142e-06, + "loss": 8.5111, + "step": 179960 + }, + { + "epoch": 0.8987490324352668, + "grad_norm": 0.08121143281459808, + "learning_rate": 3.045132543993592e-06, + "loss": 8.512, + "step": 179970 + }, + { + "epoch": 0.8987989712602062, + "grad_norm": 0.09262192249298096, + "learning_rate": 3.0436306290520417e-06, + "loss": 8.523, + "step": 179980 + }, + { + "epoch": 0.8988489100851457, + "grad_norm": 0.09229442477226257, + "learning_rate": 3.042128714110491e-06, + "loss": 8.5225, + "step": 179990 + }, + { + "epoch": 0.8988988489100851, + "grad_norm": 0.092495396733284, + "learning_rate": 3.0406267991689404e-06, + "loss": 8.5128, + "step": 180000 + }, + { + "epoch": 0.8989487877350246, + "grad_norm": 0.09082264453172684, + "learning_rate": 3.0391248842273897e-06, + "loss": 8.5133, + "step": 180010 + }, + { + "epoch": 0.898998726559964, + "grad_norm": 0.09642348438501358, + "learning_rate": 3.03762296928584e-06, + "loss": 8.4977, + "step": 180020 + }, + { + "epoch": 0.8990486653849035, + "grad_norm": 0.08731796592473984, + "learning_rate": 3.036121054344289e-06, + "loss": 8.5145, + "step": 180030 + }, + { + "epoch": 0.8990986042098429, + "grad_norm": 0.0900919958949089, + "learning_rate": 3.0346191394027385e-06, + "loss": 8.5136, + "step": 180040 + }, + { + "epoch": 0.8991485430347824, + "grad_norm": 0.09062129259109497, + "learning_rate": 3.033117224461188e-06, + "loss": 8.5201, + "step": 180050 + }, + { + "epoch": 0.8991984818597218, + "grad_norm": 0.09137134999036789, + "learning_rate": 3.0316153095196376e-06, + "loss": 8.5163, + "step": 180060 + }, + { + "epoch": 0.8992484206846613, + "grad_norm": 0.0882347971200943, + "learning_rate": 3.0301133945780874e-06, + "loss": 8.516, + "step": 180070 + }, + { + "epoch": 0.8992983595096007, + "grad_norm": 0.08691363036632538, + "learning_rate": 3.0286114796365367e-06, + "loss": 8.5077, + "step": 180080 + }, + { + "epoch": 0.8993482983345402, + "grad_norm": 0.08639916777610779, + "learning_rate": 3.027109564694986e-06, + "loss": 8.5125, + "step": 180090 + }, + { + "epoch": 0.8993982371594796, + "grad_norm": 0.08904005587100983, + "learning_rate": 3.025607649753436e-06, + "loss": 8.5207, + "step": 180100 + }, + { + "epoch": 0.8994481759844191, + "grad_norm": 0.09132896363735199, + "learning_rate": 3.024105734811885e-06, + "loss": 8.5062, + "step": 180110 + }, + { + "epoch": 0.8994981148093585, + "grad_norm": 0.08482132852077484, + "learning_rate": 3.022603819870335e-06, + "loss": 8.5185, + "step": 180120 + }, + { + "epoch": 0.899548053634298, + "grad_norm": 0.08881242573261261, + "learning_rate": 3.0211019049287842e-06, + "loss": 8.5312, + "step": 180130 + }, + { + "epoch": 0.8995979924592374, + "grad_norm": 0.08985382318496704, + "learning_rate": 3.019599989987234e-06, + "loss": 8.5021, + "step": 180140 + }, + { + "epoch": 0.8996479312841769, + "grad_norm": 0.08999388664960861, + "learning_rate": 3.0180980750456833e-06, + "loss": 8.4929, + "step": 180150 + }, + { + "epoch": 0.8996978701091163, + "grad_norm": 0.092741958796978, + "learning_rate": 3.0165961601041326e-06, + "loss": 8.5206, + "step": 180160 + }, + { + "epoch": 0.8997478089340558, + "grad_norm": 0.08737698942422867, + "learning_rate": 3.0150942451625824e-06, + "loss": 8.516, + "step": 180170 + }, + { + "epoch": 0.8997977477589952, + "grad_norm": 0.09328185021877289, + "learning_rate": 3.013592330221032e-06, + "loss": 8.5143, + "step": 180180 + }, + { + "epoch": 0.8998476865839347, + "grad_norm": 0.09485041350126266, + "learning_rate": 3.0120904152794815e-06, + "loss": 8.522, + "step": 180190 + }, + { + "epoch": 0.8998976254088741, + "grad_norm": 0.0960816740989685, + "learning_rate": 3.010588500337931e-06, + "loss": 8.5241, + "step": 180200 + }, + { + "epoch": 0.8999475642338136, + "grad_norm": 0.08661402761936188, + "learning_rate": 3.00908658539638e-06, + "loss": 8.5088, + "step": 180210 + }, + { + "epoch": 0.899997503058753, + "grad_norm": 0.08585493266582489, + "learning_rate": 3.0075846704548303e-06, + "loss": 8.5145, + "step": 180220 + }, + { + "epoch": 0.9000474418836925, + "grad_norm": 0.09099698811769485, + "learning_rate": 3.0060827555132796e-06, + "loss": 8.5141, + "step": 180230 + }, + { + "epoch": 0.9000973807086319, + "grad_norm": 0.0931340754032135, + "learning_rate": 3.004580840571729e-06, + "loss": 8.4905, + "step": 180240 + }, + { + "epoch": 0.9001473195335714, + "grad_norm": 0.08374540507793427, + "learning_rate": 3.0030789256301783e-06, + "loss": 8.5174, + "step": 180250 + }, + { + "epoch": 0.9001972583585108, + "grad_norm": 0.09065917134284973, + "learning_rate": 3.001577010688628e-06, + "loss": 8.5069, + "step": 180260 + }, + { + "epoch": 0.9002471971834503, + "grad_norm": 0.09299372136592865, + "learning_rate": 3.000075095747078e-06, + "loss": 8.5176, + "step": 180270 + }, + { + "epoch": 0.9002971360083897, + "grad_norm": 0.08948089927434921, + "learning_rate": 2.998573180805527e-06, + "loss": 8.4984, + "step": 180280 + }, + { + "epoch": 0.9003470748333292, + "grad_norm": 0.0947575643658638, + "learning_rate": 2.9970712658639765e-06, + "loss": 8.5057, + "step": 180290 + }, + { + "epoch": 0.9003970136582686, + "grad_norm": 0.09377171099185944, + "learning_rate": 2.9955693509224262e-06, + "loss": 8.5169, + "step": 180300 + }, + { + "epoch": 0.900446952483208, + "grad_norm": 0.08988793194293976, + "learning_rate": 2.9940674359808756e-06, + "loss": 8.4968, + "step": 180310 + }, + { + "epoch": 0.9004968913081475, + "grad_norm": 0.09155865758657455, + "learning_rate": 2.9925655210393253e-06, + "loss": 8.5183, + "step": 180320 + }, + { + "epoch": 0.900546830133087, + "grad_norm": 0.09261929988861084, + "learning_rate": 2.9910636060977747e-06, + "loss": 8.4968, + "step": 180330 + }, + { + "epoch": 0.9005967689580264, + "grad_norm": 0.09239667654037476, + "learning_rate": 2.9895616911562244e-06, + "loss": 8.5143, + "step": 180340 + }, + { + "epoch": 0.9006467077829658, + "grad_norm": 0.09450769424438477, + "learning_rate": 2.9880597762146737e-06, + "loss": 8.504, + "step": 180350 + }, + { + "epoch": 0.9006966466079053, + "grad_norm": 0.0904548168182373, + "learning_rate": 2.986557861273123e-06, + "loss": 8.5121, + "step": 180360 + }, + { + "epoch": 0.9007465854328448, + "grad_norm": 0.09444373100996017, + "learning_rate": 2.985055946331573e-06, + "loss": 8.5214, + "step": 180370 + }, + { + "epoch": 0.9007965242577842, + "grad_norm": 0.09165211021900177, + "learning_rate": 2.9835540313900226e-06, + "loss": 8.5255, + "step": 180380 + }, + { + "epoch": 0.9008464630827236, + "grad_norm": 0.09335034340620041, + "learning_rate": 2.982052116448472e-06, + "loss": 8.4975, + "step": 180390 + }, + { + "epoch": 0.9008964019076631, + "grad_norm": 0.09302797168493271, + "learning_rate": 2.9805502015069212e-06, + "loss": 8.5096, + "step": 180400 + }, + { + "epoch": 0.9009463407326026, + "grad_norm": 0.09198260307312012, + "learning_rate": 2.9790482865653706e-06, + "loss": 8.4975, + "step": 180410 + }, + { + "epoch": 0.900996279557542, + "grad_norm": 0.08965457230806351, + "learning_rate": 2.9775463716238208e-06, + "loss": 8.5025, + "step": 180420 + }, + { + "epoch": 0.9010462183824814, + "grad_norm": 0.08751804381608963, + "learning_rate": 2.97604445668227e-06, + "loss": 8.4958, + "step": 180430 + }, + { + "epoch": 0.9010961572074209, + "grad_norm": 0.09645684063434601, + "learning_rate": 2.9745425417407194e-06, + "loss": 8.5389, + "step": 180440 + }, + { + "epoch": 0.9011460960323604, + "grad_norm": 0.09055258333683014, + "learning_rate": 2.9730406267991687e-06, + "loss": 8.501, + "step": 180450 + }, + { + "epoch": 0.9011960348572998, + "grad_norm": 0.0941043570637703, + "learning_rate": 2.971538711857619e-06, + "loss": 8.5124, + "step": 180460 + }, + { + "epoch": 0.9012459736822392, + "grad_norm": 0.09053096920251846, + "learning_rate": 2.9700367969160683e-06, + "loss": 8.4932, + "step": 180470 + }, + { + "epoch": 0.9012959125071787, + "grad_norm": 0.08705537766218185, + "learning_rate": 2.9685348819745176e-06, + "loss": 8.5147, + "step": 180480 + }, + { + "epoch": 0.9013458513321182, + "grad_norm": 0.09120982885360718, + "learning_rate": 2.967032967032967e-06, + "loss": 8.536, + "step": 180490 + }, + { + "epoch": 0.9013957901570576, + "grad_norm": 0.09626072645187378, + "learning_rate": 2.9655310520914167e-06, + "loss": 8.5223, + "step": 180500 + }, + { + "epoch": 0.901445728981997, + "grad_norm": 0.09363730996847153, + "learning_rate": 2.9640291371498664e-06, + "loss": 8.5234, + "step": 180510 + }, + { + "epoch": 0.9014956678069365, + "grad_norm": 0.09183649718761444, + "learning_rate": 2.9625272222083158e-06, + "loss": 8.5249, + "step": 180520 + }, + { + "epoch": 0.901545606631876, + "grad_norm": 0.08634661138057709, + "learning_rate": 2.961025307266765e-06, + "loss": 8.514, + "step": 180530 + }, + { + "epoch": 0.9015955454568154, + "grad_norm": 0.08858415484428406, + "learning_rate": 2.959523392325215e-06, + "loss": 8.5206, + "step": 180540 + }, + { + "epoch": 0.9016454842817548, + "grad_norm": 0.08977922797203064, + "learning_rate": 2.958021477383664e-06, + "loss": 8.5043, + "step": 180550 + }, + { + "epoch": 0.9016954231066943, + "grad_norm": 0.0880313590168953, + "learning_rate": 2.956519562442114e-06, + "loss": 8.5144, + "step": 180560 + }, + { + "epoch": 0.9017453619316338, + "grad_norm": 0.08856451511383057, + "learning_rate": 2.9550176475005633e-06, + "loss": 8.5266, + "step": 180570 + }, + { + "epoch": 0.9017953007565732, + "grad_norm": 0.09039363265037537, + "learning_rate": 2.953515732559013e-06, + "loss": 8.5042, + "step": 180580 + }, + { + "epoch": 0.9018452395815126, + "grad_norm": 0.08735457062721252, + "learning_rate": 2.9520138176174624e-06, + "loss": 8.5157, + "step": 180590 + }, + { + "epoch": 0.9018951784064521, + "grad_norm": 0.0854557529091835, + "learning_rate": 2.9505119026759117e-06, + "loss": 8.514, + "step": 180600 + }, + { + "epoch": 0.9019451172313916, + "grad_norm": 0.086369588971138, + "learning_rate": 2.9490099877343614e-06, + "loss": 8.5198, + "step": 180610 + }, + { + "epoch": 0.901995056056331, + "grad_norm": 0.096776083111763, + "learning_rate": 2.947508072792811e-06, + "loss": 8.4991, + "step": 180620 + }, + { + "epoch": 0.9020449948812704, + "grad_norm": 0.08912768959999084, + "learning_rate": 2.9460061578512605e-06, + "loss": 8.5191, + "step": 180630 + }, + { + "epoch": 0.9020949337062099, + "grad_norm": 0.09278588742017746, + "learning_rate": 2.94450424290971e-06, + "loss": 8.5229, + "step": 180640 + }, + { + "epoch": 0.9021448725311494, + "grad_norm": 0.09282492101192474, + "learning_rate": 2.943002327968159e-06, + "loss": 8.5159, + "step": 180650 + }, + { + "epoch": 0.9021948113560888, + "grad_norm": 0.08819557726383209, + "learning_rate": 2.9415004130266094e-06, + "loss": 8.5337, + "step": 180660 + }, + { + "epoch": 0.9022447501810282, + "grad_norm": 0.08956322073936462, + "learning_rate": 2.9399984980850587e-06, + "loss": 8.5141, + "step": 180670 + }, + { + "epoch": 0.9022946890059677, + "grad_norm": 0.0941721498966217, + "learning_rate": 2.938496583143508e-06, + "loss": 8.5227, + "step": 180680 + }, + { + "epoch": 0.9023446278309072, + "grad_norm": 0.09089160710573196, + "learning_rate": 2.9369946682019574e-06, + "loss": 8.5036, + "step": 180690 + }, + { + "epoch": 0.9023945666558466, + "grad_norm": 0.09959226101636887, + "learning_rate": 2.935492753260407e-06, + "loss": 8.4941, + "step": 180700 + }, + { + "epoch": 0.902444505480786, + "grad_norm": 0.0919126570224762, + "learning_rate": 2.933990838318857e-06, + "loss": 8.4893, + "step": 180710 + }, + { + "epoch": 0.9024944443057255, + "grad_norm": 0.0832803025841713, + "learning_rate": 2.932488923377306e-06, + "loss": 8.5169, + "step": 180720 + }, + { + "epoch": 0.902544383130665, + "grad_norm": 0.09311427175998688, + "learning_rate": 2.9309870084357555e-06, + "loss": 8.4991, + "step": 180730 + }, + { + "epoch": 0.9025943219556044, + "grad_norm": 0.09170939028263092, + "learning_rate": 2.9294850934942053e-06, + "loss": 8.4904, + "step": 180740 + }, + { + "epoch": 0.9026442607805438, + "grad_norm": 0.09490250796079636, + "learning_rate": 2.9279831785526546e-06, + "loss": 8.5021, + "step": 180750 + }, + { + "epoch": 0.9026941996054832, + "grad_norm": 0.09353826195001602, + "learning_rate": 2.9264812636111044e-06, + "loss": 8.517, + "step": 180760 + }, + { + "epoch": 0.9027441384304228, + "grad_norm": 0.09115730971097946, + "learning_rate": 2.9249793486695537e-06, + "loss": 8.4986, + "step": 180770 + }, + { + "epoch": 0.9027940772553622, + "grad_norm": 0.09605268388986588, + "learning_rate": 2.9234774337280035e-06, + "loss": 8.5088, + "step": 180780 + }, + { + "epoch": 0.9028440160803016, + "grad_norm": 0.09390175342559814, + "learning_rate": 2.921975518786453e-06, + "loss": 8.5063, + "step": 180790 + }, + { + "epoch": 0.902893954905241, + "grad_norm": 0.09228114783763885, + "learning_rate": 2.920473603844902e-06, + "loss": 8.5095, + "step": 180800 + }, + { + "epoch": 0.9029438937301806, + "grad_norm": 0.09767407178878784, + "learning_rate": 2.918971688903352e-06, + "loss": 8.5066, + "step": 180810 + }, + { + "epoch": 0.90299383255512, + "grad_norm": 0.08856954425573349, + "learning_rate": 2.9174697739618016e-06, + "loss": 8.5368, + "step": 180820 + }, + { + "epoch": 0.9030437713800594, + "grad_norm": 0.09355857968330383, + "learning_rate": 2.915967859020251e-06, + "loss": 8.5208, + "step": 180830 + }, + { + "epoch": 0.9030937102049988, + "grad_norm": 0.08886077255010605, + "learning_rate": 2.9144659440787003e-06, + "loss": 8.5253, + "step": 180840 + }, + { + "epoch": 0.9031436490299384, + "grad_norm": 0.09166674315929413, + "learning_rate": 2.9129640291371496e-06, + "loss": 8.5249, + "step": 180850 + }, + { + "epoch": 0.9031935878548778, + "grad_norm": 0.08610928058624268, + "learning_rate": 2.9114621141956e-06, + "loss": 8.5264, + "step": 180860 + }, + { + "epoch": 0.9032435266798172, + "grad_norm": 0.08989940583705902, + "learning_rate": 2.909960199254049e-06, + "loss": 8.5241, + "step": 180870 + }, + { + "epoch": 0.9032934655047566, + "grad_norm": 0.09518139809370041, + "learning_rate": 2.9084582843124985e-06, + "loss": 8.5228, + "step": 180880 + }, + { + "epoch": 0.9033434043296962, + "grad_norm": 0.09098067134618759, + "learning_rate": 2.906956369370948e-06, + "loss": 8.5047, + "step": 180890 + }, + { + "epoch": 0.9033933431546356, + "grad_norm": 0.0919727236032486, + "learning_rate": 2.9054544544293976e-06, + "loss": 8.5145, + "step": 180900 + }, + { + "epoch": 0.903443281979575, + "grad_norm": 0.09157610684633255, + "learning_rate": 2.9039525394878473e-06, + "loss": 8.4891, + "step": 180910 + }, + { + "epoch": 0.9034932208045144, + "grad_norm": 0.09556107968091965, + "learning_rate": 2.9024506245462966e-06, + "loss": 8.5144, + "step": 180920 + }, + { + "epoch": 0.903543159629454, + "grad_norm": 0.08390878885984421, + "learning_rate": 2.900948709604746e-06, + "loss": 8.5181, + "step": 180930 + }, + { + "epoch": 0.9035930984543934, + "grad_norm": 0.09191958606243134, + "learning_rate": 2.8994467946631957e-06, + "loss": 8.5095, + "step": 180940 + }, + { + "epoch": 0.9036430372793328, + "grad_norm": 0.09150879085063934, + "learning_rate": 2.897944879721645e-06, + "loss": 8.5215, + "step": 180950 + }, + { + "epoch": 0.9036929761042722, + "grad_norm": 0.0931938886642456, + "learning_rate": 2.896442964780095e-06, + "loss": 8.4941, + "step": 180960 + }, + { + "epoch": 0.9037429149292117, + "grad_norm": 0.09147433936595917, + "learning_rate": 2.894941049838544e-06, + "loss": 8.5119, + "step": 180970 + }, + { + "epoch": 0.9037928537541512, + "grad_norm": 0.09065255522727966, + "learning_rate": 2.893439134896994e-06, + "loss": 8.5166, + "step": 180980 + }, + { + "epoch": 0.9038427925790906, + "grad_norm": 0.0912211537361145, + "learning_rate": 2.8919372199554432e-06, + "loss": 8.4907, + "step": 180990 + }, + { + "epoch": 0.90389273140403, + "grad_norm": 0.09441815316677094, + "learning_rate": 2.890435305013893e-06, + "loss": 8.5097, + "step": 181000 + }, + { + "epoch": 0.9039426702289695, + "grad_norm": 0.08536705374717712, + "learning_rate": 2.8889333900723423e-06, + "loss": 8.5144, + "step": 181010 + }, + { + "epoch": 0.903992609053909, + "grad_norm": 0.08860597014427185, + "learning_rate": 2.887431475130792e-06, + "loss": 8.5068, + "step": 181020 + }, + { + "epoch": 0.9040425478788484, + "grad_norm": 0.09018053114414215, + "learning_rate": 2.8859295601892414e-06, + "loss": 8.5083, + "step": 181030 + }, + { + "epoch": 0.9040924867037878, + "grad_norm": 0.08966843783855438, + "learning_rate": 2.8844276452476907e-06, + "loss": 8.4964, + "step": 181040 + }, + { + "epoch": 0.9041424255287273, + "grad_norm": 0.08810800313949585, + "learning_rate": 2.8829257303061405e-06, + "loss": 8.5215, + "step": 181050 + }, + { + "epoch": 0.9041923643536668, + "grad_norm": 0.0902119129896164, + "learning_rate": 2.8814238153645902e-06, + "loss": 8.5246, + "step": 181060 + }, + { + "epoch": 0.9042423031786062, + "grad_norm": 0.09436272829771042, + "learning_rate": 2.8799219004230396e-06, + "loss": 8.4939, + "step": 181070 + }, + { + "epoch": 0.9042922420035456, + "grad_norm": 0.09024659544229507, + "learning_rate": 2.878419985481489e-06, + "loss": 8.5095, + "step": 181080 + }, + { + "epoch": 0.904342180828485, + "grad_norm": 0.089921735227108, + "learning_rate": 2.8769180705399382e-06, + "loss": 8.5008, + "step": 181090 + }, + { + "epoch": 0.9043921196534246, + "grad_norm": 0.08670993894338608, + "learning_rate": 2.8754161555983884e-06, + "loss": 8.5017, + "step": 181100 + }, + { + "epoch": 0.904442058478364, + "grad_norm": 0.09145168215036392, + "learning_rate": 2.8739142406568377e-06, + "loss": 8.5277, + "step": 181110 + }, + { + "epoch": 0.9044919973033034, + "grad_norm": 0.09542462974786758, + "learning_rate": 2.872412325715287e-06, + "loss": 8.5008, + "step": 181120 + }, + { + "epoch": 0.9045419361282429, + "grad_norm": 0.09122487902641296, + "learning_rate": 2.8709104107737364e-06, + "loss": 8.5092, + "step": 181130 + }, + { + "epoch": 0.9045918749531824, + "grad_norm": 0.08779449760913849, + "learning_rate": 2.869408495832186e-06, + "loss": 8.4968, + "step": 181140 + }, + { + "epoch": 0.9046418137781218, + "grad_norm": 0.08945740014314651, + "learning_rate": 2.867906580890636e-06, + "loss": 8.5206, + "step": 181150 + }, + { + "epoch": 0.9046917526030612, + "grad_norm": 0.08815411478281021, + "learning_rate": 2.8664046659490852e-06, + "loss": 8.512, + "step": 181160 + }, + { + "epoch": 0.9047416914280006, + "grad_norm": 0.09432817250490189, + "learning_rate": 2.8649027510075346e-06, + "loss": 8.5159, + "step": 181170 + }, + { + "epoch": 0.9047916302529402, + "grad_norm": 0.09125339239835739, + "learning_rate": 2.8634008360659843e-06, + "loss": 8.5162, + "step": 181180 + }, + { + "epoch": 0.9048415690778796, + "grad_norm": 0.08769851177930832, + "learning_rate": 2.8618989211244337e-06, + "loss": 8.5112, + "step": 181190 + }, + { + "epoch": 0.904891507902819, + "grad_norm": 0.09156162291765213, + "learning_rate": 2.8603970061828834e-06, + "loss": 8.5177, + "step": 181200 + }, + { + "epoch": 0.9049414467277584, + "grad_norm": 0.095248281955719, + "learning_rate": 2.8588950912413328e-06, + "loss": 8.4942, + "step": 181210 + }, + { + "epoch": 0.904991385552698, + "grad_norm": 0.09011649340391159, + "learning_rate": 2.8573931762997825e-06, + "loss": 8.5229, + "step": 181220 + }, + { + "epoch": 0.9050413243776374, + "grad_norm": 0.08675964921712875, + "learning_rate": 2.855891261358232e-06, + "loss": 8.5231, + "step": 181230 + }, + { + "epoch": 0.9050912632025768, + "grad_norm": 0.0876312181353569, + "learning_rate": 2.854389346416681e-06, + "loss": 8.5145, + "step": 181240 + }, + { + "epoch": 0.9051412020275162, + "grad_norm": 0.0970059260725975, + "learning_rate": 2.852887431475131e-06, + "loss": 8.5047, + "step": 181250 + }, + { + "epoch": 0.9051911408524558, + "grad_norm": 0.0930013656616211, + "learning_rate": 2.8513855165335807e-06, + "loss": 8.4996, + "step": 181260 + }, + { + "epoch": 0.9052410796773952, + "grad_norm": 0.09447172284126282, + "learning_rate": 2.84988360159203e-06, + "loss": 8.5009, + "step": 181270 + }, + { + "epoch": 0.9052910185023346, + "grad_norm": 0.0933113843202591, + "learning_rate": 2.8483816866504793e-06, + "loss": 8.4953, + "step": 181280 + }, + { + "epoch": 0.905340957327274, + "grad_norm": 0.09435321390628815, + "learning_rate": 2.8468797717089287e-06, + "loss": 8.4999, + "step": 181290 + }, + { + "epoch": 0.9053908961522136, + "grad_norm": 0.08828049153089523, + "learning_rate": 2.8453778567673784e-06, + "loss": 8.5145, + "step": 181300 + }, + { + "epoch": 0.905440834977153, + "grad_norm": 0.08913722634315491, + "learning_rate": 2.843875941825828e-06, + "loss": 8.4944, + "step": 181310 + }, + { + "epoch": 0.9054907738020924, + "grad_norm": 0.09632635116577148, + "learning_rate": 2.8423740268842775e-06, + "loss": 8.493, + "step": 181320 + }, + { + "epoch": 0.9055407126270318, + "grad_norm": 0.0897509828209877, + "learning_rate": 2.840872111942727e-06, + "loss": 8.4977, + "step": 181330 + }, + { + "epoch": 0.9055906514519714, + "grad_norm": 0.09208657592535019, + "learning_rate": 2.839370197001176e-06, + "loss": 8.5314, + "step": 181340 + }, + { + "epoch": 0.9056405902769108, + "grad_norm": 0.09006085991859436, + "learning_rate": 2.8378682820596264e-06, + "loss": 8.5163, + "step": 181350 + }, + { + "epoch": 0.9056905291018502, + "grad_norm": 0.09176279604434967, + "learning_rate": 2.8363663671180757e-06, + "loss": 8.5148, + "step": 181360 + }, + { + "epoch": 0.9057404679267896, + "grad_norm": 0.09576241672039032, + "learning_rate": 2.834864452176525e-06, + "loss": 8.5346, + "step": 181370 + }, + { + "epoch": 0.9057904067517292, + "grad_norm": 0.09277977049350739, + "learning_rate": 2.8333625372349744e-06, + "loss": 8.5067, + "step": 181380 + }, + { + "epoch": 0.9058403455766686, + "grad_norm": 0.0882072001695633, + "learning_rate": 2.831860622293424e-06, + "loss": 8.5046, + "step": 181390 + }, + { + "epoch": 0.905890284401608, + "grad_norm": 0.08874956518411636, + "learning_rate": 2.830358707351874e-06, + "loss": 8.5382, + "step": 181400 + }, + { + "epoch": 0.9059402232265474, + "grad_norm": 0.08898142725229263, + "learning_rate": 2.828856792410323e-06, + "loss": 8.5067, + "step": 181410 + }, + { + "epoch": 0.905990162051487, + "grad_norm": 0.08590088039636612, + "learning_rate": 2.8273548774687725e-06, + "loss": 8.5023, + "step": 181420 + }, + { + "epoch": 0.9060401008764264, + "grad_norm": 0.08650851249694824, + "learning_rate": 2.8258529625272223e-06, + "loss": 8.5202, + "step": 181430 + }, + { + "epoch": 0.9060900397013658, + "grad_norm": 0.09431696683168411, + "learning_rate": 2.8243510475856716e-06, + "loss": 8.5401, + "step": 181440 + }, + { + "epoch": 0.9061399785263052, + "grad_norm": 0.09654943645000458, + "learning_rate": 2.8228491326441214e-06, + "loss": 8.5078, + "step": 181450 + }, + { + "epoch": 0.9061899173512448, + "grad_norm": 0.08821980655193329, + "learning_rate": 2.8213472177025707e-06, + "loss": 8.4975, + "step": 181460 + }, + { + "epoch": 0.9062398561761842, + "grad_norm": 0.0911247506737709, + "learning_rate": 2.8198453027610204e-06, + "loss": 8.5129, + "step": 181470 + }, + { + "epoch": 0.9062897950011236, + "grad_norm": 0.08649583905935287, + "learning_rate": 2.8183433878194698e-06, + "loss": 8.5025, + "step": 181480 + }, + { + "epoch": 0.906339733826063, + "grad_norm": 0.09082172811031342, + "learning_rate": 2.816841472877919e-06, + "loss": 8.5101, + "step": 181490 + }, + { + "epoch": 0.9063896726510026, + "grad_norm": 0.09340551495552063, + "learning_rate": 2.815339557936369e-06, + "loss": 8.5192, + "step": 181500 + }, + { + "epoch": 0.906439611475942, + "grad_norm": 0.09474127739667892, + "learning_rate": 2.8138376429948186e-06, + "loss": 8.5036, + "step": 181510 + }, + { + "epoch": 0.9064895503008814, + "grad_norm": 0.08885298669338226, + "learning_rate": 2.812335728053268e-06, + "loss": 8.4964, + "step": 181520 + }, + { + "epoch": 0.9065394891258208, + "grad_norm": 0.09157950431108475, + "learning_rate": 2.8108338131117173e-06, + "loss": 8.4965, + "step": 181530 + }, + { + "epoch": 0.9065894279507604, + "grad_norm": 0.08861027657985687, + "learning_rate": 2.809331898170167e-06, + "loss": 8.518, + "step": 181540 + }, + { + "epoch": 0.9066393667756998, + "grad_norm": 0.0883537232875824, + "learning_rate": 2.807829983228617e-06, + "loss": 8.5024, + "step": 181550 + }, + { + "epoch": 0.9066893056006392, + "grad_norm": 0.0927857756614685, + "learning_rate": 2.806328068287066e-06, + "loss": 8.4976, + "step": 181560 + }, + { + "epoch": 0.9067392444255786, + "grad_norm": 0.09182663261890411, + "learning_rate": 2.8048261533455155e-06, + "loss": 8.4891, + "step": 181570 + }, + { + "epoch": 0.9067891832505182, + "grad_norm": 0.09595274925231934, + "learning_rate": 2.803324238403965e-06, + "loss": 8.5132, + "step": 181580 + }, + { + "epoch": 0.9068391220754576, + "grad_norm": 0.09273464232683182, + "learning_rate": 2.801822323462415e-06, + "loss": 8.5065, + "step": 181590 + }, + { + "epoch": 0.906889060900397, + "grad_norm": 0.09522940963506699, + "learning_rate": 2.8003204085208643e-06, + "loss": 8.5137, + "step": 181600 + }, + { + "epoch": 0.9069389997253364, + "grad_norm": 0.08875519037246704, + "learning_rate": 2.7988184935793136e-06, + "loss": 8.5101, + "step": 181610 + }, + { + "epoch": 0.906988938550276, + "grad_norm": 0.09621170908212662, + "learning_rate": 2.797316578637763e-06, + "loss": 8.5001, + "step": 181620 + }, + { + "epoch": 0.9070388773752154, + "grad_norm": 0.09134541451931, + "learning_rate": 2.7958146636962127e-06, + "loss": 8.5145, + "step": 181630 + }, + { + "epoch": 0.9070888162001548, + "grad_norm": 0.09547372162342072, + "learning_rate": 2.7943127487546625e-06, + "loss": 8.4756, + "step": 181640 + }, + { + "epoch": 0.9071387550250942, + "grad_norm": 0.09366821497678757, + "learning_rate": 2.792810833813112e-06, + "loss": 8.5362, + "step": 181650 + }, + { + "epoch": 0.9071886938500338, + "grad_norm": 0.09829097241163254, + "learning_rate": 2.791308918871561e-06, + "loss": 8.5254, + "step": 181660 + }, + { + "epoch": 0.9072386326749732, + "grad_norm": 0.09187762439250946, + "learning_rate": 2.789807003930011e-06, + "loss": 8.5192, + "step": 181670 + }, + { + "epoch": 0.9072885714999126, + "grad_norm": 0.09273143857717514, + "learning_rate": 2.7883050889884602e-06, + "loss": 8.4973, + "step": 181680 + }, + { + "epoch": 0.907338510324852, + "grad_norm": 0.09177732467651367, + "learning_rate": 2.78680317404691e-06, + "loss": 8.5055, + "step": 181690 + }, + { + "epoch": 0.9073884491497916, + "grad_norm": 0.089385986328125, + "learning_rate": 2.7853012591053593e-06, + "loss": 8.5095, + "step": 181700 + }, + { + "epoch": 0.907438387974731, + "grad_norm": 0.09638892114162445, + "learning_rate": 2.783799344163809e-06, + "loss": 8.4961, + "step": 181710 + }, + { + "epoch": 0.9074883267996704, + "grad_norm": 0.08958081156015396, + "learning_rate": 2.7822974292222584e-06, + "loss": 8.5128, + "step": 181720 + }, + { + "epoch": 0.9075382656246098, + "grad_norm": 0.09109731018543243, + "learning_rate": 2.7807955142807077e-06, + "loss": 8.5253, + "step": 181730 + }, + { + "epoch": 0.9075882044495494, + "grad_norm": 0.08715473860502243, + "learning_rate": 2.7792935993391575e-06, + "loss": 8.5185, + "step": 181740 + }, + { + "epoch": 0.9076381432744888, + "grad_norm": 0.09187250584363937, + "learning_rate": 2.7777916843976072e-06, + "loss": 8.5065, + "step": 181750 + }, + { + "epoch": 0.9076880820994282, + "grad_norm": 0.09143325686454773, + "learning_rate": 2.7762897694560566e-06, + "loss": 8.4969, + "step": 181760 + }, + { + "epoch": 0.9077380209243676, + "grad_norm": 0.08711639046669006, + "learning_rate": 2.774787854514506e-06, + "loss": 8.5134, + "step": 181770 + }, + { + "epoch": 0.9077879597493071, + "grad_norm": 0.09443769603967667, + "learning_rate": 2.7732859395729552e-06, + "loss": 8.5037, + "step": 181780 + }, + { + "epoch": 0.9078378985742466, + "grad_norm": 0.09021743386983871, + "learning_rate": 2.7717840246314054e-06, + "loss": 8.4901, + "step": 181790 + }, + { + "epoch": 0.907887837399186, + "grad_norm": 0.08807482570409775, + "learning_rate": 2.7702821096898547e-06, + "loss": 8.529, + "step": 181800 + }, + { + "epoch": 0.9079377762241254, + "grad_norm": 0.09367617219686508, + "learning_rate": 2.768780194748304e-06, + "loss": 8.534, + "step": 181810 + }, + { + "epoch": 0.907987715049065, + "grad_norm": 0.09450183063745499, + "learning_rate": 2.7672782798067534e-06, + "loss": 8.5267, + "step": 181820 + }, + { + "epoch": 0.9080376538740044, + "grad_norm": 0.09131249040365219, + "learning_rate": 2.765776364865203e-06, + "loss": 8.5342, + "step": 181830 + }, + { + "epoch": 0.9080875926989438, + "grad_norm": 0.09016092121601105, + "learning_rate": 2.764274449923653e-06, + "loss": 8.5188, + "step": 181840 + }, + { + "epoch": 0.9081375315238832, + "grad_norm": 0.09213612228631973, + "learning_rate": 2.7627725349821022e-06, + "loss": 8.5291, + "step": 181850 + }, + { + "epoch": 0.9081874703488227, + "grad_norm": 0.09062368422746658, + "learning_rate": 2.7612706200405516e-06, + "loss": 8.5298, + "step": 181860 + }, + { + "epoch": 0.9082374091737622, + "grad_norm": 0.08791705220937729, + "learning_rate": 2.7597687050990013e-06, + "loss": 8.5115, + "step": 181870 + }, + { + "epoch": 0.9082873479987016, + "grad_norm": 0.09510977566242218, + "learning_rate": 2.7582667901574507e-06, + "loss": 8.5174, + "step": 181880 + }, + { + "epoch": 0.908337286823641, + "grad_norm": 0.0934426486492157, + "learning_rate": 2.7567648752159004e-06, + "loss": 8.5261, + "step": 181890 + }, + { + "epoch": 0.9083872256485805, + "grad_norm": 0.09243565052747726, + "learning_rate": 2.7552629602743497e-06, + "loss": 8.5155, + "step": 181900 + }, + { + "epoch": 0.90843716447352, + "grad_norm": 0.09363988786935806, + "learning_rate": 2.7537610453327995e-06, + "loss": 8.5313, + "step": 181910 + }, + { + "epoch": 0.9084871032984594, + "grad_norm": 0.09643617272377014, + "learning_rate": 2.752259130391249e-06, + "loss": 8.5019, + "step": 181920 + }, + { + "epoch": 0.9085370421233988, + "grad_norm": 0.09652823954820633, + "learning_rate": 2.750757215449698e-06, + "loss": 8.5114, + "step": 181930 + }, + { + "epoch": 0.9085869809483383, + "grad_norm": 0.0902838185429573, + "learning_rate": 2.749255300508148e-06, + "loss": 8.5336, + "step": 181940 + }, + { + "epoch": 0.9086369197732778, + "grad_norm": 0.0898505225777626, + "learning_rate": 2.7477533855665977e-06, + "loss": 8.5068, + "step": 181950 + }, + { + "epoch": 0.9086868585982172, + "grad_norm": 0.0900585874915123, + "learning_rate": 2.746251470625047e-06, + "loss": 8.4995, + "step": 181960 + }, + { + "epoch": 0.9087367974231566, + "grad_norm": 0.09357380867004395, + "learning_rate": 2.7447495556834963e-06, + "loss": 8.5153, + "step": 181970 + }, + { + "epoch": 0.908786736248096, + "grad_norm": 0.08812058717012405, + "learning_rate": 2.7432476407419457e-06, + "loss": 8.5055, + "step": 181980 + }, + { + "epoch": 0.9088366750730356, + "grad_norm": 0.08812303096055984, + "learning_rate": 2.741745725800396e-06, + "loss": 8.5064, + "step": 181990 + }, + { + "epoch": 0.908886613897975, + "grad_norm": 0.09171146154403687, + "learning_rate": 2.740243810858845e-06, + "loss": 8.5135, + "step": 182000 + }, + { + "epoch": 0.9089365527229144, + "grad_norm": 0.09255418926477432, + "learning_rate": 2.7387418959172945e-06, + "loss": 8.5029, + "step": 182010 + }, + { + "epoch": 0.9089864915478538, + "grad_norm": 0.08891849964857101, + "learning_rate": 2.737239980975744e-06, + "loss": 8.5038, + "step": 182020 + }, + { + "epoch": 0.9090364303727934, + "grad_norm": 0.09265652298927307, + "learning_rate": 2.7357380660341936e-06, + "loss": 8.5129, + "step": 182030 + }, + { + "epoch": 0.9090863691977328, + "grad_norm": 0.08749819546937943, + "learning_rate": 2.7342361510926433e-06, + "loss": 8.5183, + "step": 182040 + }, + { + "epoch": 0.9091363080226722, + "grad_norm": 0.08859850466251373, + "learning_rate": 2.7327342361510927e-06, + "loss": 8.5137, + "step": 182050 + }, + { + "epoch": 0.9091862468476116, + "grad_norm": 0.09206672012805939, + "learning_rate": 2.731232321209542e-06, + "loss": 8.5156, + "step": 182060 + }, + { + "epoch": 0.9092361856725512, + "grad_norm": 0.08951518684625626, + "learning_rate": 2.7297304062679918e-06, + "loss": 8.4979, + "step": 182070 + }, + { + "epoch": 0.9092861244974906, + "grad_norm": 0.09024099260568619, + "learning_rate": 2.7282284913264415e-06, + "loss": 8.5223, + "step": 182080 + }, + { + "epoch": 0.90933606332243, + "grad_norm": 0.08952950686216354, + "learning_rate": 2.726726576384891e-06, + "loss": 8.4998, + "step": 182090 + }, + { + "epoch": 0.9093860021473694, + "grad_norm": 0.09278613328933716, + "learning_rate": 2.72522466144334e-06, + "loss": 8.5078, + "step": 182100 + }, + { + "epoch": 0.909435940972309, + "grad_norm": 0.08384092897176743, + "learning_rate": 2.72372274650179e-06, + "loss": 8.5027, + "step": 182110 + }, + { + "epoch": 0.9094858797972484, + "grad_norm": 0.08967365324497223, + "learning_rate": 2.7222208315602393e-06, + "loss": 8.5171, + "step": 182120 + }, + { + "epoch": 0.9095358186221878, + "grad_norm": 0.09536395221948624, + "learning_rate": 2.720718916618689e-06, + "loss": 8.5055, + "step": 182130 + }, + { + "epoch": 0.9095857574471272, + "grad_norm": 0.09401460736989975, + "learning_rate": 2.7192170016771384e-06, + "loss": 8.5148, + "step": 182140 + }, + { + "epoch": 0.9096356962720668, + "grad_norm": 0.0905858501791954, + "learning_rate": 2.717715086735588e-06, + "loss": 8.5112, + "step": 182150 + }, + { + "epoch": 0.9096856350970062, + "grad_norm": 0.09035810828208923, + "learning_rate": 2.7162131717940374e-06, + "loss": 8.4981, + "step": 182160 + }, + { + "epoch": 0.9097355739219456, + "grad_norm": 0.09156506508588791, + "learning_rate": 2.7147112568524868e-06, + "loss": 8.5072, + "step": 182170 + }, + { + "epoch": 0.909785512746885, + "grad_norm": 0.09411493688821793, + "learning_rate": 2.7132093419109365e-06, + "loss": 8.5061, + "step": 182180 + }, + { + "epoch": 0.9098354515718245, + "grad_norm": 0.08917105942964554, + "learning_rate": 2.7117074269693863e-06, + "loss": 8.5034, + "step": 182190 + }, + { + "epoch": 0.909885390396764, + "grad_norm": 0.08715265989303589, + "learning_rate": 2.7102055120278356e-06, + "loss": 8.507, + "step": 182200 + }, + { + "epoch": 0.9099353292217034, + "grad_norm": 0.09055981785058975, + "learning_rate": 2.708703597086285e-06, + "loss": 8.5144, + "step": 182210 + }, + { + "epoch": 0.9099852680466428, + "grad_norm": 0.09121181070804596, + "learning_rate": 2.7072016821447343e-06, + "loss": 8.5191, + "step": 182220 + }, + { + "epoch": 0.9100352068715823, + "grad_norm": 0.09437250345945358, + "learning_rate": 2.7056997672031845e-06, + "loss": 8.5132, + "step": 182230 + }, + { + "epoch": 0.9100851456965218, + "grad_norm": 0.09657744318246841, + "learning_rate": 2.7041978522616338e-06, + "loss": 8.5328, + "step": 182240 + }, + { + "epoch": 0.9101350845214612, + "grad_norm": 0.09253308176994324, + "learning_rate": 2.702695937320083e-06, + "loss": 8.5081, + "step": 182250 + }, + { + "epoch": 0.9101850233464006, + "grad_norm": 0.09130608290433884, + "learning_rate": 2.7011940223785324e-06, + "loss": 8.4981, + "step": 182260 + }, + { + "epoch": 0.9102349621713401, + "grad_norm": 0.08788429945707321, + "learning_rate": 2.699692107436982e-06, + "loss": 8.4895, + "step": 182270 + }, + { + "epoch": 0.9102849009962796, + "grad_norm": 0.09099657833576202, + "learning_rate": 2.698190192495432e-06, + "loss": 8.508, + "step": 182280 + }, + { + "epoch": 0.910334839821219, + "grad_norm": 0.08800596743822098, + "learning_rate": 2.6966882775538813e-06, + "loss": 8.5297, + "step": 182290 + }, + { + "epoch": 0.9103847786461584, + "grad_norm": 0.09730488806962967, + "learning_rate": 2.6951863626123306e-06, + "loss": 8.5148, + "step": 182300 + }, + { + "epoch": 0.9104347174710979, + "grad_norm": 0.09206937998533249, + "learning_rate": 2.6936844476707804e-06, + "loss": 8.5049, + "step": 182310 + }, + { + "epoch": 0.9104846562960374, + "grad_norm": 0.08866451680660248, + "learning_rate": 2.6921825327292297e-06, + "loss": 8.512, + "step": 182320 + }, + { + "epoch": 0.9105345951209768, + "grad_norm": 0.09090999513864517, + "learning_rate": 2.6906806177876795e-06, + "loss": 8.4796, + "step": 182330 + }, + { + "epoch": 0.9105845339459162, + "grad_norm": 0.08853746205568314, + "learning_rate": 2.689178702846129e-06, + "loss": 8.5107, + "step": 182340 + }, + { + "epoch": 0.9106344727708557, + "grad_norm": 0.09544672071933746, + "learning_rate": 2.6876767879045785e-06, + "loss": 8.5033, + "step": 182350 + }, + { + "epoch": 0.9106844115957952, + "grad_norm": 0.09194538742303848, + "learning_rate": 2.686174872963028e-06, + "loss": 8.5087, + "step": 182360 + }, + { + "epoch": 0.9107343504207346, + "grad_norm": 0.08848384767770767, + "learning_rate": 2.684672958021477e-06, + "loss": 8.5096, + "step": 182370 + }, + { + "epoch": 0.910784289245674, + "grad_norm": 0.09186051785945892, + "learning_rate": 2.683171043079927e-06, + "loss": 8.5115, + "step": 182380 + }, + { + "epoch": 0.9108342280706135, + "grad_norm": 0.09041396528482437, + "learning_rate": 2.6816691281383767e-06, + "loss": 8.5148, + "step": 182390 + }, + { + "epoch": 0.910884166895553, + "grad_norm": 0.0917699784040451, + "learning_rate": 2.680167213196826e-06, + "loss": 8.5289, + "step": 182400 + }, + { + "epoch": 0.9109341057204924, + "grad_norm": 0.09223365783691406, + "learning_rate": 2.6786652982552754e-06, + "loss": 8.509, + "step": 182410 + }, + { + "epoch": 0.9109840445454318, + "grad_norm": 0.08908481150865555, + "learning_rate": 2.6771633833137247e-06, + "loss": 8.519, + "step": 182420 + }, + { + "epoch": 0.9110339833703713, + "grad_norm": 0.08989152312278748, + "learning_rate": 2.675661468372175e-06, + "loss": 8.5255, + "step": 182430 + }, + { + "epoch": 0.9110839221953108, + "grad_norm": 0.0932261049747467, + "learning_rate": 2.6741595534306242e-06, + "loss": 8.5118, + "step": 182440 + }, + { + "epoch": 0.9111338610202502, + "grad_norm": 0.08758267015218735, + "learning_rate": 2.6726576384890736e-06, + "loss": 8.528, + "step": 182450 + }, + { + "epoch": 0.9111837998451896, + "grad_norm": 0.09773239493370056, + "learning_rate": 2.671155723547523e-06, + "loss": 8.5038, + "step": 182460 + }, + { + "epoch": 0.9112337386701291, + "grad_norm": 0.09452714025974274, + "learning_rate": 2.6696538086059726e-06, + "loss": 8.512, + "step": 182470 + }, + { + "epoch": 0.9112836774950686, + "grad_norm": 0.09245564788579941, + "learning_rate": 2.6681518936644224e-06, + "loss": 8.5157, + "step": 182480 + }, + { + "epoch": 0.911333616320008, + "grad_norm": 0.08789669722318649, + "learning_rate": 2.6666499787228717e-06, + "loss": 8.5091, + "step": 182490 + }, + { + "epoch": 0.9113835551449474, + "grad_norm": 0.08953165262937546, + "learning_rate": 2.665148063781321e-06, + "loss": 8.4934, + "step": 182500 + }, + { + "epoch": 0.9114334939698869, + "grad_norm": 0.09100998938083649, + "learning_rate": 2.663646148839771e-06, + "loss": 8.5173, + "step": 182510 + }, + { + "epoch": 0.9114834327948264, + "grad_norm": 0.10075462609529495, + "learning_rate": 2.66214423389822e-06, + "loss": 8.5088, + "step": 182520 + }, + { + "epoch": 0.9115333716197658, + "grad_norm": 0.09430301189422607, + "learning_rate": 2.66064231895667e-06, + "loss": 8.4971, + "step": 182530 + }, + { + "epoch": 0.9115833104447052, + "grad_norm": 0.08746136724948883, + "learning_rate": 2.6591404040151192e-06, + "loss": 8.4993, + "step": 182540 + }, + { + "epoch": 0.9116332492696447, + "grad_norm": 0.08813217282295227, + "learning_rate": 2.657638489073569e-06, + "loss": 8.5164, + "step": 182550 + }, + { + "epoch": 0.9116831880945842, + "grad_norm": 0.08637049794197083, + "learning_rate": 2.6561365741320183e-06, + "loss": 8.508, + "step": 182560 + }, + { + "epoch": 0.9117331269195236, + "grad_norm": 0.09196769446134567, + "learning_rate": 2.6546346591904676e-06, + "loss": 8.498, + "step": 182570 + }, + { + "epoch": 0.911783065744463, + "grad_norm": 0.09665251523256302, + "learning_rate": 2.6531327442489174e-06, + "loss": 8.5179, + "step": 182580 + }, + { + "epoch": 0.9118330045694025, + "grad_norm": 0.08948566764593124, + "learning_rate": 2.651630829307367e-06, + "loss": 8.5084, + "step": 182590 + }, + { + "epoch": 0.911882943394342, + "grad_norm": 0.08373957872390747, + "learning_rate": 2.6501289143658165e-06, + "loss": 8.5356, + "step": 182600 + }, + { + "epoch": 0.9119328822192814, + "grad_norm": 0.08785004913806915, + "learning_rate": 2.648626999424266e-06, + "loss": 8.5137, + "step": 182610 + }, + { + "epoch": 0.9119828210442208, + "grad_norm": 0.09454427659511566, + "learning_rate": 2.6471250844827156e-06, + "loss": 8.503, + "step": 182620 + }, + { + "epoch": 0.9120327598691603, + "grad_norm": 0.0859336405992508, + "learning_rate": 2.6456231695411653e-06, + "loss": 8.5194, + "step": 182630 + }, + { + "epoch": 0.9120826986940997, + "grad_norm": 0.0904291421175003, + "learning_rate": 2.6441212545996147e-06, + "loss": 8.5232, + "step": 182640 + }, + { + "epoch": 0.9121326375190392, + "grad_norm": 0.0902220755815506, + "learning_rate": 2.642619339658064e-06, + "loss": 8.4902, + "step": 182650 + }, + { + "epoch": 0.9121825763439786, + "grad_norm": 0.09232915937900543, + "learning_rate": 2.6411174247165133e-06, + "loss": 8.5124, + "step": 182660 + }, + { + "epoch": 0.9122325151689181, + "grad_norm": 0.0919727310538292, + "learning_rate": 2.6396155097749635e-06, + "loss": 8.5165, + "step": 182670 + }, + { + "epoch": 0.9122824539938575, + "grad_norm": 0.09011544287204742, + "learning_rate": 2.638113594833413e-06, + "loss": 8.534, + "step": 182680 + }, + { + "epoch": 0.912332392818797, + "grad_norm": 0.08686014264822006, + "learning_rate": 2.636611679891862e-06, + "loss": 8.5142, + "step": 182690 + }, + { + "epoch": 0.9123823316437364, + "grad_norm": 0.09502139687538147, + "learning_rate": 2.6351097649503115e-06, + "loss": 8.5061, + "step": 182700 + }, + { + "epoch": 0.9124322704686759, + "grad_norm": 0.08637500554323196, + "learning_rate": 2.6336078500087613e-06, + "loss": 8.5126, + "step": 182710 + }, + { + "epoch": 0.9124822092936153, + "grad_norm": 0.08577697724103928, + "learning_rate": 2.632105935067211e-06, + "loss": 8.5109, + "step": 182720 + }, + { + "epoch": 0.9125321481185548, + "grad_norm": 0.08816831558942795, + "learning_rate": 2.6306040201256603e-06, + "loss": 8.5045, + "step": 182730 + }, + { + "epoch": 0.9125820869434942, + "grad_norm": 0.09027297049760818, + "learning_rate": 2.6291021051841097e-06, + "loss": 8.5062, + "step": 182740 + }, + { + "epoch": 0.9126320257684337, + "grad_norm": 0.08729089051485062, + "learning_rate": 2.6276001902425594e-06, + "loss": 8.4982, + "step": 182750 + }, + { + "epoch": 0.9126819645933731, + "grad_norm": 0.09011752158403397, + "learning_rate": 2.6260982753010088e-06, + "loss": 8.5129, + "step": 182760 + }, + { + "epoch": 0.9127319034183126, + "grad_norm": 0.09230612218379974, + "learning_rate": 2.6245963603594585e-06, + "loss": 8.5134, + "step": 182770 + }, + { + "epoch": 0.912781842243252, + "grad_norm": 0.08646968007087708, + "learning_rate": 2.623094445417908e-06, + "loss": 8.5196, + "step": 182780 + }, + { + "epoch": 0.9128317810681915, + "grad_norm": 0.09204564988613129, + "learning_rate": 2.6215925304763576e-06, + "loss": 8.5168, + "step": 182790 + }, + { + "epoch": 0.9128817198931309, + "grad_norm": 0.08996839821338654, + "learning_rate": 2.620090615534807e-06, + "loss": 8.5249, + "step": 182800 + }, + { + "epoch": 0.9129316587180704, + "grad_norm": 0.09080091118812561, + "learning_rate": 2.6185887005932563e-06, + "loss": 8.5052, + "step": 182810 + }, + { + "epoch": 0.9129815975430098, + "grad_norm": 0.09134030342102051, + "learning_rate": 2.617086785651706e-06, + "loss": 8.5065, + "step": 182820 + }, + { + "epoch": 0.9130315363679493, + "grad_norm": 0.08714727312326431, + "learning_rate": 2.6155848707101558e-06, + "loss": 8.4966, + "step": 182830 + }, + { + "epoch": 0.9130814751928887, + "grad_norm": 0.08802255988121033, + "learning_rate": 2.614082955768605e-06, + "loss": 8.5049, + "step": 182840 + }, + { + "epoch": 0.9131314140178282, + "grad_norm": 0.091342493891716, + "learning_rate": 2.6125810408270544e-06, + "loss": 8.5057, + "step": 182850 + }, + { + "epoch": 0.9131813528427676, + "grad_norm": 0.0987749993801117, + "learning_rate": 2.6110791258855038e-06, + "loss": 8.5085, + "step": 182860 + }, + { + "epoch": 0.9132312916677071, + "grad_norm": 0.08858326077461243, + "learning_rate": 2.609577210943954e-06, + "loss": 8.5, + "step": 182870 + }, + { + "epoch": 0.9132812304926465, + "grad_norm": 0.09258518368005753, + "learning_rate": 2.6080752960024033e-06, + "loss": 8.4916, + "step": 182880 + }, + { + "epoch": 0.913331169317586, + "grad_norm": 0.09570000320672989, + "learning_rate": 2.6065733810608526e-06, + "loss": 8.5024, + "step": 182890 + }, + { + "epoch": 0.9133811081425254, + "grad_norm": 0.09027928858995438, + "learning_rate": 2.605071466119302e-06, + "loss": 8.5172, + "step": 182900 + }, + { + "epoch": 0.9134310469674649, + "grad_norm": 0.09241816401481628, + "learning_rate": 2.6035695511777517e-06, + "loss": 8.5002, + "step": 182910 + }, + { + "epoch": 0.9134809857924043, + "grad_norm": 0.08846849203109741, + "learning_rate": 2.6020676362362014e-06, + "loss": 8.5361, + "step": 182920 + }, + { + "epoch": 0.9135309246173438, + "grad_norm": 0.09381444752216339, + "learning_rate": 2.6005657212946508e-06, + "loss": 8.5218, + "step": 182930 + }, + { + "epoch": 0.9135808634422832, + "grad_norm": 0.09875883162021637, + "learning_rate": 2.5990638063531e-06, + "loss": 8.5085, + "step": 182940 + }, + { + "epoch": 0.9136308022672226, + "grad_norm": 0.0989549458026886, + "learning_rate": 2.59756189141155e-06, + "loss": 8.4929, + "step": 182950 + }, + { + "epoch": 0.9136807410921621, + "grad_norm": 0.08980342745780945, + "learning_rate": 2.596059976469999e-06, + "loss": 8.501, + "step": 182960 + }, + { + "epoch": 0.9137306799171016, + "grad_norm": 0.0909147560596466, + "learning_rate": 2.594558061528449e-06, + "loss": 8.5148, + "step": 182970 + }, + { + "epoch": 0.913780618742041, + "grad_norm": 0.091719850897789, + "learning_rate": 2.5930561465868983e-06, + "loss": 8.5061, + "step": 182980 + }, + { + "epoch": 0.9138305575669804, + "grad_norm": 0.09617502242326736, + "learning_rate": 2.591554231645348e-06, + "loss": 8.5005, + "step": 182990 + }, + { + "epoch": 0.9138804963919199, + "grad_norm": 0.08993982523679733, + "learning_rate": 2.5900523167037974e-06, + "loss": 8.5062, + "step": 183000 + }, + { + "epoch": 0.9139304352168593, + "grad_norm": 0.08975507318973541, + "learning_rate": 2.5885504017622467e-06, + "loss": 8.5052, + "step": 183010 + }, + { + "epoch": 0.9139803740417988, + "grad_norm": 0.09407518804073334, + "learning_rate": 2.5870484868206965e-06, + "loss": 8.5038, + "step": 183020 + }, + { + "epoch": 0.9140303128667382, + "grad_norm": 0.09323114156723022, + "learning_rate": 2.585546571879146e-06, + "loss": 8.5105, + "step": 183030 + }, + { + "epoch": 0.9140802516916777, + "grad_norm": 0.08849375694990158, + "learning_rate": 2.5840446569375955e-06, + "loss": 8.5113, + "step": 183040 + }, + { + "epoch": 0.9141301905166171, + "grad_norm": 0.08718482404947281, + "learning_rate": 2.582542741996045e-06, + "loss": 8.51, + "step": 183050 + }, + { + "epoch": 0.9141801293415566, + "grad_norm": 0.09222862124443054, + "learning_rate": 2.581040827054494e-06, + "loss": 8.503, + "step": 183060 + }, + { + "epoch": 0.914230068166496, + "grad_norm": 0.09302924573421478, + "learning_rate": 2.5795389121129444e-06, + "loss": 8.5162, + "step": 183070 + }, + { + "epoch": 0.9142800069914355, + "grad_norm": 0.09386178851127625, + "learning_rate": 2.5780369971713937e-06, + "loss": 8.5088, + "step": 183080 + }, + { + "epoch": 0.914329945816375, + "grad_norm": 0.09588906168937683, + "learning_rate": 2.576535082229843e-06, + "loss": 8.4878, + "step": 183090 + }, + { + "epoch": 0.9143798846413144, + "grad_norm": 0.09002941846847534, + "learning_rate": 2.5750331672882924e-06, + "loss": 8.4933, + "step": 183100 + }, + { + "epoch": 0.9144298234662538, + "grad_norm": 0.08975888043642044, + "learning_rate": 2.573531252346742e-06, + "loss": 8.4971, + "step": 183110 + }, + { + "epoch": 0.9144797622911933, + "grad_norm": 0.09490811824798584, + "learning_rate": 2.572029337405192e-06, + "loss": 8.522, + "step": 183120 + }, + { + "epoch": 0.9145297011161327, + "grad_norm": 0.09320612251758575, + "learning_rate": 2.5705274224636412e-06, + "loss": 8.5048, + "step": 183130 + }, + { + "epoch": 0.9145796399410722, + "grad_norm": 0.08958461880683899, + "learning_rate": 2.5690255075220905e-06, + "loss": 8.511, + "step": 183140 + }, + { + "epoch": 0.9146295787660116, + "grad_norm": 0.09119990468025208, + "learning_rate": 2.5675235925805403e-06, + "loss": 8.4993, + "step": 183150 + }, + { + "epoch": 0.9146795175909511, + "grad_norm": 0.093448705971241, + "learning_rate": 2.56602167763899e-06, + "loss": 8.4941, + "step": 183160 + }, + { + "epoch": 0.9147294564158905, + "grad_norm": 0.0903758853673935, + "learning_rate": 2.5645197626974394e-06, + "loss": 8.5067, + "step": 183170 + }, + { + "epoch": 0.91477939524083, + "grad_norm": 0.09165302664041519, + "learning_rate": 2.5630178477558887e-06, + "loss": 8.5058, + "step": 183180 + }, + { + "epoch": 0.9148293340657694, + "grad_norm": 0.08942514657974243, + "learning_rate": 2.5615159328143385e-06, + "loss": 8.5319, + "step": 183190 + }, + { + "epoch": 0.9148792728907089, + "grad_norm": 0.08656205981969833, + "learning_rate": 2.560014017872788e-06, + "loss": 8.5003, + "step": 183200 + }, + { + "epoch": 0.9149292117156483, + "grad_norm": 0.08952057361602783, + "learning_rate": 2.5585121029312376e-06, + "loss": 8.5137, + "step": 183210 + }, + { + "epoch": 0.9149791505405878, + "grad_norm": 0.09631547331809998, + "learning_rate": 2.557010187989687e-06, + "loss": 8.5028, + "step": 183220 + }, + { + "epoch": 0.9150290893655272, + "grad_norm": 0.0935741513967514, + "learning_rate": 2.5555082730481366e-06, + "loss": 8.5188, + "step": 183230 + }, + { + "epoch": 0.9150790281904667, + "grad_norm": 0.09073397517204285, + "learning_rate": 2.554006358106586e-06, + "loss": 8.5276, + "step": 183240 + }, + { + "epoch": 0.9151289670154061, + "grad_norm": 0.09225539863109589, + "learning_rate": 2.5525044431650353e-06, + "loss": 8.5081, + "step": 183250 + }, + { + "epoch": 0.9151789058403456, + "grad_norm": 0.09920675307512283, + "learning_rate": 2.551002528223485e-06, + "loss": 8.5123, + "step": 183260 + }, + { + "epoch": 0.915228844665285, + "grad_norm": 0.09345440566539764, + "learning_rate": 2.549500613281935e-06, + "loss": 8.505, + "step": 183270 + }, + { + "epoch": 0.9152787834902245, + "grad_norm": 0.09154894202947617, + "learning_rate": 2.547998698340384e-06, + "loss": 8.4907, + "step": 183280 + }, + { + "epoch": 0.9153287223151639, + "grad_norm": 0.09122487902641296, + "learning_rate": 2.5464967833988335e-06, + "loss": 8.5205, + "step": 183290 + }, + { + "epoch": 0.9153786611401034, + "grad_norm": 0.09112253040075302, + "learning_rate": 2.544994868457283e-06, + "loss": 8.4846, + "step": 183300 + }, + { + "epoch": 0.9154285999650428, + "grad_norm": 0.08982587605714798, + "learning_rate": 2.543492953515733e-06, + "loss": 8.5097, + "step": 183310 + }, + { + "epoch": 0.9154785387899823, + "grad_norm": 0.09354843199253082, + "learning_rate": 2.5419910385741823e-06, + "loss": 8.5127, + "step": 183320 + }, + { + "epoch": 0.9155284776149217, + "grad_norm": 0.08977293968200684, + "learning_rate": 2.5404891236326317e-06, + "loss": 8.5254, + "step": 183330 + }, + { + "epoch": 0.9155784164398612, + "grad_norm": 0.0911603569984436, + "learning_rate": 2.538987208691081e-06, + "loss": 8.5047, + "step": 183340 + }, + { + "epoch": 0.9156283552648006, + "grad_norm": 0.09315786510705948, + "learning_rate": 2.5374852937495307e-06, + "loss": 8.4967, + "step": 183350 + }, + { + "epoch": 0.9156782940897401, + "grad_norm": 0.09075506776571274, + "learning_rate": 2.5359833788079805e-06, + "loss": 8.4941, + "step": 183360 + }, + { + "epoch": 0.9157282329146795, + "grad_norm": 0.0923544391989708, + "learning_rate": 2.53448146386643e-06, + "loss": 8.5156, + "step": 183370 + }, + { + "epoch": 0.915778171739619, + "grad_norm": 0.09413059055805206, + "learning_rate": 2.532979548924879e-06, + "loss": 8.4979, + "step": 183380 + }, + { + "epoch": 0.9158281105645584, + "grad_norm": 0.09276200085878372, + "learning_rate": 2.531477633983329e-06, + "loss": 8.5112, + "step": 183390 + }, + { + "epoch": 0.9158780493894979, + "grad_norm": 0.09218557178974152, + "learning_rate": 2.5299757190417782e-06, + "loss": 8.5049, + "step": 183400 + }, + { + "epoch": 0.9159279882144373, + "grad_norm": 0.09127632528543472, + "learning_rate": 2.528473804100228e-06, + "loss": 8.4994, + "step": 183410 + }, + { + "epoch": 0.9159779270393767, + "grad_norm": 0.09124505519866943, + "learning_rate": 2.5269718891586773e-06, + "loss": 8.5037, + "step": 183420 + }, + { + "epoch": 0.9160278658643162, + "grad_norm": 0.09244653582572937, + "learning_rate": 2.525469974217127e-06, + "loss": 8.5091, + "step": 183430 + }, + { + "epoch": 0.9160778046892557, + "grad_norm": 0.09359874576330185, + "learning_rate": 2.5239680592755764e-06, + "loss": 8.5033, + "step": 183440 + }, + { + "epoch": 0.9161277435141951, + "grad_norm": 0.08860906213521957, + "learning_rate": 2.5224661443340257e-06, + "loss": 8.5175, + "step": 183450 + }, + { + "epoch": 0.9161776823391345, + "grad_norm": 0.08711325377225876, + "learning_rate": 2.5209642293924755e-06, + "loss": 8.4991, + "step": 183460 + }, + { + "epoch": 0.916227621164074, + "grad_norm": 0.0916765034198761, + "learning_rate": 2.5194623144509253e-06, + "loss": 8.5133, + "step": 183470 + }, + { + "epoch": 0.9162775599890135, + "grad_norm": 0.0957622155547142, + "learning_rate": 2.5179603995093746e-06, + "loss": 8.5013, + "step": 183480 + }, + { + "epoch": 0.9163274988139529, + "grad_norm": 0.08793076127767563, + "learning_rate": 2.516458484567824e-06, + "loss": 8.5154, + "step": 183490 + }, + { + "epoch": 0.9163774376388923, + "grad_norm": 0.08785158395767212, + "learning_rate": 2.5149565696262732e-06, + "loss": 8.5007, + "step": 183500 + }, + { + "epoch": 0.9164273764638318, + "grad_norm": 0.09278276562690735, + "learning_rate": 2.5134546546847234e-06, + "loss": 8.5113, + "step": 183510 + }, + { + "epoch": 0.9164773152887713, + "grad_norm": 0.08464596420526505, + "learning_rate": 2.5119527397431728e-06, + "loss": 8.5296, + "step": 183520 + }, + { + "epoch": 0.9165272541137107, + "grad_norm": 0.08865651488304138, + "learning_rate": 2.510450824801622e-06, + "loss": 8.5137, + "step": 183530 + }, + { + "epoch": 0.9165771929386501, + "grad_norm": 0.09250415861606598, + "learning_rate": 2.5089489098600714e-06, + "loss": 8.5173, + "step": 183540 + }, + { + "epoch": 0.9166271317635896, + "grad_norm": 0.09271103143692017, + "learning_rate": 2.507446994918521e-06, + "loss": 8.4864, + "step": 183550 + }, + { + "epoch": 0.9166770705885291, + "grad_norm": 0.09764368087053299, + "learning_rate": 2.505945079976971e-06, + "loss": 8.5003, + "step": 183560 + }, + { + "epoch": 0.9167270094134685, + "grad_norm": 0.09379107505083084, + "learning_rate": 2.5044431650354203e-06, + "loss": 8.5047, + "step": 183570 + }, + { + "epoch": 0.9167769482384079, + "grad_norm": 0.08978167921304703, + "learning_rate": 2.5029412500938696e-06, + "loss": 8.5087, + "step": 183580 + }, + { + "epoch": 0.9168268870633474, + "grad_norm": 0.0897134467959404, + "learning_rate": 2.5014393351523193e-06, + "loss": 8.5065, + "step": 183590 + }, + { + "epoch": 0.9168768258882869, + "grad_norm": 0.09094201773405075, + "learning_rate": 2.4999374202107687e-06, + "loss": 8.5085, + "step": 183600 + }, + { + "epoch": 0.9169267647132263, + "grad_norm": 0.09478742629289627, + "learning_rate": 2.4984355052692184e-06, + "loss": 8.5052, + "step": 183610 + }, + { + "epoch": 0.9169767035381657, + "grad_norm": 0.0915999785065651, + "learning_rate": 2.4969335903276678e-06, + "loss": 8.5097, + "step": 183620 + }, + { + "epoch": 0.9170266423631052, + "grad_norm": 0.08944950252771378, + "learning_rate": 2.4954316753861175e-06, + "loss": 8.4869, + "step": 183630 + }, + { + "epoch": 0.9170765811880447, + "grad_norm": 0.09154719859361649, + "learning_rate": 2.493929760444567e-06, + "loss": 8.5146, + "step": 183640 + }, + { + "epoch": 0.9171265200129841, + "grad_norm": 0.0902072861790657, + "learning_rate": 2.492427845503016e-06, + "loss": 8.5102, + "step": 183650 + }, + { + "epoch": 0.9171764588379235, + "grad_norm": 0.09381358325481415, + "learning_rate": 2.490925930561466e-06, + "loss": 8.5281, + "step": 183660 + }, + { + "epoch": 0.917226397662863, + "grad_norm": 0.0945509746670723, + "learning_rate": 2.4894240156199157e-06, + "loss": 8.5014, + "step": 183670 + }, + { + "epoch": 0.9172763364878025, + "grad_norm": 0.09148582816123962, + "learning_rate": 2.487922100678365e-06, + "loss": 8.51, + "step": 183680 + }, + { + "epoch": 0.9173262753127419, + "grad_norm": 0.09244761615991592, + "learning_rate": 2.4864201857368144e-06, + "loss": 8.5024, + "step": 183690 + }, + { + "epoch": 0.9173762141376813, + "grad_norm": 0.08857100456953049, + "learning_rate": 2.484918270795264e-06, + "loss": 8.5167, + "step": 183700 + }, + { + "epoch": 0.9174261529626208, + "grad_norm": 0.09274089336395264, + "learning_rate": 2.483416355853714e-06, + "loss": 8.5078, + "step": 183710 + }, + { + "epoch": 0.9174760917875603, + "grad_norm": 0.08995862305164337, + "learning_rate": 2.481914440912163e-06, + "loss": 8.5171, + "step": 183720 + }, + { + "epoch": 0.9175260306124997, + "grad_norm": 0.08714859187602997, + "learning_rate": 2.4804125259706125e-06, + "loss": 8.4972, + "step": 183730 + }, + { + "epoch": 0.9175759694374391, + "grad_norm": 0.09085182845592499, + "learning_rate": 2.478910611029062e-06, + "loss": 8.5136, + "step": 183740 + }, + { + "epoch": 0.9176259082623786, + "grad_norm": 0.09306159615516663, + "learning_rate": 2.477408696087512e-06, + "loss": 8.5094, + "step": 183750 + }, + { + "epoch": 0.9176758470873181, + "grad_norm": 0.08627147227525711, + "learning_rate": 2.4759067811459614e-06, + "loss": 8.5231, + "step": 183760 + }, + { + "epoch": 0.9177257859122575, + "grad_norm": 0.09129956364631653, + "learning_rate": 2.4744048662044107e-06, + "loss": 8.4941, + "step": 183770 + }, + { + "epoch": 0.9177757247371969, + "grad_norm": 0.09526669234037399, + "learning_rate": 2.47290295126286e-06, + "loss": 8.4966, + "step": 183780 + }, + { + "epoch": 0.9178256635621364, + "grad_norm": 0.09122246503829956, + "learning_rate": 2.4714010363213098e-06, + "loss": 8.4972, + "step": 183790 + }, + { + "epoch": 0.9178756023870759, + "grad_norm": 0.09195857495069504, + "learning_rate": 2.4698991213797595e-06, + "loss": 8.5057, + "step": 183800 + }, + { + "epoch": 0.9179255412120153, + "grad_norm": 0.090829037129879, + "learning_rate": 2.468397206438209e-06, + "loss": 8.5169, + "step": 183810 + }, + { + "epoch": 0.9179754800369547, + "grad_norm": 0.09058675169944763, + "learning_rate": 2.466895291496658e-06, + "loss": 8.511, + "step": 183820 + }, + { + "epoch": 0.9180254188618941, + "grad_norm": 0.08892668038606644, + "learning_rate": 2.465393376555108e-06, + "loss": 8.5127, + "step": 183830 + }, + { + "epoch": 0.9180753576868337, + "grad_norm": 0.09829755127429962, + "learning_rate": 2.4638914616135573e-06, + "loss": 8.4872, + "step": 183840 + }, + { + "epoch": 0.9181252965117731, + "grad_norm": 0.08881474286317825, + "learning_rate": 2.462389546672007e-06, + "loss": 8.4938, + "step": 183850 + }, + { + "epoch": 0.9181752353367125, + "grad_norm": 0.08773206174373627, + "learning_rate": 2.4608876317304564e-06, + "loss": 8.5175, + "step": 183860 + }, + { + "epoch": 0.918225174161652, + "grad_norm": 0.08635301142930984, + "learning_rate": 2.459385716788906e-06, + "loss": 8.5228, + "step": 183870 + }, + { + "epoch": 0.9182751129865915, + "grad_norm": 0.08536515384912491, + "learning_rate": 2.4578838018473555e-06, + "loss": 8.5215, + "step": 183880 + }, + { + "epoch": 0.9183250518115309, + "grad_norm": 0.08720611780881882, + "learning_rate": 2.456381886905805e-06, + "loss": 8.5034, + "step": 183890 + }, + { + "epoch": 0.9183749906364703, + "grad_norm": 0.09371429681777954, + "learning_rate": 2.4548799719642545e-06, + "loss": 8.5029, + "step": 183900 + }, + { + "epoch": 0.9184249294614097, + "grad_norm": 0.08567806333303452, + "learning_rate": 2.4533780570227043e-06, + "loss": 8.5001, + "step": 183910 + }, + { + "epoch": 0.9184748682863493, + "grad_norm": 0.09422826766967773, + "learning_rate": 2.4518761420811536e-06, + "loss": 8.5079, + "step": 183920 + }, + { + "epoch": 0.9185248071112887, + "grad_norm": 0.0892164334654808, + "learning_rate": 2.450374227139603e-06, + "loss": 8.4915, + "step": 183930 + }, + { + "epoch": 0.9185747459362281, + "grad_norm": 0.09322153776884079, + "learning_rate": 2.4488723121980523e-06, + "loss": 8.5038, + "step": 183940 + }, + { + "epoch": 0.9186246847611675, + "grad_norm": 0.0924592837691307, + "learning_rate": 2.4473703972565025e-06, + "loss": 8.5004, + "step": 183950 + }, + { + "epoch": 0.918674623586107, + "grad_norm": 0.0903005376458168, + "learning_rate": 2.445868482314952e-06, + "loss": 8.5133, + "step": 183960 + }, + { + "epoch": 0.9187245624110465, + "grad_norm": 0.09269312024116516, + "learning_rate": 2.444366567373401e-06, + "loss": 8.5266, + "step": 183970 + }, + { + "epoch": 0.9187745012359859, + "grad_norm": 0.0908818319439888, + "learning_rate": 2.4428646524318505e-06, + "loss": 8.5057, + "step": 183980 + }, + { + "epoch": 0.9188244400609253, + "grad_norm": 0.09337764978408813, + "learning_rate": 2.4413627374903002e-06, + "loss": 8.5112, + "step": 183990 + }, + { + "epoch": 0.9188743788858648, + "grad_norm": 0.09528010338544846, + "learning_rate": 2.43986082254875e-06, + "loss": 8.503, + "step": 184000 + }, + { + "epoch": 0.9189243177108043, + "grad_norm": 0.09190403670072556, + "learning_rate": 2.4383589076071993e-06, + "loss": 8.5116, + "step": 184010 + }, + { + "epoch": 0.9189742565357437, + "grad_norm": 0.09150946140289307, + "learning_rate": 2.4368569926656486e-06, + "loss": 8.5047, + "step": 184020 + }, + { + "epoch": 0.9190241953606831, + "grad_norm": 0.09026480466127396, + "learning_rate": 2.4353550777240984e-06, + "loss": 8.5045, + "step": 184030 + }, + { + "epoch": 0.9190741341856226, + "grad_norm": 0.08819565176963806, + "learning_rate": 2.4338531627825477e-06, + "loss": 8.5203, + "step": 184040 + }, + { + "epoch": 0.9191240730105621, + "grad_norm": 0.09232760965824127, + "learning_rate": 2.4323512478409975e-06, + "loss": 8.5237, + "step": 184050 + }, + { + "epoch": 0.9191740118355015, + "grad_norm": 0.08861426264047623, + "learning_rate": 2.430849332899447e-06, + "loss": 8.4923, + "step": 184060 + }, + { + "epoch": 0.9192239506604409, + "grad_norm": 0.09452666342258453, + "learning_rate": 2.4293474179578966e-06, + "loss": 8.513, + "step": 184070 + }, + { + "epoch": 0.9192738894853804, + "grad_norm": 0.09179414063692093, + "learning_rate": 2.427845503016346e-06, + "loss": 8.4931, + "step": 184080 + }, + { + "epoch": 0.9193238283103199, + "grad_norm": 0.09043210744857788, + "learning_rate": 2.4263435880747952e-06, + "loss": 8.5092, + "step": 184090 + }, + { + "epoch": 0.9193737671352593, + "grad_norm": 0.09003657847642899, + "learning_rate": 2.424841673133245e-06, + "loss": 8.517, + "step": 184100 + }, + { + "epoch": 0.9194237059601987, + "grad_norm": 0.09282027930021286, + "learning_rate": 2.4233397581916947e-06, + "loss": 8.5116, + "step": 184110 + }, + { + "epoch": 0.9194736447851382, + "grad_norm": 0.08989115059375763, + "learning_rate": 2.421837843250144e-06, + "loss": 8.4954, + "step": 184120 + }, + { + "epoch": 0.9195235836100777, + "grad_norm": 0.09200627356767654, + "learning_rate": 2.4203359283085934e-06, + "loss": 8.5153, + "step": 184130 + }, + { + "epoch": 0.9195735224350171, + "grad_norm": 0.09356432408094406, + "learning_rate": 2.4188340133670427e-06, + "loss": 8.5144, + "step": 184140 + }, + { + "epoch": 0.9196234612599565, + "grad_norm": 0.08702446520328522, + "learning_rate": 2.417332098425493e-06, + "loss": 8.4921, + "step": 184150 + }, + { + "epoch": 0.919673400084896, + "grad_norm": 0.09164425730705261, + "learning_rate": 2.4158301834839422e-06, + "loss": 8.5054, + "step": 184160 + }, + { + "epoch": 0.9197233389098355, + "grad_norm": 0.09433116018772125, + "learning_rate": 2.4143282685423916e-06, + "loss": 8.5058, + "step": 184170 + }, + { + "epoch": 0.9197732777347749, + "grad_norm": 0.09576484560966492, + "learning_rate": 2.412826353600841e-06, + "loss": 8.5018, + "step": 184180 + }, + { + "epoch": 0.9198232165597143, + "grad_norm": 0.09483747184276581, + "learning_rate": 2.4113244386592907e-06, + "loss": 8.5188, + "step": 184190 + }, + { + "epoch": 0.9198731553846538, + "grad_norm": 0.09752367436885834, + "learning_rate": 2.4098225237177404e-06, + "loss": 8.5201, + "step": 184200 + }, + { + "epoch": 0.9199230942095933, + "grad_norm": 0.0914745032787323, + "learning_rate": 2.4083206087761897e-06, + "loss": 8.5139, + "step": 184210 + }, + { + "epoch": 0.9199730330345327, + "grad_norm": 0.08465293794870377, + "learning_rate": 2.406818693834639e-06, + "loss": 8.5177, + "step": 184220 + }, + { + "epoch": 0.9200229718594721, + "grad_norm": 0.09186521172523499, + "learning_rate": 2.4053167788930884e-06, + "loss": 8.512, + "step": 184230 + }, + { + "epoch": 0.9200729106844115, + "grad_norm": 0.09527092427015305, + "learning_rate": 2.403814863951538e-06, + "loss": 8.5174, + "step": 184240 + }, + { + "epoch": 0.9201228495093511, + "grad_norm": 0.08546338975429535, + "learning_rate": 2.402312949009988e-06, + "loss": 8.5272, + "step": 184250 + }, + { + "epoch": 0.9201727883342905, + "grad_norm": 0.09446820616722107, + "learning_rate": 2.4008110340684373e-06, + "loss": 8.5021, + "step": 184260 + }, + { + "epoch": 0.9202227271592299, + "grad_norm": 0.09014095366001129, + "learning_rate": 2.3993091191268866e-06, + "loss": 8.5166, + "step": 184270 + }, + { + "epoch": 0.9202726659841693, + "grad_norm": 0.09346702694892883, + "learning_rate": 2.3978072041853363e-06, + "loss": 8.5163, + "step": 184280 + }, + { + "epoch": 0.9203226048091089, + "grad_norm": 0.09391672164201736, + "learning_rate": 2.396305289243786e-06, + "loss": 8.5082, + "step": 184290 + }, + { + "epoch": 0.9203725436340483, + "grad_norm": 0.09350437670946121, + "learning_rate": 2.3948033743022354e-06, + "loss": 8.5018, + "step": 184300 + }, + { + "epoch": 0.9204224824589877, + "grad_norm": 0.09567178785800934, + "learning_rate": 2.3933014593606848e-06, + "loss": 8.5052, + "step": 184310 + }, + { + "epoch": 0.9204724212839271, + "grad_norm": 0.09135613590478897, + "learning_rate": 2.3917995444191345e-06, + "loss": 8.5153, + "step": 184320 + }, + { + "epoch": 0.9205223601088667, + "grad_norm": 0.08975464105606079, + "learning_rate": 2.390297629477584e-06, + "loss": 8.5067, + "step": 184330 + }, + { + "epoch": 0.9205722989338061, + "grad_norm": 0.0906573086977005, + "learning_rate": 2.3887957145360336e-06, + "loss": 8.5185, + "step": 184340 + }, + { + "epoch": 0.9206222377587455, + "grad_norm": 0.09258557856082916, + "learning_rate": 2.387293799594483e-06, + "loss": 8.5008, + "step": 184350 + }, + { + "epoch": 0.9206721765836849, + "grad_norm": 0.09056249260902405, + "learning_rate": 2.3857918846529327e-06, + "loss": 8.5061, + "step": 184360 + }, + { + "epoch": 0.9207221154086245, + "grad_norm": 0.09652728587388992, + "learning_rate": 2.384289969711382e-06, + "loss": 8.4943, + "step": 184370 + }, + { + "epoch": 0.9207720542335639, + "grad_norm": 0.08950930088758469, + "learning_rate": 2.3827880547698313e-06, + "loss": 8.5062, + "step": 184380 + }, + { + "epoch": 0.9208219930585033, + "grad_norm": 0.09195245802402496, + "learning_rate": 2.381286139828281e-06, + "loss": 8.4987, + "step": 184390 + }, + { + "epoch": 0.9208719318834427, + "grad_norm": 0.08976045995950699, + "learning_rate": 2.379784224886731e-06, + "loss": 8.5078, + "step": 184400 + }, + { + "epoch": 0.9209218707083823, + "grad_norm": 0.08620437234640121, + "learning_rate": 2.37828230994518e-06, + "loss": 8.5356, + "step": 184410 + }, + { + "epoch": 0.9209718095333217, + "grad_norm": 0.08748499304056168, + "learning_rate": 2.3767803950036295e-06, + "loss": 8.5128, + "step": 184420 + }, + { + "epoch": 0.9210217483582611, + "grad_norm": 0.08978407084941864, + "learning_rate": 2.375278480062079e-06, + "loss": 8.508, + "step": 184430 + }, + { + "epoch": 0.9210716871832005, + "grad_norm": 0.09201885014772415, + "learning_rate": 2.373776565120529e-06, + "loss": 8.4949, + "step": 184440 + }, + { + "epoch": 0.9211216260081401, + "grad_norm": 0.09136571735143661, + "learning_rate": 2.3722746501789784e-06, + "loss": 8.5061, + "step": 184450 + }, + { + "epoch": 0.9211715648330795, + "grad_norm": 0.09248650074005127, + "learning_rate": 2.3707727352374277e-06, + "loss": 8.4964, + "step": 184460 + }, + { + "epoch": 0.9212215036580189, + "grad_norm": 0.08990224450826645, + "learning_rate": 2.369270820295877e-06, + "loss": 8.51, + "step": 184470 + }, + { + "epoch": 0.9212714424829583, + "grad_norm": 0.09131959080696106, + "learning_rate": 2.3677689053543268e-06, + "loss": 8.5172, + "step": 184480 + }, + { + "epoch": 0.9213213813078979, + "grad_norm": 0.09705401957035065, + "learning_rate": 2.3662669904127765e-06, + "loss": 8.5212, + "step": 184490 + }, + { + "epoch": 0.9213713201328373, + "grad_norm": 0.09412173926830292, + "learning_rate": 2.364765075471226e-06, + "loss": 8.4953, + "step": 184500 + }, + { + "epoch": 0.9214212589577767, + "grad_norm": 0.08390575647354126, + "learning_rate": 2.363263160529675e-06, + "loss": 8.5015, + "step": 184510 + }, + { + "epoch": 0.9214711977827161, + "grad_norm": 0.09075505286455154, + "learning_rate": 2.361761245588125e-06, + "loss": 8.488, + "step": 184520 + }, + { + "epoch": 0.9215211366076557, + "grad_norm": 0.08866789937019348, + "learning_rate": 2.3602593306465743e-06, + "loss": 8.5028, + "step": 184530 + }, + { + "epoch": 0.9215710754325951, + "grad_norm": 0.09287255257368088, + "learning_rate": 2.358757415705024e-06, + "loss": 8.5005, + "step": 184540 + }, + { + "epoch": 0.9216210142575345, + "grad_norm": 0.08919494599103928, + "learning_rate": 2.3572555007634734e-06, + "loss": 8.5096, + "step": 184550 + }, + { + "epoch": 0.9216709530824739, + "grad_norm": 0.09100577980279922, + "learning_rate": 2.355753585821923e-06, + "loss": 8.4997, + "step": 184560 + }, + { + "epoch": 0.9217208919074135, + "grad_norm": 0.09456155449151993, + "learning_rate": 2.3542516708803725e-06, + "loss": 8.4924, + "step": 184570 + }, + { + "epoch": 0.9217708307323529, + "grad_norm": 0.09156771004199982, + "learning_rate": 2.3527497559388218e-06, + "loss": 8.5267, + "step": 184580 + }, + { + "epoch": 0.9218207695572923, + "grad_norm": 0.08734113723039627, + "learning_rate": 2.3512478409972715e-06, + "loss": 8.4983, + "step": 184590 + }, + { + "epoch": 0.9218707083822317, + "grad_norm": 0.09091001003980637, + "learning_rate": 2.3497459260557213e-06, + "loss": 8.518, + "step": 184600 + }, + { + "epoch": 0.9219206472071713, + "grad_norm": 0.08664854615926743, + "learning_rate": 2.3482440111141706e-06, + "loss": 8.5167, + "step": 184610 + }, + { + "epoch": 0.9219705860321107, + "grad_norm": 0.08995333313941956, + "learning_rate": 2.34674209617262e-06, + "loss": 8.4968, + "step": 184620 + }, + { + "epoch": 0.9220205248570501, + "grad_norm": 0.09020540118217468, + "learning_rate": 2.3452401812310693e-06, + "loss": 8.4935, + "step": 184630 + }, + { + "epoch": 0.9220704636819895, + "grad_norm": 0.08525863289833069, + "learning_rate": 2.3437382662895195e-06, + "loss": 8.512, + "step": 184640 + }, + { + "epoch": 0.9221204025069291, + "grad_norm": 0.09016373753547668, + "learning_rate": 2.342236351347969e-06, + "loss": 8.4992, + "step": 184650 + }, + { + "epoch": 0.9221703413318685, + "grad_norm": 0.09463771432638168, + "learning_rate": 2.340734436406418e-06, + "loss": 8.4842, + "step": 184660 + }, + { + "epoch": 0.9222202801568079, + "grad_norm": 0.08823353797197342, + "learning_rate": 2.3392325214648675e-06, + "loss": 8.4996, + "step": 184670 + }, + { + "epoch": 0.9222702189817473, + "grad_norm": 0.09164861589670181, + "learning_rate": 2.3377306065233172e-06, + "loss": 8.5169, + "step": 184680 + }, + { + "epoch": 0.9223201578066869, + "grad_norm": 0.092039093375206, + "learning_rate": 2.336228691581767e-06, + "loss": 8.5056, + "step": 184690 + }, + { + "epoch": 0.9223700966316263, + "grad_norm": 0.09081801027059555, + "learning_rate": 2.3347267766402163e-06, + "loss": 8.4922, + "step": 184700 + }, + { + "epoch": 0.9224200354565657, + "grad_norm": 0.09290160983800888, + "learning_rate": 2.3332248616986656e-06, + "loss": 8.5182, + "step": 184710 + }, + { + "epoch": 0.9224699742815051, + "grad_norm": 0.08207748085260391, + "learning_rate": 2.3317229467571154e-06, + "loss": 8.5093, + "step": 184720 + }, + { + "epoch": 0.9225199131064447, + "grad_norm": 0.09553519636392593, + "learning_rate": 2.3302210318155647e-06, + "loss": 8.5143, + "step": 184730 + }, + { + "epoch": 0.9225698519313841, + "grad_norm": 0.09908508509397507, + "learning_rate": 2.3287191168740145e-06, + "loss": 8.5059, + "step": 184740 + }, + { + "epoch": 0.9226197907563235, + "grad_norm": 0.0906306803226471, + "learning_rate": 2.327217201932464e-06, + "loss": 8.5063, + "step": 184750 + }, + { + "epoch": 0.9226697295812629, + "grad_norm": 0.09021761268377304, + "learning_rate": 2.3257152869909136e-06, + "loss": 8.51, + "step": 184760 + }, + { + "epoch": 0.9227196684062025, + "grad_norm": 0.09584904462099075, + "learning_rate": 2.324213372049363e-06, + "loss": 8.4976, + "step": 184770 + }, + { + "epoch": 0.9227696072311419, + "grad_norm": 0.08946247398853302, + "learning_rate": 2.3227114571078126e-06, + "loss": 8.5217, + "step": 184780 + }, + { + "epoch": 0.9228195460560813, + "grad_norm": 0.09100887924432755, + "learning_rate": 2.321209542166262e-06, + "loss": 8.511, + "step": 184790 + }, + { + "epoch": 0.9228694848810207, + "grad_norm": 0.08670665323734283, + "learning_rate": 2.3197076272247117e-06, + "loss": 8.5134, + "step": 184800 + }, + { + "epoch": 0.9229194237059603, + "grad_norm": 0.09500810503959656, + "learning_rate": 2.318205712283161e-06, + "loss": 8.4904, + "step": 184810 + }, + { + "epoch": 0.9229693625308997, + "grad_norm": 0.09684941917657852, + "learning_rate": 2.3167037973416104e-06, + "loss": 8.5025, + "step": 184820 + }, + { + "epoch": 0.9230193013558391, + "grad_norm": 0.08873332291841507, + "learning_rate": 2.31520188240006e-06, + "loss": 8.4923, + "step": 184830 + }, + { + "epoch": 0.9230692401807785, + "grad_norm": 0.0964055061340332, + "learning_rate": 2.31369996745851e-06, + "loss": 8.5126, + "step": 184840 + }, + { + "epoch": 0.923119179005718, + "grad_norm": 0.09410259127616882, + "learning_rate": 2.3121980525169592e-06, + "loss": 8.5124, + "step": 184850 + }, + { + "epoch": 0.9231691178306575, + "grad_norm": 0.09654883295297623, + "learning_rate": 2.3106961375754086e-06, + "loss": 8.5073, + "step": 184860 + }, + { + "epoch": 0.9232190566555969, + "grad_norm": 0.08777954429388046, + "learning_rate": 2.309194222633858e-06, + "loss": 8.5067, + "step": 184870 + }, + { + "epoch": 0.9232689954805363, + "grad_norm": 0.09537537395954132, + "learning_rate": 2.307692307692308e-06, + "loss": 8.4888, + "step": 184880 + }, + { + "epoch": 0.9233189343054758, + "grad_norm": 0.08902040123939514, + "learning_rate": 2.3061903927507574e-06, + "loss": 8.5042, + "step": 184890 + }, + { + "epoch": 0.9233688731304153, + "grad_norm": 0.09366223216056824, + "learning_rate": 2.3046884778092067e-06, + "loss": 8.5255, + "step": 184900 + }, + { + "epoch": 0.9234188119553547, + "grad_norm": 0.08827183395624161, + "learning_rate": 2.303186562867656e-06, + "loss": 8.5054, + "step": 184910 + }, + { + "epoch": 0.9234687507802941, + "grad_norm": 0.09339195489883423, + "learning_rate": 2.301684647926106e-06, + "loss": 8.5159, + "step": 184920 + }, + { + "epoch": 0.9235186896052335, + "grad_norm": 0.09930474311113358, + "learning_rate": 2.3001827329845556e-06, + "loss": 8.4914, + "step": 184930 + }, + { + "epoch": 0.9235686284301731, + "grad_norm": 0.09311912208795547, + "learning_rate": 2.298680818043005e-06, + "loss": 8.513, + "step": 184940 + }, + { + "epoch": 0.9236185672551125, + "grad_norm": 0.09054923057556152, + "learning_rate": 2.2971789031014542e-06, + "loss": 8.5079, + "step": 184950 + }, + { + "epoch": 0.9236685060800519, + "grad_norm": 0.0914381593465805, + "learning_rate": 2.295676988159904e-06, + "loss": 8.5086, + "step": 184960 + }, + { + "epoch": 0.9237184449049913, + "grad_norm": 0.09014654159545898, + "learning_rate": 2.2941750732183533e-06, + "loss": 8.5171, + "step": 184970 + }, + { + "epoch": 0.9237683837299309, + "grad_norm": 0.0892617329955101, + "learning_rate": 2.292673158276803e-06, + "loss": 8.5193, + "step": 184980 + }, + { + "epoch": 0.9238183225548703, + "grad_norm": 0.09248284995555878, + "learning_rate": 2.2911712433352524e-06, + "loss": 8.5127, + "step": 184990 + }, + { + "epoch": 0.9238682613798097, + "grad_norm": 0.08743773400783539, + "learning_rate": 2.289669328393702e-06, + "loss": 8.523, + "step": 185000 + }, + { + "epoch": 0.9239182002047491, + "grad_norm": 0.08780092000961304, + "learning_rate": 2.2881674134521515e-06, + "loss": 8.5022, + "step": 185010 + }, + { + "epoch": 0.9239681390296887, + "grad_norm": 0.09048131853342056, + "learning_rate": 2.286665498510601e-06, + "loss": 8.5004, + "step": 185020 + }, + { + "epoch": 0.9240180778546281, + "grad_norm": 0.089464470744133, + "learning_rate": 2.2851635835690506e-06, + "loss": 8.51, + "step": 185030 + }, + { + "epoch": 0.9240680166795675, + "grad_norm": 0.09030899405479431, + "learning_rate": 2.2836616686275003e-06, + "loss": 8.5044, + "step": 185040 + }, + { + "epoch": 0.9241179555045069, + "grad_norm": 0.08874811232089996, + "learning_rate": 2.2821597536859497e-06, + "loss": 8.5056, + "step": 185050 + }, + { + "epoch": 0.9241678943294465, + "grad_norm": 0.08877728879451752, + "learning_rate": 2.280657838744399e-06, + "loss": 8.5067, + "step": 185060 + }, + { + "epoch": 0.9242178331543859, + "grad_norm": 0.09442177414894104, + "learning_rate": 2.2791559238028483e-06, + "loss": 8.5198, + "step": 185070 + }, + { + "epoch": 0.9242677719793253, + "grad_norm": 0.09836707264184952, + "learning_rate": 2.2776540088612985e-06, + "loss": 8.4915, + "step": 185080 + }, + { + "epoch": 0.9243177108042647, + "grad_norm": 0.08862404525279999, + "learning_rate": 2.276152093919748e-06, + "loss": 8.4892, + "step": 185090 + }, + { + "epoch": 0.9243676496292043, + "grad_norm": 0.09015829116106033, + "learning_rate": 2.274650178978197e-06, + "loss": 8.5203, + "step": 185100 + }, + { + "epoch": 0.9244175884541437, + "grad_norm": 0.0880509465932846, + "learning_rate": 2.2731482640366465e-06, + "loss": 8.5141, + "step": 185110 + }, + { + "epoch": 0.9244675272790831, + "grad_norm": 0.09141357243061066, + "learning_rate": 2.2716463490950963e-06, + "loss": 8.5056, + "step": 185120 + }, + { + "epoch": 0.9245174661040225, + "grad_norm": 0.09560871124267578, + "learning_rate": 2.270144434153546e-06, + "loss": 8.4859, + "step": 185130 + }, + { + "epoch": 0.9245674049289621, + "grad_norm": 0.09067047387361526, + "learning_rate": 2.2686425192119953e-06, + "loss": 8.5161, + "step": 185140 + }, + { + "epoch": 0.9246173437539015, + "grad_norm": 0.08690574020147324, + "learning_rate": 2.2671406042704447e-06, + "loss": 8.5297, + "step": 185150 + }, + { + "epoch": 0.9246672825788409, + "grad_norm": 0.09214247763156891, + "learning_rate": 2.2656386893288944e-06, + "loss": 8.5163, + "step": 185160 + }, + { + "epoch": 0.9247172214037803, + "grad_norm": 0.08881056308746338, + "learning_rate": 2.2641367743873438e-06, + "loss": 8.5007, + "step": 185170 + }, + { + "epoch": 0.9247671602287199, + "grad_norm": 0.09248036891222, + "learning_rate": 2.2626348594457935e-06, + "loss": 8.4997, + "step": 185180 + }, + { + "epoch": 0.9248170990536593, + "grad_norm": 0.08973632007837296, + "learning_rate": 2.261132944504243e-06, + "loss": 8.5202, + "step": 185190 + }, + { + "epoch": 0.9248670378785987, + "grad_norm": 0.09075845032930374, + "learning_rate": 2.2596310295626926e-06, + "loss": 8.4875, + "step": 185200 + }, + { + "epoch": 0.9249169767035381, + "grad_norm": 0.09289133548736572, + "learning_rate": 2.258129114621142e-06, + "loss": 8.5093, + "step": 185210 + }, + { + "epoch": 0.9249669155284777, + "grad_norm": 0.0958157405257225, + "learning_rate": 2.2566271996795913e-06, + "loss": 8.5047, + "step": 185220 + }, + { + "epoch": 0.9250168543534171, + "grad_norm": 0.09263189136981964, + "learning_rate": 2.255125284738041e-06, + "loss": 8.5116, + "step": 185230 + }, + { + "epoch": 0.9250667931783565, + "grad_norm": 0.08901172876358032, + "learning_rate": 2.2536233697964908e-06, + "loss": 8.4862, + "step": 185240 + }, + { + "epoch": 0.9251167320032959, + "grad_norm": 0.09158726781606674, + "learning_rate": 2.25212145485494e-06, + "loss": 8.5025, + "step": 185250 + }, + { + "epoch": 0.9251666708282354, + "grad_norm": 0.08551083505153656, + "learning_rate": 2.2506195399133894e-06, + "loss": 8.5062, + "step": 185260 + }, + { + "epoch": 0.9252166096531749, + "grad_norm": 0.09838327020406723, + "learning_rate": 2.2491176249718388e-06, + "loss": 8.5046, + "step": 185270 + }, + { + "epoch": 0.9252665484781143, + "grad_norm": 0.0905366763472557, + "learning_rate": 2.247615710030289e-06, + "loss": 8.5041, + "step": 185280 + }, + { + "epoch": 0.9253164873030537, + "grad_norm": 0.09381816536188126, + "learning_rate": 2.2461137950887383e-06, + "loss": 8.5001, + "step": 185290 + }, + { + "epoch": 0.9253664261279932, + "grad_norm": 0.09462891519069672, + "learning_rate": 2.2446118801471876e-06, + "loss": 8.505, + "step": 185300 + }, + { + "epoch": 0.9254163649529327, + "grad_norm": 0.09308251738548279, + "learning_rate": 2.243109965205637e-06, + "loss": 8.5048, + "step": 185310 + }, + { + "epoch": 0.9254663037778721, + "grad_norm": 0.09172924607992172, + "learning_rate": 2.2416080502640867e-06, + "loss": 8.5165, + "step": 185320 + }, + { + "epoch": 0.9255162426028115, + "grad_norm": 0.09043785184621811, + "learning_rate": 2.2401061353225365e-06, + "loss": 8.5263, + "step": 185330 + }, + { + "epoch": 0.925566181427751, + "grad_norm": 0.08776098489761353, + "learning_rate": 2.2386042203809858e-06, + "loss": 8.5158, + "step": 185340 + }, + { + "epoch": 0.9256161202526905, + "grad_norm": 0.09275338798761368, + "learning_rate": 2.237102305439435e-06, + "loss": 8.5181, + "step": 185350 + }, + { + "epoch": 0.9256660590776299, + "grad_norm": 0.09385108202695847, + "learning_rate": 2.235600390497885e-06, + "loss": 8.4931, + "step": 185360 + }, + { + "epoch": 0.9257159979025693, + "grad_norm": 0.0922423005104065, + "learning_rate": 2.2340984755563346e-06, + "loss": 8.5035, + "step": 185370 + }, + { + "epoch": 0.9257659367275088, + "grad_norm": 0.08742625266313553, + "learning_rate": 2.232596560614784e-06, + "loss": 8.5218, + "step": 185380 + }, + { + "epoch": 0.9258158755524483, + "grad_norm": 0.08818572014570236, + "learning_rate": 2.2310946456732333e-06, + "loss": 8.4944, + "step": 185390 + }, + { + "epoch": 0.9258658143773877, + "grad_norm": 0.0831325426697731, + "learning_rate": 2.229592730731683e-06, + "loss": 8.5027, + "step": 185400 + }, + { + "epoch": 0.9259157532023271, + "grad_norm": 0.09564302861690521, + "learning_rate": 2.2280908157901324e-06, + "loss": 8.485, + "step": 185410 + }, + { + "epoch": 0.9259656920272666, + "grad_norm": 0.08953244984149933, + "learning_rate": 2.226588900848582e-06, + "loss": 8.5, + "step": 185420 + }, + { + "epoch": 0.9260156308522061, + "grad_norm": 0.09346967190504074, + "learning_rate": 2.2250869859070315e-06, + "loss": 8.5088, + "step": 185430 + }, + { + "epoch": 0.9260655696771455, + "grad_norm": 0.09185745567083359, + "learning_rate": 2.2235850709654812e-06, + "loss": 8.5048, + "step": 185440 + }, + { + "epoch": 0.9261155085020849, + "grad_norm": 0.08855016529560089, + "learning_rate": 2.2220831560239305e-06, + "loss": 8.5062, + "step": 185450 + }, + { + "epoch": 0.9261654473270244, + "grad_norm": 0.09117954969406128, + "learning_rate": 2.22058124108238e-06, + "loss": 8.5, + "step": 185460 + }, + { + "epoch": 0.9262153861519639, + "grad_norm": 0.09156261384487152, + "learning_rate": 2.2190793261408296e-06, + "loss": 8.5025, + "step": 185470 + }, + { + "epoch": 0.9262653249769033, + "grad_norm": 0.09079194068908691, + "learning_rate": 2.2175774111992794e-06, + "loss": 8.5112, + "step": 185480 + }, + { + "epoch": 0.9263152638018427, + "grad_norm": 0.09392797201871872, + "learning_rate": 2.2160754962577287e-06, + "loss": 8.5097, + "step": 185490 + }, + { + "epoch": 0.9263652026267822, + "grad_norm": 0.09081744402647018, + "learning_rate": 2.214573581316178e-06, + "loss": 8.5035, + "step": 185500 + }, + { + "epoch": 0.9264151414517217, + "grad_norm": 0.09045954048633575, + "learning_rate": 2.2130716663746274e-06, + "loss": 8.4953, + "step": 185510 + }, + { + "epoch": 0.9264650802766611, + "grad_norm": 0.08934304118156433, + "learning_rate": 2.2115697514330776e-06, + "loss": 8.4913, + "step": 185520 + }, + { + "epoch": 0.9265150191016005, + "grad_norm": 0.0947798490524292, + "learning_rate": 2.210067836491527e-06, + "loss": 8.4957, + "step": 185530 + }, + { + "epoch": 0.92656495792654, + "grad_norm": 0.09003657102584839, + "learning_rate": 2.2085659215499762e-06, + "loss": 8.4949, + "step": 185540 + }, + { + "epoch": 0.9266148967514795, + "grad_norm": 0.08870958536863327, + "learning_rate": 2.2070640066084256e-06, + "loss": 8.5088, + "step": 185550 + }, + { + "epoch": 0.9266648355764189, + "grad_norm": 0.09150302410125732, + "learning_rate": 2.2055620916668753e-06, + "loss": 8.5054, + "step": 185560 + }, + { + "epoch": 0.9267147744013583, + "grad_norm": 0.08964120596647263, + "learning_rate": 2.204060176725325e-06, + "loss": 8.5195, + "step": 185570 + }, + { + "epoch": 0.9267647132262978, + "grad_norm": 0.0831717923283577, + "learning_rate": 2.2025582617837744e-06, + "loss": 8.5152, + "step": 185580 + }, + { + "epoch": 0.9268146520512373, + "grad_norm": 0.09838960319757462, + "learning_rate": 2.2010563468422237e-06, + "loss": 8.4911, + "step": 185590 + }, + { + "epoch": 0.9268645908761767, + "grad_norm": 0.08773133903741837, + "learning_rate": 2.1995544319006735e-06, + "loss": 8.5124, + "step": 185600 + }, + { + "epoch": 0.9269145297011161, + "grad_norm": 0.09229285269975662, + "learning_rate": 2.198052516959123e-06, + "loss": 8.4904, + "step": 185610 + }, + { + "epoch": 0.9269644685260556, + "grad_norm": 0.087303526699543, + "learning_rate": 2.1965506020175726e-06, + "loss": 8.5135, + "step": 185620 + }, + { + "epoch": 0.927014407350995, + "grad_norm": 0.09083783626556396, + "learning_rate": 2.195048687076022e-06, + "loss": 8.4835, + "step": 185630 + }, + { + "epoch": 0.9270643461759345, + "grad_norm": 0.09213201701641083, + "learning_rate": 2.1935467721344717e-06, + "loss": 8.5163, + "step": 185640 + }, + { + "epoch": 0.9271142850008739, + "grad_norm": 0.09071563184261322, + "learning_rate": 2.192044857192921e-06, + "loss": 8.5098, + "step": 185650 + }, + { + "epoch": 0.9271642238258134, + "grad_norm": 0.08385904878377914, + "learning_rate": 2.1905429422513703e-06, + "loss": 8.503, + "step": 185660 + }, + { + "epoch": 0.9272141626507528, + "grad_norm": 0.09102963656187057, + "learning_rate": 2.18904102730982e-06, + "loss": 8.4991, + "step": 185670 + }, + { + "epoch": 0.9272641014756923, + "grad_norm": 0.08781067281961441, + "learning_rate": 2.18753911236827e-06, + "loss": 8.5112, + "step": 185680 + }, + { + "epoch": 0.9273140403006317, + "grad_norm": 0.09064462780952454, + "learning_rate": 2.186037197426719e-06, + "loss": 8.5103, + "step": 185690 + }, + { + "epoch": 0.9273639791255712, + "grad_norm": 0.09395837038755417, + "learning_rate": 2.1845352824851685e-06, + "loss": 8.5011, + "step": 185700 + }, + { + "epoch": 0.9274139179505106, + "grad_norm": 0.08788859099149704, + "learning_rate": 2.183033367543618e-06, + "loss": 8.4993, + "step": 185710 + }, + { + "epoch": 0.9274638567754501, + "grad_norm": 0.09015844762325287, + "learning_rate": 2.181531452602068e-06, + "loss": 8.5091, + "step": 185720 + }, + { + "epoch": 0.9275137956003895, + "grad_norm": 0.08770440518856049, + "learning_rate": 2.1800295376605173e-06, + "loss": 8.4965, + "step": 185730 + }, + { + "epoch": 0.927563734425329, + "grad_norm": 0.09498754143714905, + "learning_rate": 2.1785276227189667e-06, + "loss": 8.4999, + "step": 185740 + }, + { + "epoch": 0.9276136732502684, + "grad_norm": 0.09174376726150513, + "learning_rate": 2.177025707777416e-06, + "loss": 8.509, + "step": 185750 + }, + { + "epoch": 0.9276636120752079, + "grad_norm": 0.08917669951915741, + "learning_rate": 2.1755237928358658e-06, + "loss": 8.5181, + "step": 185760 + }, + { + "epoch": 0.9277135509001473, + "grad_norm": 0.0941305160522461, + "learning_rate": 2.1740218778943155e-06, + "loss": 8.5051, + "step": 185770 + }, + { + "epoch": 0.9277634897250868, + "grad_norm": 0.09079430997371674, + "learning_rate": 2.172519962952765e-06, + "loss": 8.515, + "step": 185780 + }, + { + "epoch": 0.9278134285500262, + "grad_norm": 0.09007134288549423, + "learning_rate": 2.171018048011214e-06, + "loss": 8.4849, + "step": 185790 + }, + { + "epoch": 0.9278633673749657, + "grad_norm": 0.09367641806602478, + "learning_rate": 2.169516133069664e-06, + "loss": 8.5065, + "step": 185800 + }, + { + "epoch": 0.9279133061999051, + "grad_norm": 0.08902136236429214, + "learning_rate": 2.1680142181281133e-06, + "loss": 8.5059, + "step": 185810 + }, + { + "epoch": 0.9279632450248446, + "grad_norm": 0.08988285809755325, + "learning_rate": 2.166512303186563e-06, + "loss": 8.5104, + "step": 185820 + }, + { + "epoch": 0.928013183849784, + "grad_norm": 0.09199918806552887, + "learning_rate": 2.1650103882450123e-06, + "loss": 8.5187, + "step": 185830 + }, + { + "epoch": 0.9280631226747235, + "grad_norm": 0.08583081513643265, + "learning_rate": 2.163508473303462e-06, + "loss": 8.4948, + "step": 185840 + }, + { + "epoch": 0.9281130614996629, + "grad_norm": 0.09111568331718445, + "learning_rate": 2.1620065583619114e-06, + "loss": 8.5025, + "step": 185850 + }, + { + "epoch": 0.9281630003246024, + "grad_norm": 0.08850061148405075, + "learning_rate": 2.160504643420361e-06, + "loss": 8.5003, + "step": 185860 + }, + { + "epoch": 0.9282129391495418, + "grad_norm": 0.09337007999420166, + "learning_rate": 2.1590027284788105e-06, + "loss": 8.5119, + "step": 185870 + }, + { + "epoch": 0.9282628779744813, + "grad_norm": 0.08924791216850281, + "learning_rate": 2.1575008135372603e-06, + "loss": 8.5174, + "step": 185880 + }, + { + "epoch": 0.9283128167994207, + "grad_norm": 0.09130074083805084, + "learning_rate": 2.1559988985957096e-06, + "loss": 8.5219, + "step": 185890 + }, + { + "epoch": 0.9283627556243602, + "grad_norm": 0.0915064811706543, + "learning_rate": 2.154496983654159e-06, + "loss": 8.5056, + "step": 185900 + }, + { + "epoch": 0.9284126944492996, + "grad_norm": 0.09288337826728821, + "learning_rate": 2.1529950687126087e-06, + "loss": 8.4999, + "step": 185910 + }, + { + "epoch": 0.9284626332742391, + "grad_norm": 0.08676610141992569, + "learning_rate": 2.1514931537710584e-06, + "loss": 8.514, + "step": 185920 + }, + { + "epoch": 0.9285125720991785, + "grad_norm": 0.08682654052972794, + "learning_rate": 2.1499912388295078e-06, + "loss": 8.5044, + "step": 185930 + }, + { + "epoch": 0.9285625109241179, + "grad_norm": 0.0899660736322403, + "learning_rate": 2.148489323887957e-06, + "loss": 8.5023, + "step": 185940 + }, + { + "epoch": 0.9286124497490574, + "grad_norm": 0.09490537643432617, + "learning_rate": 2.1469874089464064e-06, + "loss": 8.4954, + "step": 185950 + }, + { + "epoch": 0.9286623885739969, + "grad_norm": 0.08999119699001312, + "learning_rate": 2.1454854940048566e-06, + "loss": 8.4927, + "step": 185960 + }, + { + "epoch": 0.9287123273989363, + "grad_norm": 0.09169169515371323, + "learning_rate": 2.143983579063306e-06, + "loss": 8.5092, + "step": 185970 + }, + { + "epoch": 0.9287622662238757, + "grad_norm": 0.09515541046857834, + "learning_rate": 2.1424816641217553e-06, + "loss": 8.4973, + "step": 185980 + }, + { + "epoch": 0.9288122050488152, + "grad_norm": 0.09675678610801697, + "learning_rate": 2.1409797491802046e-06, + "loss": 8.49, + "step": 185990 + }, + { + "epoch": 0.9288621438737547, + "grad_norm": 0.08823744207620621, + "learning_rate": 2.1394778342386544e-06, + "loss": 8.512, + "step": 186000 + }, + { + "epoch": 0.9289120826986941, + "grad_norm": 0.09296853095293045, + "learning_rate": 2.137975919297104e-06, + "loss": 8.5053, + "step": 186010 + }, + { + "epoch": 0.9289620215236335, + "grad_norm": 0.08902104943990707, + "learning_rate": 2.1364740043555534e-06, + "loss": 8.5037, + "step": 186020 + }, + { + "epoch": 0.929011960348573, + "grad_norm": 0.094472736120224, + "learning_rate": 2.1349720894140028e-06, + "loss": 8.4983, + "step": 186030 + }, + { + "epoch": 0.9290618991735125, + "grad_norm": 0.09332358092069626, + "learning_rate": 2.1334701744724525e-06, + "loss": 8.5144, + "step": 186040 + }, + { + "epoch": 0.9291118379984519, + "grad_norm": 0.08705665916204453, + "learning_rate": 2.131968259530902e-06, + "loss": 8.5012, + "step": 186050 + }, + { + "epoch": 0.9291617768233913, + "grad_norm": 0.08535271137952805, + "learning_rate": 2.1304663445893516e-06, + "loss": 8.5141, + "step": 186060 + }, + { + "epoch": 0.9292117156483308, + "grad_norm": 0.09060922265052795, + "learning_rate": 2.128964429647801e-06, + "loss": 8.507, + "step": 186070 + }, + { + "epoch": 0.9292616544732702, + "grad_norm": 0.09175438433885574, + "learning_rate": 2.1274625147062507e-06, + "loss": 8.5066, + "step": 186080 + }, + { + "epoch": 0.9293115932982097, + "grad_norm": 0.08872448652982712, + "learning_rate": 2.1259605997647e-06, + "loss": 8.5203, + "step": 186090 + }, + { + "epoch": 0.9293615321231491, + "grad_norm": 0.0916357934474945, + "learning_rate": 2.1244586848231494e-06, + "loss": 8.5022, + "step": 186100 + }, + { + "epoch": 0.9294114709480886, + "grad_norm": 0.08553195744752884, + "learning_rate": 2.122956769881599e-06, + "loss": 8.5101, + "step": 186110 + }, + { + "epoch": 0.929461409773028, + "grad_norm": 0.09251870959997177, + "learning_rate": 2.121454854940049e-06, + "loss": 8.5139, + "step": 186120 + }, + { + "epoch": 0.9295113485979675, + "grad_norm": 0.0898064374923706, + "learning_rate": 2.119952939998498e-06, + "loss": 8.5184, + "step": 186130 + }, + { + "epoch": 0.9295612874229069, + "grad_norm": 0.09209618717432022, + "learning_rate": 2.1184510250569475e-06, + "loss": 8.5068, + "step": 186140 + }, + { + "epoch": 0.9296112262478464, + "grad_norm": 0.08947913348674774, + "learning_rate": 2.116949110115397e-06, + "loss": 8.512, + "step": 186150 + }, + { + "epoch": 0.9296611650727858, + "grad_norm": 0.08615361899137497, + "learning_rate": 2.115447195173847e-06, + "loss": 8.5051, + "step": 186160 + }, + { + "epoch": 0.9297111038977253, + "grad_norm": 0.08832980692386627, + "learning_rate": 2.1139452802322964e-06, + "loss": 8.5106, + "step": 186170 + }, + { + "epoch": 0.9297610427226647, + "grad_norm": 0.09645520150661469, + "learning_rate": 2.1124433652907457e-06, + "loss": 8.4926, + "step": 186180 + }, + { + "epoch": 0.9298109815476042, + "grad_norm": 0.09340712428092957, + "learning_rate": 2.110941450349195e-06, + "loss": 8.4987, + "step": 186190 + }, + { + "epoch": 0.9298609203725436, + "grad_norm": 0.08868103474378586, + "learning_rate": 2.109439535407645e-06, + "loss": 8.4996, + "step": 186200 + }, + { + "epoch": 0.9299108591974831, + "grad_norm": 0.09115692973136902, + "learning_rate": 2.1079376204660946e-06, + "loss": 8.5084, + "step": 186210 + }, + { + "epoch": 0.9299607980224225, + "grad_norm": 0.0927361398935318, + "learning_rate": 2.106435705524544e-06, + "loss": 8.5019, + "step": 186220 + }, + { + "epoch": 0.930010736847362, + "grad_norm": 0.08556483685970306, + "learning_rate": 2.1049337905829932e-06, + "loss": 8.5105, + "step": 186230 + }, + { + "epoch": 0.9300606756723014, + "grad_norm": 0.09068150818347931, + "learning_rate": 2.103431875641443e-06, + "loss": 8.5143, + "step": 186240 + }, + { + "epoch": 0.9301106144972409, + "grad_norm": 0.09075603634119034, + "learning_rate": 2.1019299606998923e-06, + "loss": 8.5138, + "step": 186250 + }, + { + "epoch": 0.9301605533221803, + "grad_norm": 0.09026709944009781, + "learning_rate": 2.100428045758342e-06, + "loss": 8.5206, + "step": 186260 + }, + { + "epoch": 0.9302104921471198, + "grad_norm": 0.09280247241258621, + "learning_rate": 2.0989261308167914e-06, + "loss": 8.5057, + "step": 186270 + }, + { + "epoch": 0.9302604309720592, + "grad_norm": 0.08981787413358688, + "learning_rate": 2.097424215875241e-06, + "loss": 8.5119, + "step": 186280 + }, + { + "epoch": 0.9303103697969987, + "grad_norm": 0.09122589975595474, + "learning_rate": 2.0959223009336905e-06, + "loss": 8.4926, + "step": 186290 + }, + { + "epoch": 0.9303603086219381, + "grad_norm": 0.09164507687091827, + "learning_rate": 2.09442038599214e-06, + "loss": 8.5077, + "step": 186300 + }, + { + "epoch": 0.9304102474468776, + "grad_norm": 0.089156374335289, + "learning_rate": 2.0929184710505896e-06, + "loss": 8.5004, + "step": 186310 + }, + { + "epoch": 0.930460186271817, + "grad_norm": 0.08851976692676544, + "learning_rate": 2.0914165561090393e-06, + "loss": 8.5076, + "step": 186320 + }, + { + "epoch": 0.9305101250967565, + "grad_norm": 0.08928805589675903, + "learning_rate": 2.0899146411674886e-06, + "loss": 8.5166, + "step": 186330 + }, + { + "epoch": 0.9305600639216959, + "grad_norm": 0.09302154183387756, + "learning_rate": 2.088412726225938e-06, + "loss": 8.504, + "step": 186340 + }, + { + "epoch": 0.9306100027466354, + "grad_norm": 0.09193503111600876, + "learning_rate": 2.0869108112843873e-06, + "loss": 8.5065, + "step": 186350 + }, + { + "epoch": 0.9306599415715748, + "grad_norm": 0.09432563930749893, + "learning_rate": 2.0854088963428375e-06, + "loss": 8.5003, + "step": 186360 + }, + { + "epoch": 0.9307098803965143, + "grad_norm": 0.0901406854391098, + "learning_rate": 2.083906981401287e-06, + "loss": 8.5008, + "step": 186370 + }, + { + "epoch": 0.9307598192214537, + "grad_norm": 0.09133270382881165, + "learning_rate": 2.082405066459736e-06, + "loss": 8.4996, + "step": 186380 + }, + { + "epoch": 0.9308097580463932, + "grad_norm": 0.08889655023813248, + "learning_rate": 2.0809031515181855e-06, + "loss": 8.5021, + "step": 186390 + }, + { + "epoch": 0.9308596968713326, + "grad_norm": 0.08690587431192398, + "learning_rate": 2.0794012365766352e-06, + "loss": 8.4902, + "step": 186400 + }, + { + "epoch": 0.930909635696272, + "grad_norm": 0.08836494386196136, + "learning_rate": 2.077899321635085e-06, + "loss": 8.5141, + "step": 186410 + }, + { + "epoch": 0.9309595745212115, + "grad_norm": 0.09551975876092911, + "learning_rate": 2.0763974066935343e-06, + "loss": 8.4937, + "step": 186420 + }, + { + "epoch": 0.931009513346151, + "grad_norm": 0.08524799346923828, + "learning_rate": 2.0748954917519837e-06, + "loss": 8.5133, + "step": 186430 + }, + { + "epoch": 0.9310594521710904, + "grad_norm": 0.09045319259166718, + "learning_rate": 2.0733935768104334e-06, + "loss": 8.5013, + "step": 186440 + }, + { + "epoch": 0.9311093909960299, + "grad_norm": 0.08845016360282898, + "learning_rate": 2.071891661868883e-06, + "loss": 8.5065, + "step": 186450 + }, + { + "epoch": 0.9311593298209693, + "grad_norm": 0.08880577981472015, + "learning_rate": 2.0703897469273325e-06, + "loss": 8.4971, + "step": 186460 + }, + { + "epoch": 0.9312092686459088, + "grad_norm": 0.08805317431688309, + "learning_rate": 2.068887831985782e-06, + "loss": 8.4976, + "step": 186470 + }, + { + "epoch": 0.9312592074708482, + "grad_norm": 0.08898577839136124, + "learning_rate": 2.0673859170442316e-06, + "loss": 8.4908, + "step": 186480 + }, + { + "epoch": 0.9313091462957876, + "grad_norm": 0.08940175920724869, + "learning_rate": 2.065884002102681e-06, + "loss": 8.5027, + "step": 186490 + }, + { + "epoch": 0.9313590851207271, + "grad_norm": 0.08669328689575195, + "learning_rate": 2.0643820871611307e-06, + "loss": 8.5035, + "step": 186500 + }, + { + "epoch": 0.9314090239456666, + "grad_norm": 0.08564577996730804, + "learning_rate": 2.06288017221958e-06, + "loss": 8.4989, + "step": 186510 + }, + { + "epoch": 0.931458962770606, + "grad_norm": 0.09756655246019363, + "learning_rate": 2.0613782572780298e-06, + "loss": 8.5036, + "step": 186520 + }, + { + "epoch": 0.9315089015955454, + "grad_norm": 0.08926945179700851, + "learning_rate": 2.059876342336479e-06, + "loss": 8.5037, + "step": 186530 + }, + { + "epoch": 0.9315588404204849, + "grad_norm": 0.08959157764911652, + "learning_rate": 2.0583744273949284e-06, + "loss": 8.492, + "step": 186540 + }, + { + "epoch": 0.9316087792454244, + "grad_norm": 0.09301578253507614, + "learning_rate": 2.056872512453378e-06, + "loss": 8.5004, + "step": 186550 + }, + { + "epoch": 0.9316587180703638, + "grad_norm": 0.09195592999458313, + "learning_rate": 2.055370597511828e-06, + "loss": 8.5058, + "step": 186560 + }, + { + "epoch": 0.9317086568953032, + "grad_norm": 0.08996932953596115, + "learning_rate": 2.0538686825702773e-06, + "loss": 8.504, + "step": 186570 + }, + { + "epoch": 0.9317585957202427, + "grad_norm": 0.09332447499036789, + "learning_rate": 2.0523667676287266e-06, + "loss": 8.4953, + "step": 186580 + }, + { + "epoch": 0.9318085345451822, + "grad_norm": 0.08750541508197784, + "learning_rate": 2.050864852687176e-06, + "loss": 8.5028, + "step": 186590 + }, + { + "epoch": 0.9318584733701216, + "grad_norm": 0.08709759265184402, + "learning_rate": 2.049362937745626e-06, + "loss": 8.5079, + "step": 186600 + }, + { + "epoch": 0.931908412195061, + "grad_norm": 0.09282954037189484, + "learning_rate": 2.0478610228040754e-06, + "loss": 8.5121, + "step": 186610 + }, + { + "epoch": 0.9319583510200005, + "grad_norm": 0.08732730150222778, + "learning_rate": 2.0463591078625248e-06, + "loss": 8.5083, + "step": 186620 + }, + { + "epoch": 0.93200828984494, + "grad_norm": 0.09398452192544937, + "learning_rate": 2.044857192920974e-06, + "loss": 8.5088, + "step": 186630 + }, + { + "epoch": 0.9320582286698794, + "grad_norm": 0.09803569316864014, + "learning_rate": 2.043355277979424e-06, + "loss": 8.5001, + "step": 186640 + }, + { + "epoch": 0.9321081674948188, + "grad_norm": 0.08622153103351593, + "learning_rate": 2.0418533630378736e-06, + "loss": 8.5101, + "step": 186650 + }, + { + "epoch": 0.9321581063197583, + "grad_norm": 0.0959283858537674, + "learning_rate": 2.040351448096323e-06, + "loss": 8.5072, + "step": 186660 + }, + { + "epoch": 0.9322080451446978, + "grad_norm": 0.08934350311756134, + "learning_rate": 2.0388495331547723e-06, + "loss": 8.4896, + "step": 186670 + }, + { + "epoch": 0.9322579839696372, + "grad_norm": 0.09462158381938934, + "learning_rate": 2.037347618213222e-06, + "loss": 8.5083, + "step": 186680 + }, + { + "epoch": 0.9323079227945766, + "grad_norm": 0.09019871056079865, + "learning_rate": 2.0358457032716714e-06, + "loss": 8.5093, + "step": 186690 + }, + { + "epoch": 0.9323578616195161, + "grad_norm": 0.0971764400601387, + "learning_rate": 2.034343788330121e-06, + "loss": 8.5032, + "step": 186700 + }, + { + "epoch": 0.9324078004444556, + "grad_norm": 0.09243464469909668, + "learning_rate": 2.0328418733885704e-06, + "loss": 8.5077, + "step": 186710 + }, + { + "epoch": 0.932457739269395, + "grad_norm": 0.09595301002264023, + "learning_rate": 2.03133995844702e-06, + "loss": 8.4983, + "step": 186720 + }, + { + "epoch": 0.9325076780943344, + "grad_norm": 0.09078612178564072, + "learning_rate": 2.0298380435054695e-06, + "loss": 8.5082, + "step": 186730 + }, + { + "epoch": 0.9325576169192739, + "grad_norm": 0.09375286102294922, + "learning_rate": 2.028336128563919e-06, + "loss": 8.518, + "step": 186740 + }, + { + "epoch": 0.9326075557442134, + "grad_norm": 0.08845958113670349, + "learning_rate": 2.0268342136223686e-06, + "loss": 8.5098, + "step": 186750 + }, + { + "epoch": 0.9326574945691528, + "grad_norm": 0.08810067176818848, + "learning_rate": 2.0253322986808184e-06, + "loss": 8.4912, + "step": 186760 + }, + { + "epoch": 0.9327074333940922, + "grad_norm": 0.08972468227148056, + "learning_rate": 2.0238303837392677e-06, + "loss": 8.52, + "step": 186770 + }, + { + "epoch": 0.9327573722190317, + "grad_norm": 0.09303756058216095, + "learning_rate": 2.022328468797717e-06, + "loss": 8.5024, + "step": 186780 + }, + { + "epoch": 0.9328073110439712, + "grad_norm": 0.09837578982114792, + "learning_rate": 2.0208265538561664e-06, + "loss": 8.5024, + "step": 186790 + }, + { + "epoch": 0.9328572498689106, + "grad_norm": 0.08584380894899368, + "learning_rate": 2.0193246389146165e-06, + "loss": 8.5302, + "step": 186800 + }, + { + "epoch": 0.93290718869385, + "grad_norm": 0.09214811772108078, + "learning_rate": 2.017822723973066e-06, + "loss": 8.5084, + "step": 186810 + }, + { + "epoch": 0.9329571275187895, + "grad_norm": 0.08620260655879974, + "learning_rate": 2.016320809031515e-06, + "loss": 8.5275, + "step": 186820 + }, + { + "epoch": 0.933007066343729, + "grad_norm": 0.09670999646186829, + "learning_rate": 2.0148188940899645e-06, + "loss": 8.493, + "step": 186830 + }, + { + "epoch": 0.9330570051686684, + "grad_norm": 0.08751846104860306, + "learning_rate": 2.0133169791484143e-06, + "loss": 8.5126, + "step": 186840 + }, + { + "epoch": 0.9331069439936078, + "grad_norm": 0.0873984843492508, + "learning_rate": 2.011815064206864e-06, + "loss": 8.5206, + "step": 186850 + }, + { + "epoch": 0.9331568828185473, + "grad_norm": 0.0933077484369278, + "learning_rate": 2.0103131492653134e-06, + "loss": 8.5069, + "step": 186860 + }, + { + "epoch": 0.9332068216434868, + "grad_norm": 0.09100789576768875, + "learning_rate": 2.0088112343237627e-06, + "loss": 8.5076, + "step": 186870 + }, + { + "epoch": 0.9332567604684262, + "grad_norm": 0.09495274722576141, + "learning_rate": 2.0073093193822125e-06, + "loss": 8.5025, + "step": 186880 + }, + { + "epoch": 0.9333066992933656, + "grad_norm": 0.08778039366006851, + "learning_rate": 2.005807404440662e-06, + "loss": 8.5001, + "step": 186890 + }, + { + "epoch": 0.933356638118305, + "grad_norm": 0.09381724148988724, + "learning_rate": 2.0043054894991115e-06, + "loss": 8.5296, + "step": 186900 + }, + { + "epoch": 0.9334065769432445, + "grad_norm": 0.08771809935569763, + "learning_rate": 2.002803574557561e-06, + "loss": 8.5012, + "step": 186910 + }, + { + "epoch": 0.933456515768184, + "grad_norm": 0.08947025239467621, + "learning_rate": 2.0013016596160106e-06, + "loss": 8.5203, + "step": 186920 + }, + { + "epoch": 0.9335064545931234, + "grad_norm": 0.09270720183849335, + "learning_rate": 1.99979974467446e-06, + "loss": 8.5097, + "step": 186930 + }, + { + "epoch": 0.9335563934180628, + "grad_norm": 0.09039568901062012, + "learning_rate": 1.9982978297329093e-06, + "loss": 8.4951, + "step": 186940 + }, + { + "epoch": 0.9336063322430023, + "grad_norm": 0.09502692520618439, + "learning_rate": 1.996795914791359e-06, + "loss": 8.4986, + "step": 186950 + }, + { + "epoch": 0.9336562710679418, + "grad_norm": 0.0894678607583046, + "learning_rate": 1.995293999849809e-06, + "loss": 8.5048, + "step": 186960 + }, + { + "epoch": 0.9337062098928812, + "grad_norm": 0.08719871193170547, + "learning_rate": 1.993792084908258e-06, + "loss": 8.5154, + "step": 186970 + }, + { + "epoch": 0.9337561487178206, + "grad_norm": 0.09486103057861328, + "learning_rate": 1.9922901699667075e-06, + "loss": 8.523, + "step": 186980 + }, + { + "epoch": 0.9338060875427601, + "grad_norm": 0.09258001297712326, + "learning_rate": 1.9907882550251572e-06, + "loss": 8.5054, + "step": 186990 + }, + { + "epoch": 0.9338560263676996, + "grad_norm": 0.08995097130537033, + "learning_rate": 1.989286340083607e-06, + "loss": 8.4992, + "step": 187000 + }, + { + "epoch": 0.933905965192639, + "grad_norm": 0.09509983658790588, + "learning_rate": 1.9877844251420563e-06, + "loss": 8.5136, + "step": 187010 + }, + { + "epoch": 0.9339559040175784, + "grad_norm": 0.09256593883037567, + "learning_rate": 1.9862825102005056e-06, + "loss": 8.524, + "step": 187020 + }, + { + "epoch": 0.9340058428425179, + "grad_norm": 0.09413289278745651, + "learning_rate": 1.984780595258955e-06, + "loss": 8.4867, + "step": 187030 + }, + { + "epoch": 0.9340557816674574, + "grad_norm": 0.09102970361709595, + "learning_rate": 1.983278680317405e-06, + "loss": 8.4949, + "step": 187040 + }, + { + "epoch": 0.9341057204923968, + "grad_norm": 0.09890950471162796, + "learning_rate": 1.9817767653758545e-06, + "loss": 8.506, + "step": 187050 + }, + { + "epoch": 0.9341556593173362, + "grad_norm": 0.09198985993862152, + "learning_rate": 1.980274850434304e-06, + "loss": 8.5197, + "step": 187060 + }, + { + "epoch": 0.9342055981422757, + "grad_norm": 0.09256508946418762, + "learning_rate": 1.978772935492753e-06, + "loss": 8.501, + "step": 187070 + }, + { + "epoch": 0.9342555369672152, + "grad_norm": 0.08654509484767914, + "learning_rate": 1.977271020551203e-06, + "loss": 8.5061, + "step": 187080 + }, + { + "epoch": 0.9343054757921546, + "grad_norm": 0.08977734297513962, + "learning_rate": 1.9757691056096527e-06, + "loss": 8.5108, + "step": 187090 + }, + { + "epoch": 0.934355414617094, + "grad_norm": 0.08983021229505539, + "learning_rate": 1.974267190668102e-06, + "loss": 8.4969, + "step": 187100 + }, + { + "epoch": 0.9344053534420335, + "grad_norm": 0.09085972607135773, + "learning_rate": 1.9727652757265513e-06, + "loss": 8.5194, + "step": 187110 + }, + { + "epoch": 0.934455292266973, + "grad_norm": 0.08963587135076523, + "learning_rate": 1.971263360785001e-06, + "loss": 8.4978, + "step": 187120 + }, + { + "epoch": 0.9345052310919124, + "grad_norm": 0.08823879063129425, + "learning_rate": 1.9697614458434504e-06, + "loss": 8.5022, + "step": 187130 + }, + { + "epoch": 0.9345551699168518, + "grad_norm": 0.08996618539094925, + "learning_rate": 1.9682595309019e-06, + "loss": 8.5172, + "step": 187140 + }, + { + "epoch": 0.9346051087417913, + "grad_norm": 0.08975294977426529, + "learning_rate": 1.9667576159603495e-06, + "loss": 8.4987, + "step": 187150 + }, + { + "epoch": 0.9346550475667308, + "grad_norm": 0.08694805204868317, + "learning_rate": 1.965255701018799e-06, + "loss": 8.5241, + "step": 187160 + }, + { + "epoch": 0.9347049863916702, + "grad_norm": 0.09013424068689346, + "learning_rate": 1.9637537860772486e-06, + "loss": 8.5266, + "step": 187170 + }, + { + "epoch": 0.9347549252166096, + "grad_norm": 0.09890510141849518, + "learning_rate": 1.962251871135698e-06, + "loss": 8.514, + "step": 187180 + }, + { + "epoch": 0.934804864041549, + "grad_norm": 0.08957573026418686, + "learning_rate": 1.9607499561941477e-06, + "loss": 8.5077, + "step": 187190 + }, + { + "epoch": 0.9348548028664886, + "grad_norm": 0.09044113010168076, + "learning_rate": 1.959248041252597e-06, + "loss": 8.4931, + "step": 187200 + }, + { + "epoch": 0.934904741691428, + "grad_norm": 0.08695122599601746, + "learning_rate": 1.9577461263110467e-06, + "loss": 8.4954, + "step": 187210 + }, + { + "epoch": 0.9349546805163674, + "grad_norm": 0.09316051751375198, + "learning_rate": 1.956244211369496e-06, + "loss": 8.515, + "step": 187220 + }, + { + "epoch": 0.9350046193413069, + "grad_norm": 0.0904255136847496, + "learning_rate": 1.9547422964279454e-06, + "loss": 8.4974, + "step": 187230 + }, + { + "epoch": 0.9350545581662464, + "grad_norm": 0.09191304445266724, + "learning_rate": 1.953240381486395e-06, + "loss": 8.5016, + "step": 187240 + }, + { + "epoch": 0.9351044969911858, + "grad_norm": 0.09369964152574539, + "learning_rate": 1.951738466544845e-06, + "loss": 8.5113, + "step": 187250 + }, + { + "epoch": 0.9351544358161252, + "grad_norm": 0.08856088668107986, + "learning_rate": 1.9502365516032942e-06, + "loss": 8.4985, + "step": 187260 + }, + { + "epoch": 0.9352043746410647, + "grad_norm": 0.08918076008558273, + "learning_rate": 1.9487346366617436e-06, + "loss": 8.4963, + "step": 187270 + }, + { + "epoch": 0.9352543134660042, + "grad_norm": 0.08736125379800797, + "learning_rate": 1.947232721720193e-06, + "loss": 8.5068, + "step": 187280 + }, + { + "epoch": 0.9353042522909436, + "grad_norm": 0.08733315765857697, + "learning_rate": 1.945730806778643e-06, + "loss": 8.5016, + "step": 187290 + }, + { + "epoch": 0.935354191115883, + "grad_norm": 0.0980258360505104, + "learning_rate": 1.9442288918370924e-06, + "loss": 8.4994, + "step": 187300 + }, + { + "epoch": 0.9354041299408224, + "grad_norm": 0.09128105640411377, + "learning_rate": 1.9427269768955418e-06, + "loss": 8.508, + "step": 187310 + }, + { + "epoch": 0.935454068765762, + "grad_norm": 0.08948434889316559, + "learning_rate": 1.941225061953991e-06, + "loss": 8.4966, + "step": 187320 + }, + { + "epoch": 0.9355040075907014, + "grad_norm": 0.09193699061870575, + "learning_rate": 1.939723147012441e-06, + "loss": 8.4927, + "step": 187330 + }, + { + "epoch": 0.9355539464156408, + "grad_norm": 0.09148573130369186, + "learning_rate": 1.9382212320708906e-06, + "loss": 8.4928, + "step": 187340 + }, + { + "epoch": 0.9356038852405802, + "grad_norm": 0.09217602759599686, + "learning_rate": 1.93671931712934e-06, + "loss": 8.5084, + "step": 187350 + }, + { + "epoch": 0.9356538240655198, + "grad_norm": 0.09518623352050781, + "learning_rate": 1.9352174021877893e-06, + "loss": 8.5043, + "step": 187360 + }, + { + "epoch": 0.9357037628904592, + "grad_norm": 0.08786571770906448, + "learning_rate": 1.933715487246239e-06, + "loss": 8.5051, + "step": 187370 + }, + { + "epoch": 0.9357537017153986, + "grad_norm": 0.0917324647307396, + "learning_rate": 1.9322135723046883e-06, + "loss": 8.5073, + "step": 187380 + }, + { + "epoch": 0.935803640540338, + "grad_norm": 0.08800705522298813, + "learning_rate": 1.930711657363138e-06, + "loss": 8.5151, + "step": 187390 + }, + { + "epoch": 0.9358535793652776, + "grad_norm": 0.09400518983602524, + "learning_rate": 1.9292097424215874e-06, + "loss": 8.4974, + "step": 187400 + }, + { + "epoch": 0.935903518190217, + "grad_norm": 0.08761896938085556, + "learning_rate": 1.927707827480037e-06, + "loss": 8.507, + "step": 187410 + }, + { + "epoch": 0.9359534570151564, + "grad_norm": 0.09526966512203217, + "learning_rate": 1.9262059125384865e-06, + "loss": 8.494, + "step": 187420 + }, + { + "epoch": 0.9360033958400958, + "grad_norm": 0.08607209473848343, + "learning_rate": 1.924703997596936e-06, + "loss": 8.5211, + "step": 187430 + }, + { + "epoch": 0.9360533346650354, + "grad_norm": 0.0938340425491333, + "learning_rate": 1.9232020826553856e-06, + "loss": 8.5096, + "step": 187440 + }, + { + "epoch": 0.9361032734899748, + "grad_norm": 0.0961814746260643, + "learning_rate": 1.9217001677138354e-06, + "loss": 8.5094, + "step": 187450 + }, + { + "epoch": 0.9361532123149142, + "grad_norm": 0.09486842155456543, + "learning_rate": 1.9201982527722847e-06, + "loss": 8.5038, + "step": 187460 + }, + { + "epoch": 0.9362031511398536, + "grad_norm": 0.09420774132013321, + "learning_rate": 1.918696337830734e-06, + "loss": 8.4886, + "step": 187470 + }, + { + "epoch": 0.9362530899647932, + "grad_norm": 0.09382013976573944, + "learning_rate": 1.9171944228891838e-06, + "loss": 8.4905, + "step": 187480 + }, + { + "epoch": 0.9363030287897326, + "grad_norm": 0.08685174584388733, + "learning_rate": 1.9156925079476335e-06, + "loss": 8.5125, + "step": 187490 + }, + { + "epoch": 0.936352967614672, + "grad_norm": 0.09120272099971771, + "learning_rate": 1.914190593006083e-06, + "loss": 8.5024, + "step": 187500 + }, + { + "epoch": 0.9364029064396114, + "grad_norm": 0.09364549070596695, + "learning_rate": 1.912688678064532e-06, + "loss": 8.5126, + "step": 187510 + }, + { + "epoch": 0.936452845264551, + "grad_norm": 0.0939672514796257, + "learning_rate": 1.9111867631229815e-06, + "loss": 8.4961, + "step": 187520 + }, + { + "epoch": 0.9365027840894904, + "grad_norm": 0.08717799186706543, + "learning_rate": 1.9096848481814317e-06, + "loss": 8.5275, + "step": 187530 + }, + { + "epoch": 0.9365527229144298, + "grad_norm": 0.09963074326515198, + "learning_rate": 1.908182933239881e-06, + "loss": 8.5056, + "step": 187540 + }, + { + "epoch": 0.9366026617393692, + "grad_norm": 0.08945607393980026, + "learning_rate": 1.9066810182983304e-06, + "loss": 8.4934, + "step": 187550 + }, + { + "epoch": 0.9366526005643088, + "grad_norm": 0.093963123857975, + "learning_rate": 1.9051791033567797e-06, + "loss": 8.5153, + "step": 187560 + }, + { + "epoch": 0.9367025393892482, + "grad_norm": 0.09400065243244171, + "learning_rate": 1.9036771884152297e-06, + "loss": 8.4959, + "step": 187570 + }, + { + "epoch": 0.9367524782141876, + "grad_norm": 0.09188316017389297, + "learning_rate": 1.902175273473679e-06, + "loss": 8.5011, + "step": 187580 + }, + { + "epoch": 0.936802417039127, + "grad_norm": 0.09157782793045044, + "learning_rate": 1.9006733585321283e-06, + "loss": 8.4999, + "step": 187590 + }, + { + "epoch": 0.9368523558640666, + "grad_norm": 0.08901472389698029, + "learning_rate": 1.8991714435905779e-06, + "loss": 8.5101, + "step": 187600 + }, + { + "epoch": 0.936902294689006, + "grad_norm": 0.08923201262950897, + "learning_rate": 1.8976695286490276e-06, + "loss": 8.4988, + "step": 187610 + }, + { + "epoch": 0.9369522335139454, + "grad_norm": 0.09177745878696442, + "learning_rate": 1.8961676137074772e-06, + "loss": 8.5018, + "step": 187620 + }, + { + "epoch": 0.9370021723388848, + "grad_norm": 0.09261074662208557, + "learning_rate": 1.8946656987659265e-06, + "loss": 8.5063, + "step": 187630 + }, + { + "epoch": 0.9370521111638244, + "grad_norm": 0.09517964720726013, + "learning_rate": 1.893163783824376e-06, + "loss": 8.4984, + "step": 187640 + }, + { + "epoch": 0.9371020499887638, + "grad_norm": 0.09262313693761826, + "learning_rate": 1.8916618688828258e-06, + "loss": 8.4988, + "step": 187650 + }, + { + "epoch": 0.9371519888137032, + "grad_norm": 0.08536347001791, + "learning_rate": 1.8901599539412751e-06, + "loss": 8.5007, + "step": 187660 + }, + { + "epoch": 0.9372019276386426, + "grad_norm": 0.08875380456447601, + "learning_rate": 1.8886580389997247e-06, + "loss": 8.5065, + "step": 187670 + }, + { + "epoch": 0.9372518664635822, + "grad_norm": 0.08886587619781494, + "learning_rate": 1.887156124058174e-06, + "loss": 8.5045, + "step": 187680 + }, + { + "epoch": 0.9373018052885216, + "grad_norm": 0.09413565695285797, + "learning_rate": 1.885654209116624e-06, + "loss": 8.5019, + "step": 187690 + }, + { + "epoch": 0.937351744113461, + "grad_norm": 0.0883907675743103, + "learning_rate": 1.8841522941750733e-06, + "loss": 8.4836, + "step": 187700 + }, + { + "epoch": 0.9374016829384004, + "grad_norm": 0.09106942266225815, + "learning_rate": 1.8826503792335226e-06, + "loss": 8.5187, + "step": 187710 + }, + { + "epoch": 0.93745162176334, + "grad_norm": 0.08953803032636642, + "learning_rate": 1.8811484642919722e-06, + "loss": 8.505, + "step": 187720 + }, + { + "epoch": 0.9375015605882794, + "grad_norm": 0.09178190678358078, + "learning_rate": 1.879646549350422e-06, + "loss": 8.5085, + "step": 187730 + }, + { + "epoch": 0.9375514994132188, + "grad_norm": 0.09529706090688705, + "learning_rate": 1.8781446344088715e-06, + "loss": 8.502, + "step": 187740 + }, + { + "epoch": 0.9376014382381582, + "grad_norm": 0.08537425100803375, + "learning_rate": 1.8766427194673208e-06, + "loss": 8.4914, + "step": 187750 + }, + { + "epoch": 0.9376513770630978, + "grad_norm": 0.0965605229139328, + "learning_rate": 1.8751408045257701e-06, + "loss": 8.5017, + "step": 187760 + }, + { + "epoch": 0.9377013158880372, + "grad_norm": 0.08984053879976273, + "learning_rate": 1.8736388895842199e-06, + "loss": 8.5113, + "step": 187770 + }, + { + "epoch": 0.9377512547129766, + "grad_norm": 0.0879005640745163, + "learning_rate": 1.8721369746426694e-06, + "loss": 8.5056, + "step": 187780 + }, + { + "epoch": 0.937801193537916, + "grad_norm": 0.09019943326711655, + "learning_rate": 1.870635059701119e-06, + "loss": 8.4996, + "step": 187790 + }, + { + "epoch": 0.9378511323628556, + "grad_norm": 0.08988826721906662, + "learning_rate": 1.8691331447595685e-06, + "loss": 8.5003, + "step": 187800 + }, + { + "epoch": 0.937901071187795, + "grad_norm": 0.0893273875117302, + "learning_rate": 1.8676312298180178e-06, + "loss": 8.5022, + "step": 187810 + }, + { + "epoch": 0.9379510100127344, + "grad_norm": 0.09221018105745316, + "learning_rate": 1.8661293148764676e-06, + "loss": 8.5128, + "step": 187820 + }, + { + "epoch": 0.9380009488376738, + "grad_norm": 0.09288901835680008, + "learning_rate": 1.864627399934917e-06, + "loss": 8.4876, + "step": 187830 + }, + { + "epoch": 0.9380508876626134, + "grad_norm": 0.09186063706874847, + "learning_rate": 1.8631254849933667e-06, + "loss": 8.4982, + "step": 187840 + }, + { + "epoch": 0.9381008264875528, + "grad_norm": 0.09561605751514435, + "learning_rate": 1.861623570051816e-06, + "loss": 8.507, + "step": 187850 + }, + { + "epoch": 0.9381507653124922, + "grad_norm": 0.09141556173563004, + "learning_rate": 1.8601216551102656e-06, + "loss": 8.5184, + "step": 187860 + }, + { + "epoch": 0.9382007041374316, + "grad_norm": 0.08741176128387451, + "learning_rate": 1.858619740168715e-06, + "loss": 8.5107, + "step": 187870 + }, + { + "epoch": 0.9382506429623712, + "grad_norm": 0.0920015349984169, + "learning_rate": 1.8571178252271646e-06, + "loss": 8.5134, + "step": 187880 + }, + { + "epoch": 0.9383005817873106, + "grad_norm": 0.09082834422588348, + "learning_rate": 1.8556159102856142e-06, + "loss": 8.5058, + "step": 187890 + }, + { + "epoch": 0.93835052061225, + "grad_norm": 0.093767449259758, + "learning_rate": 1.8541139953440637e-06, + "loss": 8.5058, + "step": 187900 + }, + { + "epoch": 0.9384004594371894, + "grad_norm": 0.08792780339717865, + "learning_rate": 1.8526120804025133e-06, + "loss": 8.5191, + "step": 187910 + }, + { + "epoch": 0.9384503982621288, + "grad_norm": 0.08794878423213959, + "learning_rate": 1.8511101654609628e-06, + "loss": 8.5125, + "step": 187920 + }, + { + "epoch": 0.9385003370870684, + "grad_norm": 0.08979849517345428, + "learning_rate": 1.8496082505194122e-06, + "loss": 8.5039, + "step": 187930 + }, + { + "epoch": 0.9385502759120078, + "grad_norm": 0.08824494481086731, + "learning_rate": 1.848106335577862e-06, + "loss": 8.4959, + "step": 187940 + }, + { + "epoch": 0.9386002147369472, + "grad_norm": 0.08635149151086807, + "learning_rate": 1.8466044206363112e-06, + "loss": 8.4952, + "step": 187950 + }, + { + "epoch": 0.9386501535618866, + "grad_norm": 0.09149076044559479, + "learning_rate": 1.845102505694761e-06, + "loss": 8.4943, + "step": 187960 + }, + { + "epoch": 0.9387000923868262, + "grad_norm": 0.0918533205986023, + "learning_rate": 1.8436005907532103e-06, + "loss": 8.5059, + "step": 187970 + }, + { + "epoch": 0.9387500312117656, + "grad_norm": 0.09328784048557281, + "learning_rate": 1.8420986758116599e-06, + "loss": 8.494, + "step": 187980 + }, + { + "epoch": 0.938799970036705, + "grad_norm": 0.087800532579422, + "learning_rate": 1.8405967608701094e-06, + "loss": 8.5055, + "step": 187990 + }, + { + "epoch": 0.9388499088616444, + "grad_norm": 0.0951266810297966, + "learning_rate": 1.839094845928559e-06, + "loss": 8.4924, + "step": 188000 + }, + { + "epoch": 0.938899847686584, + "grad_norm": 0.0940432995557785, + "learning_rate": 1.8375929309870085e-06, + "loss": 8.4983, + "step": 188010 + }, + { + "epoch": 0.9389497865115234, + "grad_norm": 0.08336132764816284, + "learning_rate": 1.836091016045458e-06, + "loss": 8.5174, + "step": 188020 + }, + { + "epoch": 0.9389997253364628, + "grad_norm": 0.09214475005865097, + "learning_rate": 1.8345891011039074e-06, + "loss": 8.5017, + "step": 188030 + }, + { + "epoch": 0.9390496641614022, + "grad_norm": 0.0941125676035881, + "learning_rate": 1.8330871861623571e-06, + "loss": 8.5126, + "step": 188040 + }, + { + "epoch": 0.9390996029863418, + "grad_norm": 0.08968133479356766, + "learning_rate": 1.8315852712208065e-06, + "loss": 8.5143, + "step": 188050 + }, + { + "epoch": 0.9391495418112812, + "grad_norm": 0.08894607424736023, + "learning_rate": 1.8300833562792562e-06, + "loss": 8.5021, + "step": 188060 + }, + { + "epoch": 0.9391994806362206, + "grad_norm": 0.09236035495996475, + "learning_rate": 1.8285814413377055e-06, + "loss": 8.4871, + "step": 188070 + }, + { + "epoch": 0.93924941946116, + "grad_norm": 0.09041870385408401, + "learning_rate": 1.827079526396155e-06, + "loss": 8.4946, + "step": 188080 + }, + { + "epoch": 0.9392993582860996, + "grad_norm": 0.08861184120178223, + "learning_rate": 1.8255776114546046e-06, + "loss": 8.485, + "step": 188090 + }, + { + "epoch": 0.939349297111039, + "grad_norm": 0.09006591141223907, + "learning_rate": 1.8240756965130542e-06, + "loss": 8.5131, + "step": 188100 + }, + { + "epoch": 0.9393992359359784, + "grad_norm": 0.09157989919185638, + "learning_rate": 1.8225737815715037e-06, + "loss": 8.5131, + "step": 188110 + }, + { + "epoch": 0.9394491747609178, + "grad_norm": 0.09308748692274094, + "learning_rate": 1.8210718666299533e-06, + "loss": 8.4979, + "step": 188120 + }, + { + "epoch": 0.9394991135858574, + "grad_norm": 0.10365467518568039, + "learning_rate": 1.8195699516884026e-06, + "loss": 8.4922, + "step": 188130 + }, + { + "epoch": 0.9395490524107968, + "grad_norm": 0.08948034048080444, + "learning_rate": 1.8180680367468523e-06, + "loss": 8.4991, + "step": 188140 + }, + { + "epoch": 0.9395989912357362, + "grad_norm": 0.08552392572164536, + "learning_rate": 1.8165661218053017e-06, + "loss": 8.525, + "step": 188150 + }, + { + "epoch": 0.9396489300606756, + "grad_norm": 0.09680628776550293, + "learning_rate": 1.8150642068637514e-06, + "loss": 8.5061, + "step": 188160 + }, + { + "epoch": 0.9396988688856152, + "grad_norm": 0.08866409212350845, + "learning_rate": 1.8135622919222008e-06, + "loss": 8.5054, + "step": 188170 + }, + { + "epoch": 0.9397488077105546, + "grad_norm": 0.09396608918905258, + "learning_rate": 1.8120603769806505e-06, + "loss": 8.515, + "step": 188180 + }, + { + "epoch": 0.939798746535494, + "grad_norm": 0.08692758530378342, + "learning_rate": 1.8105584620390998e-06, + "loss": 8.5197, + "step": 188190 + }, + { + "epoch": 0.9398486853604334, + "grad_norm": 0.0874529555439949, + "learning_rate": 1.8090565470975494e-06, + "loss": 8.509, + "step": 188200 + }, + { + "epoch": 0.939898624185373, + "grad_norm": 0.08824202418327332, + "learning_rate": 1.807554632155999e-06, + "loss": 8.5272, + "step": 188210 + }, + { + "epoch": 0.9399485630103124, + "grad_norm": 0.0905284509062767, + "learning_rate": 1.8060527172144485e-06, + "loss": 8.5125, + "step": 188220 + }, + { + "epoch": 0.9399985018352518, + "grad_norm": 0.0894288644194603, + "learning_rate": 1.804550802272898e-06, + "loss": 8.4988, + "step": 188230 + }, + { + "epoch": 0.9400484406601912, + "grad_norm": 0.09066865593194962, + "learning_rate": 1.8030488873313476e-06, + "loss": 8.5031, + "step": 188240 + }, + { + "epoch": 0.9400983794851308, + "grad_norm": 0.08707483112812042, + "learning_rate": 1.801546972389797e-06, + "loss": 8.49, + "step": 188250 + }, + { + "epoch": 0.9401483183100702, + "grad_norm": 0.08790065348148346, + "learning_rate": 1.8000450574482467e-06, + "loss": 8.4957, + "step": 188260 + }, + { + "epoch": 0.9401982571350096, + "grad_norm": 0.09469541907310486, + "learning_rate": 1.798543142506696e-06, + "loss": 8.5037, + "step": 188270 + }, + { + "epoch": 0.940248195959949, + "grad_norm": 0.0937683954834938, + "learning_rate": 1.7970412275651457e-06, + "loss": 8.5359, + "step": 188280 + }, + { + "epoch": 0.9402981347848886, + "grad_norm": 0.08924166113138199, + "learning_rate": 1.795539312623595e-06, + "loss": 8.5029, + "step": 188290 + }, + { + "epoch": 0.940348073609828, + "grad_norm": 0.08660190552473068, + "learning_rate": 1.7940373976820446e-06, + "loss": 8.5128, + "step": 188300 + }, + { + "epoch": 0.9403980124347674, + "grad_norm": 0.0949644148349762, + "learning_rate": 1.7925354827404942e-06, + "loss": 8.5099, + "step": 188310 + }, + { + "epoch": 0.9404479512597068, + "grad_norm": 0.08653347194194794, + "learning_rate": 1.7910335677989437e-06, + "loss": 8.4979, + "step": 188320 + }, + { + "epoch": 0.9404978900846463, + "grad_norm": 0.09062409400939941, + "learning_rate": 1.7895316528573932e-06, + "loss": 8.5025, + "step": 188330 + }, + { + "epoch": 0.9405478289095858, + "grad_norm": 0.09127449989318848, + "learning_rate": 1.7880297379158428e-06, + "loss": 8.4861, + "step": 188340 + }, + { + "epoch": 0.9405977677345252, + "grad_norm": 0.09160736948251724, + "learning_rate": 1.7865278229742921e-06, + "loss": 8.498, + "step": 188350 + }, + { + "epoch": 0.9406477065594646, + "grad_norm": 0.08944227546453476, + "learning_rate": 1.7850259080327419e-06, + "loss": 8.4942, + "step": 188360 + }, + { + "epoch": 0.9406976453844041, + "grad_norm": 0.08706548810005188, + "learning_rate": 1.7835239930911912e-06, + "loss": 8.5159, + "step": 188370 + }, + { + "epoch": 0.9407475842093436, + "grad_norm": 0.08913147449493408, + "learning_rate": 1.782022078149641e-06, + "loss": 8.4955, + "step": 188380 + }, + { + "epoch": 0.940797523034283, + "grad_norm": 0.09037254750728607, + "learning_rate": 1.7805201632080903e-06, + "loss": 8.5251, + "step": 188390 + }, + { + "epoch": 0.9408474618592224, + "grad_norm": 0.09032527357339859, + "learning_rate": 1.7790182482665398e-06, + "loss": 8.522, + "step": 188400 + }, + { + "epoch": 0.9408974006841619, + "grad_norm": 0.08700371533632278, + "learning_rate": 1.7775163333249894e-06, + "loss": 8.4951, + "step": 188410 + }, + { + "epoch": 0.9409473395091014, + "grad_norm": 0.09280155599117279, + "learning_rate": 1.776014418383439e-06, + "loss": 8.5057, + "step": 188420 + }, + { + "epoch": 0.9409972783340408, + "grad_norm": 0.09698103368282318, + "learning_rate": 1.7745125034418885e-06, + "loss": 8.4896, + "step": 188430 + }, + { + "epoch": 0.9410472171589802, + "grad_norm": 0.09256529062986374, + "learning_rate": 1.773010588500338e-06, + "loss": 8.5083, + "step": 188440 + }, + { + "epoch": 0.9410971559839197, + "grad_norm": 0.0863165482878685, + "learning_rate": 1.7715086735587875e-06, + "loss": 8.5024, + "step": 188450 + }, + { + "epoch": 0.9411470948088592, + "grad_norm": 0.08784864097833633, + "learning_rate": 1.770006758617237e-06, + "loss": 8.5086, + "step": 188460 + }, + { + "epoch": 0.9411970336337986, + "grad_norm": 0.09332052618265152, + "learning_rate": 1.7685048436756864e-06, + "loss": 8.5082, + "step": 188470 + }, + { + "epoch": 0.941246972458738, + "grad_norm": 0.08981883525848389, + "learning_rate": 1.7670029287341362e-06, + "loss": 8.5098, + "step": 188480 + }, + { + "epoch": 0.9412969112836775, + "grad_norm": 0.08930005878210068, + "learning_rate": 1.7655010137925855e-06, + "loss": 8.5293, + "step": 188490 + }, + { + "epoch": 0.941346850108617, + "grad_norm": 0.08967043459415436, + "learning_rate": 1.7639990988510353e-06, + "loss": 8.5243, + "step": 188500 + }, + { + "epoch": 0.9413967889335564, + "grad_norm": 0.09381302446126938, + "learning_rate": 1.7624971839094846e-06, + "loss": 8.4928, + "step": 188510 + }, + { + "epoch": 0.9414467277584958, + "grad_norm": 0.0840909332036972, + "learning_rate": 1.7609952689679341e-06, + "loss": 8.5254, + "step": 188520 + }, + { + "epoch": 0.9414966665834353, + "grad_norm": 0.09190535545349121, + "learning_rate": 1.7594933540263837e-06, + "loss": 8.5259, + "step": 188530 + }, + { + "epoch": 0.9415466054083748, + "grad_norm": 0.0981719046831131, + "learning_rate": 1.7579914390848332e-06, + "loss": 8.5103, + "step": 188540 + }, + { + "epoch": 0.9415965442333142, + "grad_norm": 0.08787670731544495, + "learning_rate": 1.7564895241432828e-06, + "loss": 8.5087, + "step": 188550 + }, + { + "epoch": 0.9416464830582536, + "grad_norm": 0.09270340204238892, + "learning_rate": 1.7549876092017323e-06, + "loss": 8.5074, + "step": 188560 + }, + { + "epoch": 0.9416964218831931, + "grad_norm": 0.09285027533769608, + "learning_rate": 1.7534856942601816e-06, + "loss": 8.5098, + "step": 188570 + }, + { + "epoch": 0.9417463607081326, + "grad_norm": 0.0908166691660881, + "learning_rate": 1.7519837793186314e-06, + "loss": 8.4977, + "step": 188580 + }, + { + "epoch": 0.941796299533072, + "grad_norm": 0.09321475028991699, + "learning_rate": 1.7504818643770807e-06, + "loss": 8.5135, + "step": 188590 + }, + { + "epoch": 0.9418462383580114, + "grad_norm": 0.08778084069490433, + "learning_rate": 1.7489799494355305e-06, + "loss": 8.5213, + "step": 188600 + }, + { + "epoch": 0.9418961771829509, + "grad_norm": 0.09464406967163086, + "learning_rate": 1.7474780344939798e-06, + "loss": 8.5029, + "step": 188610 + }, + { + "epoch": 0.9419461160078904, + "grad_norm": 0.09429196268320084, + "learning_rate": 1.7459761195524294e-06, + "loss": 8.4987, + "step": 188620 + }, + { + "epoch": 0.9419960548328298, + "grad_norm": 0.09396494925022125, + "learning_rate": 1.744474204610879e-06, + "loss": 8.5222, + "step": 188630 + }, + { + "epoch": 0.9420459936577692, + "grad_norm": 0.08751998841762543, + "learning_rate": 1.7429722896693284e-06, + "loss": 8.4981, + "step": 188640 + }, + { + "epoch": 0.9420959324827087, + "grad_norm": 0.08901175856590271, + "learning_rate": 1.741470374727778e-06, + "loss": 8.5163, + "step": 188650 + }, + { + "epoch": 0.9421458713076482, + "grad_norm": 0.08800294995307922, + "learning_rate": 1.7399684597862275e-06, + "loss": 8.5103, + "step": 188660 + }, + { + "epoch": 0.9421958101325876, + "grad_norm": 0.09194714576005936, + "learning_rate": 1.7384665448446769e-06, + "loss": 8.4861, + "step": 188670 + }, + { + "epoch": 0.942245748957527, + "grad_norm": 0.10198995471000671, + "learning_rate": 1.7369646299031266e-06, + "loss": 8.511, + "step": 188680 + }, + { + "epoch": 0.9422956877824665, + "grad_norm": 0.09428546577692032, + "learning_rate": 1.735462714961576e-06, + "loss": 8.5202, + "step": 188690 + }, + { + "epoch": 0.942345626607406, + "grad_norm": 0.09078855067491531, + "learning_rate": 1.7339608000200257e-06, + "loss": 8.5044, + "step": 188700 + }, + { + "epoch": 0.9423955654323454, + "grad_norm": 0.09226953983306885, + "learning_rate": 1.732458885078475e-06, + "loss": 8.5043, + "step": 188710 + }, + { + "epoch": 0.9424455042572848, + "grad_norm": 0.09067227691411972, + "learning_rate": 1.7309569701369248e-06, + "loss": 8.5106, + "step": 188720 + }, + { + "epoch": 0.9424954430822243, + "grad_norm": 0.09468891471624374, + "learning_rate": 1.7294550551953741e-06, + "loss": 8.4964, + "step": 188730 + }, + { + "epoch": 0.9425453819071637, + "grad_norm": 0.0890989601612091, + "learning_rate": 1.7279531402538237e-06, + "loss": 8.5198, + "step": 188740 + }, + { + "epoch": 0.9425953207321032, + "grad_norm": 0.09626948088407516, + "learning_rate": 1.7264512253122732e-06, + "loss": 8.5006, + "step": 188750 + }, + { + "epoch": 0.9426452595570426, + "grad_norm": 0.08856556564569473, + "learning_rate": 1.7249493103707227e-06, + "loss": 8.487, + "step": 188760 + }, + { + "epoch": 0.9426951983819821, + "grad_norm": 0.09216383099555969, + "learning_rate": 1.7234473954291723e-06, + "loss": 8.5164, + "step": 188770 + }, + { + "epoch": 0.9427451372069215, + "grad_norm": 0.08988320082426071, + "learning_rate": 1.7219454804876218e-06, + "loss": 8.5159, + "step": 188780 + }, + { + "epoch": 0.942795076031861, + "grad_norm": 0.09181145578622818, + "learning_rate": 1.7204435655460712e-06, + "loss": 8.4966, + "step": 188790 + }, + { + "epoch": 0.9428450148568004, + "grad_norm": 0.08662037551403046, + "learning_rate": 1.718941650604521e-06, + "loss": 8.5168, + "step": 188800 + }, + { + "epoch": 0.9428949536817399, + "grad_norm": 0.09044136106967926, + "learning_rate": 1.7174397356629702e-06, + "loss": 8.5264, + "step": 188810 + }, + { + "epoch": 0.9429448925066793, + "grad_norm": 0.09236202389001846, + "learning_rate": 1.71593782072142e-06, + "loss": 8.4977, + "step": 188820 + }, + { + "epoch": 0.9429948313316188, + "grad_norm": 0.08900753408670425, + "learning_rate": 1.7144359057798693e-06, + "loss": 8.5269, + "step": 188830 + }, + { + "epoch": 0.9430447701565582, + "grad_norm": 0.09463819116353989, + "learning_rate": 1.7129339908383189e-06, + "loss": 8.5008, + "step": 188840 + }, + { + "epoch": 0.9430947089814977, + "grad_norm": 0.09038529545068741, + "learning_rate": 1.7114320758967684e-06, + "loss": 8.5102, + "step": 188850 + }, + { + "epoch": 0.9431446478064371, + "grad_norm": 0.0910005047917366, + "learning_rate": 1.709930160955218e-06, + "loss": 8.4995, + "step": 188860 + }, + { + "epoch": 0.9431945866313766, + "grad_norm": 0.09129851311445236, + "learning_rate": 1.7084282460136675e-06, + "loss": 8.5172, + "step": 188870 + }, + { + "epoch": 0.943244525456316, + "grad_norm": 0.09213831275701523, + "learning_rate": 1.706926331072117e-06, + "loss": 8.5083, + "step": 188880 + }, + { + "epoch": 0.9432944642812554, + "grad_norm": 0.09839709848165512, + "learning_rate": 1.7054244161305664e-06, + "loss": 8.4996, + "step": 188890 + }, + { + "epoch": 0.9433444031061949, + "grad_norm": 0.09194820374250412, + "learning_rate": 1.7039225011890161e-06, + "loss": 8.493, + "step": 188900 + }, + { + "epoch": 0.9433943419311344, + "grad_norm": 0.0895412266254425, + "learning_rate": 1.7024205862474655e-06, + "loss": 8.5171, + "step": 188910 + }, + { + "epoch": 0.9434442807560738, + "grad_norm": 0.08914782851934433, + "learning_rate": 1.7009186713059152e-06, + "loss": 8.4905, + "step": 188920 + }, + { + "epoch": 0.9434942195810132, + "grad_norm": 0.09766605496406555, + "learning_rate": 1.6994167563643646e-06, + "loss": 8.5128, + "step": 188930 + }, + { + "epoch": 0.9435441584059527, + "grad_norm": 0.0920957624912262, + "learning_rate": 1.697914841422814e-06, + "loss": 8.4913, + "step": 188940 + }, + { + "epoch": 0.9435940972308922, + "grad_norm": 0.08751115202903748, + "learning_rate": 1.6964129264812636e-06, + "loss": 8.5104, + "step": 188950 + }, + { + "epoch": 0.9436440360558316, + "grad_norm": 0.09014736860990524, + "learning_rate": 1.6949110115397132e-06, + "loss": 8.5043, + "step": 188960 + }, + { + "epoch": 0.943693974880771, + "grad_norm": 0.08869920670986176, + "learning_rate": 1.6934090965981627e-06, + "loss": 8.4973, + "step": 188970 + }, + { + "epoch": 0.9437439137057105, + "grad_norm": 0.088557668030262, + "learning_rate": 1.6919071816566123e-06, + "loss": 8.5082, + "step": 188980 + }, + { + "epoch": 0.94379385253065, + "grad_norm": 0.08449137955904007, + "learning_rate": 1.6904052667150618e-06, + "loss": 8.5086, + "step": 188990 + }, + { + "epoch": 0.9438437913555894, + "grad_norm": 0.08829719573259354, + "learning_rate": 1.6889033517735114e-06, + "loss": 8.5273, + "step": 189000 + }, + { + "epoch": 0.9438937301805288, + "grad_norm": 0.09361531585454941, + "learning_rate": 1.6874014368319607e-06, + "loss": 8.4817, + "step": 189010 + }, + { + "epoch": 0.9439436690054683, + "grad_norm": 0.09768420457839966, + "learning_rate": 1.6858995218904104e-06, + "loss": 8.5082, + "step": 189020 + }, + { + "epoch": 0.9439936078304078, + "grad_norm": 0.0924157053232193, + "learning_rate": 1.6843976069488598e-06, + "loss": 8.5025, + "step": 189030 + }, + { + "epoch": 0.9440435466553472, + "grad_norm": 0.09079400449991226, + "learning_rate": 1.6828956920073095e-06, + "loss": 8.5017, + "step": 189040 + }, + { + "epoch": 0.9440934854802866, + "grad_norm": 0.09591791033744812, + "learning_rate": 1.6813937770657589e-06, + "loss": 8.5061, + "step": 189050 + }, + { + "epoch": 0.9441434243052261, + "grad_norm": 0.0918201357126236, + "learning_rate": 1.6798918621242084e-06, + "loss": 8.515, + "step": 189060 + }, + { + "epoch": 0.9441933631301656, + "grad_norm": 0.09003403782844543, + "learning_rate": 1.678389947182658e-06, + "loss": 8.5041, + "step": 189070 + }, + { + "epoch": 0.944243301955105, + "grad_norm": 0.09714401513338089, + "learning_rate": 1.6768880322411075e-06, + "loss": 8.4791, + "step": 189080 + }, + { + "epoch": 0.9442932407800444, + "grad_norm": 0.08682556450366974, + "learning_rate": 1.675386117299557e-06, + "loss": 8.5136, + "step": 189090 + }, + { + "epoch": 0.9443431796049839, + "grad_norm": 0.08617331087589264, + "learning_rate": 1.6738842023580066e-06, + "loss": 8.5153, + "step": 189100 + }, + { + "epoch": 0.9443931184299234, + "grad_norm": 0.09631218761205673, + "learning_rate": 1.672382287416456e-06, + "loss": 8.4938, + "step": 189110 + }, + { + "epoch": 0.9444430572548628, + "grad_norm": 0.09221839159727097, + "learning_rate": 1.6708803724749057e-06, + "loss": 8.5093, + "step": 189120 + }, + { + "epoch": 0.9444929960798022, + "grad_norm": 0.08441857248544693, + "learning_rate": 1.669378457533355e-06, + "loss": 8.5082, + "step": 189130 + }, + { + "epoch": 0.9445429349047417, + "grad_norm": 0.0894455835223198, + "learning_rate": 1.6678765425918047e-06, + "loss": 8.4961, + "step": 189140 + }, + { + "epoch": 0.9445928737296811, + "grad_norm": 0.08852816373109818, + "learning_rate": 1.666374627650254e-06, + "loss": 8.4978, + "step": 189150 + }, + { + "epoch": 0.9446428125546206, + "grad_norm": 0.09122937172651291, + "learning_rate": 1.6648727127087036e-06, + "loss": 8.5011, + "step": 189160 + }, + { + "epoch": 0.94469275137956, + "grad_norm": 0.09438908100128174, + "learning_rate": 1.6633707977671532e-06, + "loss": 8.5027, + "step": 189170 + }, + { + "epoch": 0.9447426902044995, + "grad_norm": 0.08923160284757614, + "learning_rate": 1.6618688828256027e-06, + "loss": 8.4992, + "step": 189180 + }, + { + "epoch": 0.944792629029439, + "grad_norm": 0.08867133408784866, + "learning_rate": 1.6603669678840523e-06, + "loss": 8.4922, + "step": 189190 + }, + { + "epoch": 0.9448425678543784, + "grad_norm": 0.0948406532406807, + "learning_rate": 1.6588650529425018e-06, + "loss": 8.5108, + "step": 189200 + }, + { + "epoch": 0.9448925066793178, + "grad_norm": 0.08932336419820786, + "learning_rate": 1.6573631380009511e-06, + "loss": 8.5091, + "step": 189210 + }, + { + "epoch": 0.9449424455042573, + "grad_norm": 0.09022360295057297, + "learning_rate": 1.6558612230594009e-06, + "loss": 8.5191, + "step": 189220 + }, + { + "epoch": 0.9449923843291967, + "grad_norm": 0.09157562255859375, + "learning_rate": 1.6543593081178502e-06, + "loss": 8.5045, + "step": 189230 + }, + { + "epoch": 0.9450423231541362, + "grad_norm": 0.09014785289764404, + "learning_rate": 1.6528573931763e-06, + "loss": 8.5158, + "step": 189240 + }, + { + "epoch": 0.9450922619790756, + "grad_norm": 0.09535112977027893, + "learning_rate": 1.6513554782347493e-06, + "loss": 8.5037, + "step": 189250 + }, + { + "epoch": 0.9451422008040151, + "grad_norm": 0.09707490354776382, + "learning_rate": 1.6498535632931988e-06, + "loss": 8.5056, + "step": 189260 + }, + { + "epoch": 0.9451921396289545, + "grad_norm": 0.09138431400060654, + "learning_rate": 1.6483516483516484e-06, + "loss": 8.4955, + "step": 189270 + }, + { + "epoch": 0.945242078453894, + "grad_norm": 0.0848042219877243, + "learning_rate": 1.646849733410098e-06, + "loss": 8.5018, + "step": 189280 + }, + { + "epoch": 0.9452920172788334, + "grad_norm": 0.08952652662992477, + "learning_rate": 1.6453478184685475e-06, + "loss": 8.5142, + "step": 189290 + }, + { + "epoch": 0.9453419561037729, + "grad_norm": 0.09085434675216675, + "learning_rate": 1.643845903526997e-06, + "loss": 8.5238, + "step": 189300 + }, + { + "epoch": 0.9453918949287123, + "grad_norm": 0.08714916557073593, + "learning_rate": 1.6423439885854466e-06, + "loss": 8.531, + "step": 189310 + }, + { + "epoch": 0.9454418337536518, + "grad_norm": 0.09484901279211044, + "learning_rate": 1.640842073643896e-06, + "loss": 8.4926, + "step": 189320 + }, + { + "epoch": 0.9454917725785912, + "grad_norm": 0.08953174948692322, + "learning_rate": 1.6393401587023454e-06, + "loss": 8.5142, + "step": 189330 + }, + { + "epoch": 0.9455417114035307, + "grad_norm": 0.09061595797538757, + "learning_rate": 1.6378382437607952e-06, + "loss": 8.4869, + "step": 189340 + }, + { + "epoch": 0.9455916502284701, + "grad_norm": 0.08741827309131622, + "learning_rate": 1.6363363288192445e-06, + "loss": 8.506, + "step": 189350 + }, + { + "epoch": 0.9456415890534096, + "grad_norm": 0.0936361625790596, + "learning_rate": 1.6348344138776943e-06, + "loss": 8.4956, + "step": 189360 + }, + { + "epoch": 0.945691527878349, + "grad_norm": 0.0899646058678627, + "learning_rate": 1.6333324989361436e-06, + "loss": 8.5156, + "step": 189370 + }, + { + "epoch": 0.9457414667032885, + "grad_norm": 0.09246868640184402, + "learning_rate": 1.6318305839945931e-06, + "loss": 8.5201, + "step": 189380 + }, + { + "epoch": 0.9457914055282279, + "grad_norm": 0.08746492117643356, + "learning_rate": 1.6303286690530427e-06, + "loss": 8.509, + "step": 189390 + }, + { + "epoch": 0.9458413443531674, + "grad_norm": 0.09183592349290848, + "learning_rate": 1.6288267541114922e-06, + "loss": 8.5058, + "step": 189400 + }, + { + "epoch": 0.9458912831781068, + "grad_norm": 0.088591568171978, + "learning_rate": 1.6273248391699418e-06, + "loss": 8.5103, + "step": 189410 + }, + { + "epoch": 0.9459412220030463, + "grad_norm": 0.09180916845798492, + "learning_rate": 1.6258229242283913e-06, + "loss": 8.505, + "step": 189420 + }, + { + "epoch": 0.9459911608279857, + "grad_norm": 0.0878295823931694, + "learning_rate": 1.6243210092868406e-06, + "loss": 8.4924, + "step": 189430 + }, + { + "epoch": 0.9460410996529252, + "grad_norm": 0.09343598783016205, + "learning_rate": 1.6228190943452904e-06, + "loss": 8.4996, + "step": 189440 + }, + { + "epoch": 0.9460910384778646, + "grad_norm": 0.08655836433172226, + "learning_rate": 1.6213171794037397e-06, + "loss": 8.4907, + "step": 189450 + }, + { + "epoch": 0.9461409773028041, + "grad_norm": 0.09079290181398392, + "learning_rate": 1.6198152644621895e-06, + "loss": 8.5051, + "step": 189460 + }, + { + "epoch": 0.9461909161277435, + "grad_norm": 0.09278516471385956, + "learning_rate": 1.6183133495206388e-06, + "loss": 8.5262, + "step": 189470 + }, + { + "epoch": 0.946240854952683, + "grad_norm": 0.0923718810081482, + "learning_rate": 1.6168114345790884e-06, + "loss": 8.4926, + "step": 189480 + }, + { + "epoch": 0.9462907937776224, + "grad_norm": 0.0890062153339386, + "learning_rate": 1.615309519637538e-06, + "loss": 8.5063, + "step": 189490 + }, + { + "epoch": 0.9463407326025619, + "grad_norm": 0.0915982574224472, + "learning_rate": 1.6138076046959875e-06, + "loss": 8.5034, + "step": 189500 + }, + { + "epoch": 0.9463906714275013, + "grad_norm": 0.0939352884888649, + "learning_rate": 1.612305689754437e-06, + "loss": 8.4948, + "step": 189510 + }, + { + "epoch": 0.9464406102524408, + "grad_norm": 0.09317021816968918, + "learning_rate": 1.6108037748128865e-06, + "loss": 8.5007, + "step": 189520 + }, + { + "epoch": 0.9464905490773802, + "grad_norm": 0.08831048011779785, + "learning_rate": 1.609301859871336e-06, + "loss": 8.4932, + "step": 189530 + }, + { + "epoch": 0.9465404879023197, + "grad_norm": 0.1017037183046341, + "learning_rate": 1.6077999449297856e-06, + "loss": 8.5018, + "step": 189540 + }, + { + "epoch": 0.9465904267272591, + "grad_norm": 0.09273435175418854, + "learning_rate": 1.606298029988235e-06, + "loss": 8.4929, + "step": 189550 + }, + { + "epoch": 0.9466403655521985, + "grad_norm": 0.09123826771974564, + "learning_rate": 1.6047961150466847e-06, + "loss": 8.4936, + "step": 189560 + }, + { + "epoch": 0.946690304377138, + "grad_norm": 0.09415683150291443, + "learning_rate": 1.603294200105134e-06, + "loss": 8.4812, + "step": 189570 + }, + { + "epoch": 0.9467402432020775, + "grad_norm": 0.08812573552131653, + "learning_rate": 1.6017922851635838e-06, + "loss": 8.5189, + "step": 189580 + }, + { + "epoch": 0.9467901820270169, + "grad_norm": 0.0949978157877922, + "learning_rate": 1.6002903702220331e-06, + "loss": 8.4971, + "step": 189590 + }, + { + "epoch": 0.9468401208519563, + "grad_norm": 0.09173561632633209, + "learning_rate": 1.5987884552804827e-06, + "loss": 8.4943, + "step": 189600 + }, + { + "epoch": 0.9468900596768958, + "grad_norm": 0.09235606342554092, + "learning_rate": 1.5972865403389322e-06, + "loss": 8.4975, + "step": 189610 + }, + { + "epoch": 0.9469399985018353, + "grad_norm": 0.09276280552148819, + "learning_rate": 1.5957846253973818e-06, + "loss": 8.5019, + "step": 189620 + }, + { + "epoch": 0.9469899373267747, + "grad_norm": 0.09055425226688385, + "learning_rate": 1.5942827104558313e-06, + "loss": 8.5128, + "step": 189630 + }, + { + "epoch": 0.9470398761517141, + "grad_norm": 0.09334809333086014, + "learning_rate": 1.5927807955142808e-06, + "loss": 8.4991, + "step": 189640 + }, + { + "epoch": 0.9470898149766536, + "grad_norm": 0.09053409099578857, + "learning_rate": 1.5912788805727302e-06, + "loss": 8.4977, + "step": 189650 + }, + { + "epoch": 0.9471397538015931, + "grad_norm": 0.09045706689357758, + "learning_rate": 1.58977696563118e-06, + "loss": 8.4876, + "step": 189660 + }, + { + "epoch": 0.9471896926265325, + "grad_norm": 0.0899735763669014, + "learning_rate": 1.5882750506896293e-06, + "loss": 8.5024, + "step": 189670 + }, + { + "epoch": 0.9472396314514719, + "grad_norm": 0.09022071212530136, + "learning_rate": 1.586773135748079e-06, + "loss": 8.5059, + "step": 189680 + }, + { + "epoch": 0.9472895702764114, + "grad_norm": 0.08928265422582626, + "learning_rate": 1.5852712208065283e-06, + "loss": 8.5079, + "step": 189690 + }, + { + "epoch": 0.9473395091013509, + "grad_norm": 0.08643801510334015, + "learning_rate": 1.5837693058649779e-06, + "loss": 8.4948, + "step": 189700 + }, + { + "epoch": 0.9473894479262903, + "grad_norm": 0.09584169834852219, + "learning_rate": 1.5822673909234274e-06, + "loss": 8.4735, + "step": 189710 + }, + { + "epoch": 0.9474393867512297, + "grad_norm": 0.08683201670646667, + "learning_rate": 1.580765475981877e-06, + "loss": 8.4989, + "step": 189720 + }, + { + "epoch": 0.9474893255761692, + "grad_norm": 0.08900432288646698, + "learning_rate": 1.5792635610403265e-06, + "loss": 8.5159, + "step": 189730 + }, + { + "epoch": 0.9475392644011087, + "grad_norm": 0.0945209488272667, + "learning_rate": 1.577761646098776e-06, + "loss": 8.4977, + "step": 189740 + }, + { + "epoch": 0.9475892032260481, + "grad_norm": 0.09035822004079819, + "learning_rate": 1.5762597311572254e-06, + "loss": 8.4902, + "step": 189750 + }, + { + "epoch": 0.9476391420509875, + "grad_norm": 0.09136451780796051, + "learning_rate": 1.5747578162156751e-06, + "loss": 8.5167, + "step": 189760 + }, + { + "epoch": 0.947689080875927, + "grad_norm": 0.09294602274894714, + "learning_rate": 1.5732559012741245e-06, + "loss": 8.4919, + "step": 189770 + }, + { + "epoch": 0.9477390197008665, + "grad_norm": 0.08745280653238297, + "learning_rate": 1.5717539863325742e-06, + "loss": 8.5084, + "step": 189780 + }, + { + "epoch": 0.9477889585258059, + "grad_norm": 0.08939157426357269, + "learning_rate": 1.5702520713910236e-06, + "loss": 8.5243, + "step": 189790 + }, + { + "epoch": 0.9478388973507453, + "grad_norm": 0.09142594784498215, + "learning_rate": 1.5687501564494731e-06, + "loss": 8.5088, + "step": 189800 + }, + { + "epoch": 0.9478888361756848, + "grad_norm": 0.08955658972263336, + "learning_rate": 1.5672482415079227e-06, + "loss": 8.5227, + "step": 189810 + }, + { + "epoch": 0.9479387750006243, + "grad_norm": 0.08954078704118729, + "learning_rate": 1.5657463265663722e-06, + "loss": 8.5083, + "step": 189820 + }, + { + "epoch": 0.9479887138255637, + "grad_norm": 0.09221227467060089, + "learning_rate": 1.5642444116248217e-06, + "loss": 8.5135, + "step": 189830 + }, + { + "epoch": 0.9480386526505031, + "grad_norm": 0.09133052825927734, + "learning_rate": 1.5627424966832713e-06, + "loss": 8.5026, + "step": 189840 + }, + { + "epoch": 0.9480885914754426, + "grad_norm": 0.0908413901925087, + "learning_rate": 1.5612405817417208e-06, + "loss": 8.5082, + "step": 189850 + }, + { + "epoch": 0.9481385303003821, + "grad_norm": 0.09032785892486572, + "learning_rate": 1.5597386668001704e-06, + "loss": 8.5031, + "step": 189860 + }, + { + "epoch": 0.9481884691253215, + "grad_norm": 0.09097219258546829, + "learning_rate": 1.5582367518586197e-06, + "loss": 8.5025, + "step": 189870 + }, + { + "epoch": 0.9482384079502609, + "grad_norm": 0.08840753883123398, + "learning_rate": 1.5567348369170695e-06, + "loss": 8.4975, + "step": 189880 + }, + { + "epoch": 0.9482883467752004, + "grad_norm": 0.08880984038114548, + "learning_rate": 1.5552329219755188e-06, + "loss": 8.5018, + "step": 189890 + }, + { + "epoch": 0.9483382856001398, + "grad_norm": 0.08809378743171692, + "learning_rate": 1.5537310070339685e-06, + "loss": 8.5057, + "step": 189900 + }, + { + "epoch": 0.9483882244250793, + "grad_norm": 0.09054802358150482, + "learning_rate": 1.5522290920924179e-06, + "loss": 8.4954, + "step": 189910 + }, + { + "epoch": 0.9484381632500187, + "grad_norm": 0.08676068484783173, + "learning_rate": 1.5507271771508674e-06, + "loss": 8.4857, + "step": 189920 + }, + { + "epoch": 0.9484881020749582, + "grad_norm": 0.0902305543422699, + "learning_rate": 1.549225262209317e-06, + "loss": 8.4918, + "step": 189930 + }, + { + "epoch": 0.9485380408998976, + "grad_norm": 0.08811389654874802, + "learning_rate": 1.5477233472677665e-06, + "loss": 8.5045, + "step": 189940 + }, + { + "epoch": 0.9485879797248371, + "grad_norm": 0.08572709560394287, + "learning_rate": 1.546221432326216e-06, + "loss": 8.5071, + "step": 189950 + }, + { + "epoch": 0.9486379185497765, + "grad_norm": 0.09473098814487457, + "learning_rate": 1.5447195173846656e-06, + "loss": 8.5074, + "step": 189960 + }, + { + "epoch": 0.948687857374716, + "grad_norm": 0.08549437671899796, + "learning_rate": 1.543217602443115e-06, + "loss": 8.5064, + "step": 189970 + }, + { + "epoch": 0.9487377961996554, + "grad_norm": 0.08814796805381775, + "learning_rate": 1.5417156875015647e-06, + "loss": 8.5284, + "step": 189980 + }, + { + "epoch": 0.9487877350245949, + "grad_norm": 0.0959198847413063, + "learning_rate": 1.540213772560014e-06, + "loss": 8.501, + "step": 189990 + }, + { + "epoch": 0.9488376738495343, + "grad_norm": 0.08826687932014465, + "learning_rate": 1.5387118576184638e-06, + "loss": 8.5092, + "step": 190000 + }, + { + "epoch": 0.9488876126744737, + "grad_norm": 0.09331593662500381, + "learning_rate": 1.537209942676913e-06, + "loss": 8.4996, + "step": 190010 + }, + { + "epoch": 0.9489375514994132, + "grad_norm": 0.09332361817359924, + "learning_rate": 1.5357080277353626e-06, + "loss": 8.5019, + "step": 190020 + }, + { + "epoch": 0.9489874903243527, + "grad_norm": 0.09323418140411377, + "learning_rate": 1.5342061127938122e-06, + "loss": 8.5008, + "step": 190030 + }, + { + "epoch": 0.9490374291492921, + "grad_norm": 0.09189082682132721, + "learning_rate": 1.5327041978522615e-06, + "loss": 8.5162, + "step": 190040 + }, + { + "epoch": 0.9490873679742315, + "grad_norm": 0.0889974981546402, + "learning_rate": 1.5312022829107113e-06, + "loss": 8.4987, + "step": 190050 + }, + { + "epoch": 0.949137306799171, + "grad_norm": 0.08503816276788712, + "learning_rate": 1.5297003679691606e-06, + "loss": 8.5133, + "step": 190060 + }, + { + "epoch": 0.9491872456241105, + "grad_norm": 0.09503103792667389, + "learning_rate": 1.5281984530276103e-06, + "loss": 8.4963, + "step": 190070 + }, + { + "epoch": 0.9492371844490499, + "grad_norm": 0.08845657110214233, + "learning_rate": 1.5266965380860597e-06, + "loss": 8.4878, + "step": 190080 + }, + { + "epoch": 0.9492871232739893, + "grad_norm": 0.08706570416688919, + "learning_rate": 1.5251946231445092e-06, + "loss": 8.5065, + "step": 190090 + }, + { + "epoch": 0.9493370620989288, + "grad_norm": 0.08456262946128845, + "learning_rate": 1.5236927082029588e-06, + "loss": 8.502, + "step": 190100 + }, + { + "epoch": 0.9493870009238683, + "grad_norm": 0.09131083637475967, + "learning_rate": 1.5221907932614083e-06, + "loss": 8.4949, + "step": 190110 + }, + { + "epoch": 0.9494369397488077, + "grad_norm": 0.09142878651618958, + "learning_rate": 1.5206888783198579e-06, + "loss": 8.5019, + "step": 190120 + }, + { + "epoch": 0.9494868785737471, + "grad_norm": 0.08769478648900986, + "learning_rate": 1.5191869633783074e-06, + "loss": 8.5095, + "step": 190130 + }, + { + "epoch": 0.9495368173986866, + "grad_norm": 0.09371474385261536, + "learning_rate": 1.5176850484367567e-06, + "loss": 8.5074, + "step": 190140 + }, + { + "epoch": 0.9495867562236261, + "grad_norm": 0.08654850721359253, + "learning_rate": 1.5161831334952065e-06, + "loss": 8.5099, + "step": 190150 + }, + { + "epoch": 0.9496366950485655, + "grad_norm": 0.09408658742904663, + "learning_rate": 1.5146812185536558e-06, + "loss": 8.4846, + "step": 190160 + }, + { + "epoch": 0.9496866338735049, + "grad_norm": 0.0925004854798317, + "learning_rate": 1.5131793036121056e-06, + "loss": 8.4927, + "step": 190170 + }, + { + "epoch": 0.9497365726984444, + "grad_norm": 0.09245412796735764, + "learning_rate": 1.511677388670555e-06, + "loss": 8.5093, + "step": 190180 + }, + { + "epoch": 0.9497865115233839, + "grad_norm": 0.0886409729719162, + "learning_rate": 1.5101754737290044e-06, + "loss": 8.5086, + "step": 190190 + }, + { + "epoch": 0.9498364503483233, + "grad_norm": 0.08819697797298431, + "learning_rate": 1.508673558787454e-06, + "loss": 8.4979, + "step": 190200 + }, + { + "epoch": 0.9498863891732627, + "grad_norm": 0.09439955651760101, + "learning_rate": 1.5071716438459035e-06, + "loss": 8.5077, + "step": 190210 + }, + { + "epoch": 0.9499363279982022, + "grad_norm": 0.09136414527893066, + "learning_rate": 1.505669728904353e-06, + "loss": 8.5202, + "step": 190220 + }, + { + "epoch": 0.9499862668231417, + "grad_norm": 0.09268520772457123, + "learning_rate": 1.5041678139628026e-06, + "loss": 8.498, + "step": 190230 + }, + { + "epoch": 0.9500362056480811, + "grad_norm": 0.08827424794435501, + "learning_rate": 1.502665899021252e-06, + "loss": 8.5254, + "step": 190240 + }, + { + "epoch": 0.9500861444730205, + "grad_norm": 0.09765148162841797, + "learning_rate": 1.5011639840797017e-06, + "loss": 8.4877, + "step": 190250 + }, + { + "epoch": 0.95013608329796, + "grad_norm": 0.09497714787721634, + "learning_rate": 1.499662069138151e-06, + "loss": 8.495, + "step": 190260 + }, + { + "epoch": 0.9501860221228995, + "grad_norm": 0.091663658618927, + "learning_rate": 1.4981601541966008e-06, + "loss": 8.5161, + "step": 190270 + }, + { + "epoch": 0.9502359609478389, + "grad_norm": 0.09366423636674881, + "learning_rate": 1.4966582392550501e-06, + "loss": 8.4835, + "step": 190280 + }, + { + "epoch": 0.9502858997727783, + "grad_norm": 0.09287559986114502, + "learning_rate": 1.4951563243134997e-06, + "loss": 8.4897, + "step": 190290 + }, + { + "epoch": 0.9503358385977178, + "grad_norm": 0.09357742220163345, + "learning_rate": 1.4936544093719492e-06, + "loss": 8.4976, + "step": 190300 + }, + { + "epoch": 0.9503857774226573, + "grad_norm": 0.09103827178478241, + "learning_rate": 1.4921524944303987e-06, + "loss": 8.4842, + "step": 190310 + }, + { + "epoch": 0.9504357162475967, + "grad_norm": 0.08826170116662979, + "learning_rate": 1.4906505794888483e-06, + "loss": 8.5147, + "step": 190320 + }, + { + "epoch": 0.9504856550725361, + "grad_norm": 0.08988435566425323, + "learning_rate": 1.4891486645472978e-06, + "loss": 8.4985, + "step": 190330 + }, + { + "epoch": 0.9505355938974756, + "grad_norm": 0.09322842955589294, + "learning_rate": 1.4876467496057474e-06, + "loss": 8.5058, + "step": 190340 + }, + { + "epoch": 0.9505855327224151, + "grad_norm": 0.09746944904327393, + "learning_rate": 1.486144834664197e-06, + "loss": 8.4953, + "step": 190350 + }, + { + "epoch": 0.9506354715473545, + "grad_norm": 0.08793298900127411, + "learning_rate": 1.4846429197226463e-06, + "loss": 8.495, + "step": 190360 + }, + { + "epoch": 0.9506854103722939, + "grad_norm": 0.08952205628156662, + "learning_rate": 1.483141004781096e-06, + "loss": 8.5206, + "step": 190370 + }, + { + "epoch": 0.9507353491972333, + "grad_norm": 0.09018826484680176, + "learning_rate": 1.4816390898395453e-06, + "loss": 8.4989, + "step": 190380 + }, + { + "epoch": 0.9507852880221729, + "grad_norm": 0.08909120410680771, + "learning_rate": 1.480137174897995e-06, + "loss": 8.5011, + "step": 190390 + }, + { + "epoch": 0.9508352268471123, + "grad_norm": 0.0869319885969162, + "learning_rate": 1.4786352599564444e-06, + "loss": 8.4915, + "step": 190400 + }, + { + "epoch": 0.9508851656720517, + "grad_norm": 0.0899181067943573, + "learning_rate": 1.477133345014894e-06, + "loss": 8.4897, + "step": 190410 + }, + { + "epoch": 0.9509351044969911, + "grad_norm": 0.0868888720870018, + "learning_rate": 1.4756314300733435e-06, + "loss": 8.5152, + "step": 190420 + }, + { + "epoch": 0.9509850433219307, + "grad_norm": 0.09073241055011749, + "learning_rate": 1.474129515131793e-06, + "loss": 8.5101, + "step": 190430 + }, + { + "epoch": 0.9510349821468701, + "grad_norm": 0.08949577063322067, + "learning_rate": 1.4726276001902426e-06, + "loss": 8.4997, + "step": 190440 + }, + { + "epoch": 0.9510849209718095, + "grad_norm": 0.09318594634532928, + "learning_rate": 1.4711256852486921e-06, + "loss": 8.4928, + "step": 190450 + }, + { + "epoch": 0.9511348597967489, + "grad_norm": 0.08922556787729263, + "learning_rate": 1.4696237703071415e-06, + "loss": 8.5018, + "step": 190460 + }, + { + "epoch": 0.9511847986216885, + "grad_norm": 0.08539392054080963, + "learning_rate": 1.4681218553655912e-06, + "loss": 8.4915, + "step": 190470 + }, + { + "epoch": 0.9512347374466279, + "grad_norm": 0.09017814695835114, + "learning_rate": 1.4666199404240406e-06, + "loss": 8.4938, + "step": 190480 + }, + { + "epoch": 0.9512846762715673, + "grad_norm": 0.08659885823726654, + "learning_rate": 1.4651180254824903e-06, + "loss": 8.487, + "step": 190490 + }, + { + "epoch": 0.9513346150965067, + "grad_norm": 0.09293199330568314, + "learning_rate": 1.4636161105409396e-06, + "loss": 8.5041, + "step": 190500 + }, + { + "epoch": 0.9513845539214463, + "grad_norm": 0.08663637191057205, + "learning_rate": 1.4621141955993892e-06, + "loss": 8.49, + "step": 190510 + }, + { + "epoch": 0.9514344927463857, + "grad_norm": 0.09226275235414505, + "learning_rate": 1.4606122806578387e-06, + "loss": 8.4864, + "step": 190520 + }, + { + "epoch": 0.9514844315713251, + "grad_norm": 0.09186282753944397, + "learning_rate": 1.4591103657162883e-06, + "loss": 8.5054, + "step": 190530 + }, + { + "epoch": 0.9515343703962645, + "grad_norm": 0.09182189404964447, + "learning_rate": 1.4576084507747378e-06, + "loss": 8.5114, + "step": 190540 + }, + { + "epoch": 0.9515843092212041, + "grad_norm": 0.09429624676704407, + "learning_rate": 1.4561065358331874e-06, + "loss": 8.5163, + "step": 190550 + }, + { + "epoch": 0.9516342480461435, + "grad_norm": 0.09003430604934692, + "learning_rate": 1.4546046208916367e-06, + "loss": 8.4997, + "step": 190560 + }, + { + "epoch": 0.9516841868710829, + "grad_norm": 0.0899423211812973, + "learning_rate": 1.4531027059500864e-06, + "loss": 8.5114, + "step": 190570 + }, + { + "epoch": 0.9517341256960223, + "grad_norm": 0.0908636823296547, + "learning_rate": 1.4516007910085358e-06, + "loss": 8.5043, + "step": 190580 + }, + { + "epoch": 0.9517840645209619, + "grad_norm": 0.095972940325737, + "learning_rate": 1.4500988760669855e-06, + "loss": 8.5164, + "step": 190590 + }, + { + "epoch": 0.9518340033459013, + "grad_norm": 0.09233302623033524, + "learning_rate": 1.4485969611254349e-06, + "loss": 8.5112, + "step": 190600 + }, + { + "epoch": 0.9518839421708407, + "grad_norm": 0.0877123698592186, + "learning_rate": 1.4470950461838844e-06, + "loss": 8.5003, + "step": 190610 + }, + { + "epoch": 0.9519338809957801, + "grad_norm": 0.08805521577596664, + "learning_rate": 1.445593131242334e-06, + "loss": 8.5093, + "step": 190620 + }, + { + "epoch": 0.9519838198207197, + "grad_norm": 0.09961655735969543, + "learning_rate": 1.4440912163007835e-06, + "loss": 8.4953, + "step": 190630 + }, + { + "epoch": 0.9520337586456591, + "grad_norm": 0.09043627232313156, + "learning_rate": 1.442589301359233e-06, + "loss": 8.4995, + "step": 190640 + }, + { + "epoch": 0.9520836974705985, + "grad_norm": 0.08915705978870392, + "learning_rate": 1.4410873864176826e-06, + "loss": 8.502, + "step": 190650 + }, + { + "epoch": 0.9521336362955379, + "grad_norm": 0.08573364466428757, + "learning_rate": 1.4395854714761321e-06, + "loss": 8.5038, + "step": 190660 + }, + { + "epoch": 0.9521835751204775, + "grad_norm": 0.08786933869123459, + "learning_rate": 1.4380835565345817e-06, + "loss": 8.5091, + "step": 190670 + }, + { + "epoch": 0.9522335139454169, + "grad_norm": 0.08725972473621368, + "learning_rate": 1.436581641593031e-06, + "loss": 8.4832, + "step": 190680 + }, + { + "epoch": 0.9522834527703563, + "grad_norm": 0.08919131010770798, + "learning_rate": 1.4350797266514807e-06, + "loss": 8.4989, + "step": 190690 + }, + { + "epoch": 0.9523333915952957, + "grad_norm": 0.08816230297088623, + "learning_rate": 1.43357781170993e-06, + "loss": 8.4939, + "step": 190700 + }, + { + "epoch": 0.9523833304202353, + "grad_norm": 0.08805028349161148, + "learning_rate": 1.4320758967683798e-06, + "loss": 8.5108, + "step": 190710 + }, + { + "epoch": 0.9524332692451747, + "grad_norm": 0.08876121044158936, + "learning_rate": 1.4305739818268292e-06, + "loss": 8.5145, + "step": 190720 + }, + { + "epoch": 0.9524832080701141, + "grad_norm": 0.09014929085969925, + "learning_rate": 1.4290720668852787e-06, + "loss": 8.5085, + "step": 190730 + }, + { + "epoch": 0.9525331468950535, + "grad_norm": 0.09153331071138382, + "learning_rate": 1.4275701519437283e-06, + "loss": 8.4938, + "step": 190740 + }, + { + "epoch": 0.9525830857199931, + "grad_norm": 0.09297018498182297, + "learning_rate": 1.4260682370021778e-06, + "loss": 8.5057, + "step": 190750 + }, + { + "epoch": 0.9526330245449325, + "grad_norm": 0.09277532249689102, + "learning_rate": 1.4245663220606273e-06, + "loss": 8.5036, + "step": 190760 + }, + { + "epoch": 0.9526829633698719, + "grad_norm": 0.08956871181726456, + "learning_rate": 1.4230644071190769e-06, + "loss": 8.4772, + "step": 190770 + }, + { + "epoch": 0.9527329021948113, + "grad_norm": 0.09383154660463333, + "learning_rate": 1.4215624921775262e-06, + "loss": 8.5021, + "step": 190780 + }, + { + "epoch": 0.9527828410197509, + "grad_norm": 0.08647419512271881, + "learning_rate": 1.420060577235976e-06, + "loss": 8.4969, + "step": 190790 + }, + { + "epoch": 0.9528327798446903, + "grad_norm": 0.08593643456697464, + "learning_rate": 1.4185586622944253e-06, + "loss": 8.496, + "step": 190800 + }, + { + "epoch": 0.9528827186696297, + "grad_norm": 0.09408606588840485, + "learning_rate": 1.417056747352875e-06, + "loss": 8.4957, + "step": 190810 + }, + { + "epoch": 0.9529326574945691, + "grad_norm": 0.08725640922784805, + "learning_rate": 1.4155548324113244e-06, + "loss": 8.5259, + "step": 190820 + }, + { + "epoch": 0.9529825963195087, + "grad_norm": 0.09070757776498795, + "learning_rate": 1.414052917469774e-06, + "loss": 8.4818, + "step": 190830 + }, + { + "epoch": 0.9530325351444481, + "grad_norm": 0.08983830362558365, + "learning_rate": 1.4125510025282235e-06, + "loss": 8.4856, + "step": 190840 + }, + { + "epoch": 0.9530824739693875, + "grad_norm": 0.08678752928972244, + "learning_rate": 1.411049087586673e-06, + "loss": 8.513, + "step": 190850 + }, + { + "epoch": 0.9531324127943269, + "grad_norm": 0.09017140418291092, + "learning_rate": 1.4095471726451226e-06, + "loss": 8.518, + "step": 190860 + }, + { + "epoch": 0.9531823516192663, + "grad_norm": 0.09126543253660202, + "learning_rate": 1.408045257703572e-06, + "loss": 8.508, + "step": 190870 + }, + { + "epoch": 0.9532322904442059, + "grad_norm": 0.09493157267570496, + "learning_rate": 1.4065433427620216e-06, + "loss": 8.5058, + "step": 190880 + }, + { + "epoch": 0.9532822292691453, + "grad_norm": 0.09498272091150284, + "learning_rate": 1.4050414278204712e-06, + "loss": 8.5069, + "step": 190890 + }, + { + "epoch": 0.9533321680940847, + "grad_norm": 0.08452911674976349, + "learning_rate": 1.4035395128789205e-06, + "loss": 8.5176, + "step": 190900 + }, + { + "epoch": 0.9533821069190241, + "grad_norm": 0.08910375088453293, + "learning_rate": 1.4020375979373703e-06, + "loss": 8.5208, + "step": 190910 + }, + { + "epoch": 0.9534320457439637, + "grad_norm": 0.10094549506902695, + "learning_rate": 1.4005356829958196e-06, + "loss": 8.5057, + "step": 190920 + }, + { + "epoch": 0.9534819845689031, + "grad_norm": 0.08806487917900085, + "learning_rate": 1.3990337680542694e-06, + "loss": 8.4974, + "step": 190930 + }, + { + "epoch": 0.9535319233938425, + "grad_norm": 0.09561792016029358, + "learning_rate": 1.3975318531127187e-06, + "loss": 8.4931, + "step": 190940 + }, + { + "epoch": 0.9535818622187819, + "grad_norm": 0.08623037487268448, + "learning_rate": 1.3960299381711682e-06, + "loss": 8.496, + "step": 190950 + }, + { + "epoch": 0.9536318010437215, + "grad_norm": 0.08998066931962967, + "learning_rate": 1.3945280232296178e-06, + "loss": 8.5056, + "step": 190960 + }, + { + "epoch": 0.9536817398686609, + "grad_norm": 0.10207744687795639, + "learning_rate": 1.3930261082880673e-06, + "loss": 8.4875, + "step": 190970 + }, + { + "epoch": 0.9537316786936003, + "grad_norm": 0.09375947713851929, + "learning_rate": 1.3915241933465169e-06, + "loss": 8.4791, + "step": 190980 + }, + { + "epoch": 0.9537816175185397, + "grad_norm": 0.09087695181369781, + "learning_rate": 1.3900222784049664e-06, + "loss": 8.4961, + "step": 190990 + }, + { + "epoch": 0.9538315563434793, + "grad_norm": 0.09509552270174026, + "learning_rate": 1.3885203634634157e-06, + "loss": 8.4978, + "step": 191000 + }, + { + "epoch": 0.9538814951684187, + "grad_norm": 0.08731069415807724, + "learning_rate": 1.3870184485218655e-06, + "loss": 8.5142, + "step": 191010 + }, + { + "epoch": 0.9539314339933581, + "grad_norm": 0.09380168467760086, + "learning_rate": 1.3855165335803148e-06, + "loss": 8.4932, + "step": 191020 + }, + { + "epoch": 0.9539813728182975, + "grad_norm": 0.09554290771484375, + "learning_rate": 1.3840146186387646e-06, + "loss": 8.4965, + "step": 191030 + }, + { + "epoch": 0.9540313116432371, + "grad_norm": 0.09323696792125702, + "learning_rate": 1.382512703697214e-06, + "loss": 8.509, + "step": 191040 + }, + { + "epoch": 0.9540812504681765, + "grad_norm": 0.09462062269449234, + "learning_rate": 1.3810107887556635e-06, + "loss": 8.5129, + "step": 191050 + }, + { + "epoch": 0.9541311892931159, + "grad_norm": 0.08865117281675339, + "learning_rate": 1.379508873814113e-06, + "loss": 8.5054, + "step": 191060 + }, + { + "epoch": 0.9541811281180553, + "grad_norm": 0.09803714603185654, + "learning_rate": 1.3780069588725625e-06, + "loss": 8.5022, + "step": 191070 + }, + { + "epoch": 0.9542310669429949, + "grad_norm": 0.09523754566907883, + "learning_rate": 1.376505043931012e-06, + "loss": 8.4978, + "step": 191080 + }, + { + "epoch": 0.9542810057679343, + "grad_norm": 0.09519781172275543, + "learning_rate": 1.3750031289894616e-06, + "loss": 8.4971, + "step": 191090 + }, + { + "epoch": 0.9543309445928737, + "grad_norm": 0.09077487140893936, + "learning_rate": 1.373501214047911e-06, + "loss": 8.5253, + "step": 191100 + }, + { + "epoch": 0.9543808834178131, + "grad_norm": 0.08766099810600281, + "learning_rate": 1.3719992991063607e-06, + "loss": 8.4914, + "step": 191110 + }, + { + "epoch": 0.9544308222427527, + "grad_norm": 0.08919545263051987, + "learning_rate": 1.37049738416481e-06, + "loss": 8.4901, + "step": 191120 + }, + { + "epoch": 0.9544807610676921, + "grad_norm": 0.091376394033432, + "learning_rate": 1.3689954692232598e-06, + "loss": 8.4782, + "step": 191130 + }, + { + "epoch": 0.9545306998926315, + "grad_norm": 0.09148900955915451, + "learning_rate": 1.3674935542817091e-06, + "loss": 8.5037, + "step": 191140 + }, + { + "epoch": 0.9545806387175709, + "grad_norm": 0.08681777864694595, + "learning_rate": 1.3659916393401587e-06, + "loss": 8.4946, + "step": 191150 + }, + { + "epoch": 0.9546305775425105, + "grad_norm": 0.09286382794380188, + "learning_rate": 1.3644897243986082e-06, + "loss": 8.4937, + "step": 191160 + }, + { + "epoch": 0.9546805163674499, + "grad_norm": 0.08869923651218414, + "learning_rate": 1.3629878094570578e-06, + "loss": 8.5241, + "step": 191170 + }, + { + "epoch": 0.9547304551923893, + "grad_norm": 0.09262396395206451, + "learning_rate": 1.3614858945155073e-06, + "loss": 8.4772, + "step": 191180 + }, + { + "epoch": 0.9547803940173287, + "grad_norm": 0.08976420015096664, + "learning_rate": 1.3599839795739568e-06, + "loss": 8.5162, + "step": 191190 + }, + { + "epoch": 0.9548303328422683, + "grad_norm": 0.08680476248264313, + "learning_rate": 1.3584820646324064e-06, + "loss": 8.5119, + "step": 191200 + }, + { + "epoch": 0.9548802716672077, + "grad_norm": 0.09427861869335175, + "learning_rate": 1.356980149690856e-06, + "loss": 8.5139, + "step": 191210 + }, + { + "epoch": 0.9549302104921471, + "grad_norm": 0.08783675730228424, + "learning_rate": 1.3554782347493053e-06, + "loss": 8.5041, + "step": 191220 + }, + { + "epoch": 0.9549801493170865, + "grad_norm": 0.09209182858467102, + "learning_rate": 1.353976319807755e-06, + "loss": 8.5103, + "step": 191230 + }, + { + "epoch": 0.9550300881420261, + "grad_norm": 0.08795662224292755, + "learning_rate": 1.3524744048662043e-06, + "loss": 8.5108, + "step": 191240 + }, + { + "epoch": 0.9550800269669655, + "grad_norm": 0.09797421097755432, + "learning_rate": 1.350972489924654e-06, + "loss": 8.4915, + "step": 191250 + }, + { + "epoch": 0.9551299657919049, + "grad_norm": 0.0975293442606926, + "learning_rate": 1.3494705749831034e-06, + "loss": 8.5011, + "step": 191260 + }, + { + "epoch": 0.9551799046168443, + "grad_norm": 0.09412509948015213, + "learning_rate": 1.347968660041553e-06, + "loss": 8.5011, + "step": 191270 + }, + { + "epoch": 0.9552298434417839, + "grad_norm": 0.09029638022184372, + "learning_rate": 1.3464667451000025e-06, + "loss": 8.5052, + "step": 191280 + }, + { + "epoch": 0.9552797822667233, + "grad_norm": 0.09370943903923035, + "learning_rate": 1.344964830158452e-06, + "loss": 8.4932, + "step": 191290 + }, + { + "epoch": 0.9553297210916627, + "grad_norm": 0.09150079637765884, + "learning_rate": 1.3434629152169016e-06, + "loss": 8.5044, + "step": 191300 + }, + { + "epoch": 0.9553796599166021, + "grad_norm": 0.09412145614624023, + "learning_rate": 1.3419610002753511e-06, + "loss": 8.5145, + "step": 191310 + }, + { + "epoch": 0.9554295987415417, + "grad_norm": 0.09421419352293015, + "learning_rate": 1.3404590853338005e-06, + "loss": 8.5175, + "step": 191320 + }, + { + "epoch": 0.9554795375664811, + "grad_norm": 0.08627758920192719, + "learning_rate": 1.3389571703922502e-06, + "loss": 8.497, + "step": 191330 + }, + { + "epoch": 0.9555294763914205, + "grad_norm": 0.0896778404712677, + "learning_rate": 1.3374552554506996e-06, + "loss": 8.4944, + "step": 191340 + }, + { + "epoch": 0.9555794152163599, + "grad_norm": 0.09137307852506638, + "learning_rate": 1.3359533405091493e-06, + "loss": 8.4975, + "step": 191350 + }, + { + "epoch": 0.9556293540412995, + "grad_norm": 0.0910387709736824, + "learning_rate": 1.3344514255675987e-06, + "loss": 8.5027, + "step": 191360 + }, + { + "epoch": 0.9556792928662389, + "grad_norm": 0.09388083219528198, + "learning_rate": 1.3329495106260482e-06, + "loss": 8.5204, + "step": 191370 + }, + { + "epoch": 0.9557292316911783, + "grad_norm": 0.0911535695195198, + "learning_rate": 1.3314475956844977e-06, + "loss": 8.5079, + "step": 191380 + }, + { + "epoch": 0.9557791705161177, + "grad_norm": 0.0940667912364006, + "learning_rate": 1.3299456807429473e-06, + "loss": 8.5054, + "step": 191390 + }, + { + "epoch": 0.9558291093410572, + "grad_norm": 0.08681704849004745, + "learning_rate": 1.3284437658013968e-06, + "loss": 8.4992, + "step": 191400 + }, + { + "epoch": 0.9558790481659967, + "grad_norm": 0.09067001193761826, + "learning_rate": 1.3269418508598464e-06, + "loss": 8.5076, + "step": 191410 + }, + { + "epoch": 0.9559289869909361, + "grad_norm": 0.08941010385751724, + "learning_rate": 1.325439935918296e-06, + "loss": 8.4981, + "step": 191420 + }, + { + "epoch": 0.9559789258158755, + "grad_norm": 0.08782663941383362, + "learning_rate": 1.3239380209767455e-06, + "loss": 8.4801, + "step": 191430 + }, + { + "epoch": 0.956028864640815, + "grad_norm": 0.0957464948296547, + "learning_rate": 1.3224361060351948e-06, + "loss": 8.4795, + "step": 191440 + }, + { + "epoch": 0.9560788034657545, + "grad_norm": 0.0979733094573021, + "learning_rate": 1.3209341910936445e-06, + "loss": 8.4909, + "step": 191450 + }, + { + "epoch": 0.9561287422906939, + "grad_norm": 0.09294435381889343, + "learning_rate": 1.3194322761520939e-06, + "loss": 8.4827, + "step": 191460 + }, + { + "epoch": 0.9561786811156333, + "grad_norm": 0.09183111786842346, + "learning_rate": 1.3179303612105436e-06, + "loss": 8.4947, + "step": 191470 + }, + { + "epoch": 0.9562286199405728, + "grad_norm": 0.0917709469795227, + "learning_rate": 1.316428446268993e-06, + "loss": 8.5117, + "step": 191480 + }, + { + "epoch": 0.9562785587655123, + "grad_norm": 0.0903320387005806, + "learning_rate": 1.3149265313274425e-06, + "loss": 8.4863, + "step": 191490 + }, + { + "epoch": 0.9563284975904517, + "grad_norm": 0.09910201281309128, + "learning_rate": 1.313424616385892e-06, + "loss": 8.519, + "step": 191500 + }, + { + "epoch": 0.9563784364153911, + "grad_norm": 0.08993766456842422, + "learning_rate": 1.3119227014443416e-06, + "loss": 8.5283, + "step": 191510 + }, + { + "epoch": 0.9564283752403306, + "grad_norm": 0.09398555755615234, + "learning_rate": 1.3104207865027911e-06, + "loss": 8.4969, + "step": 191520 + }, + { + "epoch": 0.9564783140652701, + "grad_norm": 0.08856987208127975, + "learning_rate": 1.3089188715612407e-06, + "loss": 8.5304, + "step": 191530 + }, + { + "epoch": 0.9565282528902095, + "grad_norm": 0.09370505064725876, + "learning_rate": 1.30741695661969e-06, + "loss": 8.5073, + "step": 191540 + }, + { + "epoch": 0.9565781917151489, + "grad_norm": 0.08654750138521194, + "learning_rate": 1.3059150416781398e-06, + "loss": 8.502, + "step": 191550 + }, + { + "epoch": 0.9566281305400884, + "grad_norm": 0.08909036964178085, + "learning_rate": 1.304413126736589e-06, + "loss": 8.5031, + "step": 191560 + }, + { + "epoch": 0.9566780693650279, + "grad_norm": 0.09467563778162003, + "learning_rate": 1.3029112117950388e-06, + "loss": 8.4999, + "step": 191570 + }, + { + "epoch": 0.9567280081899673, + "grad_norm": 0.08794749528169632, + "learning_rate": 1.3014092968534882e-06, + "loss": 8.5046, + "step": 191580 + }, + { + "epoch": 0.9567779470149067, + "grad_norm": 0.08709059655666351, + "learning_rate": 1.2999073819119377e-06, + "loss": 8.4894, + "step": 191590 + }, + { + "epoch": 0.9568278858398462, + "grad_norm": 0.09558872878551483, + "learning_rate": 1.2984054669703873e-06, + "loss": 8.5192, + "step": 191600 + }, + { + "epoch": 0.9568778246647857, + "grad_norm": 0.09214767813682556, + "learning_rate": 1.2969035520288368e-06, + "loss": 8.5062, + "step": 191610 + }, + { + "epoch": 0.9569277634897251, + "grad_norm": 0.08921287953853607, + "learning_rate": 1.2954016370872863e-06, + "loss": 8.5002, + "step": 191620 + }, + { + "epoch": 0.9569777023146645, + "grad_norm": 0.09433959424495697, + "learning_rate": 1.2938997221457359e-06, + "loss": 8.497, + "step": 191630 + }, + { + "epoch": 0.957027641139604, + "grad_norm": 0.08520886301994324, + "learning_rate": 1.2923978072041852e-06, + "loss": 8.4983, + "step": 191640 + }, + { + "epoch": 0.9570775799645435, + "grad_norm": 0.0877714678645134, + "learning_rate": 1.290895892262635e-06, + "loss": 8.513, + "step": 191650 + }, + { + "epoch": 0.9571275187894829, + "grad_norm": 0.09116245806217194, + "learning_rate": 1.2893939773210843e-06, + "loss": 8.4974, + "step": 191660 + }, + { + "epoch": 0.9571774576144223, + "grad_norm": 0.08422242105007172, + "learning_rate": 1.287892062379534e-06, + "loss": 8.5094, + "step": 191670 + }, + { + "epoch": 0.9572273964393618, + "grad_norm": 0.08759858459234238, + "learning_rate": 1.2863901474379834e-06, + "loss": 8.4916, + "step": 191680 + }, + { + "epoch": 0.9572773352643013, + "grad_norm": 0.08717787265777588, + "learning_rate": 1.284888232496433e-06, + "loss": 8.5063, + "step": 191690 + }, + { + "epoch": 0.9573272740892407, + "grad_norm": 0.09190387278795242, + "learning_rate": 1.2833863175548825e-06, + "loss": 8.4875, + "step": 191700 + }, + { + "epoch": 0.9573772129141801, + "grad_norm": 0.09158718585968018, + "learning_rate": 1.281884402613332e-06, + "loss": 8.5172, + "step": 191710 + }, + { + "epoch": 0.9574271517391196, + "grad_norm": 0.09621883928775787, + "learning_rate": 1.2803824876717816e-06, + "loss": 8.5133, + "step": 191720 + }, + { + "epoch": 0.957477090564059, + "grad_norm": 0.08670492470264435, + "learning_rate": 1.2788805727302311e-06, + "loss": 8.5062, + "step": 191730 + }, + { + "epoch": 0.9575270293889985, + "grad_norm": 0.08832064270973206, + "learning_rate": 1.2773786577886807e-06, + "loss": 8.4969, + "step": 191740 + }, + { + "epoch": 0.9575769682139379, + "grad_norm": 0.08412336558103561, + "learning_rate": 1.2758767428471302e-06, + "loss": 8.5298, + "step": 191750 + }, + { + "epoch": 0.9576269070388774, + "grad_norm": 0.09177365899085999, + "learning_rate": 1.2743748279055795e-06, + "loss": 8.494, + "step": 191760 + }, + { + "epoch": 0.9576768458638169, + "grad_norm": 0.08740728348493576, + "learning_rate": 1.2728729129640293e-06, + "loss": 8.4938, + "step": 191770 + }, + { + "epoch": 0.9577267846887563, + "grad_norm": 0.08786563575267792, + "learning_rate": 1.2713709980224786e-06, + "loss": 8.5155, + "step": 191780 + }, + { + "epoch": 0.9577767235136957, + "grad_norm": 0.09397321939468384, + "learning_rate": 1.2698690830809284e-06, + "loss": 8.5062, + "step": 191790 + }, + { + "epoch": 0.9578266623386352, + "grad_norm": 0.08770589530467987, + "learning_rate": 1.2683671681393777e-06, + "loss": 8.485, + "step": 191800 + }, + { + "epoch": 0.9578766011635746, + "grad_norm": 0.08947274088859558, + "learning_rate": 1.2668652531978272e-06, + "loss": 8.4993, + "step": 191810 + }, + { + "epoch": 0.9579265399885141, + "grad_norm": 0.0907110795378685, + "learning_rate": 1.2653633382562768e-06, + "loss": 8.5106, + "step": 191820 + }, + { + "epoch": 0.9579764788134535, + "grad_norm": 0.08664959669113159, + "learning_rate": 1.2638614233147263e-06, + "loss": 8.4997, + "step": 191830 + }, + { + "epoch": 0.9580264176383929, + "grad_norm": 0.09159454703330994, + "learning_rate": 1.2623595083731759e-06, + "loss": 8.5125, + "step": 191840 + }, + { + "epoch": 0.9580763564633324, + "grad_norm": 0.09214504808187485, + "learning_rate": 1.2608575934316254e-06, + "loss": 8.4985, + "step": 191850 + }, + { + "epoch": 0.9581262952882719, + "grad_norm": 0.09041109681129456, + "learning_rate": 1.2593556784900747e-06, + "loss": 8.4857, + "step": 191860 + }, + { + "epoch": 0.9581762341132113, + "grad_norm": 0.09253381192684174, + "learning_rate": 1.2578537635485245e-06, + "loss": 8.4968, + "step": 191870 + }, + { + "epoch": 0.9582261729381507, + "grad_norm": 0.08627009391784668, + "learning_rate": 1.2563518486069738e-06, + "loss": 8.4892, + "step": 191880 + }, + { + "epoch": 0.9582761117630902, + "grad_norm": 0.08717307448387146, + "learning_rate": 1.2548499336654236e-06, + "loss": 8.4982, + "step": 191890 + }, + { + "epoch": 0.9583260505880297, + "grad_norm": 0.08845217525959015, + "learning_rate": 1.253348018723873e-06, + "loss": 8.4926, + "step": 191900 + }, + { + "epoch": 0.9583759894129691, + "grad_norm": 0.09016247093677521, + "learning_rate": 1.2518461037823225e-06, + "loss": 8.4968, + "step": 191910 + }, + { + "epoch": 0.9584259282379085, + "grad_norm": 0.08623816072940826, + "learning_rate": 1.250344188840772e-06, + "loss": 8.4969, + "step": 191920 + }, + { + "epoch": 0.958475867062848, + "grad_norm": 0.0954393595457077, + "learning_rate": 1.2488422738992216e-06, + "loss": 8.517, + "step": 191930 + }, + { + "epoch": 0.9585258058877875, + "grad_norm": 0.09252654761075974, + "learning_rate": 1.247340358957671e-06, + "loss": 8.4971, + "step": 191940 + }, + { + "epoch": 0.9585757447127269, + "grad_norm": 0.08835329860448837, + "learning_rate": 1.2458384440161206e-06, + "loss": 8.5138, + "step": 191950 + }, + { + "epoch": 0.9586256835376663, + "grad_norm": 0.08202789723873138, + "learning_rate": 1.24433652907457e-06, + "loss": 8.4846, + "step": 191960 + }, + { + "epoch": 0.9586756223626058, + "grad_norm": 0.09334851056337357, + "learning_rate": 1.2428346141330197e-06, + "loss": 8.5079, + "step": 191970 + }, + { + "epoch": 0.9587255611875453, + "grad_norm": 0.08949562907218933, + "learning_rate": 1.241332699191469e-06, + "loss": 8.4951, + "step": 191980 + }, + { + "epoch": 0.9587755000124847, + "grad_norm": 0.09301643818616867, + "learning_rate": 1.2398307842499188e-06, + "loss": 8.5035, + "step": 191990 + }, + { + "epoch": 0.9588254388374241, + "grad_norm": 0.08926796913146973, + "learning_rate": 1.2383288693083681e-06, + "loss": 8.5123, + "step": 192000 + }, + { + "epoch": 0.9588753776623636, + "grad_norm": 0.09037186205387115, + "learning_rate": 1.236826954366818e-06, + "loss": 8.5027, + "step": 192010 + }, + { + "epoch": 0.9589253164873031, + "grad_norm": 0.09102381020784378, + "learning_rate": 1.2353250394252672e-06, + "loss": 8.5107, + "step": 192020 + }, + { + "epoch": 0.9589752553122425, + "grad_norm": 0.08897027373313904, + "learning_rate": 1.2338231244837168e-06, + "loss": 8.5083, + "step": 192030 + }, + { + "epoch": 0.9590251941371819, + "grad_norm": 0.08504099398851395, + "learning_rate": 1.2323212095421663e-06, + "loss": 8.5118, + "step": 192040 + }, + { + "epoch": 0.9590751329621214, + "grad_norm": 0.09035465121269226, + "learning_rate": 1.2308192946006159e-06, + "loss": 8.502, + "step": 192050 + }, + { + "epoch": 0.9591250717870609, + "grad_norm": 0.0969761461019516, + "learning_rate": 1.2293173796590654e-06, + "loss": 8.5028, + "step": 192060 + }, + { + "epoch": 0.9591750106120003, + "grad_norm": 0.0866863876581192, + "learning_rate": 1.227815464717515e-06, + "loss": 8.5155, + "step": 192070 + }, + { + "epoch": 0.9592249494369397, + "grad_norm": 0.08988199383020401, + "learning_rate": 1.2263135497759643e-06, + "loss": 8.4989, + "step": 192080 + }, + { + "epoch": 0.9592748882618792, + "grad_norm": 0.09326018393039703, + "learning_rate": 1.224811634834414e-06, + "loss": 8.5031, + "step": 192090 + }, + { + "epoch": 0.9593248270868187, + "grad_norm": 0.0929257795214653, + "learning_rate": 1.2233097198928634e-06, + "loss": 8.4906, + "step": 192100 + }, + { + "epoch": 0.9593747659117581, + "grad_norm": 0.09300316870212555, + "learning_rate": 1.2218078049513131e-06, + "loss": 8.5025, + "step": 192110 + }, + { + "epoch": 0.9594247047366975, + "grad_norm": 0.09423361718654633, + "learning_rate": 1.2203058900097624e-06, + "loss": 8.5001, + "step": 192120 + }, + { + "epoch": 0.959474643561637, + "grad_norm": 0.09319435060024261, + "learning_rate": 1.218803975068212e-06, + "loss": 8.4935, + "step": 192130 + }, + { + "epoch": 0.9595245823865765, + "grad_norm": 0.09030423313379288, + "learning_rate": 1.2173020601266615e-06, + "loss": 8.4956, + "step": 192140 + }, + { + "epoch": 0.9595745212115159, + "grad_norm": 0.08913615345954895, + "learning_rate": 1.215800145185111e-06, + "loss": 8.5366, + "step": 192150 + }, + { + "epoch": 0.9596244600364553, + "grad_norm": 0.09200779348611832, + "learning_rate": 1.2142982302435606e-06, + "loss": 8.5005, + "step": 192160 + }, + { + "epoch": 0.9596743988613948, + "grad_norm": 0.08741243183612823, + "learning_rate": 1.2127963153020102e-06, + "loss": 8.4892, + "step": 192170 + }, + { + "epoch": 0.9597243376863343, + "grad_norm": 0.09448469430208206, + "learning_rate": 1.2112944003604595e-06, + "loss": 8.4926, + "step": 192180 + }, + { + "epoch": 0.9597742765112737, + "grad_norm": 0.0888211578130722, + "learning_rate": 1.2097924854189092e-06, + "loss": 8.4874, + "step": 192190 + }, + { + "epoch": 0.9598242153362131, + "grad_norm": 0.0948527380824089, + "learning_rate": 1.2082905704773586e-06, + "loss": 8.5015, + "step": 192200 + }, + { + "epoch": 0.9598741541611526, + "grad_norm": 0.09242790937423706, + "learning_rate": 1.2067886555358083e-06, + "loss": 8.4939, + "step": 192210 + }, + { + "epoch": 0.959924092986092, + "grad_norm": 0.09047332406044006, + "learning_rate": 1.2052867405942577e-06, + "loss": 8.4966, + "step": 192220 + }, + { + "epoch": 0.9599740318110315, + "grad_norm": 0.09905733168125153, + "learning_rate": 1.2037848256527072e-06, + "loss": 8.5018, + "step": 192230 + }, + { + "epoch": 0.9600239706359709, + "grad_norm": 0.09126092493534088, + "learning_rate": 1.2022829107111568e-06, + "loss": 8.5119, + "step": 192240 + }, + { + "epoch": 0.9600739094609104, + "grad_norm": 0.09281742572784424, + "learning_rate": 1.2007809957696063e-06, + "loss": 8.5087, + "step": 192250 + }, + { + "epoch": 0.9601238482858498, + "grad_norm": 0.09464964270591736, + "learning_rate": 1.1992790808280558e-06, + "loss": 8.505, + "step": 192260 + }, + { + "epoch": 0.9601737871107893, + "grad_norm": 0.09487392008304596, + "learning_rate": 1.1977771658865054e-06, + "loss": 8.4981, + "step": 192270 + }, + { + "epoch": 0.9602237259357287, + "grad_norm": 0.09476413577795029, + "learning_rate": 1.196275250944955e-06, + "loss": 8.4889, + "step": 192280 + }, + { + "epoch": 0.9602736647606682, + "grad_norm": 0.09175974875688553, + "learning_rate": 1.1947733360034045e-06, + "loss": 8.4984, + "step": 192290 + }, + { + "epoch": 0.9603236035856076, + "grad_norm": 0.09201910346746445, + "learning_rate": 1.1932714210618538e-06, + "loss": 8.4981, + "step": 192300 + }, + { + "epoch": 0.9603735424105471, + "grad_norm": 0.0915134996175766, + "learning_rate": 1.1917695061203036e-06, + "loss": 8.5039, + "step": 192310 + }, + { + "epoch": 0.9604234812354865, + "grad_norm": 0.08767704665660858, + "learning_rate": 1.1902675911787529e-06, + "loss": 8.5014, + "step": 192320 + }, + { + "epoch": 0.960473420060426, + "grad_norm": 0.0916207805275917, + "learning_rate": 1.1887656762372026e-06, + "loss": 8.5048, + "step": 192330 + }, + { + "epoch": 0.9605233588853654, + "grad_norm": 0.09325848519802094, + "learning_rate": 1.187263761295652e-06, + "loss": 8.5005, + "step": 192340 + }, + { + "epoch": 0.9605732977103049, + "grad_norm": 0.08897262811660767, + "learning_rate": 1.1857618463541015e-06, + "loss": 8.5037, + "step": 192350 + }, + { + "epoch": 0.9606232365352443, + "grad_norm": 0.08885764330625534, + "learning_rate": 1.184259931412551e-06, + "loss": 8.5089, + "step": 192360 + }, + { + "epoch": 0.9606731753601838, + "grad_norm": 0.085177481174469, + "learning_rate": 1.1827580164710006e-06, + "loss": 8.5113, + "step": 192370 + }, + { + "epoch": 0.9607231141851232, + "grad_norm": 0.08419841527938843, + "learning_rate": 1.1812561015294501e-06, + "loss": 8.48, + "step": 192380 + }, + { + "epoch": 0.9607730530100627, + "grad_norm": 0.08771277964115143, + "learning_rate": 1.1797541865878997e-06, + "loss": 8.5046, + "step": 192390 + }, + { + "epoch": 0.9608229918350021, + "grad_norm": 0.09421413391828537, + "learning_rate": 1.178252271646349e-06, + "loss": 8.479, + "step": 192400 + }, + { + "epoch": 0.9608729306599416, + "grad_norm": 0.09432334452867508, + "learning_rate": 1.1767503567047988e-06, + "loss": 8.4848, + "step": 192410 + }, + { + "epoch": 0.960922869484881, + "grad_norm": 0.09068858623504639, + "learning_rate": 1.175248441763248e-06, + "loss": 8.5012, + "step": 192420 + }, + { + "epoch": 0.9609728083098205, + "grad_norm": 0.09556064754724503, + "learning_rate": 1.1737465268216979e-06, + "loss": 8.5111, + "step": 192430 + }, + { + "epoch": 0.9610227471347599, + "grad_norm": 0.08801019191741943, + "learning_rate": 1.1722446118801472e-06, + "loss": 8.4905, + "step": 192440 + }, + { + "epoch": 0.9610726859596994, + "grad_norm": 0.09224274009466171, + "learning_rate": 1.1707426969385967e-06, + "loss": 8.4729, + "step": 192450 + }, + { + "epoch": 0.9611226247846388, + "grad_norm": 0.08931384235620499, + "learning_rate": 1.1692407819970463e-06, + "loss": 8.5073, + "step": 192460 + }, + { + "epoch": 0.9611725636095783, + "grad_norm": 0.09076393395662308, + "learning_rate": 1.1677388670554958e-06, + "loss": 8.4992, + "step": 192470 + }, + { + "epoch": 0.9612225024345177, + "grad_norm": 0.08941461890935898, + "learning_rate": 1.1662369521139454e-06, + "loss": 8.5019, + "step": 192480 + }, + { + "epoch": 0.9612724412594572, + "grad_norm": 0.08615435659885406, + "learning_rate": 1.164735037172395e-06, + "loss": 8.4891, + "step": 192490 + }, + { + "epoch": 0.9613223800843966, + "grad_norm": 0.08833363652229309, + "learning_rate": 1.1632331222308442e-06, + "loss": 8.5133, + "step": 192500 + }, + { + "epoch": 0.961372318909336, + "grad_norm": 0.09384980797767639, + "learning_rate": 1.161731207289294e-06, + "loss": 8.5135, + "step": 192510 + }, + { + "epoch": 0.9614222577342755, + "grad_norm": 0.09676310420036316, + "learning_rate": 1.1602292923477433e-06, + "loss": 8.4996, + "step": 192520 + }, + { + "epoch": 0.961472196559215, + "grad_norm": 0.09096337109804153, + "learning_rate": 1.158727377406193e-06, + "loss": 8.4903, + "step": 192530 + }, + { + "epoch": 0.9615221353841544, + "grad_norm": 0.09308335185050964, + "learning_rate": 1.1572254624646424e-06, + "loss": 8.5021, + "step": 192540 + }, + { + "epoch": 0.9615720742090939, + "grad_norm": 0.0906360074877739, + "learning_rate": 1.1557235475230922e-06, + "loss": 8.5127, + "step": 192550 + }, + { + "epoch": 0.9616220130340333, + "grad_norm": 0.09376540780067444, + "learning_rate": 1.1542216325815415e-06, + "loss": 8.5081, + "step": 192560 + }, + { + "epoch": 0.9616719518589728, + "grad_norm": 0.09033221006393433, + "learning_rate": 1.152719717639991e-06, + "loss": 8.5068, + "step": 192570 + }, + { + "epoch": 0.9617218906839122, + "grad_norm": 0.0966610535979271, + "learning_rate": 1.1512178026984406e-06, + "loss": 8.4919, + "step": 192580 + }, + { + "epoch": 0.9617718295088517, + "grad_norm": 0.09105295687913895, + "learning_rate": 1.1497158877568901e-06, + "loss": 8.5013, + "step": 192590 + }, + { + "epoch": 0.9618217683337911, + "grad_norm": 0.08788570016622543, + "learning_rate": 1.1482139728153397e-06, + "loss": 8.4736, + "step": 192600 + }, + { + "epoch": 0.9618717071587306, + "grad_norm": 0.09203591197729111, + "learning_rate": 1.1467120578737892e-06, + "loss": 8.493, + "step": 192610 + }, + { + "epoch": 0.96192164598367, + "grad_norm": 0.09675536304712296, + "learning_rate": 1.1452101429322385e-06, + "loss": 8.5179, + "step": 192620 + }, + { + "epoch": 0.9619715848086094, + "grad_norm": 0.09318403899669647, + "learning_rate": 1.1437082279906883e-06, + "loss": 8.5007, + "step": 192630 + }, + { + "epoch": 0.9620215236335489, + "grad_norm": 0.09018708020448685, + "learning_rate": 1.1422063130491376e-06, + "loss": 8.4895, + "step": 192640 + }, + { + "epoch": 0.9620714624584884, + "grad_norm": 0.09459145367145538, + "learning_rate": 1.1407043981075874e-06, + "loss": 8.5098, + "step": 192650 + }, + { + "epoch": 0.9621214012834278, + "grad_norm": 0.09730951488018036, + "learning_rate": 1.1392024831660367e-06, + "loss": 8.5015, + "step": 192660 + }, + { + "epoch": 0.9621713401083672, + "grad_norm": 0.09757350385189056, + "learning_rate": 1.1377005682244863e-06, + "loss": 8.5212, + "step": 192670 + }, + { + "epoch": 0.9622212789333067, + "grad_norm": 0.08909895271062851, + "learning_rate": 1.1361986532829358e-06, + "loss": 8.508, + "step": 192680 + }, + { + "epoch": 0.9622712177582462, + "grad_norm": 0.09438234567642212, + "learning_rate": 1.1346967383413853e-06, + "loss": 8.4956, + "step": 192690 + }, + { + "epoch": 0.9623211565831856, + "grad_norm": 0.08857671916484833, + "learning_rate": 1.1331948233998349e-06, + "loss": 8.5006, + "step": 192700 + }, + { + "epoch": 0.962371095408125, + "grad_norm": 0.0914953425526619, + "learning_rate": 1.1316929084582844e-06, + "loss": 8.5024, + "step": 192710 + }, + { + "epoch": 0.9624210342330645, + "grad_norm": 0.08889735490083694, + "learning_rate": 1.1301909935167338e-06, + "loss": 8.5009, + "step": 192720 + }, + { + "epoch": 0.962470973058004, + "grad_norm": 0.09225352108478546, + "learning_rate": 1.1286890785751835e-06, + "loss": 8.5067, + "step": 192730 + }, + { + "epoch": 0.9625209118829434, + "grad_norm": 0.09539781510829926, + "learning_rate": 1.1271871636336328e-06, + "loss": 8.5049, + "step": 192740 + }, + { + "epoch": 0.9625708507078828, + "grad_norm": 0.0937805026769638, + "learning_rate": 1.1256852486920826e-06, + "loss": 8.5277, + "step": 192750 + }, + { + "epoch": 0.9626207895328223, + "grad_norm": 0.08874644339084625, + "learning_rate": 1.124183333750532e-06, + "loss": 8.5063, + "step": 192760 + }, + { + "epoch": 0.9626707283577618, + "grad_norm": 0.0926111564040184, + "learning_rate": 1.1226814188089815e-06, + "loss": 8.495, + "step": 192770 + }, + { + "epoch": 0.9627206671827012, + "grad_norm": 0.09661145508289337, + "learning_rate": 1.121179503867431e-06, + "loss": 8.5039, + "step": 192780 + }, + { + "epoch": 0.9627706060076406, + "grad_norm": 0.09471116960048676, + "learning_rate": 1.1196775889258806e-06, + "loss": 8.4877, + "step": 192790 + }, + { + "epoch": 0.9628205448325801, + "grad_norm": 0.09448757022619247, + "learning_rate": 1.11817567398433e-06, + "loss": 8.4881, + "step": 192800 + }, + { + "epoch": 0.9628704836575196, + "grad_norm": 0.09081356972455978, + "learning_rate": 1.1166737590427796e-06, + "loss": 8.5098, + "step": 192810 + }, + { + "epoch": 0.962920422482459, + "grad_norm": 0.08594221621751785, + "learning_rate": 1.1151718441012292e-06, + "loss": 8.51, + "step": 192820 + }, + { + "epoch": 0.9629703613073984, + "grad_norm": 0.08855900168418884, + "learning_rate": 1.1136699291596787e-06, + "loss": 8.5147, + "step": 192830 + }, + { + "epoch": 0.9630203001323379, + "grad_norm": 0.09176114946603775, + "learning_rate": 1.112168014218128e-06, + "loss": 8.5023, + "step": 192840 + }, + { + "epoch": 0.9630702389572773, + "grad_norm": 0.09412090480327606, + "learning_rate": 1.1106660992765778e-06, + "loss": 8.5108, + "step": 192850 + }, + { + "epoch": 0.9631201777822168, + "grad_norm": 0.0874660536646843, + "learning_rate": 1.1091641843350272e-06, + "loss": 8.5087, + "step": 192860 + }, + { + "epoch": 0.9631701166071562, + "grad_norm": 0.08917557448148727, + "learning_rate": 1.107662269393477e-06, + "loss": 8.502, + "step": 192870 + }, + { + "epoch": 0.9632200554320957, + "grad_norm": 0.09307212382555008, + "learning_rate": 1.1061603544519262e-06, + "loss": 8.5, + "step": 192880 + }, + { + "epoch": 0.9632699942570351, + "grad_norm": 0.0892016664147377, + "learning_rate": 1.1046584395103758e-06, + "loss": 8.4979, + "step": 192890 + }, + { + "epoch": 0.9633199330819746, + "grad_norm": 0.08102846145629883, + "learning_rate": 1.1031565245688253e-06, + "loss": 8.511, + "step": 192900 + }, + { + "epoch": 0.963369871906914, + "grad_norm": 0.09051341563463211, + "learning_rate": 1.1016546096272749e-06, + "loss": 8.5003, + "step": 192910 + }, + { + "epoch": 0.9634198107318535, + "grad_norm": 0.09337877482175827, + "learning_rate": 1.1001526946857244e-06, + "loss": 8.5054, + "step": 192920 + }, + { + "epoch": 0.9634697495567929, + "grad_norm": 0.0894298255443573, + "learning_rate": 1.098650779744174e-06, + "loss": 8.5, + "step": 192930 + }, + { + "epoch": 0.9635196883817324, + "grad_norm": 0.09010350704193115, + "learning_rate": 1.0971488648026233e-06, + "loss": 8.5062, + "step": 192940 + }, + { + "epoch": 0.9635696272066718, + "grad_norm": 0.08480501919984818, + "learning_rate": 1.095646949861073e-06, + "loss": 8.5046, + "step": 192950 + }, + { + "epoch": 0.9636195660316113, + "grad_norm": 0.09088391810655594, + "learning_rate": 1.0941450349195224e-06, + "loss": 8.4948, + "step": 192960 + }, + { + "epoch": 0.9636695048565507, + "grad_norm": 0.09037703275680542, + "learning_rate": 1.092643119977972e-06, + "loss": 8.4929, + "step": 192970 + }, + { + "epoch": 0.9637194436814902, + "grad_norm": 0.08855674415826797, + "learning_rate": 1.0911412050364215e-06, + "loss": 8.508, + "step": 192980 + }, + { + "epoch": 0.9637693825064296, + "grad_norm": 0.08638061583042145, + "learning_rate": 1.0896392900948708e-06, + "loss": 8.5144, + "step": 192990 + }, + { + "epoch": 0.963819321331369, + "grad_norm": 0.09214214235544205, + "learning_rate": 1.0881373751533205e-06, + "loss": 8.4967, + "step": 193000 + }, + { + "epoch": 0.9638692601563085, + "grad_norm": 0.09081536531448364, + "learning_rate": 1.0866354602117699e-06, + "loss": 8.5044, + "step": 193010 + }, + { + "epoch": 0.963919198981248, + "grad_norm": 0.09031979739665985, + "learning_rate": 1.0851335452702196e-06, + "loss": 8.5013, + "step": 193020 + }, + { + "epoch": 0.9639691378061874, + "grad_norm": 0.08983229100704193, + "learning_rate": 1.083631630328669e-06, + "loss": 8.4903, + "step": 193030 + }, + { + "epoch": 0.9640190766311268, + "grad_norm": 0.08677353709936142, + "learning_rate": 1.0821297153871185e-06, + "loss": 8.5, + "step": 193040 + }, + { + "epoch": 0.9640690154560663, + "grad_norm": 0.09196817129850388, + "learning_rate": 1.080627800445568e-06, + "loss": 8.508, + "step": 193050 + }, + { + "epoch": 0.9641189542810058, + "grad_norm": 0.09112430363893509, + "learning_rate": 1.0791258855040176e-06, + "loss": 8.5005, + "step": 193060 + }, + { + "epoch": 0.9641688931059452, + "grad_norm": 0.09055691957473755, + "learning_rate": 1.0776239705624671e-06, + "loss": 8.4864, + "step": 193070 + }, + { + "epoch": 0.9642188319308846, + "grad_norm": 0.09631075710058212, + "learning_rate": 1.0761220556209167e-06, + "loss": 8.5011, + "step": 193080 + }, + { + "epoch": 0.9642687707558241, + "grad_norm": 0.091032475233078, + "learning_rate": 1.0746201406793662e-06, + "loss": 8.4968, + "step": 193090 + }, + { + "epoch": 0.9643187095807636, + "grad_norm": 0.09456492960453033, + "learning_rate": 1.0731182257378158e-06, + "loss": 8.5076, + "step": 193100 + }, + { + "epoch": 0.964368648405703, + "grad_norm": 0.09016124159097672, + "learning_rate": 1.071616310796265e-06, + "loss": 8.5205, + "step": 193110 + }, + { + "epoch": 0.9644185872306424, + "grad_norm": 0.09598606824874878, + "learning_rate": 1.0701143958547148e-06, + "loss": 8.483, + "step": 193120 + }, + { + "epoch": 0.9644685260555819, + "grad_norm": 0.08811622112989426, + "learning_rate": 1.0686124809131642e-06, + "loss": 8.4907, + "step": 193130 + }, + { + "epoch": 0.9645184648805214, + "grad_norm": 0.0883399173617363, + "learning_rate": 1.067110565971614e-06, + "loss": 8.5047, + "step": 193140 + }, + { + "epoch": 0.9645684037054608, + "grad_norm": 0.0905977189540863, + "learning_rate": 1.0656086510300633e-06, + "loss": 8.4943, + "step": 193150 + }, + { + "epoch": 0.9646183425304002, + "grad_norm": 0.09106041491031647, + "learning_rate": 1.0641067360885128e-06, + "loss": 8.5189, + "step": 193160 + }, + { + "epoch": 0.9646682813553397, + "grad_norm": 0.08970261365175247, + "learning_rate": 1.0626048211469624e-06, + "loss": 8.5162, + "step": 193170 + }, + { + "epoch": 0.9647182201802792, + "grad_norm": 0.08954989910125732, + "learning_rate": 1.061102906205412e-06, + "loss": 8.4932, + "step": 193180 + }, + { + "epoch": 0.9647681590052186, + "grad_norm": 0.08976080268621445, + "learning_rate": 1.0596009912638614e-06, + "loss": 8.4927, + "step": 193190 + }, + { + "epoch": 0.964818097830158, + "grad_norm": 0.09053555130958557, + "learning_rate": 1.058099076322311e-06, + "loss": 8.4998, + "step": 193200 + }, + { + "epoch": 0.9648680366550975, + "grad_norm": 0.09599043428897858, + "learning_rate": 1.0565971613807603e-06, + "loss": 8.5194, + "step": 193210 + }, + { + "epoch": 0.964917975480037, + "grad_norm": 0.09338483214378357, + "learning_rate": 1.05509524643921e-06, + "loss": 8.4981, + "step": 193220 + }, + { + "epoch": 0.9649679143049764, + "grad_norm": 0.09092381596565247, + "learning_rate": 1.0535933314976594e-06, + "loss": 8.5404, + "step": 193230 + }, + { + "epoch": 0.9650178531299158, + "grad_norm": 0.0899573415517807, + "learning_rate": 1.0520914165561092e-06, + "loss": 8.5163, + "step": 193240 + }, + { + "epoch": 0.9650677919548553, + "grad_norm": 0.09256323426961899, + "learning_rate": 1.0505895016145585e-06, + "loss": 8.4776, + "step": 193250 + }, + { + "epoch": 0.9651177307797948, + "grad_norm": 0.08706999570131302, + "learning_rate": 1.049087586673008e-06, + "loss": 8.487, + "step": 193260 + }, + { + "epoch": 0.9651676696047342, + "grad_norm": 0.09370410442352295, + "learning_rate": 1.0475856717314576e-06, + "loss": 8.5015, + "step": 193270 + }, + { + "epoch": 0.9652176084296736, + "grad_norm": 0.09068583697080612, + "learning_rate": 1.0460837567899071e-06, + "loss": 8.4835, + "step": 193280 + }, + { + "epoch": 0.9652675472546131, + "grad_norm": 0.09026405960321426, + "learning_rate": 1.0445818418483567e-06, + "loss": 8.4975, + "step": 193290 + }, + { + "epoch": 0.9653174860795526, + "grad_norm": 0.0878196433186531, + "learning_rate": 1.0430799269068062e-06, + "loss": 8.4881, + "step": 193300 + }, + { + "epoch": 0.965367424904492, + "grad_norm": 0.08937402069568634, + "learning_rate": 1.0415780119652555e-06, + "loss": 8.497, + "step": 193310 + }, + { + "epoch": 0.9654173637294314, + "grad_norm": 0.09177373349666595, + "learning_rate": 1.0400760970237053e-06, + "loss": 8.4949, + "step": 193320 + }, + { + "epoch": 0.9654673025543709, + "grad_norm": 0.09359072893857956, + "learning_rate": 1.0385741820821546e-06, + "loss": 8.5039, + "step": 193330 + }, + { + "epoch": 0.9655172413793104, + "grad_norm": 0.09130553156137466, + "learning_rate": 1.0370722671406044e-06, + "loss": 8.489, + "step": 193340 + }, + { + "epoch": 0.9655671802042498, + "grad_norm": 0.09181410074234009, + "learning_rate": 1.0355703521990537e-06, + "loss": 8.5007, + "step": 193350 + }, + { + "epoch": 0.9656171190291892, + "grad_norm": 0.09474316984415054, + "learning_rate": 1.0340684372575035e-06, + "loss": 8.5026, + "step": 193360 + }, + { + "epoch": 0.9656670578541287, + "grad_norm": 0.09248656779527664, + "learning_rate": 1.0325665223159528e-06, + "loss": 8.5032, + "step": 193370 + }, + { + "epoch": 0.9657169966790682, + "grad_norm": 0.09201966971158981, + "learning_rate": 1.0310646073744023e-06, + "loss": 8.5016, + "step": 193380 + }, + { + "epoch": 0.9657669355040076, + "grad_norm": 0.09128998965024948, + "learning_rate": 1.0295626924328519e-06, + "loss": 8.4941, + "step": 193390 + }, + { + "epoch": 0.965816874328947, + "grad_norm": 0.08449841290712357, + "learning_rate": 1.0280607774913014e-06, + "loss": 8.5381, + "step": 193400 + }, + { + "epoch": 0.9658668131538864, + "grad_norm": 0.09496010094881058, + "learning_rate": 1.026558862549751e-06, + "loss": 8.5214, + "step": 193410 + }, + { + "epoch": 0.965916751978826, + "grad_norm": 0.08874188363552094, + "learning_rate": 1.0250569476082005e-06, + "loss": 8.512, + "step": 193420 + }, + { + "epoch": 0.9659666908037654, + "grad_norm": 0.08817078918218613, + "learning_rate": 1.0235550326666498e-06, + "loss": 8.4784, + "step": 193430 + }, + { + "epoch": 0.9660166296287048, + "grad_norm": 0.09461791068315506, + "learning_rate": 1.0220531177250996e-06, + "loss": 8.4886, + "step": 193440 + }, + { + "epoch": 0.9660665684536442, + "grad_norm": 0.09889914095401764, + "learning_rate": 1.020551202783549e-06, + "loss": 8.506, + "step": 193450 + }, + { + "epoch": 0.9661165072785838, + "grad_norm": 0.09483031183481216, + "learning_rate": 1.0190492878419987e-06, + "loss": 8.5026, + "step": 193460 + }, + { + "epoch": 0.9661664461035232, + "grad_norm": 0.09063508361577988, + "learning_rate": 1.017547372900448e-06, + "loss": 8.4843, + "step": 193470 + }, + { + "epoch": 0.9662163849284626, + "grad_norm": 0.09315601736307144, + "learning_rate": 1.0160454579588976e-06, + "loss": 8.4879, + "step": 193480 + }, + { + "epoch": 0.966266323753402, + "grad_norm": 0.09240519255399704, + "learning_rate": 1.014543543017347e-06, + "loss": 8.4912, + "step": 193490 + }, + { + "epoch": 0.9663162625783416, + "grad_norm": 0.09266150742769241, + "learning_rate": 1.0130416280757966e-06, + "loss": 8.5122, + "step": 193500 + }, + { + "epoch": 0.966366201403281, + "grad_norm": 0.08804892003536224, + "learning_rate": 1.0115397131342462e-06, + "loss": 8.498, + "step": 193510 + }, + { + "epoch": 0.9664161402282204, + "grad_norm": 0.0914304181933403, + "learning_rate": 1.0100377981926957e-06, + "loss": 8.5013, + "step": 193520 + }, + { + "epoch": 0.9664660790531598, + "grad_norm": 0.08695808053016663, + "learning_rate": 1.008535883251145e-06, + "loss": 8.5127, + "step": 193530 + }, + { + "epoch": 0.9665160178780994, + "grad_norm": 0.09132501482963562, + "learning_rate": 1.0070339683095948e-06, + "loss": 8.4965, + "step": 193540 + }, + { + "epoch": 0.9665659567030388, + "grad_norm": 0.08505229651927948, + "learning_rate": 1.0055320533680441e-06, + "loss": 8.5024, + "step": 193550 + }, + { + "epoch": 0.9666158955279782, + "grad_norm": 0.09099217504262924, + "learning_rate": 1.004030138426494e-06, + "loss": 8.5031, + "step": 193560 + }, + { + "epoch": 0.9666658343529176, + "grad_norm": 0.09543818980455399, + "learning_rate": 1.0025282234849432e-06, + "loss": 8.497, + "step": 193570 + }, + { + "epoch": 0.9667157731778572, + "grad_norm": 0.09109151363372803, + "learning_rate": 1.0010263085433928e-06, + "loss": 8.4983, + "step": 193580 + }, + { + "epoch": 0.9667657120027966, + "grad_norm": 0.09007328748703003, + "learning_rate": 9.995243936018423e-07, + "loss": 8.5035, + "step": 193590 + }, + { + "epoch": 0.966815650827736, + "grad_norm": 0.09476472437381744, + "learning_rate": 9.980224786602919e-07, + "loss": 8.4901, + "step": 193600 + }, + { + "epoch": 0.9668655896526754, + "grad_norm": 0.09251927584409714, + "learning_rate": 9.965205637187414e-07, + "loss": 8.5123, + "step": 193610 + }, + { + "epoch": 0.966915528477615, + "grad_norm": 0.08989694714546204, + "learning_rate": 9.95018648777191e-07, + "loss": 8.5028, + "step": 193620 + }, + { + "epoch": 0.9669654673025544, + "grad_norm": 0.09202085435390472, + "learning_rate": 9.935167338356405e-07, + "loss": 8.5208, + "step": 193630 + }, + { + "epoch": 0.9670154061274938, + "grad_norm": 0.09178955852985382, + "learning_rate": 9.9201481889409e-07, + "loss": 8.5038, + "step": 193640 + }, + { + "epoch": 0.9670653449524332, + "grad_norm": 0.08983652293682098, + "learning_rate": 9.905129039525394e-07, + "loss": 8.4953, + "step": 193650 + }, + { + "epoch": 0.9671152837773728, + "grad_norm": 0.08969952166080475, + "learning_rate": 9.890109890109891e-07, + "loss": 8.5043, + "step": 193660 + }, + { + "epoch": 0.9671652226023122, + "grad_norm": 0.09150168299674988, + "learning_rate": 9.875090740694384e-07, + "loss": 8.5058, + "step": 193670 + }, + { + "epoch": 0.9672151614272516, + "grad_norm": 0.09063497930765152, + "learning_rate": 9.860071591278882e-07, + "loss": 8.5009, + "step": 193680 + }, + { + "epoch": 0.967265100252191, + "grad_norm": 0.08984601497650146, + "learning_rate": 9.845052441863375e-07, + "loss": 8.5082, + "step": 193690 + }, + { + "epoch": 0.9673150390771306, + "grad_norm": 0.09148889034986496, + "learning_rate": 9.83003329244787e-07, + "loss": 8.5017, + "step": 193700 + }, + { + "epoch": 0.96736497790207, + "grad_norm": 0.09478826075792313, + "learning_rate": 9.815014143032366e-07, + "loss": 8.4963, + "step": 193710 + }, + { + "epoch": 0.9674149167270094, + "grad_norm": 0.0906151756644249, + "learning_rate": 9.799994993616862e-07, + "loss": 8.4947, + "step": 193720 + }, + { + "epoch": 0.9674648555519488, + "grad_norm": 0.0921965166926384, + "learning_rate": 9.784975844201357e-07, + "loss": 8.5126, + "step": 193730 + }, + { + "epoch": 0.9675147943768884, + "grad_norm": 0.09627021849155426, + "learning_rate": 9.769956694785852e-07, + "loss": 8.4987, + "step": 193740 + }, + { + "epoch": 0.9675647332018278, + "grad_norm": 0.0912567749619484, + "learning_rate": 9.754937545370346e-07, + "loss": 8.5012, + "step": 193750 + }, + { + "epoch": 0.9676146720267672, + "grad_norm": 0.09098992496728897, + "learning_rate": 9.739918395954843e-07, + "loss": 8.4986, + "step": 193760 + }, + { + "epoch": 0.9676646108517066, + "grad_norm": 0.09001246094703674, + "learning_rate": 9.724899246539337e-07, + "loss": 8.4776, + "step": 193770 + }, + { + "epoch": 0.9677145496766462, + "grad_norm": 0.09463299065828323, + "learning_rate": 9.709880097123834e-07, + "loss": 8.4844, + "step": 193780 + }, + { + "epoch": 0.9677644885015856, + "grad_norm": 0.09130155295133591, + "learning_rate": 9.694860947708328e-07, + "loss": 8.4935, + "step": 193790 + }, + { + "epoch": 0.967814427326525, + "grad_norm": 0.09506887197494507, + "learning_rate": 9.679841798292823e-07, + "loss": 8.5098, + "step": 193800 + }, + { + "epoch": 0.9678643661514644, + "grad_norm": 0.0893334150314331, + "learning_rate": 9.664822648877318e-07, + "loss": 8.4957, + "step": 193810 + }, + { + "epoch": 0.9679143049764038, + "grad_norm": 0.08439191430807114, + "learning_rate": 9.649803499461814e-07, + "loss": 8.5174, + "step": 193820 + }, + { + "epoch": 0.9679642438013434, + "grad_norm": 0.0962853729724884, + "learning_rate": 9.63478435004631e-07, + "loss": 8.5103, + "step": 193830 + }, + { + "epoch": 0.9680141826262828, + "grad_norm": 0.09273120760917664, + "learning_rate": 9.619765200630805e-07, + "loss": 8.4903, + "step": 193840 + }, + { + "epoch": 0.9680641214512222, + "grad_norm": 0.08646199852228165, + "learning_rate": 9.604746051215298e-07, + "loss": 8.4982, + "step": 193850 + }, + { + "epoch": 0.9681140602761616, + "grad_norm": 0.08911348879337311, + "learning_rate": 9.589726901799796e-07, + "loss": 8.4981, + "step": 193860 + }, + { + "epoch": 0.9681639991011012, + "grad_norm": 0.09012620151042938, + "learning_rate": 9.574707752384289e-07, + "loss": 8.4926, + "step": 193870 + }, + { + "epoch": 0.9682139379260406, + "grad_norm": 0.08890789747238159, + "learning_rate": 9.559688602968786e-07, + "loss": 8.4956, + "step": 193880 + }, + { + "epoch": 0.96826387675098, + "grad_norm": 0.0956655889749527, + "learning_rate": 9.54466945355328e-07, + "loss": 8.5063, + "step": 193890 + }, + { + "epoch": 0.9683138155759194, + "grad_norm": 0.0944046676158905, + "learning_rate": 9.529650304137776e-07, + "loss": 8.4913, + "step": 193900 + }, + { + "epoch": 0.968363754400859, + "grad_norm": 0.09098576009273529, + "learning_rate": 9.514631154722271e-07, + "loss": 8.4847, + "step": 193910 + }, + { + "epoch": 0.9684136932257984, + "grad_norm": 0.09032325446605682, + "learning_rate": 9.499612005306767e-07, + "loss": 8.5017, + "step": 193920 + }, + { + "epoch": 0.9684636320507378, + "grad_norm": 0.08898947387933731, + "learning_rate": 9.484592855891261e-07, + "loss": 8.4929, + "step": 193930 + }, + { + "epoch": 0.9685135708756772, + "grad_norm": 0.087337426841259, + "learning_rate": 9.469573706475757e-07, + "loss": 8.5065, + "step": 193940 + }, + { + "epoch": 0.9685635097006168, + "grad_norm": 0.089985191822052, + "learning_rate": 9.454554557060251e-07, + "loss": 8.4867, + "step": 193950 + }, + { + "epoch": 0.9686134485255562, + "grad_norm": 0.09129071980714798, + "learning_rate": 9.439535407644748e-07, + "loss": 8.5096, + "step": 193960 + }, + { + "epoch": 0.9686633873504956, + "grad_norm": 0.08964653313159943, + "learning_rate": 9.424516258229242e-07, + "loss": 8.5179, + "step": 193970 + }, + { + "epoch": 0.968713326175435, + "grad_norm": 0.09389611333608627, + "learning_rate": 9.409497108813739e-07, + "loss": 8.4891, + "step": 193980 + }, + { + "epoch": 0.9687632650003746, + "grad_norm": 0.0910143330693245, + "learning_rate": 9.394477959398232e-07, + "loss": 8.5042, + "step": 193990 + }, + { + "epoch": 0.968813203825314, + "grad_norm": 0.09279486536979675, + "learning_rate": 9.379458809982728e-07, + "loss": 8.5063, + "step": 194000 + }, + { + "epoch": 0.9688631426502534, + "grad_norm": 0.09352979063987732, + "learning_rate": 9.364439660567224e-07, + "loss": 8.5034, + "step": 194010 + }, + { + "epoch": 0.9689130814751928, + "grad_norm": 0.09566289931535721, + "learning_rate": 9.349420511151719e-07, + "loss": 8.4996, + "step": 194020 + }, + { + "epoch": 0.9689630203001324, + "grad_norm": 0.09447862952947617, + "learning_rate": 9.334401361736215e-07, + "loss": 8.4978, + "step": 194030 + }, + { + "epoch": 0.9690129591250718, + "grad_norm": 0.09513899683952332, + "learning_rate": 9.319382212320709e-07, + "loss": 8.5037, + "step": 194040 + }, + { + "epoch": 0.9690628979500112, + "grad_norm": 0.09640028327703476, + "learning_rate": 9.304363062905204e-07, + "loss": 8.5095, + "step": 194050 + }, + { + "epoch": 0.9691128367749506, + "grad_norm": 0.09304415434598923, + "learning_rate": 9.2893439134897e-07, + "loss": 8.4915, + "step": 194060 + }, + { + "epoch": 0.9691627755998902, + "grad_norm": 0.08626173436641693, + "learning_rate": 9.274324764074195e-07, + "loss": 8.5022, + "step": 194070 + }, + { + "epoch": 0.9692127144248296, + "grad_norm": 0.08531943708658218, + "learning_rate": 9.259305614658691e-07, + "loss": 8.5094, + "step": 194080 + }, + { + "epoch": 0.969262653249769, + "grad_norm": 0.08645452558994293, + "learning_rate": 9.244286465243186e-07, + "loss": 8.4834, + "step": 194090 + }, + { + "epoch": 0.9693125920747084, + "grad_norm": 0.08829811960458755, + "learning_rate": 9.229267315827681e-07, + "loss": 8.4885, + "step": 194100 + }, + { + "epoch": 0.969362530899648, + "grad_norm": 0.08838041871786118, + "learning_rate": 9.214248166412176e-07, + "loss": 8.4871, + "step": 194110 + }, + { + "epoch": 0.9694124697245874, + "grad_norm": 0.10451626777648926, + "learning_rate": 9.199229016996671e-07, + "loss": 8.5013, + "step": 194120 + }, + { + "epoch": 0.9694624085495268, + "grad_norm": 0.0896938219666481, + "learning_rate": 9.184209867581167e-07, + "loss": 8.4912, + "step": 194130 + }, + { + "epoch": 0.9695123473744662, + "grad_norm": 0.09267926216125488, + "learning_rate": 9.169190718165662e-07, + "loss": 8.5015, + "step": 194140 + }, + { + "epoch": 0.9695622861994058, + "grad_norm": 0.09529542177915573, + "learning_rate": 9.154171568750157e-07, + "loss": 8.5052, + "step": 194150 + }, + { + "epoch": 0.9696122250243452, + "grad_norm": 0.09126924723386765, + "learning_rate": 9.139152419334652e-07, + "loss": 8.4985, + "step": 194160 + }, + { + "epoch": 0.9696621638492846, + "grad_norm": 0.09080453962087631, + "learning_rate": 9.124133269919148e-07, + "loss": 8.4891, + "step": 194170 + }, + { + "epoch": 0.969712102674224, + "grad_norm": 0.09079496562480927, + "learning_rate": 9.109114120503643e-07, + "loss": 8.4894, + "step": 194180 + }, + { + "epoch": 0.9697620414991636, + "grad_norm": 0.09417524933815002, + "learning_rate": 9.094094971088138e-07, + "loss": 8.5045, + "step": 194190 + }, + { + "epoch": 0.969811980324103, + "grad_norm": 0.09117228537797928, + "learning_rate": 9.079075821672633e-07, + "loss": 8.5076, + "step": 194200 + }, + { + "epoch": 0.9698619191490424, + "grad_norm": 0.09321728348731995, + "learning_rate": 9.064056672257128e-07, + "loss": 8.4965, + "step": 194210 + }, + { + "epoch": 0.9699118579739818, + "grad_norm": 0.08753789961338043, + "learning_rate": 9.049037522841624e-07, + "loss": 8.4978, + "step": 194220 + }, + { + "epoch": 0.9699617967989214, + "grad_norm": 0.09341299533843994, + "learning_rate": 9.034018373426119e-07, + "loss": 8.4969, + "step": 194230 + }, + { + "epoch": 0.9700117356238608, + "grad_norm": 0.09108027070760727, + "learning_rate": 9.018999224010614e-07, + "loss": 8.4958, + "step": 194240 + }, + { + "epoch": 0.9700616744488002, + "grad_norm": 0.09051679074764252, + "learning_rate": 9.00398007459511e-07, + "loss": 8.5203, + "step": 194250 + }, + { + "epoch": 0.9701116132737396, + "grad_norm": 0.0874452218413353, + "learning_rate": 8.988960925179604e-07, + "loss": 8.4992, + "step": 194260 + }, + { + "epoch": 0.9701615520986792, + "grad_norm": 0.09082052111625671, + "learning_rate": 8.9739417757641e-07, + "loss": 8.5051, + "step": 194270 + }, + { + "epoch": 0.9702114909236186, + "grad_norm": 0.08856025338172913, + "learning_rate": 8.958922626348595e-07, + "loss": 8.5033, + "step": 194280 + }, + { + "epoch": 0.970261429748558, + "grad_norm": 0.09108460694551468, + "learning_rate": 8.943903476933091e-07, + "loss": 8.5058, + "step": 194290 + }, + { + "epoch": 0.9703113685734974, + "grad_norm": 0.09241397678852081, + "learning_rate": 8.928884327517586e-07, + "loss": 8.5089, + "step": 194300 + }, + { + "epoch": 0.970361307398437, + "grad_norm": 0.099297434091568, + "learning_rate": 8.91386517810208e-07, + "loss": 8.4976, + "step": 194310 + }, + { + "epoch": 0.9704112462233764, + "grad_norm": 0.08700823783874512, + "learning_rate": 8.898846028686576e-07, + "loss": 8.4843, + "step": 194320 + }, + { + "epoch": 0.9704611850483158, + "grad_norm": 0.09079594165086746, + "learning_rate": 8.883826879271071e-07, + "loss": 8.5041, + "step": 194330 + }, + { + "epoch": 0.9705111238732552, + "grad_norm": 0.08681687712669373, + "learning_rate": 8.868807729855567e-07, + "loss": 8.4892, + "step": 194340 + }, + { + "epoch": 0.9705610626981948, + "grad_norm": 0.09553930163383484, + "learning_rate": 8.853788580440062e-07, + "loss": 8.5019, + "step": 194350 + }, + { + "epoch": 0.9706110015231342, + "grad_norm": 0.08355355262756348, + "learning_rate": 8.838769431024558e-07, + "loss": 8.5108, + "step": 194360 + }, + { + "epoch": 0.9706609403480736, + "grad_norm": 0.0913529172539711, + "learning_rate": 8.823750281609052e-07, + "loss": 8.5125, + "step": 194370 + }, + { + "epoch": 0.970710879173013, + "grad_norm": 0.09661257266998291, + "learning_rate": 8.808731132193547e-07, + "loss": 8.5045, + "step": 194380 + }, + { + "epoch": 0.9707608179979526, + "grad_norm": 0.09083771705627441, + "learning_rate": 8.793711982778043e-07, + "loss": 8.4961, + "step": 194390 + }, + { + "epoch": 0.970810756822892, + "grad_norm": 0.08930204063653946, + "learning_rate": 8.778692833362538e-07, + "loss": 8.509, + "step": 194400 + }, + { + "epoch": 0.9708606956478314, + "grad_norm": 0.08711707592010498, + "learning_rate": 8.763673683947034e-07, + "loss": 8.5015, + "step": 194410 + }, + { + "epoch": 0.9709106344727708, + "grad_norm": 0.09438509494066238, + "learning_rate": 8.748654534531527e-07, + "loss": 8.4954, + "step": 194420 + }, + { + "epoch": 0.9709605732977103, + "grad_norm": 0.09597020596265793, + "learning_rate": 8.733635385116022e-07, + "loss": 8.4873, + "step": 194430 + }, + { + "epoch": 0.9710105121226498, + "grad_norm": 0.09720340371131897, + "learning_rate": 8.718616235700518e-07, + "loss": 8.5018, + "step": 194440 + }, + { + "epoch": 0.9710604509475892, + "grad_norm": 0.09392903745174408, + "learning_rate": 8.703597086285013e-07, + "loss": 8.498, + "step": 194450 + }, + { + "epoch": 0.9711103897725286, + "grad_norm": 0.08631488680839539, + "learning_rate": 8.688577936869509e-07, + "loss": 8.5162, + "step": 194460 + }, + { + "epoch": 0.9711603285974681, + "grad_norm": 0.0900026261806488, + "learning_rate": 8.673558787454003e-07, + "loss": 8.4875, + "step": 194470 + }, + { + "epoch": 0.9712102674224076, + "grad_norm": 0.09366903454065323, + "learning_rate": 8.658539638038498e-07, + "loss": 8.4926, + "step": 194480 + }, + { + "epoch": 0.971260206247347, + "grad_norm": 0.08986707031726837, + "learning_rate": 8.643520488622994e-07, + "loss": 8.5027, + "step": 194490 + }, + { + "epoch": 0.9713101450722864, + "grad_norm": 0.08996418118476868, + "learning_rate": 8.628501339207489e-07, + "loss": 8.4983, + "step": 194500 + }, + { + "epoch": 0.971360083897226, + "grad_norm": 0.09488117694854736, + "learning_rate": 8.613482189791985e-07, + "loss": 8.4869, + "step": 194510 + }, + { + "epoch": 0.9714100227221654, + "grad_norm": 0.0917017012834549, + "learning_rate": 8.59846304037648e-07, + "loss": 8.5092, + "step": 194520 + }, + { + "epoch": 0.9714599615471048, + "grad_norm": 0.0919492170214653, + "learning_rate": 8.583443890960975e-07, + "loss": 8.4992, + "step": 194530 + }, + { + "epoch": 0.9715099003720442, + "grad_norm": 0.08751703053712845, + "learning_rate": 8.56842474154547e-07, + "loss": 8.5088, + "step": 194540 + }, + { + "epoch": 0.9715598391969837, + "grad_norm": 0.09287269413471222, + "learning_rate": 8.553405592129965e-07, + "loss": 8.496, + "step": 194550 + }, + { + "epoch": 0.9716097780219232, + "grad_norm": 0.08905121684074402, + "learning_rate": 8.538386442714461e-07, + "loss": 8.5047, + "step": 194560 + }, + { + "epoch": 0.9716597168468626, + "grad_norm": 0.0883074626326561, + "learning_rate": 8.523367293298956e-07, + "loss": 8.4918, + "step": 194570 + }, + { + "epoch": 0.971709655671802, + "grad_norm": 0.08838310837745667, + "learning_rate": 8.508348143883451e-07, + "loss": 8.4834, + "step": 194580 + }, + { + "epoch": 0.9717595944967415, + "grad_norm": 0.08658456802368164, + "learning_rate": 8.493328994467946e-07, + "loss": 8.4993, + "step": 194590 + }, + { + "epoch": 0.971809533321681, + "grad_norm": 0.08917684108018875, + "learning_rate": 8.478309845052442e-07, + "loss": 8.5031, + "step": 194600 + }, + { + "epoch": 0.9718594721466204, + "grad_norm": 0.09120479226112366, + "learning_rate": 8.463290695636937e-07, + "loss": 8.495, + "step": 194610 + }, + { + "epoch": 0.9719094109715598, + "grad_norm": 0.09076827019453049, + "learning_rate": 8.448271546221432e-07, + "loss": 8.5126, + "step": 194620 + }, + { + "epoch": 0.9719593497964993, + "grad_norm": 0.09772755205631256, + "learning_rate": 8.433252396805928e-07, + "loss": 8.4906, + "step": 194630 + }, + { + "epoch": 0.9720092886214388, + "grad_norm": 0.08785592764616013, + "learning_rate": 8.418233247390422e-07, + "loss": 8.5035, + "step": 194640 + }, + { + "epoch": 0.9720592274463782, + "grad_norm": 0.09105689078569412, + "learning_rate": 8.403214097974918e-07, + "loss": 8.4977, + "step": 194650 + }, + { + "epoch": 0.9721091662713176, + "grad_norm": 0.09146515280008316, + "learning_rate": 8.388194948559413e-07, + "loss": 8.5028, + "step": 194660 + }, + { + "epoch": 0.9721591050962571, + "grad_norm": 0.09533209353685379, + "learning_rate": 8.373175799143908e-07, + "loss": 8.4955, + "step": 194670 + }, + { + "epoch": 0.9722090439211966, + "grad_norm": 0.08939139544963837, + "learning_rate": 8.358156649728404e-07, + "loss": 8.4918, + "step": 194680 + }, + { + "epoch": 0.972258982746136, + "grad_norm": 0.0908391997218132, + "learning_rate": 8.343137500312898e-07, + "loss": 8.5214, + "step": 194690 + }, + { + "epoch": 0.9723089215710754, + "grad_norm": 0.08793427795171738, + "learning_rate": 8.328118350897394e-07, + "loss": 8.5005, + "step": 194700 + }, + { + "epoch": 0.9723588603960149, + "grad_norm": 0.09640876948833466, + "learning_rate": 8.313099201481889e-07, + "loss": 8.5052, + "step": 194710 + }, + { + "epoch": 0.9724087992209544, + "grad_norm": 0.0828898623585701, + "learning_rate": 8.298080052066385e-07, + "loss": 8.4938, + "step": 194720 + }, + { + "epoch": 0.9724587380458938, + "grad_norm": 0.09573552757501602, + "learning_rate": 8.28306090265088e-07, + "loss": 8.5073, + "step": 194730 + }, + { + "epoch": 0.9725086768708332, + "grad_norm": 0.09013304859399796, + "learning_rate": 8.268041753235374e-07, + "loss": 8.4836, + "step": 194740 + }, + { + "epoch": 0.9725586156957727, + "grad_norm": 0.08847355097532272, + "learning_rate": 8.25302260381987e-07, + "loss": 8.4966, + "step": 194750 + }, + { + "epoch": 0.9726085545207122, + "grad_norm": 0.0923420786857605, + "learning_rate": 8.238003454404365e-07, + "loss": 8.4948, + "step": 194760 + }, + { + "epoch": 0.9726584933456516, + "grad_norm": 0.09254893660545349, + "learning_rate": 8.222984304988861e-07, + "loss": 8.5117, + "step": 194770 + }, + { + "epoch": 0.972708432170591, + "grad_norm": 0.09156614542007446, + "learning_rate": 8.207965155573356e-07, + "loss": 8.4967, + "step": 194780 + }, + { + "epoch": 0.9727583709955305, + "grad_norm": 0.09188225865364075, + "learning_rate": 8.192946006157852e-07, + "loss": 8.4973, + "step": 194790 + }, + { + "epoch": 0.97280830982047, + "grad_norm": 0.09390117973089218, + "learning_rate": 8.177926856742346e-07, + "loss": 8.4866, + "step": 194800 + }, + { + "epoch": 0.9728582486454094, + "grad_norm": 0.09112615883350372, + "learning_rate": 8.162907707326841e-07, + "loss": 8.5038, + "step": 194810 + }, + { + "epoch": 0.9729081874703488, + "grad_norm": 0.08958408236503601, + "learning_rate": 8.147888557911337e-07, + "loss": 8.5039, + "step": 194820 + }, + { + "epoch": 0.9729581262952882, + "grad_norm": 0.09056022018194199, + "learning_rate": 8.132869408495832e-07, + "loss": 8.5089, + "step": 194830 + }, + { + "epoch": 0.9730080651202277, + "grad_norm": 0.0902940034866333, + "learning_rate": 8.117850259080328e-07, + "loss": 8.4841, + "step": 194840 + }, + { + "epoch": 0.9730580039451672, + "grad_norm": 0.08953052759170532, + "learning_rate": 8.102831109664822e-07, + "loss": 8.5099, + "step": 194850 + }, + { + "epoch": 0.9731079427701066, + "grad_norm": 0.09145776927471161, + "learning_rate": 8.087811960249317e-07, + "loss": 8.4842, + "step": 194860 + }, + { + "epoch": 0.973157881595046, + "grad_norm": 0.09231266379356384, + "learning_rate": 8.072792810833813e-07, + "loss": 8.4961, + "step": 194870 + }, + { + "epoch": 0.9732078204199855, + "grad_norm": 0.09086192399263382, + "learning_rate": 8.057773661418308e-07, + "loss": 8.5026, + "step": 194880 + }, + { + "epoch": 0.973257759244925, + "grad_norm": 0.0860838070511818, + "learning_rate": 8.042754512002804e-07, + "loss": 8.4974, + "step": 194890 + }, + { + "epoch": 0.9733076980698644, + "grad_norm": 0.09107068926095963, + "learning_rate": 8.027735362587299e-07, + "loss": 8.509, + "step": 194900 + }, + { + "epoch": 0.9733576368948038, + "grad_norm": 0.09526333957910538, + "learning_rate": 8.012716213171794e-07, + "loss": 8.4865, + "step": 194910 + }, + { + "epoch": 0.9734075757197433, + "grad_norm": 0.09245917946100235, + "learning_rate": 7.997697063756289e-07, + "loss": 8.5036, + "step": 194920 + }, + { + "epoch": 0.9734575145446828, + "grad_norm": 0.09295765310525894, + "learning_rate": 7.982677914340784e-07, + "loss": 8.4988, + "step": 194930 + }, + { + "epoch": 0.9735074533696222, + "grad_norm": 0.08823594450950623, + "learning_rate": 7.96765876492528e-07, + "loss": 8.4804, + "step": 194940 + }, + { + "epoch": 0.9735573921945616, + "grad_norm": 0.08996216952800751, + "learning_rate": 7.952639615509775e-07, + "loss": 8.5019, + "step": 194950 + }, + { + "epoch": 0.9736073310195011, + "grad_norm": 0.08816206455230713, + "learning_rate": 7.93762046609427e-07, + "loss": 8.5015, + "step": 194960 + }, + { + "epoch": 0.9736572698444406, + "grad_norm": 0.09345098584890366, + "learning_rate": 7.922601316678765e-07, + "loss": 8.4941, + "step": 194970 + }, + { + "epoch": 0.97370720866938, + "grad_norm": 0.08701256662607193, + "learning_rate": 7.90758216726326e-07, + "loss": 8.4983, + "step": 194980 + }, + { + "epoch": 0.9737571474943194, + "grad_norm": 0.08897973597049713, + "learning_rate": 7.892563017847756e-07, + "loss": 8.5118, + "step": 194990 + }, + { + "epoch": 0.9738070863192589, + "grad_norm": 0.08834607154130936, + "learning_rate": 7.877543868432251e-07, + "loss": 8.5255, + "step": 195000 + }, + { + "epoch": 0.9738570251441984, + "grad_norm": 0.09231365472078323, + "learning_rate": 7.862524719016746e-07, + "loss": 8.5062, + "step": 195010 + }, + { + "epoch": 0.9739069639691378, + "grad_norm": 0.089638851583004, + "learning_rate": 7.847505569601241e-07, + "loss": 8.4869, + "step": 195020 + }, + { + "epoch": 0.9739569027940772, + "grad_norm": 0.09642373770475388, + "learning_rate": 7.832486420185737e-07, + "loss": 8.5094, + "step": 195030 + }, + { + "epoch": 0.9740068416190167, + "grad_norm": 0.09365835040807724, + "learning_rate": 7.817467270770232e-07, + "loss": 8.5179, + "step": 195040 + }, + { + "epoch": 0.9740567804439562, + "grad_norm": 0.09444354474544525, + "learning_rate": 7.802448121354727e-07, + "loss": 8.5114, + "step": 195050 + }, + { + "epoch": 0.9741067192688956, + "grad_norm": 0.09574456512928009, + "learning_rate": 7.787428971939223e-07, + "loss": 8.4882, + "step": 195060 + }, + { + "epoch": 0.974156658093835, + "grad_norm": 0.09401273727416992, + "learning_rate": 7.772409822523717e-07, + "loss": 8.4983, + "step": 195070 + }, + { + "epoch": 0.9742065969187745, + "grad_norm": 0.08458282053470612, + "learning_rate": 7.757390673108213e-07, + "loss": 8.4861, + "step": 195080 + }, + { + "epoch": 0.974256535743714, + "grad_norm": 0.09678689390420914, + "learning_rate": 7.742371523692708e-07, + "loss": 8.4971, + "step": 195090 + }, + { + "epoch": 0.9743064745686534, + "grad_norm": 0.08890896290540695, + "learning_rate": 7.727352374277204e-07, + "loss": 8.4881, + "step": 195100 + }, + { + "epoch": 0.9743564133935928, + "grad_norm": 0.0906500592827797, + "learning_rate": 7.712333224861699e-07, + "loss": 8.4991, + "step": 195110 + }, + { + "epoch": 0.9744063522185323, + "grad_norm": 0.09543705731630325, + "learning_rate": 7.697314075446193e-07, + "loss": 8.5054, + "step": 195120 + }, + { + "epoch": 0.9744562910434718, + "grad_norm": 0.0905163511633873, + "learning_rate": 7.682294926030689e-07, + "loss": 8.4979, + "step": 195130 + }, + { + "epoch": 0.9745062298684112, + "grad_norm": 0.09297658503055573, + "learning_rate": 7.667275776615184e-07, + "loss": 8.4858, + "step": 195140 + }, + { + "epoch": 0.9745561686933506, + "grad_norm": 0.08763201534748077, + "learning_rate": 7.65225662719968e-07, + "loss": 8.4979, + "step": 195150 + }, + { + "epoch": 0.9746061075182901, + "grad_norm": 0.0941351130604744, + "learning_rate": 7.637237477784175e-07, + "loss": 8.5004, + "step": 195160 + }, + { + "epoch": 0.9746560463432296, + "grad_norm": 0.09358199685811996, + "learning_rate": 7.62221832836867e-07, + "loss": 8.4789, + "step": 195170 + }, + { + "epoch": 0.974705985168169, + "grad_norm": 0.09558181464672089, + "learning_rate": 7.607199178953165e-07, + "loss": 8.5046, + "step": 195180 + }, + { + "epoch": 0.9747559239931084, + "grad_norm": 0.08955127000808716, + "learning_rate": 7.59218002953766e-07, + "loss": 8.5107, + "step": 195190 + }, + { + "epoch": 0.9748058628180479, + "grad_norm": 0.08842689543962479, + "learning_rate": 7.577160880122156e-07, + "loss": 8.4977, + "step": 195200 + }, + { + "epoch": 0.9748558016429874, + "grad_norm": 0.08837328106164932, + "learning_rate": 7.562141730706651e-07, + "loss": 8.4808, + "step": 195210 + }, + { + "epoch": 0.9749057404679268, + "grad_norm": 0.09056874364614487, + "learning_rate": 7.547122581291147e-07, + "loss": 8.5065, + "step": 195220 + }, + { + "epoch": 0.9749556792928662, + "grad_norm": 0.0923481434583664, + "learning_rate": 7.532103431875641e-07, + "loss": 8.4953, + "step": 195230 + }, + { + "epoch": 0.9750056181178057, + "grad_norm": 0.09200780093669891, + "learning_rate": 7.517084282460136e-07, + "loss": 8.5131, + "step": 195240 + }, + { + "epoch": 0.9750555569427451, + "grad_norm": 0.09253337979316711, + "learning_rate": 7.502065133044632e-07, + "loss": 8.503, + "step": 195250 + }, + { + "epoch": 0.9751054957676846, + "grad_norm": 0.09560063481330872, + "learning_rate": 7.487045983629127e-07, + "loss": 8.4932, + "step": 195260 + }, + { + "epoch": 0.975155434592624, + "grad_norm": 0.09416811168193817, + "learning_rate": 7.472026834213623e-07, + "loss": 8.5141, + "step": 195270 + }, + { + "epoch": 0.9752053734175635, + "grad_norm": 0.08733347803354263, + "learning_rate": 7.457007684798117e-07, + "loss": 8.5092, + "step": 195280 + }, + { + "epoch": 0.975255312242503, + "grad_norm": 0.08784154802560806, + "learning_rate": 7.441988535382612e-07, + "loss": 8.5029, + "step": 195290 + }, + { + "epoch": 0.9753052510674424, + "grad_norm": 0.09478521347045898, + "learning_rate": 7.426969385967108e-07, + "loss": 8.5035, + "step": 195300 + }, + { + "epoch": 0.9753551898923818, + "grad_norm": 0.09163037687540054, + "learning_rate": 7.411950236551603e-07, + "loss": 8.4888, + "step": 195310 + }, + { + "epoch": 0.9754051287173213, + "grad_norm": 0.10056457668542862, + "learning_rate": 7.396931087136099e-07, + "loss": 8.5046, + "step": 195320 + }, + { + "epoch": 0.9754550675422607, + "grad_norm": 0.09030482172966003, + "learning_rate": 7.381911937720594e-07, + "loss": 8.505, + "step": 195330 + }, + { + "epoch": 0.9755050063672002, + "grad_norm": 0.089601531624794, + "learning_rate": 7.366892788305089e-07, + "loss": 8.4949, + "step": 195340 + }, + { + "epoch": 0.9755549451921396, + "grad_norm": 0.09470442682504654, + "learning_rate": 7.351873638889584e-07, + "loss": 8.4826, + "step": 195350 + }, + { + "epoch": 0.9756048840170791, + "grad_norm": 0.0969916582107544, + "learning_rate": 7.336854489474079e-07, + "loss": 8.4949, + "step": 195360 + }, + { + "epoch": 0.9756548228420185, + "grad_norm": 0.09550881385803223, + "learning_rate": 7.321835340058575e-07, + "loss": 8.4897, + "step": 195370 + }, + { + "epoch": 0.975704761666958, + "grad_norm": 0.08882039040327072, + "learning_rate": 7.30681619064307e-07, + "loss": 8.4957, + "step": 195380 + }, + { + "epoch": 0.9757547004918974, + "grad_norm": 0.09256576001644135, + "learning_rate": 7.291797041227565e-07, + "loss": 8.5088, + "step": 195390 + }, + { + "epoch": 0.9758046393168369, + "grad_norm": 0.09013333171606064, + "learning_rate": 7.27677789181206e-07, + "loss": 8.5056, + "step": 195400 + }, + { + "epoch": 0.9758545781417763, + "grad_norm": 0.09338297694921494, + "learning_rate": 7.261758742396556e-07, + "loss": 8.4914, + "step": 195410 + }, + { + "epoch": 0.9759045169667158, + "grad_norm": 0.08930335938930511, + "learning_rate": 7.246739592981051e-07, + "loss": 8.4948, + "step": 195420 + }, + { + "epoch": 0.9759544557916552, + "grad_norm": 0.09636734426021576, + "learning_rate": 7.231720443565546e-07, + "loss": 8.4691, + "step": 195430 + }, + { + "epoch": 0.9760043946165947, + "grad_norm": 0.09596901386976242, + "learning_rate": 7.216701294150042e-07, + "loss": 8.5013, + "step": 195440 + }, + { + "epoch": 0.9760543334415341, + "grad_norm": 0.08806633204221725, + "learning_rate": 7.201682144734536e-07, + "loss": 8.4973, + "step": 195450 + }, + { + "epoch": 0.9761042722664736, + "grad_norm": 0.08893059194087982, + "learning_rate": 7.186662995319032e-07, + "loss": 8.5316, + "step": 195460 + }, + { + "epoch": 0.976154211091413, + "grad_norm": 0.09048335999250412, + "learning_rate": 7.171643845903527e-07, + "loss": 8.504, + "step": 195470 + }, + { + "epoch": 0.9762041499163525, + "grad_norm": 0.09120376408100128, + "learning_rate": 7.156624696488023e-07, + "loss": 8.4939, + "step": 195480 + }, + { + "epoch": 0.9762540887412919, + "grad_norm": 0.09385009855031967, + "learning_rate": 7.141605547072518e-07, + "loss": 8.4997, + "step": 195490 + }, + { + "epoch": 0.9763040275662314, + "grad_norm": 0.09150715172290802, + "learning_rate": 7.126586397657012e-07, + "loss": 8.4917, + "step": 195500 + }, + { + "epoch": 0.9763539663911708, + "grad_norm": 0.08368659019470215, + "learning_rate": 7.111567248241508e-07, + "loss": 8.5044, + "step": 195510 + }, + { + "epoch": 0.9764039052161103, + "grad_norm": 0.09331393986940384, + "learning_rate": 7.096548098826003e-07, + "loss": 8.5056, + "step": 195520 + }, + { + "epoch": 0.9764538440410497, + "grad_norm": 0.08423703908920288, + "learning_rate": 7.081528949410499e-07, + "loss": 8.4948, + "step": 195530 + }, + { + "epoch": 0.9765037828659892, + "grad_norm": 0.090935617685318, + "learning_rate": 7.066509799994994e-07, + "loss": 8.5093, + "step": 195540 + }, + { + "epoch": 0.9765537216909286, + "grad_norm": 0.089198037981987, + "learning_rate": 7.051490650579488e-07, + "loss": 8.5092, + "step": 195550 + }, + { + "epoch": 0.9766036605158681, + "grad_norm": 0.09370332956314087, + "learning_rate": 7.036471501163984e-07, + "loss": 8.4945, + "step": 195560 + }, + { + "epoch": 0.9766535993408075, + "grad_norm": 0.09572462737560272, + "learning_rate": 7.021452351748479e-07, + "loss": 8.4806, + "step": 195570 + }, + { + "epoch": 0.976703538165747, + "grad_norm": 0.09619718790054321, + "learning_rate": 7.006433202332975e-07, + "loss": 8.4985, + "step": 195580 + }, + { + "epoch": 0.9767534769906864, + "grad_norm": 0.08778340369462967, + "learning_rate": 6.99141405291747e-07, + "loss": 8.5015, + "step": 195590 + }, + { + "epoch": 0.9768034158156259, + "grad_norm": 0.09372592717409134, + "learning_rate": 6.976394903501966e-07, + "loss": 8.4923, + "step": 195600 + }, + { + "epoch": 0.9768533546405653, + "grad_norm": 0.08879277855157852, + "learning_rate": 6.96137575408646e-07, + "loss": 8.5049, + "step": 195610 + }, + { + "epoch": 0.9769032934655048, + "grad_norm": 0.09419248253107071, + "learning_rate": 6.946356604670955e-07, + "loss": 8.5186, + "step": 195620 + }, + { + "epoch": 0.9769532322904442, + "grad_norm": 0.09513996541500092, + "learning_rate": 6.931337455255451e-07, + "loss": 8.5032, + "step": 195630 + }, + { + "epoch": 0.9770031711153837, + "grad_norm": 0.09008798003196716, + "learning_rate": 6.916318305839946e-07, + "loss": 8.493, + "step": 195640 + }, + { + "epoch": 0.9770531099403231, + "grad_norm": 0.08629944920539856, + "learning_rate": 6.901299156424442e-07, + "loss": 8.4955, + "step": 195650 + }, + { + "epoch": 0.9771030487652625, + "grad_norm": 0.09061171859502792, + "learning_rate": 6.886280007008936e-07, + "loss": 8.5169, + "step": 195660 + }, + { + "epoch": 0.977152987590202, + "grad_norm": 0.0888964906334877, + "learning_rate": 6.871260857593431e-07, + "loss": 8.4928, + "step": 195670 + }, + { + "epoch": 0.9772029264151415, + "grad_norm": 0.08664719760417938, + "learning_rate": 6.856241708177927e-07, + "loss": 8.4964, + "step": 195680 + }, + { + "epoch": 0.9772528652400809, + "grad_norm": 0.08702714741230011, + "learning_rate": 6.841222558762422e-07, + "loss": 8.4972, + "step": 195690 + }, + { + "epoch": 0.9773028040650203, + "grad_norm": 0.0985323116183281, + "learning_rate": 6.826203409346918e-07, + "loss": 8.4806, + "step": 195700 + }, + { + "epoch": 0.9773527428899598, + "grad_norm": 0.08669869601726532, + "learning_rate": 6.811184259931413e-07, + "loss": 8.5084, + "step": 195710 + }, + { + "epoch": 0.9774026817148993, + "grad_norm": 0.09846388548612595, + "learning_rate": 6.796165110515908e-07, + "loss": 8.5034, + "step": 195720 + }, + { + "epoch": 0.9774526205398387, + "grad_norm": 0.08850651979446411, + "learning_rate": 6.781145961100403e-07, + "loss": 8.5063, + "step": 195730 + }, + { + "epoch": 0.9775025593647781, + "grad_norm": 0.09550187736749649, + "learning_rate": 6.766126811684898e-07, + "loss": 8.4943, + "step": 195740 + }, + { + "epoch": 0.9775524981897176, + "grad_norm": 0.08764312416315079, + "learning_rate": 6.751107662269394e-07, + "loss": 8.5015, + "step": 195750 + }, + { + "epoch": 0.9776024370146571, + "grad_norm": 0.09270459413528442, + "learning_rate": 6.736088512853889e-07, + "loss": 8.4993, + "step": 195760 + }, + { + "epoch": 0.9776523758395965, + "grad_norm": 0.09069382399320602, + "learning_rate": 6.721069363438384e-07, + "loss": 8.4991, + "step": 195770 + }, + { + "epoch": 0.9777023146645359, + "grad_norm": 0.09017602354288101, + "learning_rate": 6.706050214022879e-07, + "loss": 8.5079, + "step": 195780 + }, + { + "epoch": 0.9777522534894754, + "grad_norm": 0.0948113352060318, + "learning_rate": 6.691031064607375e-07, + "loss": 8.4913, + "step": 195790 + }, + { + "epoch": 0.9778021923144148, + "grad_norm": 0.09429457783699036, + "learning_rate": 6.67601191519187e-07, + "loss": 8.501, + "step": 195800 + }, + { + "epoch": 0.9778521311393543, + "grad_norm": 0.09153127670288086, + "learning_rate": 6.660992765776365e-07, + "loss": 8.4899, + "step": 195810 + }, + { + "epoch": 0.9779020699642937, + "grad_norm": 0.0950402021408081, + "learning_rate": 6.64597361636086e-07, + "loss": 8.5159, + "step": 195820 + }, + { + "epoch": 0.9779520087892332, + "grad_norm": 0.09019756317138672, + "learning_rate": 6.630954466945355e-07, + "loss": 8.494, + "step": 195830 + }, + { + "epoch": 0.9780019476141726, + "grad_norm": 0.08579851686954498, + "learning_rate": 6.615935317529851e-07, + "loss": 8.5005, + "step": 195840 + }, + { + "epoch": 0.9780518864391121, + "grad_norm": 0.09236463159322739, + "learning_rate": 6.600916168114346e-07, + "loss": 8.4911, + "step": 195850 + }, + { + "epoch": 0.9781018252640515, + "grad_norm": 0.08974046260118484, + "learning_rate": 6.585897018698841e-07, + "loss": 8.5048, + "step": 195860 + }, + { + "epoch": 0.978151764088991, + "grad_norm": 0.0937148705124855, + "learning_rate": 6.570877869283337e-07, + "loss": 8.5085, + "step": 195870 + }, + { + "epoch": 0.9782017029139304, + "grad_norm": 0.09331946820020676, + "learning_rate": 6.555858719867831e-07, + "loss": 8.4902, + "step": 195880 + }, + { + "epoch": 0.9782516417388699, + "grad_norm": 0.08849994838237762, + "learning_rate": 6.540839570452327e-07, + "loss": 8.513, + "step": 195890 + }, + { + "epoch": 0.9783015805638093, + "grad_norm": 0.09173839539289474, + "learning_rate": 6.525820421036822e-07, + "loss": 8.4979, + "step": 195900 + }, + { + "epoch": 0.9783515193887488, + "grad_norm": 0.09019637852907181, + "learning_rate": 6.510801271621318e-07, + "loss": 8.503, + "step": 195910 + }, + { + "epoch": 0.9784014582136882, + "grad_norm": 0.09567339718341827, + "learning_rate": 6.495782122205813e-07, + "loss": 8.5031, + "step": 195920 + }, + { + "epoch": 0.9784513970386277, + "grad_norm": 0.09023743122816086, + "learning_rate": 6.480762972790307e-07, + "loss": 8.4926, + "step": 195930 + }, + { + "epoch": 0.9785013358635671, + "grad_norm": 0.08731987327337265, + "learning_rate": 6.465743823374803e-07, + "loss": 8.4897, + "step": 195940 + }, + { + "epoch": 0.9785512746885066, + "grad_norm": 0.09049738943576813, + "learning_rate": 6.450724673959298e-07, + "loss": 8.514, + "step": 195950 + }, + { + "epoch": 0.978601213513446, + "grad_norm": 0.09187744557857513, + "learning_rate": 6.435705524543794e-07, + "loss": 8.4761, + "step": 195960 + }, + { + "epoch": 0.9786511523383855, + "grad_norm": 0.09112652391195297, + "learning_rate": 6.420686375128289e-07, + "loss": 8.5003, + "step": 195970 + }, + { + "epoch": 0.9787010911633249, + "grad_norm": 0.09285633265972137, + "learning_rate": 6.405667225712785e-07, + "loss": 8.4864, + "step": 195980 + }, + { + "epoch": 0.9787510299882644, + "grad_norm": 0.09360034763813019, + "learning_rate": 6.390648076297279e-07, + "loss": 8.497, + "step": 195990 + }, + { + "epoch": 0.9788009688132038, + "grad_norm": 0.09473513811826706, + "learning_rate": 6.375628926881774e-07, + "loss": 8.5115, + "step": 196000 + }, + { + "epoch": 0.9788509076381433, + "grad_norm": 0.09806603938341141, + "learning_rate": 6.36060977746627e-07, + "loss": 8.5038, + "step": 196010 + }, + { + "epoch": 0.9789008464630827, + "grad_norm": 0.0843457505106926, + "learning_rate": 6.345590628050765e-07, + "loss": 8.5104, + "step": 196020 + }, + { + "epoch": 0.9789507852880222, + "grad_norm": 0.0888645127415657, + "learning_rate": 6.330571478635261e-07, + "loss": 8.5043, + "step": 196030 + }, + { + "epoch": 0.9790007241129616, + "grad_norm": 0.09105794876813889, + "learning_rate": 6.315552329219755e-07, + "loss": 8.5055, + "step": 196040 + }, + { + "epoch": 0.9790506629379011, + "grad_norm": 0.08398020267486572, + "learning_rate": 6.30053317980425e-07, + "loss": 8.5022, + "step": 196050 + }, + { + "epoch": 0.9791006017628405, + "grad_norm": 0.09144193679094315, + "learning_rate": 6.285514030388746e-07, + "loss": 8.4931, + "step": 196060 + }, + { + "epoch": 0.97915054058778, + "grad_norm": 0.08951180428266525, + "learning_rate": 6.270494880973241e-07, + "loss": 8.4928, + "step": 196070 + }, + { + "epoch": 0.9792004794127194, + "grad_norm": 0.08442022651433945, + "learning_rate": 6.255475731557737e-07, + "loss": 8.495, + "step": 196080 + }, + { + "epoch": 0.9792504182376589, + "grad_norm": 0.0968695729970932, + "learning_rate": 6.240456582142231e-07, + "loss": 8.4993, + "step": 196090 + }, + { + "epoch": 0.9793003570625983, + "grad_norm": 0.08780843019485474, + "learning_rate": 6.225437432726727e-07, + "loss": 8.5002, + "step": 196100 + }, + { + "epoch": 0.9793502958875377, + "grad_norm": 0.08662664890289307, + "learning_rate": 6.210418283311222e-07, + "loss": 8.5069, + "step": 196110 + }, + { + "epoch": 0.9794002347124772, + "grad_norm": 0.09597373008728027, + "learning_rate": 6.195399133895717e-07, + "loss": 8.5132, + "step": 196120 + }, + { + "epoch": 0.9794501735374167, + "grad_norm": 0.09307907521724701, + "learning_rate": 6.180379984480213e-07, + "loss": 8.4828, + "step": 196130 + }, + { + "epoch": 0.9795001123623561, + "grad_norm": 0.0918552353978157, + "learning_rate": 6.165360835064708e-07, + "loss": 8.508, + "step": 196140 + }, + { + "epoch": 0.9795500511872955, + "grad_norm": 0.08918620645999908, + "learning_rate": 6.150341685649203e-07, + "loss": 8.5133, + "step": 196150 + }, + { + "epoch": 0.979599990012235, + "grad_norm": 0.09026898443698883, + "learning_rate": 6.135322536233698e-07, + "loss": 8.5028, + "step": 196160 + }, + { + "epoch": 0.9796499288371745, + "grad_norm": 0.08795063197612762, + "learning_rate": 6.120303386818193e-07, + "loss": 8.5184, + "step": 196170 + }, + { + "epoch": 0.9796998676621139, + "grad_norm": 0.09220097213983536, + "learning_rate": 6.105284237402689e-07, + "loss": 8.5082, + "step": 196180 + }, + { + "epoch": 0.9797498064870533, + "grad_norm": 0.08570639044046402, + "learning_rate": 6.090265087987184e-07, + "loss": 8.5005, + "step": 196190 + }, + { + "epoch": 0.9797997453119928, + "grad_norm": 0.08638404309749603, + "learning_rate": 6.075245938571679e-07, + "loss": 8.506, + "step": 196200 + }, + { + "epoch": 0.9798496841369323, + "grad_norm": 0.09257509559392929, + "learning_rate": 6.060226789156174e-07, + "loss": 8.4789, + "step": 196210 + }, + { + "epoch": 0.9798996229618717, + "grad_norm": 0.09392807632684708, + "learning_rate": 6.04520763974067e-07, + "loss": 8.4907, + "step": 196220 + }, + { + "epoch": 0.9799495617868111, + "grad_norm": 0.087054044008255, + "learning_rate": 6.030188490325165e-07, + "loss": 8.4988, + "step": 196230 + }, + { + "epoch": 0.9799995006117506, + "grad_norm": 0.09597992151975632, + "learning_rate": 6.01516934090966e-07, + "loss": 8.4917, + "step": 196240 + }, + { + "epoch": 0.9800494394366901, + "grad_norm": 0.08776523172855377, + "learning_rate": 6.000150191494156e-07, + "loss": 8.5092, + "step": 196250 + }, + { + "epoch": 0.9800993782616295, + "grad_norm": 0.08641576766967773, + "learning_rate": 5.98513104207865e-07, + "loss": 8.5092, + "step": 196260 + }, + { + "epoch": 0.9801493170865689, + "grad_norm": 0.08972152322530746, + "learning_rate": 5.970111892663146e-07, + "loss": 8.4981, + "step": 196270 + }, + { + "epoch": 0.9801992559115084, + "grad_norm": 0.09072574973106384, + "learning_rate": 5.955092743247641e-07, + "loss": 8.5155, + "step": 196280 + }, + { + "epoch": 0.9802491947364479, + "grad_norm": 0.09410280734300613, + "learning_rate": 5.940073593832137e-07, + "loss": 8.5123, + "step": 196290 + }, + { + "epoch": 0.9802991335613873, + "grad_norm": 0.09331624209880829, + "learning_rate": 5.925054444416632e-07, + "loss": 8.4814, + "step": 196300 + }, + { + "epoch": 0.9803490723863267, + "grad_norm": 0.09342946112155914, + "learning_rate": 5.910035295001126e-07, + "loss": 8.4965, + "step": 196310 + }, + { + "epoch": 0.9803990112112662, + "grad_norm": 0.08969230949878693, + "learning_rate": 5.895016145585622e-07, + "loss": 8.4857, + "step": 196320 + }, + { + "epoch": 0.9804489500362057, + "grad_norm": 0.08578766137361526, + "learning_rate": 5.879996996170117e-07, + "loss": 8.4913, + "step": 196330 + }, + { + "epoch": 0.9804988888611451, + "grad_norm": 0.0996427908539772, + "learning_rate": 5.864977846754613e-07, + "loss": 8.4963, + "step": 196340 + }, + { + "epoch": 0.9805488276860845, + "grad_norm": 0.09154878556728363, + "learning_rate": 5.849958697339108e-07, + "loss": 8.4779, + "step": 196350 + }, + { + "epoch": 0.980598766511024, + "grad_norm": 0.09681200236082077, + "learning_rate": 5.834939547923602e-07, + "loss": 8.5062, + "step": 196360 + }, + { + "epoch": 0.9806487053359635, + "grad_norm": 0.09222651273012161, + "learning_rate": 5.819920398508098e-07, + "loss": 8.4888, + "step": 196370 + }, + { + "epoch": 0.9806986441609029, + "grad_norm": 0.08910214900970459, + "learning_rate": 5.804901249092593e-07, + "loss": 8.5117, + "step": 196380 + }, + { + "epoch": 0.9807485829858423, + "grad_norm": 0.09133374691009521, + "learning_rate": 5.789882099677089e-07, + "loss": 8.4863, + "step": 196390 + }, + { + "epoch": 0.9807985218107818, + "grad_norm": 0.09324437379837036, + "learning_rate": 5.774862950261584e-07, + "loss": 8.4807, + "step": 196400 + }, + { + "epoch": 0.9808484606357213, + "grad_norm": 0.08947594463825226, + "learning_rate": 5.75984380084608e-07, + "loss": 8.4995, + "step": 196410 + }, + { + "epoch": 0.9808983994606607, + "grad_norm": 0.09630808234214783, + "learning_rate": 5.744824651430574e-07, + "loss": 8.4807, + "step": 196420 + }, + { + "epoch": 0.9809483382856001, + "grad_norm": 0.08872811496257782, + "learning_rate": 5.729805502015069e-07, + "loss": 8.5092, + "step": 196430 + }, + { + "epoch": 0.9809982771105396, + "grad_norm": 0.08594346791505814, + "learning_rate": 5.714786352599565e-07, + "loss": 8.5042, + "step": 196440 + }, + { + "epoch": 0.9810482159354791, + "grad_norm": 0.08785369992256165, + "learning_rate": 5.69976720318406e-07, + "loss": 8.4873, + "step": 196450 + }, + { + "epoch": 0.9810981547604185, + "grad_norm": 0.08623884618282318, + "learning_rate": 5.684748053768556e-07, + "loss": 8.5087, + "step": 196460 + }, + { + "epoch": 0.9811480935853579, + "grad_norm": 0.09336108714342117, + "learning_rate": 5.66972890435305e-07, + "loss": 8.4784, + "step": 196470 + }, + { + "epoch": 0.9811980324102973, + "grad_norm": 0.09492529183626175, + "learning_rate": 5.654709754937545e-07, + "loss": 8.5124, + "step": 196480 + }, + { + "epoch": 0.9812479712352369, + "grad_norm": 0.08852214366197586, + "learning_rate": 5.639690605522041e-07, + "loss": 8.5098, + "step": 196490 + }, + { + "epoch": 0.9812979100601763, + "grad_norm": 0.09364449232816696, + "learning_rate": 5.624671456106536e-07, + "loss": 8.4938, + "step": 196500 + }, + { + "epoch": 0.9813478488851157, + "grad_norm": 0.0880199745297432, + "learning_rate": 5.609652306691032e-07, + "loss": 8.5006, + "step": 196510 + }, + { + "epoch": 0.9813977877100551, + "grad_norm": 0.08908896148204803, + "learning_rate": 5.594633157275527e-07, + "loss": 8.5116, + "step": 196520 + }, + { + "epoch": 0.9814477265349947, + "grad_norm": 0.0934915542602539, + "learning_rate": 5.579614007860022e-07, + "loss": 8.4807, + "step": 196530 + }, + { + "epoch": 0.9814976653599341, + "grad_norm": 0.09108605235815048, + "learning_rate": 5.564594858444517e-07, + "loss": 8.4912, + "step": 196540 + }, + { + "epoch": 0.9815476041848735, + "grad_norm": 0.08718253672122955, + "learning_rate": 5.549575709029012e-07, + "loss": 8.5017, + "step": 196550 + }, + { + "epoch": 0.981597543009813, + "grad_norm": 0.09099813550710678, + "learning_rate": 5.534556559613508e-07, + "loss": 8.5023, + "step": 196560 + }, + { + "epoch": 0.9816474818347525, + "grad_norm": 0.08504286408424377, + "learning_rate": 5.519537410198003e-07, + "loss": 8.522, + "step": 196570 + }, + { + "epoch": 0.9816974206596919, + "grad_norm": 0.08254232257604599, + "learning_rate": 5.504518260782498e-07, + "loss": 8.4956, + "step": 196580 + }, + { + "epoch": 0.9817473594846313, + "grad_norm": 0.08998601138591766, + "learning_rate": 5.489499111366993e-07, + "loss": 8.5012, + "step": 196590 + }, + { + "epoch": 0.9817972983095707, + "grad_norm": 0.09334680438041687, + "learning_rate": 5.474479961951489e-07, + "loss": 8.5018, + "step": 196600 + }, + { + "epoch": 0.9818472371345103, + "grad_norm": 0.09622131288051605, + "learning_rate": 5.459460812535984e-07, + "loss": 8.5014, + "step": 196610 + }, + { + "epoch": 0.9818971759594497, + "grad_norm": 0.0919957160949707, + "learning_rate": 5.444441663120479e-07, + "loss": 8.5115, + "step": 196620 + }, + { + "epoch": 0.9819471147843891, + "grad_norm": 0.08943960070610046, + "learning_rate": 5.429422513704974e-07, + "loss": 8.5137, + "step": 196630 + }, + { + "epoch": 0.9819970536093285, + "grad_norm": 0.09023985266685486, + "learning_rate": 5.414403364289469e-07, + "loss": 8.5006, + "step": 196640 + }, + { + "epoch": 0.9820469924342681, + "grad_norm": 0.09034501016139984, + "learning_rate": 5.399384214873965e-07, + "loss": 8.4943, + "step": 196650 + }, + { + "epoch": 0.9820969312592075, + "grad_norm": 0.08900442719459534, + "learning_rate": 5.38436506545846e-07, + "loss": 8.5223, + "step": 196660 + }, + { + "epoch": 0.9821468700841469, + "grad_norm": 0.08898912370204926, + "learning_rate": 5.369345916042955e-07, + "loss": 8.51, + "step": 196670 + }, + { + "epoch": 0.9821968089090863, + "grad_norm": 0.09157400578260422, + "learning_rate": 5.354326766627451e-07, + "loss": 8.5056, + "step": 196680 + }, + { + "epoch": 0.9822467477340259, + "grad_norm": 0.09418471902608871, + "learning_rate": 5.339307617211945e-07, + "loss": 8.4981, + "step": 196690 + }, + { + "epoch": 0.9822966865589653, + "grad_norm": 0.0900435671210289, + "learning_rate": 5.324288467796441e-07, + "loss": 8.4888, + "step": 196700 + }, + { + "epoch": 0.9823466253839047, + "grad_norm": 0.0917317196726799, + "learning_rate": 5.309269318380936e-07, + "loss": 8.5087, + "step": 196710 + }, + { + "epoch": 0.9823965642088441, + "grad_norm": 0.09196818619966507, + "learning_rate": 5.294250168965432e-07, + "loss": 8.5133, + "step": 196720 + }, + { + "epoch": 0.9824465030337837, + "grad_norm": 0.09314832836389542, + "learning_rate": 5.279231019549927e-07, + "loss": 8.4901, + "step": 196730 + }, + { + "epoch": 0.9824964418587231, + "grad_norm": 0.09629698842763901, + "learning_rate": 5.264211870134421e-07, + "loss": 8.4865, + "step": 196740 + }, + { + "epoch": 0.9825463806836625, + "grad_norm": 0.09784742444753647, + "learning_rate": 5.249192720718917e-07, + "loss": 8.4937, + "step": 196750 + }, + { + "epoch": 0.9825963195086019, + "grad_norm": 0.08975919336080551, + "learning_rate": 5.234173571303412e-07, + "loss": 8.4921, + "step": 196760 + }, + { + "epoch": 0.9826462583335415, + "grad_norm": 0.09287573397159576, + "learning_rate": 5.219154421887908e-07, + "loss": 8.4988, + "step": 196770 + }, + { + "epoch": 0.9826961971584809, + "grad_norm": 0.09043823927640915, + "learning_rate": 5.204135272472403e-07, + "loss": 8.4797, + "step": 196780 + }, + { + "epoch": 0.9827461359834203, + "grad_norm": 0.0993688553571701, + "learning_rate": 5.189116123056899e-07, + "loss": 8.4846, + "step": 196790 + }, + { + "epoch": 0.9827960748083597, + "grad_norm": 0.0893450453877449, + "learning_rate": 5.174096973641393e-07, + "loss": 8.4981, + "step": 196800 + }, + { + "epoch": 0.9828460136332992, + "grad_norm": 0.08755376189947128, + "learning_rate": 5.159077824225888e-07, + "loss": 8.5012, + "step": 196810 + }, + { + "epoch": 0.9828959524582387, + "grad_norm": 0.09437739849090576, + "learning_rate": 5.144058674810384e-07, + "loss": 8.5008, + "step": 196820 + }, + { + "epoch": 0.9829458912831781, + "grad_norm": 0.08554568886756897, + "learning_rate": 5.129039525394879e-07, + "loss": 8.4992, + "step": 196830 + }, + { + "epoch": 0.9829958301081175, + "grad_norm": 0.09264194965362549, + "learning_rate": 5.114020375979375e-07, + "loss": 8.4875, + "step": 196840 + }, + { + "epoch": 0.983045768933057, + "grad_norm": 0.09195772558450699, + "learning_rate": 5.099001226563869e-07, + "loss": 8.5076, + "step": 196850 + }, + { + "epoch": 0.9830957077579965, + "grad_norm": 0.0927330031991005, + "learning_rate": 5.083982077148364e-07, + "loss": 8.5003, + "step": 196860 + }, + { + "epoch": 0.9831456465829359, + "grad_norm": 0.09278733283281326, + "learning_rate": 5.06896292773286e-07, + "loss": 8.4892, + "step": 196870 + }, + { + "epoch": 0.9831955854078753, + "grad_norm": 0.09159595519304276, + "learning_rate": 5.053943778317355e-07, + "loss": 8.501, + "step": 196880 + }, + { + "epoch": 0.9832455242328147, + "grad_norm": 0.08706831187009811, + "learning_rate": 5.038924628901851e-07, + "loss": 8.5019, + "step": 196890 + }, + { + "epoch": 0.9832954630577543, + "grad_norm": 0.08902629464864731, + "learning_rate": 5.023905479486345e-07, + "loss": 8.4914, + "step": 196900 + }, + { + "epoch": 0.9833454018826937, + "grad_norm": 0.09136991202831268, + "learning_rate": 5.00888633007084e-07, + "loss": 8.4933, + "step": 196910 + }, + { + "epoch": 0.9833953407076331, + "grad_norm": 0.09446898847818375, + "learning_rate": 4.993867180655336e-07, + "loss": 8.4835, + "step": 196920 + }, + { + "epoch": 0.9834452795325725, + "grad_norm": 0.09721554815769196, + "learning_rate": 4.978848031239831e-07, + "loss": 8.4986, + "step": 196930 + }, + { + "epoch": 0.9834952183575121, + "grad_norm": 0.09217740595340729, + "learning_rate": 4.963828881824327e-07, + "loss": 8.4987, + "step": 196940 + }, + { + "epoch": 0.9835451571824515, + "grad_norm": 0.09315560013055801, + "learning_rate": 4.948809732408822e-07, + "loss": 8.4985, + "step": 196950 + }, + { + "epoch": 0.9835950960073909, + "grad_norm": 0.08701568096876144, + "learning_rate": 4.933790582993317e-07, + "loss": 8.5176, + "step": 196960 + }, + { + "epoch": 0.9836450348323303, + "grad_norm": 0.09363686293363571, + "learning_rate": 4.918771433577812e-07, + "loss": 8.5186, + "step": 196970 + }, + { + "epoch": 0.9836949736572699, + "grad_norm": 0.0900358110666275, + "learning_rate": 4.903752284162307e-07, + "loss": 8.5071, + "step": 196980 + }, + { + "epoch": 0.9837449124822093, + "grad_norm": 0.08556856215000153, + "learning_rate": 4.888733134746803e-07, + "loss": 8.508, + "step": 196990 + }, + { + "epoch": 0.9837948513071487, + "grad_norm": 0.08887305855751038, + "learning_rate": 4.873713985331298e-07, + "loss": 8.4958, + "step": 197000 + }, + { + "epoch": 0.9838447901320881, + "grad_norm": 0.08783209323883057, + "learning_rate": 4.858694835915793e-07, + "loss": 8.5032, + "step": 197010 + }, + { + "epoch": 0.9838947289570277, + "grad_norm": 0.08912937343120575, + "learning_rate": 4.843675686500288e-07, + "loss": 8.5039, + "step": 197020 + }, + { + "epoch": 0.9839446677819671, + "grad_norm": 0.09269113093614578, + "learning_rate": 4.828656537084784e-07, + "loss": 8.4991, + "step": 197030 + }, + { + "epoch": 0.9839946066069065, + "grad_norm": 0.0884513109922409, + "learning_rate": 4.813637387669279e-07, + "loss": 8.4936, + "step": 197040 + }, + { + "epoch": 0.9840445454318459, + "grad_norm": 0.09053158015012741, + "learning_rate": 4.798618238253774e-07, + "loss": 8.5058, + "step": 197050 + }, + { + "epoch": 0.9840944842567855, + "grad_norm": 0.09161665290594101, + "learning_rate": 4.783599088838269e-07, + "loss": 8.5196, + "step": 197060 + }, + { + "epoch": 0.9841444230817249, + "grad_norm": 0.09395797550678253, + "learning_rate": 4.768579939422764e-07, + "loss": 8.5149, + "step": 197070 + }, + { + "epoch": 0.9841943619066643, + "grad_norm": 0.09031932055950165, + "learning_rate": 4.7535607900072597e-07, + "loss": 8.5077, + "step": 197080 + }, + { + "epoch": 0.9842443007316037, + "grad_norm": 0.08975038677453995, + "learning_rate": 4.738541640591755e-07, + "loss": 8.5292, + "step": 197090 + }, + { + "epoch": 0.9842942395565433, + "grad_norm": 0.09021060913801193, + "learning_rate": 4.7235224911762505e-07, + "loss": 8.4957, + "step": 197100 + }, + { + "epoch": 0.9843441783814827, + "grad_norm": 0.08840472251176834, + "learning_rate": 4.7085033417607454e-07, + "loss": 8.5036, + "step": 197110 + }, + { + "epoch": 0.9843941172064221, + "grad_norm": 0.08798038959503174, + "learning_rate": 4.693484192345241e-07, + "loss": 8.5017, + "step": 197120 + }, + { + "epoch": 0.9844440560313615, + "grad_norm": 0.08750328421592712, + "learning_rate": 4.678465042929735e-07, + "loss": 8.5018, + "step": 197130 + }, + { + "epoch": 0.9844939948563011, + "grad_norm": 0.0955323576927185, + "learning_rate": 4.6634458935142307e-07, + "loss": 8.4924, + "step": 197140 + }, + { + "epoch": 0.9845439336812405, + "grad_norm": 0.08959522098302841, + "learning_rate": 4.648426744098726e-07, + "loss": 8.5035, + "step": 197150 + }, + { + "epoch": 0.9845938725061799, + "grad_norm": 0.08650697767734528, + "learning_rate": 4.633407594683221e-07, + "loss": 8.4908, + "step": 197160 + }, + { + "epoch": 0.9846438113311193, + "grad_norm": 0.091778464615345, + "learning_rate": 4.6183884452677164e-07, + "loss": 8.4966, + "step": 197170 + }, + { + "epoch": 0.9846937501560589, + "grad_norm": 0.08719423413276672, + "learning_rate": 4.603369295852212e-07, + "loss": 8.5151, + "step": 197180 + }, + { + "epoch": 0.9847436889809983, + "grad_norm": 0.09176063537597656, + "learning_rate": 4.588350146436707e-07, + "loss": 8.4962, + "step": 197190 + }, + { + "epoch": 0.9847936278059377, + "grad_norm": 0.08916878700256348, + "learning_rate": 4.573330997021202e-07, + "loss": 8.4894, + "step": 197200 + }, + { + "epoch": 0.9848435666308771, + "grad_norm": 0.08697172999382019, + "learning_rate": 4.558311847605697e-07, + "loss": 8.5025, + "step": 197210 + }, + { + "epoch": 0.9848935054558167, + "grad_norm": 0.09053148329257965, + "learning_rate": 4.5432926981901925e-07, + "loss": 8.4972, + "step": 197220 + }, + { + "epoch": 0.9849434442807561, + "grad_norm": 0.0926218330860138, + "learning_rate": 4.528273548774688e-07, + "loss": 8.4953, + "step": 197230 + }, + { + "epoch": 0.9849933831056955, + "grad_norm": 0.0867324098944664, + "learning_rate": 4.513254399359183e-07, + "loss": 8.4862, + "step": 197240 + }, + { + "epoch": 0.9850433219306349, + "grad_norm": 0.0981505885720253, + "learning_rate": 4.4982352499436783e-07, + "loss": 8.4912, + "step": 197250 + }, + { + "epoch": 0.9850932607555745, + "grad_norm": 0.09056996554136276, + "learning_rate": 4.4832161005281737e-07, + "loss": 8.508, + "step": 197260 + }, + { + "epoch": 0.9851431995805139, + "grad_norm": 0.08942002803087234, + "learning_rate": 4.4681969511126686e-07, + "loss": 8.5016, + "step": 197270 + }, + { + "epoch": 0.9851931384054533, + "grad_norm": 0.09431149810552597, + "learning_rate": 4.453177801697164e-07, + "loss": 8.5033, + "step": 197280 + }, + { + "epoch": 0.9852430772303927, + "grad_norm": 0.08968444168567657, + "learning_rate": 4.438158652281659e-07, + "loss": 8.5009, + "step": 197290 + }, + { + "epoch": 0.9852930160553323, + "grad_norm": 0.09338608384132385, + "learning_rate": 4.4231395028661544e-07, + "loss": 8.4933, + "step": 197300 + }, + { + "epoch": 0.9853429548802717, + "grad_norm": 0.09319829195737839, + "learning_rate": 4.40812035345065e-07, + "loss": 8.5047, + "step": 197310 + }, + { + "epoch": 0.9853928937052111, + "grad_norm": 0.08871421962976456, + "learning_rate": 4.3931012040351447e-07, + "loss": 8.4965, + "step": 197320 + }, + { + "epoch": 0.9854428325301505, + "grad_norm": 0.08973289281129837, + "learning_rate": 4.37808205461964e-07, + "loss": 8.5169, + "step": 197330 + }, + { + "epoch": 0.9854927713550901, + "grad_norm": 0.0932137593626976, + "learning_rate": 4.3630629052041356e-07, + "loss": 8.5038, + "step": 197340 + }, + { + "epoch": 0.9855427101800295, + "grad_norm": 0.09041199833154678, + "learning_rate": 4.3480437557886305e-07, + "loss": 8.4874, + "step": 197350 + }, + { + "epoch": 0.9855926490049689, + "grad_norm": 0.08918718248605728, + "learning_rate": 4.333024606373126e-07, + "loss": 8.5049, + "step": 197360 + }, + { + "epoch": 0.9856425878299083, + "grad_norm": 0.0872543603181839, + "learning_rate": 4.3180054569576213e-07, + "loss": 8.4965, + "step": 197370 + }, + { + "epoch": 0.9856925266548479, + "grad_norm": 0.09441404789686203, + "learning_rate": 4.302986307542116e-07, + "loss": 8.5013, + "step": 197380 + }, + { + "epoch": 0.9857424654797873, + "grad_norm": 0.08817983418703079, + "learning_rate": 4.2879671581266117e-07, + "loss": 8.4965, + "step": 197390 + }, + { + "epoch": 0.9857924043047267, + "grad_norm": 0.09181421250104904, + "learning_rate": 4.2729480087111066e-07, + "loss": 8.4777, + "step": 197400 + }, + { + "epoch": 0.9858423431296661, + "grad_norm": 0.0921471118927002, + "learning_rate": 4.257928859295602e-07, + "loss": 8.4848, + "step": 197410 + }, + { + "epoch": 0.9858922819546057, + "grad_norm": 0.08574623614549637, + "learning_rate": 4.2429097098800974e-07, + "loss": 8.4989, + "step": 197420 + }, + { + "epoch": 0.9859422207795451, + "grad_norm": 0.08747486025094986, + "learning_rate": 4.2278905604645923e-07, + "loss": 8.4846, + "step": 197430 + }, + { + "epoch": 0.9859921596044845, + "grad_norm": 0.09855210781097412, + "learning_rate": 4.212871411049088e-07, + "loss": 8.505, + "step": 197440 + }, + { + "epoch": 0.9860420984294239, + "grad_norm": 0.09506363421678543, + "learning_rate": 4.197852261633583e-07, + "loss": 8.5137, + "step": 197450 + }, + { + "epoch": 0.9860920372543635, + "grad_norm": 0.09487252682447433, + "learning_rate": 4.182833112218078e-07, + "loss": 8.499, + "step": 197460 + }, + { + "epoch": 0.9861419760793029, + "grad_norm": 0.08848896622657776, + "learning_rate": 4.1678139628025735e-07, + "loss": 8.4939, + "step": 197470 + }, + { + "epoch": 0.9861919149042423, + "grad_norm": 0.09323852509260178, + "learning_rate": 4.1527948133870684e-07, + "loss": 8.5181, + "step": 197480 + }, + { + "epoch": 0.9862418537291817, + "grad_norm": 0.09195881336927414, + "learning_rate": 4.137775663971564e-07, + "loss": 8.5032, + "step": 197490 + }, + { + "epoch": 0.9862917925541212, + "grad_norm": 0.08648441731929779, + "learning_rate": 4.1227565145560593e-07, + "loss": 8.5266, + "step": 197500 + }, + { + "epoch": 0.9863417313790607, + "grad_norm": 0.08899568021297455, + "learning_rate": 4.107737365140554e-07, + "loss": 8.4906, + "step": 197510 + }, + { + "epoch": 0.9863916702040001, + "grad_norm": 0.09646333009004593, + "learning_rate": 4.0927182157250496e-07, + "loss": 8.481, + "step": 197520 + }, + { + "epoch": 0.9864416090289395, + "grad_norm": 0.09079481661319733, + "learning_rate": 4.077699066309545e-07, + "loss": 8.4866, + "step": 197530 + }, + { + "epoch": 0.986491547853879, + "grad_norm": 0.09302527457475662, + "learning_rate": 4.06267991689404e-07, + "loss": 8.4956, + "step": 197540 + }, + { + "epoch": 0.9865414866788185, + "grad_norm": 0.08830315619707108, + "learning_rate": 4.0476607674785354e-07, + "loss": 8.4984, + "step": 197550 + }, + { + "epoch": 0.9865914255037579, + "grad_norm": 0.085964635014534, + "learning_rate": 4.0326416180630303e-07, + "loss": 8.4988, + "step": 197560 + }, + { + "epoch": 0.9866413643286973, + "grad_norm": 0.0878470242023468, + "learning_rate": 4.0176224686475257e-07, + "loss": 8.4891, + "step": 197570 + }, + { + "epoch": 0.9866913031536368, + "grad_norm": 0.09524169564247131, + "learning_rate": 4.002603319232021e-07, + "loss": 8.4738, + "step": 197580 + }, + { + "epoch": 0.9867412419785763, + "grad_norm": 0.09403923898935318, + "learning_rate": 3.987584169816516e-07, + "loss": 8.4943, + "step": 197590 + }, + { + "epoch": 0.9867911808035157, + "grad_norm": 0.09067419916391373, + "learning_rate": 3.9725650204010115e-07, + "loss": 8.5038, + "step": 197600 + }, + { + "epoch": 0.9868411196284551, + "grad_norm": 0.09345284849405289, + "learning_rate": 3.957545870985507e-07, + "loss": 8.4813, + "step": 197610 + }, + { + "epoch": 0.9868910584533946, + "grad_norm": 0.0898512527346611, + "learning_rate": 3.942526721570002e-07, + "loss": 8.4934, + "step": 197620 + }, + { + "epoch": 0.9869409972783341, + "grad_norm": 0.08990932255983353, + "learning_rate": 3.927507572154497e-07, + "loss": 8.5102, + "step": 197630 + }, + { + "epoch": 0.9869909361032735, + "grad_norm": 0.08922252804040909, + "learning_rate": 3.9124884227389927e-07, + "loss": 8.5005, + "step": 197640 + }, + { + "epoch": 0.9870408749282129, + "grad_norm": 0.09017761051654816, + "learning_rate": 3.8974692733234876e-07, + "loss": 8.4891, + "step": 197650 + }, + { + "epoch": 0.9870908137531524, + "grad_norm": 0.09690330922603607, + "learning_rate": 3.882450123907983e-07, + "loss": 8.519, + "step": 197660 + }, + { + "epoch": 0.9871407525780919, + "grad_norm": 0.08771155774593353, + "learning_rate": 3.867430974492478e-07, + "loss": 8.5076, + "step": 197670 + }, + { + "epoch": 0.9871906914030313, + "grad_norm": 0.09097769856452942, + "learning_rate": 3.8524118250769733e-07, + "loss": 8.4906, + "step": 197680 + }, + { + "epoch": 0.9872406302279707, + "grad_norm": 0.0901932418346405, + "learning_rate": 3.837392675661469e-07, + "loss": 8.4956, + "step": 197690 + }, + { + "epoch": 0.9872905690529102, + "grad_norm": 0.08922629803419113, + "learning_rate": 3.8223735262459637e-07, + "loss": 8.5213, + "step": 197700 + }, + { + "epoch": 0.9873405078778497, + "grad_norm": 0.09865950047969818, + "learning_rate": 3.807354376830459e-07, + "loss": 8.5128, + "step": 197710 + }, + { + "epoch": 0.9873904467027891, + "grad_norm": 0.0916372612118721, + "learning_rate": 3.7923352274149545e-07, + "loss": 8.5038, + "step": 197720 + }, + { + "epoch": 0.9874403855277285, + "grad_norm": 0.09816200286149979, + "learning_rate": 3.7773160779994494e-07, + "loss": 8.483, + "step": 197730 + }, + { + "epoch": 0.987490324352668, + "grad_norm": 0.0914330706000328, + "learning_rate": 3.762296928583945e-07, + "loss": 8.5011, + "step": 197740 + }, + { + "epoch": 0.9875402631776075, + "grad_norm": 0.09769126027822495, + "learning_rate": 3.74727777916844e-07, + "loss": 8.5084, + "step": 197750 + }, + { + "epoch": 0.9875902020025469, + "grad_norm": 0.09237746149301529, + "learning_rate": 3.732258629752935e-07, + "loss": 8.4931, + "step": 197760 + }, + { + "epoch": 0.9876401408274863, + "grad_norm": 0.09044523537158966, + "learning_rate": 3.7172394803374306e-07, + "loss": 8.5058, + "step": 197770 + }, + { + "epoch": 0.9876900796524257, + "grad_norm": 0.09292086958885193, + "learning_rate": 3.7022203309219255e-07, + "loss": 8.486, + "step": 197780 + }, + { + "epoch": 0.9877400184773653, + "grad_norm": 0.08766113966703415, + "learning_rate": 3.687201181506421e-07, + "loss": 8.5007, + "step": 197790 + }, + { + "epoch": 0.9877899573023047, + "grad_norm": 0.09624643623828888, + "learning_rate": 3.6721820320909164e-07, + "loss": 8.4815, + "step": 197800 + }, + { + "epoch": 0.9878398961272441, + "grad_norm": 0.0919470340013504, + "learning_rate": 3.6571628826754113e-07, + "loss": 8.5096, + "step": 197810 + }, + { + "epoch": 0.9878898349521835, + "grad_norm": 0.09844647347927094, + "learning_rate": 3.6421437332599067e-07, + "loss": 8.4995, + "step": 197820 + }, + { + "epoch": 0.987939773777123, + "grad_norm": 0.0885077640414238, + "learning_rate": 3.6271245838444016e-07, + "loss": 8.4987, + "step": 197830 + }, + { + "epoch": 0.9879897126020625, + "grad_norm": 0.09276360273361206, + "learning_rate": 3.612105434428897e-07, + "loss": 8.496, + "step": 197840 + }, + { + "epoch": 0.9880396514270019, + "grad_norm": 0.09369642287492752, + "learning_rate": 3.5970862850133925e-07, + "loss": 8.4979, + "step": 197850 + }, + { + "epoch": 0.9880895902519413, + "grad_norm": 0.08917860686779022, + "learning_rate": 3.5820671355978874e-07, + "loss": 8.4985, + "step": 197860 + }, + { + "epoch": 0.9881395290768809, + "grad_norm": 0.09895788133144379, + "learning_rate": 3.567047986182383e-07, + "loss": 8.5084, + "step": 197870 + }, + { + "epoch": 0.9881894679018203, + "grad_norm": 0.09372813254594803, + "learning_rate": 3.552028836766878e-07, + "loss": 8.4956, + "step": 197880 + }, + { + "epoch": 0.9882394067267597, + "grad_norm": 0.08584040403366089, + "learning_rate": 3.537009687351373e-07, + "loss": 8.4953, + "step": 197890 + }, + { + "epoch": 0.9882893455516991, + "grad_norm": 0.09182565659284592, + "learning_rate": 3.5219905379358686e-07, + "loss": 8.4967, + "step": 197900 + }, + { + "epoch": 0.9883392843766386, + "grad_norm": 0.09269486367702484, + "learning_rate": 3.506971388520364e-07, + "loss": 8.486, + "step": 197910 + }, + { + "epoch": 0.9883892232015781, + "grad_norm": 0.09027913957834244, + "learning_rate": 3.491952239104859e-07, + "loss": 8.4925, + "step": 197920 + }, + { + "epoch": 0.9884391620265175, + "grad_norm": 0.09345810860395432, + "learning_rate": 3.4769330896893544e-07, + "loss": 8.5197, + "step": 197930 + }, + { + "epoch": 0.9884891008514569, + "grad_norm": 0.09321021288633347, + "learning_rate": 3.461913940273849e-07, + "loss": 8.5137, + "step": 197940 + }, + { + "epoch": 0.9885390396763964, + "grad_norm": 0.09090397506952286, + "learning_rate": 3.4468947908583447e-07, + "loss": 8.4701, + "step": 197950 + }, + { + "epoch": 0.9885889785013359, + "grad_norm": 0.08423801511526108, + "learning_rate": 3.43187564144284e-07, + "loss": 8.4907, + "step": 197960 + }, + { + "epoch": 0.9886389173262753, + "grad_norm": 0.08930182456970215, + "learning_rate": 3.416856492027335e-07, + "loss": 8.506, + "step": 197970 + }, + { + "epoch": 0.9886888561512147, + "grad_norm": 0.09574810415506363, + "learning_rate": 3.4018373426118304e-07, + "loss": 8.4963, + "step": 197980 + }, + { + "epoch": 0.9887387949761542, + "grad_norm": 0.09295687824487686, + "learning_rate": 3.386818193196326e-07, + "loss": 8.4867, + "step": 197990 + }, + { + "epoch": 0.9887887338010937, + "grad_norm": 0.09014062583446503, + "learning_rate": 3.371799043780821e-07, + "loss": 8.4981, + "step": 198000 + }, + { + "epoch": 0.9888386726260331, + "grad_norm": 0.08868231624364853, + "learning_rate": 3.356779894365316e-07, + "loss": 8.4993, + "step": 198010 + }, + { + "epoch": 0.9888886114509725, + "grad_norm": 0.09391395002603531, + "learning_rate": 3.341760744949811e-07, + "loss": 8.4724, + "step": 198020 + }, + { + "epoch": 0.988938550275912, + "grad_norm": 0.08955026417970657, + "learning_rate": 3.3267415955343065e-07, + "loss": 8.5013, + "step": 198030 + }, + { + "epoch": 0.9889884891008515, + "grad_norm": 0.09359218180179596, + "learning_rate": 3.311722446118802e-07, + "loss": 8.4972, + "step": 198040 + }, + { + "epoch": 0.9890384279257909, + "grad_norm": 0.09413956850767136, + "learning_rate": 3.296703296703297e-07, + "loss": 8.5147, + "step": 198050 + }, + { + "epoch": 0.9890883667507303, + "grad_norm": 0.09088245779275894, + "learning_rate": 3.281684147287792e-07, + "loss": 8.4853, + "step": 198060 + }, + { + "epoch": 0.9891383055756698, + "grad_norm": 0.09208165854215622, + "learning_rate": 3.266664997872287e-07, + "loss": 8.4913, + "step": 198070 + }, + { + "epoch": 0.9891882444006093, + "grad_norm": 0.09409578889608383, + "learning_rate": 3.251645848456782e-07, + "loss": 8.4958, + "step": 198080 + }, + { + "epoch": 0.9892381832255487, + "grad_norm": 0.08956706523895264, + "learning_rate": 3.2366266990412775e-07, + "loss": 8.4896, + "step": 198090 + }, + { + "epoch": 0.9892881220504881, + "grad_norm": 0.09481275826692581, + "learning_rate": 3.2216075496257724e-07, + "loss": 8.4854, + "step": 198100 + }, + { + "epoch": 0.9893380608754276, + "grad_norm": 0.09185443073511124, + "learning_rate": 3.206588400210268e-07, + "loss": 8.5019, + "step": 198110 + }, + { + "epoch": 0.9893879997003671, + "grad_norm": 0.09097184240818024, + "learning_rate": 3.1915692507947633e-07, + "loss": 8.5186, + "step": 198120 + }, + { + "epoch": 0.9894379385253065, + "grad_norm": 0.0905596986413002, + "learning_rate": 3.176550101379258e-07, + "loss": 8.5126, + "step": 198130 + }, + { + "epoch": 0.9894878773502459, + "grad_norm": 0.09233243763446808, + "learning_rate": 3.1615309519637536e-07, + "loss": 8.4988, + "step": 198140 + }, + { + "epoch": 0.9895378161751854, + "grad_norm": 0.0961671769618988, + "learning_rate": 3.146511802548249e-07, + "loss": 8.4913, + "step": 198150 + }, + { + "epoch": 0.9895877550001249, + "grad_norm": 0.09645285457372665, + "learning_rate": 3.131492653132744e-07, + "loss": 8.4986, + "step": 198160 + }, + { + "epoch": 0.9896376938250643, + "grad_norm": 0.09721177816390991, + "learning_rate": 3.1164735037172394e-07, + "loss": 8.5021, + "step": 198170 + }, + { + "epoch": 0.9896876326500037, + "grad_norm": 0.09604041278362274, + "learning_rate": 3.101454354301735e-07, + "loss": 8.5088, + "step": 198180 + }, + { + "epoch": 0.9897375714749432, + "grad_norm": 0.09055788815021515, + "learning_rate": 3.0864352048862297e-07, + "loss": 8.4992, + "step": 198190 + }, + { + "epoch": 0.9897875102998827, + "grad_norm": 0.08688069880008698, + "learning_rate": 3.071416055470725e-07, + "loss": 8.5091, + "step": 198200 + }, + { + "epoch": 0.9898374491248221, + "grad_norm": 0.0961107388138771, + "learning_rate": 3.05639690605522e-07, + "loss": 8.5026, + "step": 198210 + }, + { + "epoch": 0.9898873879497615, + "grad_norm": 0.0864131972193718, + "learning_rate": 3.0413777566397155e-07, + "loss": 8.5183, + "step": 198220 + }, + { + "epoch": 0.989937326774701, + "grad_norm": 0.09623812884092331, + "learning_rate": 3.026358607224211e-07, + "loss": 8.5025, + "step": 198230 + }, + { + "epoch": 0.9899872655996405, + "grad_norm": 0.0971948653459549, + "learning_rate": 3.011339457808706e-07, + "loss": 8.4985, + "step": 198240 + }, + { + "epoch": 0.9900372044245799, + "grad_norm": 0.09427471458911896, + "learning_rate": 2.996320308393201e-07, + "loss": 8.4906, + "step": 198250 + }, + { + "epoch": 0.9900871432495193, + "grad_norm": 0.09101586788892746, + "learning_rate": 2.9813011589776967e-07, + "loss": 8.5086, + "step": 198260 + }, + { + "epoch": 0.9901370820744588, + "grad_norm": 0.09006375819444656, + "learning_rate": 2.9662820095621916e-07, + "loss": 8.493, + "step": 198270 + }, + { + "epoch": 0.9901870208993983, + "grad_norm": 0.08943824470043182, + "learning_rate": 2.951262860146687e-07, + "loss": 8.5086, + "step": 198280 + }, + { + "epoch": 0.9902369597243377, + "grad_norm": 0.08912531286478043, + "learning_rate": 2.936243710731182e-07, + "loss": 8.4813, + "step": 198290 + }, + { + "epoch": 0.9902868985492771, + "grad_norm": 0.10092119127511978, + "learning_rate": 2.9212245613156774e-07, + "loss": 8.4847, + "step": 198300 + }, + { + "epoch": 0.9903368373742166, + "grad_norm": 0.093541719019413, + "learning_rate": 2.906205411900173e-07, + "loss": 8.5023, + "step": 198310 + }, + { + "epoch": 0.990386776199156, + "grad_norm": 0.09948543459177017, + "learning_rate": 2.8911862624846677e-07, + "loss": 8.4979, + "step": 198320 + }, + { + "epoch": 0.9904367150240955, + "grad_norm": 0.0884837880730629, + "learning_rate": 2.876167113069163e-07, + "loss": 8.5008, + "step": 198330 + }, + { + "epoch": 0.9904866538490349, + "grad_norm": 0.08703433722257614, + "learning_rate": 2.8611479636536585e-07, + "loss": 8.4779, + "step": 198340 + }, + { + "epoch": 0.9905365926739744, + "grad_norm": 0.09166325628757477, + "learning_rate": 2.8461288142381534e-07, + "loss": 8.4882, + "step": 198350 + }, + { + "epoch": 0.9905865314989138, + "grad_norm": 0.09063871204853058, + "learning_rate": 2.831109664822649e-07, + "loss": 8.5057, + "step": 198360 + }, + { + "epoch": 0.9906364703238533, + "grad_norm": 0.08705553412437439, + "learning_rate": 2.816090515407144e-07, + "loss": 8.5068, + "step": 198370 + }, + { + "epoch": 0.9906864091487927, + "grad_norm": 0.09029226005077362, + "learning_rate": 2.801071365991639e-07, + "loss": 8.4944, + "step": 198380 + }, + { + "epoch": 0.9907363479737322, + "grad_norm": 0.09434723854064941, + "learning_rate": 2.7860522165761346e-07, + "loss": 8.4833, + "step": 198390 + }, + { + "epoch": 0.9907862867986716, + "grad_norm": 0.0964699313044548, + "learning_rate": 2.7710330671606295e-07, + "loss": 8.4942, + "step": 198400 + }, + { + "epoch": 0.9908362256236111, + "grad_norm": 0.08891110122203827, + "learning_rate": 2.756013917745125e-07, + "loss": 8.4942, + "step": 198410 + }, + { + "epoch": 0.9908861644485505, + "grad_norm": 0.09020981937646866, + "learning_rate": 2.7409947683296204e-07, + "loss": 8.5053, + "step": 198420 + }, + { + "epoch": 0.99093610327349, + "grad_norm": 0.09375789016485214, + "learning_rate": 2.7259756189141153e-07, + "loss": 8.503, + "step": 198430 + }, + { + "epoch": 0.9909860420984294, + "grad_norm": 0.08944463729858398, + "learning_rate": 2.7109564694986107e-07, + "loss": 8.5068, + "step": 198440 + }, + { + "epoch": 0.9910359809233689, + "grad_norm": 0.0924299880862236, + "learning_rate": 2.695937320083106e-07, + "loss": 8.4966, + "step": 198450 + }, + { + "epoch": 0.9910859197483083, + "grad_norm": 0.09989719092845917, + "learning_rate": 2.680918170667601e-07, + "loss": 8.4982, + "step": 198460 + }, + { + "epoch": 0.9911358585732478, + "grad_norm": 0.09110445529222488, + "learning_rate": 2.6658990212520965e-07, + "loss": 8.4971, + "step": 198470 + }, + { + "epoch": 0.9911857973981872, + "grad_norm": 0.09000138938426971, + "learning_rate": 2.6508798718365914e-07, + "loss": 8.496, + "step": 198480 + }, + { + "epoch": 0.9912357362231267, + "grad_norm": 0.09816454350948334, + "learning_rate": 2.635860722421087e-07, + "loss": 8.4862, + "step": 198490 + }, + { + "epoch": 0.9912856750480661, + "grad_norm": 0.09222198277711868, + "learning_rate": 2.620841573005582e-07, + "loss": 8.495, + "step": 198500 + }, + { + "epoch": 0.9913356138730056, + "grad_norm": 0.09800387173891068, + "learning_rate": 2.605822423590077e-07, + "loss": 8.5051, + "step": 198510 + }, + { + "epoch": 0.991385552697945, + "grad_norm": 0.08874485641717911, + "learning_rate": 2.5908032741745726e-07, + "loss": 8.4951, + "step": 198520 + }, + { + "epoch": 0.9914354915228845, + "grad_norm": 0.08928778022527695, + "learning_rate": 2.575784124759068e-07, + "loss": 8.4947, + "step": 198530 + }, + { + "epoch": 0.9914854303478239, + "grad_norm": 0.08784019201993942, + "learning_rate": 2.560764975343563e-07, + "loss": 8.4991, + "step": 198540 + }, + { + "epoch": 0.9915353691727634, + "grad_norm": 0.09007895737886429, + "learning_rate": 2.5457458259280584e-07, + "loss": 8.4845, + "step": 198550 + }, + { + "epoch": 0.9915853079977028, + "grad_norm": 0.08894022554159164, + "learning_rate": 2.530726676512553e-07, + "loss": 8.5006, + "step": 198560 + }, + { + "epoch": 0.9916352468226423, + "grad_norm": 0.08828302472829819, + "learning_rate": 2.5157075270970487e-07, + "loss": 8.52, + "step": 198570 + }, + { + "epoch": 0.9916851856475817, + "grad_norm": 0.0931435376405716, + "learning_rate": 2.500688377681544e-07, + "loss": 8.5057, + "step": 198580 + }, + { + "epoch": 0.9917351244725212, + "grad_norm": 0.09200121462345123, + "learning_rate": 2.485669228266039e-07, + "loss": 8.4947, + "step": 198590 + }, + { + "epoch": 0.9917850632974606, + "grad_norm": 0.08936648070812225, + "learning_rate": 2.4706500788505345e-07, + "loss": 8.4976, + "step": 198600 + }, + { + "epoch": 0.9918350021224001, + "grad_norm": 0.08992834389209747, + "learning_rate": 2.45563092943503e-07, + "loss": 8.5161, + "step": 198610 + }, + { + "epoch": 0.9918849409473395, + "grad_norm": 0.0911891758441925, + "learning_rate": 2.440611780019525e-07, + "loss": 8.4884, + "step": 198620 + }, + { + "epoch": 0.991934879772279, + "grad_norm": 0.08730587363243103, + "learning_rate": 2.42559263060402e-07, + "loss": 8.513, + "step": 198630 + }, + { + "epoch": 0.9919848185972184, + "grad_norm": 0.09357264637947083, + "learning_rate": 2.410573481188515e-07, + "loss": 8.5056, + "step": 198640 + }, + { + "epoch": 0.9920347574221579, + "grad_norm": 0.08683697134256363, + "learning_rate": 2.3955543317730105e-07, + "loss": 8.4803, + "step": 198650 + }, + { + "epoch": 0.9920846962470973, + "grad_norm": 0.08779203146696091, + "learning_rate": 2.3805351823575057e-07, + "loss": 8.4917, + "step": 198660 + }, + { + "epoch": 0.9921346350720368, + "grad_norm": 0.096683070063591, + "learning_rate": 2.3655160329420011e-07, + "loss": 8.5142, + "step": 198670 + }, + { + "epoch": 0.9921845738969762, + "grad_norm": 0.08591101318597794, + "learning_rate": 2.3504968835264963e-07, + "loss": 8.5099, + "step": 198680 + }, + { + "epoch": 0.9922345127219157, + "grad_norm": 0.08974206447601318, + "learning_rate": 2.3354777341109915e-07, + "loss": 8.5142, + "step": 198690 + }, + { + "epoch": 0.9922844515468551, + "grad_norm": 0.09247114509344101, + "learning_rate": 2.3204585846954866e-07, + "loss": 8.4848, + "step": 198700 + }, + { + "epoch": 0.9923343903717946, + "grad_norm": 0.08844613283872604, + "learning_rate": 2.305439435279982e-07, + "loss": 8.4997, + "step": 198710 + }, + { + "epoch": 0.992384329196734, + "grad_norm": 0.08982079476118088, + "learning_rate": 2.2904202858644772e-07, + "loss": 8.4906, + "step": 198720 + }, + { + "epoch": 0.9924342680216734, + "grad_norm": 0.08924318850040436, + "learning_rate": 2.2754011364489724e-07, + "loss": 8.4932, + "step": 198730 + }, + { + "epoch": 0.9924842068466129, + "grad_norm": 0.08730906248092651, + "learning_rate": 2.2603819870334678e-07, + "loss": 8.5035, + "step": 198740 + }, + { + "epoch": 0.9925341456715524, + "grad_norm": 0.08822734653949738, + "learning_rate": 2.245362837617963e-07, + "loss": 8.5039, + "step": 198750 + }, + { + "epoch": 0.9925840844964918, + "grad_norm": 0.09212057292461395, + "learning_rate": 2.2303436882024582e-07, + "loss": 8.4992, + "step": 198760 + }, + { + "epoch": 0.9926340233214312, + "grad_norm": 0.08733692765235901, + "learning_rate": 2.2153245387869533e-07, + "loss": 8.479, + "step": 198770 + }, + { + "epoch": 0.9926839621463707, + "grad_norm": 0.08462866395711899, + "learning_rate": 2.2003053893714488e-07, + "loss": 8.4978, + "step": 198780 + }, + { + "epoch": 0.9927339009713101, + "grad_norm": 0.09213852882385254, + "learning_rate": 2.185286239955944e-07, + "loss": 8.5001, + "step": 198790 + }, + { + "epoch": 0.9927838397962496, + "grad_norm": 0.08344164490699768, + "learning_rate": 2.170267090540439e-07, + "loss": 8.5045, + "step": 198800 + }, + { + "epoch": 0.992833778621189, + "grad_norm": 0.08867906033992767, + "learning_rate": 2.1552479411249343e-07, + "loss": 8.5079, + "step": 198810 + }, + { + "epoch": 0.9928837174461285, + "grad_norm": 0.08968056738376617, + "learning_rate": 2.1402287917094297e-07, + "loss": 8.5275, + "step": 198820 + }, + { + "epoch": 0.9929336562710679, + "grad_norm": 0.0994110107421875, + "learning_rate": 2.1252096422939249e-07, + "loss": 8.506, + "step": 198830 + }, + { + "epoch": 0.9929835950960074, + "grad_norm": 0.09683678299188614, + "learning_rate": 2.11019049287842e-07, + "loss": 8.5073, + "step": 198840 + }, + { + "epoch": 0.9930335339209468, + "grad_norm": 0.0983363687992096, + "learning_rate": 2.0951713434629152e-07, + "loss": 8.4861, + "step": 198850 + }, + { + "epoch": 0.9930834727458863, + "grad_norm": 0.09034904837608337, + "learning_rate": 2.0801521940474106e-07, + "loss": 8.4945, + "step": 198860 + }, + { + "epoch": 0.9931334115708257, + "grad_norm": 0.09096697717905045, + "learning_rate": 2.0651330446319058e-07, + "loss": 8.5022, + "step": 198870 + }, + { + "epoch": 0.9931833503957652, + "grad_norm": 0.085932657122612, + "learning_rate": 2.050113895216401e-07, + "loss": 8.5177, + "step": 198880 + }, + { + "epoch": 0.9932332892207046, + "grad_norm": 0.08621320128440857, + "learning_rate": 2.035094745800896e-07, + "loss": 8.4877, + "step": 198890 + }, + { + "epoch": 0.9932832280456441, + "grad_norm": 0.0914321318268776, + "learning_rate": 2.0200755963853916e-07, + "loss": 8.4897, + "step": 198900 + }, + { + "epoch": 0.9933331668705835, + "grad_norm": 0.08765504509210587, + "learning_rate": 2.0050564469698867e-07, + "loss": 8.502, + "step": 198910 + }, + { + "epoch": 0.993383105695523, + "grad_norm": 0.09285347908735275, + "learning_rate": 1.990037297554382e-07, + "loss": 8.4892, + "step": 198920 + }, + { + "epoch": 0.9934330445204624, + "grad_norm": 0.08891350775957108, + "learning_rate": 1.975018148138877e-07, + "loss": 8.5031, + "step": 198930 + }, + { + "epoch": 0.9934829833454019, + "grad_norm": 0.09927456080913544, + "learning_rate": 1.9599989987233725e-07, + "loss": 8.5036, + "step": 198940 + }, + { + "epoch": 0.9935329221703413, + "grad_norm": 0.08990452438592911, + "learning_rate": 1.9449798493078676e-07, + "loss": 8.4938, + "step": 198950 + }, + { + "epoch": 0.9935828609952808, + "grad_norm": 0.08888816833496094, + "learning_rate": 1.9299606998923628e-07, + "loss": 8.4857, + "step": 198960 + }, + { + "epoch": 0.9936327998202202, + "grad_norm": 0.09310969710350037, + "learning_rate": 1.914941550476858e-07, + "loss": 8.4864, + "step": 198970 + }, + { + "epoch": 0.9936827386451597, + "grad_norm": 0.08977384865283966, + "learning_rate": 1.8999224010613534e-07, + "loss": 8.4948, + "step": 198980 + }, + { + "epoch": 0.9937326774700991, + "grad_norm": 0.08601712435483932, + "learning_rate": 1.8849032516458486e-07, + "loss": 8.5055, + "step": 198990 + }, + { + "epoch": 0.9937826162950386, + "grad_norm": 0.08776349574327469, + "learning_rate": 1.8698841022303437e-07, + "loss": 8.4916, + "step": 199000 + }, + { + "epoch": 0.993832555119978, + "grad_norm": 0.09913208335638046, + "learning_rate": 1.8548649528148392e-07, + "loss": 8.4838, + "step": 199010 + }, + { + "epoch": 0.9938824939449175, + "grad_norm": 0.09497472643852234, + "learning_rate": 1.8398458033993343e-07, + "loss": 8.4997, + "step": 199020 + }, + { + "epoch": 0.9939324327698569, + "grad_norm": 0.08809848874807358, + "learning_rate": 1.8248266539838295e-07, + "loss": 8.4952, + "step": 199030 + }, + { + "epoch": 0.9939823715947964, + "grad_norm": 0.09473247826099396, + "learning_rate": 1.8098075045683247e-07, + "loss": 8.4957, + "step": 199040 + }, + { + "epoch": 0.9940323104197358, + "grad_norm": 0.08685030788183212, + "learning_rate": 1.79478835515282e-07, + "loss": 8.5072, + "step": 199050 + }, + { + "epoch": 0.9940822492446753, + "grad_norm": 0.08791281282901764, + "learning_rate": 1.7797692057373153e-07, + "loss": 8.5036, + "step": 199060 + }, + { + "epoch": 0.9941321880696147, + "grad_norm": 0.09338774532079697, + "learning_rate": 1.7647500563218104e-07, + "loss": 8.4922, + "step": 199070 + }, + { + "epoch": 0.9941821268945542, + "grad_norm": 0.09999926388263702, + "learning_rate": 1.7497309069063056e-07, + "loss": 8.5116, + "step": 199080 + }, + { + "epoch": 0.9942320657194936, + "grad_norm": 0.09071855247020721, + "learning_rate": 1.734711757490801e-07, + "loss": 8.5158, + "step": 199090 + }, + { + "epoch": 0.994282004544433, + "grad_norm": 0.0905730351805687, + "learning_rate": 1.7196926080752962e-07, + "loss": 8.4812, + "step": 199100 + }, + { + "epoch": 0.9943319433693725, + "grad_norm": 0.08825069665908813, + "learning_rate": 1.7046734586597914e-07, + "loss": 8.5107, + "step": 199110 + }, + { + "epoch": 0.994381882194312, + "grad_norm": 0.08565869927406311, + "learning_rate": 1.6896543092442865e-07, + "loss": 8.5244, + "step": 199120 + }, + { + "epoch": 0.9944318210192514, + "grad_norm": 0.09292873740196228, + "learning_rate": 1.674635159828782e-07, + "loss": 8.501, + "step": 199130 + }, + { + "epoch": 0.9944817598441908, + "grad_norm": 0.09264521300792694, + "learning_rate": 1.659616010413277e-07, + "loss": 8.5132, + "step": 199140 + }, + { + "epoch": 0.9945316986691303, + "grad_norm": 0.09055811911821365, + "learning_rate": 1.6445968609977723e-07, + "loss": 8.502, + "step": 199150 + }, + { + "epoch": 0.9945816374940698, + "grad_norm": 0.09365162253379822, + "learning_rate": 1.6295777115822672e-07, + "loss": 8.5061, + "step": 199160 + }, + { + "epoch": 0.9946315763190092, + "grad_norm": 0.08964534848928452, + "learning_rate": 1.6145585621667626e-07, + "loss": 8.4963, + "step": 199170 + }, + { + "epoch": 0.9946815151439486, + "grad_norm": 0.08972986787557602, + "learning_rate": 1.5995394127512578e-07, + "loss": 8.501, + "step": 199180 + }, + { + "epoch": 0.9947314539688881, + "grad_norm": 0.09291598945856094, + "learning_rate": 1.584520263335753e-07, + "loss": 8.5065, + "step": 199190 + }, + { + "epoch": 0.9947813927938276, + "grad_norm": 0.09010519832372665, + "learning_rate": 1.569501113920248e-07, + "loss": 8.5005, + "step": 199200 + }, + { + "epoch": 0.994831331618767, + "grad_norm": 0.08901604264974594, + "learning_rate": 1.5544819645047436e-07, + "loss": 8.4986, + "step": 199210 + }, + { + "epoch": 0.9948812704437064, + "grad_norm": 0.09446748346090317, + "learning_rate": 1.5394628150892387e-07, + "loss": 8.491, + "step": 199220 + }, + { + "epoch": 0.9949312092686459, + "grad_norm": 0.08962590992450714, + "learning_rate": 1.524443665673734e-07, + "loss": 8.499, + "step": 199230 + }, + { + "epoch": 0.9949811480935854, + "grad_norm": 0.0894179418683052, + "learning_rate": 1.509424516258229e-07, + "loss": 8.5048, + "step": 199240 + }, + { + "epoch": 0.9950310869185248, + "grad_norm": 0.0883219912648201, + "learning_rate": 1.4944053668427245e-07, + "loss": 8.5145, + "step": 199250 + }, + { + "epoch": 0.9950810257434642, + "grad_norm": 0.09172813594341278, + "learning_rate": 1.4793862174272197e-07, + "loss": 8.5009, + "step": 199260 + }, + { + "epoch": 0.9951309645684037, + "grad_norm": 0.09289974719285965, + "learning_rate": 1.4643670680117148e-07, + "loss": 8.5079, + "step": 199270 + }, + { + "epoch": 0.9951809033933432, + "grad_norm": 0.09185164421796799, + "learning_rate": 1.4493479185962102e-07, + "loss": 8.4842, + "step": 199280 + }, + { + "epoch": 0.9952308422182826, + "grad_norm": 0.089945949614048, + "learning_rate": 1.4343287691807054e-07, + "loss": 8.4803, + "step": 199290 + }, + { + "epoch": 0.995280781043222, + "grad_norm": 0.09140554815530777, + "learning_rate": 1.4193096197652006e-07, + "loss": 8.5122, + "step": 199300 + }, + { + "epoch": 0.9953307198681615, + "grad_norm": 0.0931864008307457, + "learning_rate": 1.4042904703496957e-07, + "loss": 8.4922, + "step": 199310 + }, + { + "epoch": 0.995380658693101, + "grad_norm": 0.08751117438077927, + "learning_rate": 1.3892713209341912e-07, + "loss": 8.4979, + "step": 199320 + }, + { + "epoch": 0.9954305975180404, + "grad_norm": 0.08914308249950409, + "learning_rate": 1.3742521715186863e-07, + "loss": 8.4794, + "step": 199330 + }, + { + "epoch": 0.9954805363429798, + "grad_norm": 0.09185323864221573, + "learning_rate": 1.3592330221031815e-07, + "loss": 8.4852, + "step": 199340 + }, + { + "epoch": 0.9955304751679193, + "grad_norm": 0.0920838937163353, + "learning_rate": 1.3442138726876767e-07, + "loss": 8.4852, + "step": 199350 + }, + { + "epoch": 0.9955804139928588, + "grad_norm": 0.08701695501804352, + "learning_rate": 1.329194723272172e-07, + "loss": 8.4987, + "step": 199360 + }, + { + "epoch": 0.9956303528177982, + "grad_norm": 0.09538272023200989, + "learning_rate": 1.3141755738566673e-07, + "loss": 8.4892, + "step": 199370 + }, + { + "epoch": 0.9956802916427376, + "grad_norm": 0.08852861821651459, + "learning_rate": 1.2991564244411624e-07, + "loss": 8.5061, + "step": 199380 + }, + { + "epoch": 0.9957302304676771, + "grad_norm": 0.09177656471729279, + "learning_rate": 1.2841372750256576e-07, + "loss": 8.4837, + "step": 199390 + }, + { + "epoch": 0.9957801692926166, + "grad_norm": 0.09195193648338318, + "learning_rate": 1.269118125610153e-07, + "loss": 8.4985, + "step": 199400 + }, + { + "epoch": 0.995830108117556, + "grad_norm": 0.09105399250984192, + "learning_rate": 1.2540989761946482e-07, + "loss": 8.5016, + "step": 199410 + }, + { + "epoch": 0.9958800469424954, + "grad_norm": 0.09002146124839783, + "learning_rate": 1.2390798267791434e-07, + "loss": 8.4919, + "step": 199420 + }, + { + "epoch": 0.9959299857674349, + "grad_norm": 0.09366229176521301, + "learning_rate": 1.2240606773636385e-07, + "loss": 8.5062, + "step": 199430 + }, + { + "epoch": 0.9959799245923744, + "grad_norm": 0.09170850366353989, + "learning_rate": 1.209041527948134e-07, + "loss": 8.4988, + "step": 199440 + }, + { + "epoch": 0.9960298634173138, + "grad_norm": 0.0911903902888298, + "learning_rate": 1.194022378532629e-07, + "loss": 8.4892, + "step": 199450 + }, + { + "epoch": 0.9960798022422532, + "grad_norm": 0.09550126641988754, + "learning_rate": 1.1790032291171243e-07, + "loss": 8.4933, + "step": 199460 + }, + { + "epoch": 0.9961297410671927, + "grad_norm": 0.08738602697849274, + "learning_rate": 1.1639840797016196e-07, + "loss": 8.4957, + "step": 199470 + }, + { + "epoch": 0.9961796798921322, + "grad_norm": 0.09028680622577667, + "learning_rate": 1.1489649302861148e-07, + "loss": 8.5054, + "step": 199480 + }, + { + "epoch": 0.9962296187170716, + "grad_norm": 0.08849211782217026, + "learning_rate": 1.13394578087061e-07, + "loss": 8.4965, + "step": 199490 + }, + { + "epoch": 0.996279557542011, + "grad_norm": 0.09421442449092865, + "learning_rate": 1.1189266314551052e-07, + "loss": 8.5073, + "step": 199500 + }, + { + "epoch": 0.9963294963669505, + "grad_norm": 0.09024062752723694, + "learning_rate": 1.1039074820396005e-07, + "loss": 8.5054, + "step": 199510 + }, + { + "epoch": 0.99637943519189, + "grad_norm": 0.09820150583982468, + "learning_rate": 1.0888883326240957e-07, + "loss": 8.4772, + "step": 199520 + }, + { + "epoch": 0.9964293740168294, + "grad_norm": 0.10558462142944336, + "learning_rate": 1.073869183208591e-07, + "loss": 8.495, + "step": 199530 + }, + { + "epoch": 0.9964793128417688, + "grad_norm": 0.09310225397348404, + "learning_rate": 1.0588500337930862e-07, + "loss": 8.4867, + "step": 199540 + }, + { + "epoch": 0.9965292516667082, + "grad_norm": 0.09150728583335876, + "learning_rate": 1.0438308843775815e-07, + "loss": 8.4969, + "step": 199550 + }, + { + "epoch": 0.9965791904916478, + "grad_norm": 0.09338292479515076, + "learning_rate": 1.0288117349620768e-07, + "loss": 8.4907, + "step": 199560 + }, + { + "epoch": 0.9966291293165872, + "grad_norm": 0.09587588161230087, + "learning_rate": 1.0137925855465719e-07, + "loss": 8.4943, + "step": 199570 + }, + { + "epoch": 0.9966790681415266, + "grad_norm": 0.09336939454078674, + "learning_rate": 9.987734361310672e-08, + "loss": 8.5067, + "step": 199580 + }, + { + "epoch": 0.996729006966466, + "grad_norm": 0.08930207043886185, + "learning_rate": 9.837542867155624e-08, + "loss": 8.5041, + "step": 199590 + }, + { + "epoch": 0.9967789457914056, + "grad_norm": 0.08806464821100235, + "learning_rate": 9.687351373000577e-08, + "loss": 8.5115, + "step": 199600 + }, + { + "epoch": 0.996828884616345, + "grad_norm": 0.09173772484064102, + "learning_rate": 9.537159878845528e-08, + "loss": 8.5085, + "step": 199610 + }, + { + "epoch": 0.9968788234412844, + "grad_norm": 0.08926473557949066, + "learning_rate": 9.386968384690481e-08, + "loss": 8.5104, + "step": 199620 + }, + { + "epoch": 0.9969287622662238, + "grad_norm": 0.09517572820186615, + "learning_rate": 9.236776890535433e-08, + "loss": 8.5092, + "step": 199630 + }, + { + "epoch": 0.9969787010911634, + "grad_norm": 0.08836136758327484, + "learning_rate": 9.086585396380386e-08, + "loss": 8.5023, + "step": 199640 + }, + { + "epoch": 0.9970286399161028, + "grad_norm": 0.09061846137046814, + "learning_rate": 8.936393902225338e-08, + "loss": 8.5222, + "step": 199650 + }, + { + "epoch": 0.9970785787410422, + "grad_norm": 0.09391947835683823, + "learning_rate": 8.786202408070291e-08, + "loss": 8.5009, + "step": 199660 + }, + { + "epoch": 0.9971285175659816, + "grad_norm": 0.09239717572927475, + "learning_rate": 8.636010913915242e-08, + "loss": 8.4959, + "step": 199670 + }, + { + "epoch": 0.9971784563909212, + "grad_norm": 0.09715589135885239, + "learning_rate": 8.485819419760195e-08, + "loss": 8.5019, + "step": 199680 + }, + { + "epoch": 0.9972283952158606, + "grad_norm": 0.09166194498538971, + "learning_rate": 8.335627925605147e-08, + "loss": 8.4841, + "step": 199690 + }, + { + "epoch": 0.9972783340408, + "grad_norm": 0.0937894880771637, + "learning_rate": 8.185436431450099e-08, + "loss": 8.5135, + "step": 199700 + }, + { + "epoch": 0.9973282728657394, + "grad_norm": 0.08937085419893265, + "learning_rate": 8.03524493729505e-08, + "loss": 8.5079, + "step": 199710 + }, + { + "epoch": 0.997378211690679, + "grad_norm": 0.09397994726896286, + "learning_rate": 7.885053443140003e-08, + "loss": 8.4847, + "step": 199720 + }, + { + "epoch": 0.9974281505156184, + "grad_norm": 0.09294033795595169, + "learning_rate": 7.734861948984955e-08, + "loss": 8.498, + "step": 199730 + }, + { + "epoch": 0.9974780893405578, + "grad_norm": 0.08595110476016998, + "learning_rate": 7.584670454829908e-08, + "loss": 8.4964, + "step": 199740 + }, + { + "epoch": 0.9975280281654972, + "grad_norm": 0.09340130537748337, + "learning_rate": 7.43447896067486e-08, + "loss": 8.4942, + "step": 199750 + }, + { + "epoch": 0.9975779669904367, + "grad_norm": 0.08756370097398758, + "learning_rate": 7.284287466519813e-08, + "loss": 8.5186, + "step": 199760 + }, + { + "epoch": 0.9976279058153762, + "grad_norm": 0.09118250012397766, + "learning_rate": 7.134095972364764e-08, + "loss": 8.4982, + "step": 199770 + }, + { + "epoch": 0.9976778446403156, + "grad_norm": 0.09316319227218628, + "learning_rate": 6.983904478209717e-08, + "loss": 8.491, + "step": 199780 + }, + { + "epoch": 0.997727783465255, + "grad_norm": 0.08617562055587769, + "learning_rate": 6.833712984054669e-08, + "loss": 8.4977, + "step": 199790 + }, + { + "epoch": 0.9977777222901945, + "grad_norm": 0.09523026645183563, + "learning_rate": 6.683521489899622e-08, + "loss": 8.5039, + "step": 199800 + }, + { + "epoch": 0.997827661115134, + "grad_norm": 0.09021805971860886, + "learning_rate": 6.533329995744574e-08, + "loss": 8.4985, + "step": 199810 + }, + { + "epoch": 0.9978775999400734, + "grad_norm": 0.08965896815061569, + "learning_rate": 6.383138501589527e-08, + "loss": 8.4904, + "step": 199820 + }, + { + "epoch": 0.9979275387650128, + "grad_norm": 0.09753076732158661, + "learning_rate": 6.23294700743448e-08, + "loss": 8.5033, + "step": 199830 + }, + { + "epoch": 0.9979774775899523, + "grad_norm": 0.0896683931350708, + "learning_rate": 6.082755513279431e-08, + "loss": 8.503, + "step": 199840 + }, + { + "epoch": 0.9980274164148918, + "grad_norm": 0.09268392622470856, + "learning_rate": 5.9325640191243836e-08, + "loss": 8.4949, + "step": 199850 + }, + { + "epoch": 0.9980773552398312, + "grad_norm": 0.08605291694402695, + "learning_rate": 5.782372524969336e-08, + "loss": 8.4886, + "step": 199860 + }, + { + "epoch": 0.9981272940647706, + "grad_norm": 0.09285443276166916, + "learning_rate": 5.632181030814288e-08, + "loss": 8.4869, + "step": 199870 + }, + { + "epoch": 0.99817723288971, + "grad_norm": 0.08672482520341873, + "learning_rate": 5.4819895366592405e-08, + "loss": 8.501, + "step": 199880 + }, + { + "epoch": 0.9982271717146496, + "grad_norm": 0.0957999974489212, + "learning_rate": 5.331798042504193e-08, + "loss": 8.5033, + "step": 199890 + }, + { + "epoch": 0.998277110539589, + "grad_norm": 0.08614014834165573, + "learning_rate": 5.181606548349145e-08, + "loss": 8.5082, + "step": 199900 + }, + { + "epoch": 0.9983270493645284, + "grad_norm": 0.09075914323329926, + "learning_rate": 5.0314150541940975e-08, + "loss": 8.4991, + "step": 199910 + }, + { + "epoch": 0.9983769881894679, + "grad_norm": 0.09307090193033218, + "learning_rate": 4.88122356003905e-08, + "loss": 8.474, + "step": 199920 + }, + { + "epoch": 0.9984269270144074, + "grad_norm": 0.09387535601854324, + "learning_rate": 4.731032065884002e-08, + "loss": 8.4924, + "step": 199930 + }, + { + "epoch": 0.9984768658393468, + "grad_norm": 0.08672790229320526, + "learning_rate": 4.5808405717289545e-08, + "loss": 8.5078, + "step": 199940 + }, + { + "epoch": 0.9985268046642862, + "grad_norm": 0.09689149260520935, + "learning_rate": 4.430649077573907e-08, + "loss": 8.5156, + "step": 199950 + }, + { + "epoch": 0.9985767434892256, + "grad_norm": 0.08997116982936859, + "learning_rate": 4.280457583418859e-08, + "loss": 8.5033, + "step": 199960 + }, + { + "epoch": 0.9986266823141652, + "grad_norm": 0.09200499206781387, + "learning_rate": 4.130266089263812e-08, + "loss": 8.499, + "step": 199970 + }, + { + "epoch": 0.9986766211391046, + "grad_norm": 0.09014206379652023, + "learning_rate": 3.980074595108764e-08, + "loss": 8.4947, + "step": 199980 + }, + { + "epoch": 0.998726559964044, + "grad_norm": 0.09259384125471115, + "learning_rate": 3.829883100953716e-08, + "loss": 8.4803, + "step": 199990 + }, + { + "epoch": 0.9987764987889834, + "grad_norm": 0.09015609323978424, + "learning_rate": 3.6796916067986684e-08, + "loss": 8.4907, + "step": 200000 + }, + { + "epoch": 0.998826437613923, + "grad_norm": 0.08941512554883957, + "learning_rate": 3.529500112643621e-08, + "loss": 8.5041, + "step": 200010 + }, + { + "epoch": 0.9988763764388624, + "grad_norm": 0.09428403526544571, + "learning_rate": 3.379308618488573e-08, + "loss": 8.5041, + "step": 200020 + }, + { + "epoch": 0.9989263152638018, + "grad_norm": 0.0898728296160698, + "learning_rate": 3.2291171243335254e-08, + "loss": 8.5173, + "step": 200030 + }, + { + "epoch": 0.9989762540887412, + "grad_norm": 0.08989530801773071, + "learning_rate": 3.078925630178478e-08, + "loss": 8.5081, + "step": 200040 + }, + { + "epoch": 0.9990261929136808, + "grad_norm": 0.09039604663848877, + "learning_rate": 2.92873413602343e-08, + "loss": 8.5074, + "step": 200050 + }, + { + "epoch": 0.9990761317386202, + "grad_norm": 0.08606907725334167, + "learning_rate": 2.7785426418683824e-08, + "loss": 8.5027, + "step": 200060 + }, + { + "epoch": 0.9991260705635596, + "grad_norm": 0.09312465786933899, + "learning_rate": 2.6283511477133347e-08, + "loss": 8.499, + "step": 200070 + }, + { + "epoch": 0.999176009388499, + "grad_norm": 0.09412333369255066, + "learning_rate": 2.478159653558287e-08, + "loss": 8.4968, + "step": 200080 + }, + { + "epoch": 0.9992259482134386, + "grad_norm": 0.09396173804998398, + "learning_rate": 2.3279681594032393e-08, + "loss": 8.4779, + "step": 200090 + }, + { + "epoch": 0.999275887038378, + "grad_norm": 0.08685121685266495, + "learning_rate": 2.1777766652481916e-08, + "loss": 8.501, + "step": 200100 + }, + { + "epoch": 0.9993258258633174, + "grad_norm": 0.091061532497406, + "learning_rate": 2.0275851710931436e-08, + "loss": 8.5031, + "step": 200110 + }, + { + "epoch": 0.9993757646882568, + "grad_norm": 0.08946952223777771, + "learning_rate": 1.877393676938096e-08, + "loss": 8.5099, + "step": 200120 + }, + { + "epoch": 0.9994257035131964, + "grad_norm": 0.09155212342739105, + "learning_rate": 1.7272021827830483e-08, + "loss": 8.4807, + "step": 200130 + }, + { + "epoch": 0.9994756423381358, + "grad_norm": 0.09216777235269547, + "learning_rate": 1.5770106886280006e-08, + "loss": 8.5083, + "step": 200140 + }, + { + "epoch": 0.9995255811630752, + "grad_norm": 0.08744364976882935, + "learning_rate": 1.4268191944729531e-08, + "loss": 8.4995, + "step": 200150 + }, + { + "epoch": 0.9995755199880146, + "grad_norm": 0.09221003204584122, + "learning_rate": 1.2766277003179054e-08, + "loss": 8.493, + "step": 200160 + }, + { + "epoch": 0.9996254588129542, + "grad_norm": 0.09411770850419998, + "learning_rate": 1.1264362061628577e-08, + "loss": 8.4741, + "step": 200170 + }, + { + "epoch": 0.9996753976378936, + "grad_norm": 0.09217695146799088, + "learning_rate": 9.762447120078099e-09, + "loss": 8.4893, + "step": 200180 + }, + { + "epoch": 0.999725336462833, + "grad_norm": 0.0921444296836853, + "learning_rate": 8.260532178527622e-09, + "loss": 8.4998, + "step": 200190 + }, + { + "epoch": 0.9997752752877724, + "grad_norm": 0.0900796502828598, + "learning_rate": 6.758617236977146e-09, + "loss": 8.4963, + "step": 200200 + }, + { + "epoch": 0.999825214112712, + "grad_norm": 0.08964385092258453, + "learning_rate": 5.2567022954266695e-09, + "loss": 8.4971, + "step": 200210 + }, + { + "epoch": 0.9998751529376514, + "grad_norm": 0.08609479665756226, + "learning_rate": 3.754787353876192e-09, + "loss": 8.4869, + "step": 200220 + }, + { + "epoch": 0.9999250917625908, + "grad_norm": 0.08690956234931946, + "learning_rate": 2.252872412325715e-09, + "loss": 8.4845, + "step": 200230 + }, + { + "epoch": 0.9999750305875302, + "grad_norm": 0.09233241528272629, + "learning_rate": 7.509574707752385e-10, + "loss": 8.4693, + "step": 200240 + } + ], + "logging_steps": 10, + "max_steps": 200245, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 9.875112543817114e+16, + "train_batch_size": 32, + "trial_name": null, + "trial_params": null +}