|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 10000, |
|
"global_step": 12869, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0007770611547128759, |
|
"grad_norm": 10.867119295233653, |
|
"learning_rate": 7.770007770007771e-08, |
|
"loss": 1.9304, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.0015541223094257517, |
|
"grad_norm": 10.849708826912844, |
|
"learning_rate": 1.5540015540015542e-07, |
|
"loss": 1.7731, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.002331183464138628, |
|
"grad_norm": 14.155064770562149, |
|
"learning_rate": 2.3310023310023313e-07, |
|
"loss": 1.8856, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.0031082446188515035, |
|
"grad_norm": 7.8437428614229106, |
|
"learning_rate": 3.1080031080031084e-07, |
|
"loss": 1.7444, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.0038853057735643796, |
|
"grad_norm": 6.831881710614827, |
|
"learning_rate": 3.885003885003885e-07, |
|
"loss": 1.8265, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.004662366928277256, |
|
"grad_norm": 4.832603626709541, |
|
"learning_rate": 4.6620046620046626e-07, |
|
"loss": 1.6188, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.005439428082990132, |
|
"grad_norm": 4.682650839871238, |
|
"learning_rate": 5.43900543900544e-07, |
|
"loss": 1.4275, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.006216489237703007, |
|
"grad_norm": 3.4425826085380953, |
|
"learning_rate": 6.216006216006217e-07, |
|
"loss": 1.2748, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.006993550392415883, |
|
"grad_norm": 2.5177730139033967, |
|
"learning_rate": 6.993006993006994e-07, |
|
"loss": 1.3117, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.007770611547128759, |
|
"grad_norm": 3.7240853256217887, |
|
"learning_rate": 7.77000777000777e-07, |
|
"loss": 1.3679, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.008547672701841634, |
|
"grad_norm": 3.326103554099626, |
|
"learning_rate": 8.547008547008548e-07, |
|
"loss": 1.2847, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.009324733856554511, |
|
"grad_norm": 3.06027479016113, |
|
"learning_rate": 9.324009324009325e-07, |
|
"loss": 1.4199, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.010101795011267387, |
|
"grad_norm": 3.3944711373731336, |
|
"learning_rate": 1.01010101010101e-06, |
|
"loss": 1.2416, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.010878856165980264, |
|
"grad_norm": 2.8783458689024943, |
|
"learning_rate": 1.087801087801088e-06, |
|
"loss": 1.2386, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.011655917320693139, |
|
"grad_norm": 4.560753080777367, |
|
"learning_rate": 1.1655011655011655e-06, |
|
"loss": 1.1683, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.012432978475406014, |
|
"grad_norm": 3.2610287866769823, |
|
"learning_rate": 1.2432012432012434e-06, |
|
"loss": 1.3331, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.013210039630118891, |
|
"grad_norm": 3.5630097050518494, |
|
"learning_rate": 1.320901320901321e-06, |
|
"loss": 1.3022, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.013987100784831766, |
|
"grad_norm": 3.190644560112282, |
|
"learning_rate": 1.3986013986013987e-06, |
|
"loss": 1.2078, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.014764161939544641, |
|
"grad_norm": 2.7424883315006667, |
|
"learning_rate": 1.4763014763014764e-06, |
|
"loss": 1.2883, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.015541223094257518, |
|
"grad_norm": 3.578715680372139, |
|
"learning_rate": 1.554001554001554e-06, |
|
"loss": 1.2041, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.016318284248970395, |
|
"grad_norm": 3.2571998176648207, |
|
"learning_rate": 1.6317016317016318e-06, |
|
"loss": 1.2505, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.01709534540368327, |
|
"grad_norm": 3.4399528179608327, |
|
"learning_rate": 1.7094017094017097e-06, |
|
"loss": 1.2012, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.017872406558396146, |
|
"grad_norm": 3.8729257141905116, |
|
"learning_rate": 1.7871017871017873e-06, |
|
"loss": 1.3179, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.018649467713109023, |
|
"grad_norm": 3.6027496697475616, |
|
"learning_rate": 1.864801864801865e-06, |
|
"loss": 1.2437, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.019426528867821896, |
|
"grad_norm": 3.6431878968740072, |
|
"learning_rate": 1.9425019425019425e-06, |
|
"loss": 1.1645, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.020203590022534773, |
|
"grad_norm": 2.857881707560637, |
|
"learning_rate": 2.02020202020202e-06, |
|
"loss": 1.1369, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.02098065117724765, |
|
"grad_norm": 2.8739221855243042, |
|
"learning_rate": 2.0979020979020983e-06, |
|
"loss": 1.1846, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.021757712331960527, |
|
"grad_norm": 3.5028112168977557, |
|
"learning_rate": 2.175602175602176e-06, |
|
"loss": 1.2122, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.0225347734866734, |
|
"grad_norm": 3.4640995610274445, |
|
"learning_rate": 2.2533022533022537e-06, |
|
"loss": 1.1927, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.023311834641386277, |
|
"grad_norm": 3.379264646936701, |
|
"learning_rate": 2.331002331002331e-06, |
|
"loss": 1.258, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.024088895796099154, |
|
"grad_norm": 2.4371515340367385, |
|
"learning_rate": 2.408702408702409e-06, |
|
"loss": 1.1477, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.024865956950812028, |
|
"grad_norm": 3.014613121507287, |
|
"learning_rate": 2.4864024864024867e-06, |
|
"loss": 1.1715, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.025643018105524905, |
|
"grad_norm": 3.0458793192067715, |
|
"learning_rate": 2.564102564102564e-06, |
|
"loss": 1.1353, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.026420079260237782, |
|
"grad_norm": 2.9917200999353906, |
|
"learning_rate": 2.641802641802642e-06, |
|
"loss": 1.1992, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.027197140414950655, |
|
"grad_norm": 2.6599563280716985, |
|
"learning_rate": 2.7195027195027198e-06, |
|
"loss": 1.1782, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.027974201569663532, |
|
"grad_norm": 3.0009575544324454, |
|
"learning_rate": 2.7972027972027974e-06, |
|
"loss": 1.2762, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.02875126272437641, |
|
"grad_norm": 2.8774035033800343, |
|
"learning_rate": 2.874902874902875e-06, |
|
"loss": 1.2687, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.029528323879089283, |
|
"grad_norm": 3.11771455020667, |
|
"learning_rate": 2.952602952602953e-06, |
|
"loss": 1.207, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.03030538503380216, |
|
"grad_norm": 3.6810769724431345, |
|
"learning_rate": 3.0303030303030305e-06, |
|
"loss": 1.2037, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.031082446188515037, |
|
"grad_norm": 2.5507766565385084, |
|
"learning_rate": 3.108003108003108e-06, |
|
"loss": 1.163, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.031859507343227914, |
|
"grad_norm": 2.9816770527812686, |
|
"learning_rate": 3.1857031857031863e-06, |
|
"loss": 1.1592, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.03263656849794079, |
|
"grad_norm": 2.591410551140759, |
|
"learning_rate": 3.2634032634032635e-06, |
|
"loss": 1.0411, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.03341362965265366, |
|
"grad_norm": 3.1328334298888345, |
|
"learning_rate": 3.3411033411033412e-06, |
|
"loss": 1.1438, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.03419069080736654, |
|
"grad_norm": 2.9537075236771675, |
|
"learning_rate": 3.4188034188034193e-06, |
|
"loss": 1.1713, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.034967751962079414, |
|
"grad_norm": 4.35570272552757, |
|
"learning_rate": 3.4965034965034966e-06, |
|
"loss": 1.2358, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.03574481311679229, |
|
"grad_norm": 2.4749714488159613, |
|
"learning_rate": 3.5742035742035747e-06, |
|
"loss": 1.1325, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.03652187427150517, |
|
"grad_norm": 2.770830578293701, |
|
"learning_rate": 3.651903651903652e-06, |
|
"loss": 1.1979, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.037298935426218045, |
|
"grad_norm": 3.2166027135563793, |
|
"learning_rate": 3.72960372960373e-06, |
|
"loss": 1.1605, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.03807599658093092, |
|
"grad_norm": 2.843605809275243, |
|
"learning_rate": 3.8073038073038077e-06, |
|
"loss": 1.2299, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.03885305773564379, |
|
"grad_norm": 2.959678568881321, |
|
"learning_rate": 3.885003885003885e-06, |
|
"loss": 1.2634, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.03963011889035667, |
|
"grad_norm": 2.5622873834599367, |
|
"learning_rate": 3.962703962703963e-06, |
|
"loss": 1.1137, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.040407180045069546, |
|
"grad_norm": 3.086457018563733, |
|
"learning_rate": 4.04040404040404e-06, |
|
"loss": 1.2407, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.04118424119978242, |
|
"grad_norm": 4.106519986211115, |
|
"learning_rate": 4.1181041181041185e-06, |
|
"loss": 1.1239, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.0419613023544953, |
|
"grad_norm": 2.7183745936305312, |
|
"learning_rate": 4.195804195804197e-06, |
|
"loss": 1.1746, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.04273836350920818, |
|
"grad_norm": 2.703894165918197, |
|
"learning_rate": 4.273504273504274e-06, |
|
"loss": 1.1105, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.043515424663921054, |
|
"grad_norm": 2.4867862713355686, |
|
"learning_rate": 4.351204351204352e-06, |
|
"loss": 1.1258, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.044292485818633924, |
|
"grad_norm": 2.838440814840756, |
|
"learning_rate": 4.428904428904429e-06, |
|
"loss": 1.0962, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.0450695469733468, |
|
"grad_norm": 2.1466654271023162, |
|
"learning_rate": 4.506604506604507e-06, |
|
"loss": 1.1085, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.04584660812805968, |
|
"grad_norm": 2.5468209985419477, |
|
"learning_rate": 4.5843045843045846e-06, |
|
"loss": 1.1391, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.046623669282772555, |
|
"grad_norm": 2.7865905520731493, |
|
"learning_rate": 4.662004662004662e-06, |
|
"loss": 1.1387, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.04740073043748543, |
|
"grad_norm": 2.1644371566582827, |
|
"learning_rate": 4.73970473970474e-06, |
|
"loss": 1.1499, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.04817779159219831, |
|
"grad_norm": 2.4913053508847667, |
|
"learning_rate": 4.817404817404818e-06, |
|
"loss": 1.1007, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.04895485274691118, |
|
"grad_norm": 3.001782928715834, |
|
"learning_rate": 4.895104895104895e-06, |
|
"loss": 1.2201, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.049731913901624056, |
|
"grad_norm": 2.2758391555971835, |
|
"learning_rate": 4.972804972804973e-06, |
|
"loss": 1.2219, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.05050897505633693, |
|
"grad_norm": 2.2066322284819155, |
|
"learning_rate": 5.0505050505050515e-06, |
|
"loss": 1.1288, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.05128603621104981, |
|
"grad_norm": 2.5894704735788263, |
|
"learning_rate": 5.128205128205128e-06, |
|
"loss": 1.1989, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.05206309736576269, |
|
"grad_norm": 2.952941171933437, |
|
"learning_rate": 5.205905205905206e-06, |
|
"loss": 1.1165, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.052840158520475564, |
|
"grad_norm": 2.7070115957706946, |
|
"learning_rate": 5.283605283605284e-06, |
|
"loss": 1.1954, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.05361721967518844, |
|
"grad_norm": 2.2390053746810668, |
|
"learning_rate": 5.361305361305362e-06, |
|
"loss": 1.1219, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.05439428082990131, |
|
"grad_norm": 2.5396421668929774, |
|
"learning_rate": 5.4390054390054395e-06, |
|
"loss": 1.2285, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.05517134198461419, |
|
"grad_norm": 1.881059123798051, |
|
"learning_rate": 5.516705516705518e-06, |
|
"loss": 1.074, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.055948403139327064, |
|
"grad_norm": 2.4027627997044285, |
|
"learning_rate": 5.594405594405595e-06, |
|
"loss": 1.1395, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.05672546429403994, |
|
"grad_norm": 2.135346477937923, |
|
"learning_rate": 5.672105672105672e-06, |
|
"loss": 1.0813, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.05750252544875282, |
|
"grad_norm": 2.673768837075326, |
|
"learning_rate": 5.74980574980575e-06, |
|
"loss": 1.117, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.058279586603465695, |
|
"grad_norm": 2.1561152898986924, |
|
"learning_rate": 5.827505827505828e-06, |
|
"loss": 1.0868, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.059056647758178565, |
|
"grad_norm": 2.198404569968455, |
|
"learning_rate": 5.905205905205906e-06, |
|
"loss": 1.0985, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.05983370891289144, |
|
"grad_norm": 1.9218754267874707, |
|
"learning_rate": 5.982905982905983e-06, |
|
"loss": 1.1303, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.06061077006760432, |
|
"grad_norm": 2.070293097589863, |
|
"learning_rate": 6.060606060606061e-06, |
|
"loss": 1.056, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.061387831222317196, |
|
"grad_norm": 2.2102128154144833, |
|
"learning_rate": 6.138306138306139e-06, |
|
"loss": 1.1511, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.06216489237703007, |
|
"grad_norm": 2.9020079791880438, |
|
"learning_rate": 6.216006216006216e-06, |
|
"loss": 1.0976, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.06294195353174295, |
|
"grad_norm": 3.1669273668699733, |
|
"learning_rate": 6.2937062937062944e-06, |
|
"loss": 1.1263, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.06371901468645583, |
|
"grad_norm": 1.9847375481750156, |
|
"learning_rate": 6.3714063714063726e-06, |
|
"loss": 1.0923, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.0644960758411687, |
|
"grad_norm": 2.09531371322368, |
|
"learning_rate": 6.449106449106449e-06, |
|
"loss": 1.0821, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.06527313699588158, |
|
"grad_norm": 3.287845612968483, |
|
"learning_rate": 6.526806526806527e-06, |
|
"loss": 1.0614, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.06605019815059446, |
|
"grad_norm": 2.2662925493592083, |
|
"learning_rate": 6.604506604506605e-06, |
|
"loss": 1.1021, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.06682725930530732, |
|
"grad_norm": 2.4839925554425717, |
|
"learning_rate": 6.6822066822066824e-06, |
|
"loss": 1.1501, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.0676043204600202, |
|
"grad_norm": 2.331604369524609, |
|
"learning_rate": 6.7599067599067605e-06, |
|
"loss": 1.1742, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.06838138161473307, |
|
"grad_norm": 2.3590829029315583, |
|
"learning_rate": 6.837606837606839e-06, |
|
"loss": 1.1163, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.06915844276944595, |
|
"grad_norm": 2.892618110874262, |
|
"learning_rate": 6.915306915306917e-06, |
|
"loss": 1.1725, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.06993550392415883, |
|
"grad_norm": 2.384306036165181, |
|
"learning_rate": 6.993006993006993e-06, |
|
"loss": 1.1418, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.0707125650788717, |
|
"grad_norm": 2.614333186214158, |
|
"learning_rate": 7.070707070707071e-06, |
|
"loss": 1.1369, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.07148962623358458, |
|
"grad_norm": 2.7443403027281428, |
|
"learning_rate": 7.148407148407149e-06, |
|
"loss": 1.1719, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.07226668738829746, |
|
"grad_norm": 2.115919721191313, |
|
"learning_rate": 7.226107226107227e-06, |
|
"loss": 1.1642, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.07304374854301034, |
|
"grad_norm": 2.4226282288227052, |
|
"learning_rate": 7.303807303807304e-06, |
|
"loss": 1.1609, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.07382080969772321, |
|
"grad_norm": 2.3322158120159657, |
|
"learning_rate": 7.381507381507382e-06, |
|
"loss": 1.0715, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.07459787085243609, |
|
"grad_norm": 2.216105414113714, |
|
"learning_rate": 7.45920745920746e-06, |
|
"loss": 1.0836, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.07537493200714897, |
|
"grad_norm": 2.080845937826623, |
|
"learning_rate": 7.536907536907537e-06, |
|
"loss": 1.1298, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.07615199316186184, |
|
"grad_norm": 1.9845987743197342, |
|
"learning_rate": 7.6146076146076155e-06, |
|
"loss": 1.1375, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.07692905431657471, |
|
"grad_norm": 2.052691449449282, |
|
"learning_rate": 7.692307692307694e-06, |
|
"loss": 1.1501, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.07770611547128758, |
|
"grad_norm": 1.9263855972921253, |
|
"learning_rate": 7.77000777000777e-06, |
|
"loss": 1.1237, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.07848317662600046, |
|
"grad_norm": 2.9671775698526934, |
|
"learning_rate": 7.847707847707848e-06, |
|
"loss": 1.0918, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.07926023778071334, |
|
"grad_norm": 2.2929478440651394, |
|
"learning_rate": 7.925407925407926e-06, |
|
"loss": 1.0848, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.08003729893542622, |
|
"grad_norm": 1.6757069320789237, |
|
"learning_rate": 8.003108003108003e-06, |
|
"loss": 1.1209, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.08081436009013909, |
|
"grad_norm": 2.093200645109728, |
|
"learning_rate": 8.08080808080808e-06, |
|
"loss": 1.0866, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.08159142124485197, |
|
"grad_norm": 2.408927649486391, |
|
"learning_rate": 8.158508158508159e-06, |
|
"loss": 1.0934, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.08236848239956485, |
|
"grad_norm": 2.2763929710773643, |
|
"learning_rate": 8.236208236208237e-06, |
|
"loss": 1.1081, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.08314554355427772, |
|
"grad_norm": 2.329064562776198, |
|
"learning_rate": 8.313908313908315e-06, |
|
"loss": 1.1366, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.0839226047089906, |
|
"grad_norm": 1.9093884379628574, |
|
"learning_rate": 8.391608391608393e-06, |
|
"loss": 1.0907, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.08469966586370348, |
|
"grad_norm": 2.0666971265552694, |
|
"learning_rate": 8.46930846930847e-06, |
|
"loss": 1.1396, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.08547672701841635, |
|
"grad_norm": 2.6618881870416833, |
|
"learning_rate": 8.547008547008548e-06, |
|
"loss": 1.1204, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.08625378817312923, |
|
"grad_norm": 2.5811056119151115, |
|
"learning_rate": 8.624708624708626e-06, |
|
"loss": 1.1067, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.08703084932784211, |
|
"grad_norm": 2.4891841510360697, |
|
"learning_rate": 8.702408702408704e-06, |
|
"loss": 1.0186, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.08780791048255497, |
|
"grad_norm": 1.9964291348885184, |
|
"learning_rate": 8.78010878010878e-06, |
|
"loss": 1.0534, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.08858497163726785, |
|
"grad_norm": 1.8380639056753707, |
|
"learning_rate": 8.857808857808858e-06, |
|
"loss": 1.1259, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.08936203279198073, |
|
"grad_norm": 2.026492546755725, |
|
"learning_rate": 8.935508935508937e-06, |
|
"loss": 1.1357, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.0901390939466936, |
|
"grad_norm": 2.3881102752793546, |
|
"learning_rate": 9.013209013209015e-06, |
|
"loss": 1.1451, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.09091615510140648, |
|
"grad_norm": 2.3516814013111578, |
|
"learning_rate": 9.090909090909091e-06, |
|
"loss": 1.1304, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.09169321625611936, |
|
"grad_norm": 2.2625675458737255, |
|
"learning_rate": 9.168609168609169e-06, |
|
"loss": 1.1289, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.09247027741083223, |
|
"grad_norm": 1.8601808712202859, |
|
"learning_rate": 9.246309246309247e-06, |
|
"loss": 1.0705, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.09324733856554511, |
|
"grad_norm": 2.45830074558663, |
|
"learning_rate": 9.324009324009324e-06, |
|
"loss": 1.0006, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.09402439972025799, |
|
"grad_norm": 2.2208723337033747, |
|
"learning_rate": 9.401709401709402e-06, |
|
"loss": 1.0642, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.09480146087497086, |
|
"grad_norm": 2.992927987309589, |
|
"learning_rate": 9.47940947940948e-06, |
|
"loss": 1.1099, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.09557852202968374, |
|
"grad_norm": 2.2404447843072526, |
|
"learning_rate": 9.557109557109558e-06, |
|
"loss": 1.1046, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.09635558318439662, |
|
"grad_norm": 2.027188334095754, |
|
"learning_rate": 9.634809634809636e-06, |
|
"loss": 1.1388, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.0971326443391095, |
|
"grad_norm": 1.9884931664591046, |
|
"learning_rate": 9.712509712509714e-06, |
|
"loss": 1.1093, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.09790970549382236, |
|
"grad_norm": 2.2780803616241, |
|
"learning_rate": 9.79020979020979e-06, |
|
"loss": 1.0973, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.09868676664853523, |
|
"grad_norm": 2.482851610024637, |
|
"learning_rate": 9.867909867909869e-06, |
|
"loss": 1.1074, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.09946382780324811, |
|
"grad_norm": 2.1809979058393547, |
|
"learning_rate": 9.945609945609947e-06, |
|
"loss": 1.1111, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.10024088895796099, |
|
"grad_norm": 2.653036244084716, |
|
"learning_rate": 9.999998344553621e-06, |
|
"loss": 1.0539, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.10101795011267387, |
|
"grad_norm": 2.1782834112618144, |
|
"learning_rate": 9.99996891442626e-06, |
|
"loss": 1.1277, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.10179501126738674, |
|
"grad_norm": 2.0794830642914532, |
|
"learning_rate": 9.999902696850819e-06, |
|
"loss": 1.1028, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.10257207242209962, |
|
"grad_norm": 1.9588777456228414, |
|
"learning_rate": 9.999799692314491e-06, |
|
"loss": 1.0799, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.1033491335768125, |
|
"grad_norm": 1.8109731584724105, |
|
"learning_rate": 9.999659901575142e-06, |
|
"loss": 1.0387, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.10412619473152537, |
|
"grad_norm": 1.496513992331799, |
|
"learning_rate": 9.999483325661283e-06, |
|
"loss": 1.0982, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.10490325588623825, |
|
"grad_norm": 1.9418465016002184, |
|
"learning_rate": 9.999269965872081e-06, |
|
"loss": 1.1873, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.10568031704095113, |
|
"grad_norm": 1.8814020449439044, |
|
"learning_rate": 9.999019823777335e-06, |
|
"loss": 1.1121, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.106457378195664, |
|
"grad_norm": 2.5624116813963083, |
|
"learning_rate": 9.998732901217474e-06, |
|
"loss": 1.1057, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.10723443935037688, |
|
"grad_norm": 2.8084481900607767, |
|
"learning_rate": 9.998409200303543e-06, |
|
"loss": 1.0796, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.10801150050508974, |
|
"grad_norm": 2.5585637275706827, |
|
"learning_rate": 9.998048723417184e-06, |
|
"loss": 1.0911, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.10878856165980262, |
|
"grad_norm": 1.8486528676878824, |
|
"learning_rate": 9.997651473210614e-06, |
|
"loss": 1.1027, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.1095656228145155, |
|
"grad_norm": 1.6756625698252106, |
|
"learning_rate": 9.99721745260662e-06, |
|
"loss": 0.9892, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.11034268396922838, |
|
"grad_norm": 1.7980527241240165, |
|
"learning_rate": 9.996746664798523e-06, |
|
"loss": 1.0714, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.11111974512394125, |
|
"grad_norm": 2.965648407184345, |
|
"learning_rate": 9.996239113250158e-06, |
|
"loss": 1.1627, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.11189680627865413, |
|
"grad_norm": 2.58378967500062, |
|
"learning_rate": 9.995694801695856e-06, |
|
"loss": 1.1338, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.112673867433367, |
|
"grad_norm": 2.3312493063488104, |
|
"learning_rate": 9.995113734140409e-06, |
|
"loss": 1.0527, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.11345092858807988, |
|
"grad_norm": 1.7987672632076395, |
|
"learning_rate": 9.99449591485904e-06, |
|
"loss": 1.1463, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.11422798974279276, |
|
"grad_norm": 2.2447963047423674, |
|
"learning_rate": 9.993841348397377e-06, |
|
"loss": 1.0993, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.11500505089750564, |
|
"grad_norm": 2.3307589401248983, |
|
"learning_rate": 9.993150039571417e-06, |
|
"loss": 1.1, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.11578211205221851, |
|
"grad_norm": 2.4461716652591377, |
|
"learning_rate": 9.992421993467488e-06, |
|
"loss": 1.1223, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.11655917320693139, |
|
"grad_norm": 2.325560003259248, |
|
"learning_rate": 9.991657215442215e-06, |
|
"loss": 1.1016, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.11733623436164427, |
|
"grad_norm": 2.324019330722723, |
|
"learning_rate": 9.99085571112248e-06, |
|
"loss": 1.102, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.11811329551635713, |
|
"grad_norm": 2.184804872790777, |
|
"learning_rate": 9.990017486405379e-06, |
|
"loss": 1.0691, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.11889035667107001, |
|
"grad_norm": 2.3778750559007946, |
|
"learning_rate": 9.989142547458182e-06, |
|
"loss": 1.0902, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.11966741782578288, |
|
"grad_norm": 1.9170168154911298, |
|
"learning_rate": 9.988230900718279e-06, |
|
"loss": 1.0755, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.12044447898049576, |
|
"grad_norm": 2.242423744369333, |
|
"learning_rate": 9.987282552893146e-06, |
|
"loss": 1.0557, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.12122154013520864, |
|
"grad_norm": 2.4290588197619574, |
|
"learning_rate": 9.986297510960284e-06, |
|
"loss": 1.0472, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.12199860128992152, |
|
"grad_norm": 2.4366241079551596, |
|
"learning_rate": 9.985275782167175e-06, |
|
"loss": 1.0249, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 0.12277566244463439, |
|
"grad_norm": 2.6491566316518673, |
|
"learning_rate": 9.984217374031225e-06, |
|
"loss": 1.0816, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.12355272359934727, |
|
"grad_norm": 2.159316756547971, |
|
"learning_rate": 9.983122294339708e-06, |
|
"loss": 1.078, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 0.12432978475406015, |
|
"grad_norm": 2.0761579284967944, |
|
"learning_rate": 9.981990551149714e-06, |
|
"loss": 1.0913, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.12510684590877302, |
|
"grad_norm": 2.528857689821478, |
|
"learning_rate": 9.980822152788082e-06, |
|
"loss": 1.1034, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 0.1258839070634859, |
|
"grad_norm": 1.5046304989897192, |
|
"learning_rate": 9.979617107851343e-06, |
|
"loss": 1.114, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.12666096821819878, |
|
"grad_norm": 2.2475747257064707, |
|
"learning_rate": 9.97837542520566e-06, |
|
"loss": 1.0558, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 0.12743802937291165, |
|
"grad_norm": 2.016387639571554, |
|
"learning_rate": 9.977097113986755e-06, |
|
"loss": 1.1429, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.12821509052762453, |
|
"grad_norm": 2.246062301174424, |
|
"learning_rate": 9.97578218359985e-06, |
|
"loss": 1.0643, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.1289921516823374, |
|
"grad_norm": 2.7312095064634323, |
|
"learning_rate": 9.974430643719591e-06, |
|
"loss": 1.0671, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.12976921283705029, |
|
"grad_norm": 1.813294617554991, |
|
"learning_rate": 9.973042504289978e-06, |
|
"loss": 0.9926, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 0.13054627399176316, |
|
"grad_norm": 2.2812471968380095, |
|
"learning_rate": 9.971617775524301e-06, |
|
"loss": 1.0825, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.13132333514647604, |
|
"grad_norm": 1.756937891360179, |
|
"learning_rate": 9.970156467905048e-06, |
|
"loss": 1.0673, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 0.13210039630118892, |
|
"grad_norm": 2.082158585539177, |
|
"learning_rate": 9.968658592183842e-06, |
|
"loss": 1.1994, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.1328774574559018, |
|
"grad_norm": 1.9267534200786023, |
|
"learning_rate": 9.967124159381359e-06, |
|
"loss": 1.1162, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 0.13365451861061464, |
|
"grad_norm": 3.0547406918856748, |
|
"learning_rate": 9.965553180787239e-06, |
|
"loss": 1.0263, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.13443157976532752, |
|
"grad_norm": 1.7665942406417015, |
|
"learning_rate": 9.963945667960017e-06, |
|
"loss": 0.9662, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 0.1352086409200404, |
|
"grad_norm": 1.8418454319389166, |
|
"learning_rate": 9.962301632727022e-06, |
|
"loss": 1.0806, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 0.13598570207475327, |
|
"grad_norm": 1.7673330680317212, |
|
"learning_rate": 9.960621087184303e-06, |
|
"loss": 1.0801, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.13676276322946615, |
|
"grad_norm": 2.206590428660935, |
|
"learning_rate": 9.95890404369653e-06, |
|
"loss": 1.1432, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 0.13753982438417903, |
|
"grad_norm": 2.2302577958801195, |
|
"learning_rate": 9.957150514896919e-06, |
|
"loss": 1.152, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 0.1383168855388919, |
|
"grad_norm": 2.0260327381346794, |
|
"learning_rate": 9.95536051368711e-06, |
|
"loss": 1.0658, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 0.13909394669360478, |
|
"grad_norm": 1.5644692783168082, |
|
"learning_rate": 9.953534053237108e-06, |
|
"loss": 1.0604, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 0.13987100784831766, |
|
"grad_norm": 1.738578328297917, |
|
"learning_rate": 9.951671146985159e-06, |
|
"loss": 0.9911, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.14064806900303053, |
|
"grad_norm": 1.6603612609497798, |
|
"learning_rate": 9.949771808637657e-06, |
|
"loss": 1.0849, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 0.1414251301577434, |
|
"grad_norm": 2.031511681498179, |
|
"learning_rate": 9.947836052169056e-06, |
|
"loss": 0.9919, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 0.1422021913124563, |
|
"grad_norm": 1.5044981498939936, |
|
"learning_rate": 9.945863891821749e-06, |
|
"loss": 0.9996, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 0.14297925246716917, |
|
"grad_norm": 2.293059765739188, |
|
"learning_rate": 9.943855342105979e-06, |
|
"loss": 1.0394, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 0.14375631362188204, |
|
"grad_norm": 1.9478707992466775, |
|
"learning_rate": 9.941810417799719e-06, |
|
"loss": 0.9964, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.14453337477659492, |
|
"grad_norm": 1.5149400216960562, |
|
"learning_rate": 9.939729133948572e-06, |
|
"loss": 1.0521, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 0.1453104359313078, |
|
"grad_norm": 2.2351667693118524, |
|
"learning_rate": 9.93761150586566e-06, |
|
"loss": 1.1685, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 0.14608749708602067, |
|
"grad_norm": 3.4005405751624087, |
|
"learning_rate": 9.935457549131504e-06, |
|
"loss": 1.0859, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 0.14686455824073355, |
|
"grad_norm": 2.1781460644900257, |
|
"learning_rate": 9.933267279593919e-06, |
|
"loss": 1.037, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 0.14764161939544643, |
|
"grad_norm": 2.432585604447532, |
|
"learning_rate": 9.931040713367888e-06, |
|
"loss": 1.0816, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.1484186805501593, |
|
"grad_norm": 1.834847415817245, |
|
"learning_rate": 9.928777866835454e-06, |
|
"loss": 1.0843, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 0.14919574170487218, |
|
"grad_norm": 1.7231188780918039, |
|
"learning_rate": 9.926478756645586e-06, |
|
"loss": 1.0286, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 0.14997280285958506, |
|
"grad_norm": 2.113770754133767, |
|
"learning_rate": 9.924143399714072e-06, |
|
"loss": 1.0627, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 0.15074986401429794, |
|
"grad_norm": 2.3994884363588036, |
|
"learning_rate": 9.92177181322338e-06, |
|
"loss": 1.0116, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 0.1515269251690108, |
|
"grad_norm": 2.0230342364705454, |
|
"learning_rate": 9.919364014622545e-06, |
|
"loss": 1.0606, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.1523039863237237, |
|
"grad_norm": 2.1208192115487816, |
|
"learning_rate": 9.91692002162703e-06, |
|
"loss": 1.0623, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 0.15308104747843657, |
|
"grad_norm": 1.954692914861481, |
|
"learning_rate": 9.914439852218598e-06, |
|
"loss": 1.036, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 0.15385810863314942, |
|
"grad_norm": 2.4424599661840394, |
|
"learning_rate": 9.911923524645184e-06, |
|
"loss": 1.0592, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 0.1546351697878623, |
|
"grad_norm": 1.7002048061692303, |
|
"learning_rate": 9.909371057420756e-06, |
|
"loss": 1.1009, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 0.15541223094257517, |
|
"grad_norm": 1.6400522184059512, |
|
"learning_rate": 9.906782469325183e-06, |
|
"loss": 1.0584, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.15618929209728805, |
|
"grad_norm": 1.9086125071696802, |
|
"learning_rate": 9.904157779404095e-06, |
|
"loss": 1.027, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 0.15696635325200092, |
|
"grad_norm": 2.0429187558374284, |
|
"learning_rate": 9.901497006968737e-06, |
|
"loss": 1.0366, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 0.1577434144067138, |
|
"grad_norm": 1.9839452672457782, |
|
"learning_rate": 9.89880017159584e-06, |
|
"loss": 1.0253, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 0.15852047556142668, |
|
"grad_norm": 1.9239243059085187, |
|
"learning_rate": 9.896067293127462e-06, |
|
"loss": 1.0809, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 0.15929753671613955, |
|
"grad_norm": 2.116977455932609, |
|
"learning_rate": 9.893298391670857e-06, |
|
"loss": 1.0288, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 0.16007459787085243, |
|
"grad_norm": 1.9256786973087672, |
|
"learning_rate": 9.890493487598315e-06, |
|
"loss": 1.062, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 0.1608516590255653, |
|
"grad_norm": 1.770000631025023, |
|
"learning_rate": 9.887652601547011e-06, |
|
"loss": 1.029, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 0.16162872018027818, |
|
"grad_norm": 2.0460739758835715, |
|
"learning_rate": 9.884775754418872e-06, |
|
"loss": 1.0978, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 0.16240578133499106, |
|
"grad_norm": 1.8387960887988681, |
|
"learning_rate": 9.881862967380398e-06, |
|
"loss": 1.0499, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 0.16318284248970394, |
|
"grad_norm": 2.0055836577178145, |
|
"learning_rate": 9.878914261862524e-06, |
|
"loss": 1.0964, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.16395990364441682, |
|
"grad_norm": 1.7868218097590607, |
|
"learning_rate": 9.875929659560455e-06, |
|
"loss": 1.0277, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 0.1647369647991297, |
|
"grad_norm": 2.1063589192373424, |
|
"learning_rate": 9.872909182433509e-06, |
|
"loss": 1.1237, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 0.16551402595384257, |
|
"grad_norm": 2.2482455806975365, |
|
"learning_rate": 9.869852852704951e-06, |
|
"loss": 1.069, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 0.16629108710855545, |
|
"grad_norm": 1.7191931035624053, |
|
"learning_rate": 9.866760692861837e-06, |
|
"loss": 1.0432, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 0.16706814826326832, |
|
"grad_norm": 1.9822067032337325, |
|
"learning_rate": 9.863632725654841e-06, |
|
"loss": 1.0966, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 0.1678452094179812, |
|
"grad_norm": 1.5154087879613518, |
|
"learning_rate": 9.860468974098093e-06, |
|
"loss": 0.9731, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 0.16862227057269408, |
|
"grad_norm": 2.109259264636941, |
|
"learning_rate": 9.85726946146901e-06, |
|
"loss": 1.075, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 0.16939933172740695, |
|
"grad_norm": 2.264076822727728, |
|
"learning_rate": 9.854034211308114e-06, |
|
"loss": 1.0237, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 0.17017639288211983, |
|
"grad_norm": 1.892118264625731, |
|
"learning_rate": 9.850763247418876e-06, |
|
"loss": 1.0245, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 0.1709534540368327, |
|
"grad_norm": 2.0853632303159535, |
|
"learning_rate": 9.847456593867525e-06, |
|
"loss": 1.0026, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.17173051519154559, |
|
"grad_norm": 1.9677334934726516, |
|
"learning_rate": 9.844114274982885e-06, |
|
"loss": 1.0431, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 0.17250757634625846, |
|
"grad_norm": 2.2830817893790103, |
|
"learning_rate": 9.840736315356183e-06, |
|
"loss": 1.0943, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 0.17328463750097134, |
|
"grad_norm": 1.575442825346659, |
|
"learning_rate": 9.837322739840877e-06, |
|
"loss": 1.0007, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 0.17406169865568422, |
|
"grad_norm": 2.2367315093018134, |
|
"learning_rate": 9.833873573552472e-06, |
|
"loss": 1.0301, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 0.17483875981039707, |
|
"grad_norm": 2.24222375291448, |
|
"learning_rate": 9.830388841868329e-06, |
|
"loss": 1.0919, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.17561582096510994, |
|
"grad_norm": 1.872156214913949, |
|
"learning_rate": 9.826868570427484e-06, |
|
"loss": 1.0933, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 0.17639288211982282, |
|
"grad_norm": 2.200623982755955, |
|
"learning_rate": 9.823312785130457e-06, |
|
"loss": 1.0556, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 0.1771699432745357, |
|
"grad_norm": 2.0166726180309547, |
|
"learning_rate": 9.819721512139069e-06, |
|
"loss": 1.0136, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 0.17794700442924857, |
|
"grad_norm": 2.3268106459403155, |
|
"learning_rate": 9.816094777876233e-06, |
|
"loss": 1.0609, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 0.17872406558396145, |
|
"grad_norm": 2.5483756559425097, |
|
"learning_rate": 9.812432609025778e-06, |
|
"loss": 1.1066, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.17950112673867433, |
|
"grad_norm": 1.5050242159549674, |
|
"learning_rate": 9.808735032532239e-06, |
|
"loss": 1.0461, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 0.1802781878933872, |
|
"grad_norm": 1.7444888511627248, |
|
"learning_rate": 9.805002075600668e-06, |
|
"loss": 0.9875, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 0.18105524904810008, |
|
"grad_norm": 2.1359724957586295, |
|
"learning_rate": 9.801233765696423e-06, |
|
"loss": 1.0032, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 0.18183231020281296, |
|
"grad_norm": 2.0933731292318214, |
|
"learning_rate": 9.797430130544983e-06, |
|
"loss": 1.0092, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 0.18260937135752583, |
|
"grad_norm": 1.7774756159015281, |
|
"learning_rate": 9.793591198131724e-06, |
|
"loss": 0.9708, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 0.1833864325122387, |
|
"grad_norm": 1.9057742144891412, |
|
"learning_rate": 9.789716996701729e-06, |
|
"loss": 1.0716, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 0.1841634936669516, |
|
"grad_norm": 1.6679562880223004, |
|
"learning_rate": 9.78580755475957e-06, |
|
"loss": 1.0184, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 0.18494055482166447, |
|
"grad_norm": 2.036953279006188, |
|
"learning_rate": 9.781862901069105e-06, |
|
"loss": 0.988, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 0.18571761597637734, |
|
"grad_norm": 2.0964552627447777, |
|
"learning_rate": 9.777883064653266e-06, |
|
"loss": 1.0113, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 0.18649467713109022, |
|
"grad_norm": 1.6106495155390417, |
|
"learning_rate": 9.773868074793838e-06, |
|
"loss": 1.0423, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.1872717382858031, |
|
"grad_norm": 2.8770640128408456, |
|
"learning_rate": 9.76981796103125e-06, |
|
"loss": 1.0398, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 0.18804879944051597, |
|
"grad_norm": 2.0693212678122843, |
|
"learning_rate": 9.76573275316436e-06, |
|
"loss": 1.0045, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 0.18882586059522885, |
|
"grad_norm": 2.0119207294765213, |
|
"learning_rate": 9.761612481250225e-06, |
|
"loss": 1.0224, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 0.18960292174994173, |
|
"grad_norm": 2.223557066379335, |
|
"learning_rate": 9.757457175603893e-06, |
|
"loss": 1.0773, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 0.1903799829046546, |
|
"grad_norm": 1.9108372181328375, |
|
"learning_rate": 9.753266866798174e-06, |
|
"loss": 1.0526, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 0.19115704405936748, |
|
"grad_norm": 2.1959378359625177, |
|
"learning_rate": 9.749041585663411e-06, |
|
"loss": 1.1138, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 0.19193410521408036, |
|
"grad_norm": 2.0485148481555218, |
|
"learning_rate": 9.74478136328726e-06, |
|
"loss": 1.0884, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 0.19271116636879324, |
|
"grad_norm": 1.8565352764102319, |
|
"learning_rate": 9.740486231014461e-06, |
|
"loss": 1.0099, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 0.1934882275235061, |
|
"grad_norm": 1.5302635825343132, |
|
"learning_rate": 9.736156220446597e-06, |
|
"loss": 1.0461, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 0.194265288678219, |
|
"grad_norm": 1.5954264600641812, |
|
"learning_rate": 9.731791363441876e-06, |
|
"loss": 0.9655, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.19504234983293184, |
|
"grad_norm": 1.7822694013944302, |
|
"learning_rate": 9.727391692114887e-06, |
|
"loss": 1.0542, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 0.19581941098764472, |
|
"grad_norm": 2.424667963877112, |
|
"learning_rate": 9.722957238836366e-06, |
|
"loss": 1.0331, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 0.1965964721423576, |
|
"grad_norm": 2.3703044008316487, |
|
"learning_rate": 9.718488036232963e-06, |
|
"loss": 1.0926, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 0.19737353329707047, |
|
"grad_norm": 2.1530662223107955, |
|
"learning_rate": 9.713984117186993e-06, |
|
"loss": 1.0121, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 0.19815059445178335, |
|
"grad_norm": 1.9314173573162179, |
|
"learning_rate": 9.7094455148362e-06, |
|
"loss": 1.0475, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 0.19892765560649622, |
|
"grad_norm": 1.9777777372602399, |
|
"learning_rate": 9.704872262573508e-06, |
|
"loss": 1.0105, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 0.1997047167612091, |
|
"grad_norm": 2.2819001107312546, |
|
"learning_rate": 9.700264394046787e-06, |
|
"loss": 0.948, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 0.20048177791592198, |
|
"grad_norm": 1.7288416994808482, |
|
"learning_rate": 9.69562194315859e-06, |
|
"loss": 1.0458, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 0.20125883907063485, |
|
"grad_norm": 1.7457323208199687, |
|
"learning_rate": 9.690944944065914e-06, |
|
"loss": 1.0476, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 0.20203590022534773, |
|
"grad_norm": 2.47172385268511, |
|
"learning_rate": 9.686233431179944e-06, |
|
"loss": 1.0115, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.2028129613800606, |
|
"grad_norm": 2.38182568324136, |
|
"learning_rate": 9.681487439165804e-06, |
|
"loss": 1.0733, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 0.20359002253477348, |
|
"grad_norm": 2.1251613678643153, |
|
"learning_rate": 9.676707002942299e-06, |
|
"loss": 1.1202, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 0.20436708368948636, |
|
"grad_norm": 2.3331174035594158, |
|
"learning_rate": 9.671892157681656e-06, |
|
"loss": 0.9892, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 0.20514414484419924, |
|
"grad_norm": 2.5297296744464597, |
|
"learning_rate": 9.66704293880927e-06, |
|
"loss": 1.0913, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 0.20592120599891212, |
|
"grad_norm": 1.9953398885425944, |
|
"learning_rate": 9.662159382003438e-06, |
|
"loss": 0.9739, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 0.206698267153625, |
|
"grad_norm": 1.9554157695142245, |
|
"learning_rate": 9.657241523195106e-06, |
|
"loss": 1.0062, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 0.20747532830833787, |
|
"grad_norm": 1.9681771655746416, |
|
"learning_rate": 9.652289398567591e-06, |
|
"loss": 0.9645, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 0.20825238946305075, |
|
"grad_norm": 1.6398427617567763, |
|
"learning_rate": 9.647303044556327e-06, |
|
"loss": 1.0691, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 0.20902945061776362, |
|
"grad_norm": 2.191033664996454, |
|
"learning_rate": 9.642282497848587e-06, |
|
"loss": 1.0046, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 0.2098065117724765, |
|
"grad_norm": 2.422573387512772, |
|
"learning_rate": 9.637227795383223e-06, |
|
"loss": 1.0334, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.21058357292718938, |
|
"grad_norm": 2.2231115952498817, |
|
"learning_rate": 9.63213897435039e-06, |
|
"loss": 1.0092, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 0.21136063408190225, |
|
"grad_norm": 1.5887335858791765, |
|
"learning_rate": 9.627016072191263e-06, |
|
"loss": 1.0601, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 0.21213769523661513, |
|
"grad_norm": 2.079071610960163, |
|
"learning_rate": 9.62185912659778e-06, |
|
"loss": 1.0089, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 0.212914756391328, |
|
"grad_norm": 2.6538511902261672, |
|
"learning_rate": 9.616668175512347e-06, |
|
"loss": 1.0996, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 0.21369181754604089, |
|
"grad_norm": 2.2366602617889675, |
|
"learning_rate": 9.611443257127573e-06, |
|
"loss": 0.995, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 0.21446887870075376, |
|
"grad_norm": 1.9923272374726597, |
|
"learning_rate": 9.60618440988598e-06, |
|
"loss": 1.0588, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 0.2152459398554666, |
|
"grad_norm": 1.933851579802707, |
|
"learning_rate": 9.60089167247972e-06, |
|
"loss": 1.0677, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 0.2160230010101795, |
|
"grad_norm": 1.7051761690927782, |
|
"learning_rate": 9.595565083850298e-06, |
|
"loss": 0.9761, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 0.21680006216489237, |
|
"grad_norm": 2.760621047319595, |
|
"learning_rate": 9.590204683188275e-06, |
|
"loss": 1.0485, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 0.21757712331960524, |
|
"grad_norm": 2.164361791637637, |
|
"learning_rate": 9.584810509932993e-06, |
|
"loss": 1.0935, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.21835418447431812, |
|
"grad_norm": 2.1290187047633387, |
|
"learning_rate": 9.579382603772269e-06, |
|
"loss": 1.0242, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 0.219131245629031, |
|
"grad_norm": 2.4594545836748796, |
|
"learning_rate": 9.573921004642117e-06, |
|
"loss": 1.0066, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 0.21990830678374387, |
|
"grad_norm": 2.211316974662037, |
|
"learning_rate": 9.568425752726442e-06, |
|
"loss": 0.9617, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 0.22068536793845675, |
|
"grad_norm": 2.914326191682928, |
|
"learning_rate": 9.562896888456758e-06, |
|
"loss": 1.0298, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 0.22146242909316963, |
|
"grad_norm": 1.8033463375470347, |
|
"learning_rate": 9.557334452511879e-06, |
|
"loss": 0.9536, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 0.2222394902478825, |
|
"grad_norm": 2.1801243317191856, |
|
"learning_rate": 9.551738485817622e-06, |
|
"loss": 0.951, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 0.22301655140259538, |
|
"grad_norm": 2.1629577942104183, |
|
"learning_rate": 9.546109029546511e-06, |
|
"loss": 0.9987, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 0.22379361255730826, |
|
"grad_norm": 1.3716114805711197, |
|
"learning_rate": 9.540446125117468e-06, |
|
"loss": 0.969, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 0.22457067371202113, |
|
"grad_norm": 1.9483284357069952, |
|
"learning_rate": 9.534749814195516e-06, |
|
"loss": 1.0039, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 0.225347734866734, |
|
"grad_norm": 2.0793028495715697, |
|
"learning_rate": 9.529020138691463e-06, |
|
"loss": 0.9743, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.2261247960214469, |
|
"grad_norm": 2.3579800092596646, |
|
"learning_rate": 9.523257140761595e-06, |
|
"loss": 0.9396, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 0.22690185717615977, |
|
"grad_norm": 1.9666592282727686, |
|
"learning_rate": 9.517460862807378e-06, |
|
"loss": 1.0413, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 0.22767891833087264, |
|
"grad_norm": 2.332398520531907, |
|
"learning_rate": 9.51163134747513e-06, |
|
"loss": 0.9895, |
|
"step": 2930 |
|
}, |
|
{ |
|
"epoch": 0.22845597948558552, |
|
"grad_norm": 2.0112812087397853, |
|
"learning_rate": 9.505768637655717e-06, |
|
"loss": 1.026, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 0.2292330406402984, |
|
"grad_norm": 1.30588230567386, |
|
"learning_rate": 9.499872776484234e-06, |
|
"loss": 0.9389, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 0.23001010179501127, |
|
"grad_norm": 2.4882043492951107, |
|
"learning_rate": 9.493943807339686e-06, |
|
"loss": 1.0177, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 0.23078716294972415, |
|
"grad_norm": 2.472037249258304, |
|
"learning_rate": 9.487981773844673e-06, |
|
"loss": 1.0865, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 0.23156422410443703, |
|
"grad_norm": 2.3974288694298864, |
|
"learning_rate": 9.48198671986507e-06, |
|
"loss": 1.1025, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 0.2323412852591499, |
|
"grad_norm": 1.8931995855209747, |
|
"learning_rate": 9.475958689509697e-06, |
|
"loss": 1.0401, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 0.23311834641386278, |
|
"grad_norm": 1.7588453721284736, |
|
"learning_rate": 9.469897727130001e-06, |
|
"loss": 1.026, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.23389540756857566, |
|
"grad_norm": 2.25782280175551, |
|
"learning_rate": 9.463803877319727e-06, |
|
"loss": 1.045, |
|
"step": 3010 |
|
}, |
|
{ |
|
"epoch": 0.23467246872328854, |
|
"grad_norm": 2.062470298217632, |
|
"learning_rate": 9.45767718491459e-06, |
|
"loss": 0.9873, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 0.2354495298780014, |
|
"grad_norm": 2.235317636179408, |
|
"learning_rate": 9.451517694991947e-06, |
|
"loss": 0.9935, |
|
"step": 3030 |
|
}, |
|
{ |
|
"epoch": 0.23622659103271426, |
|
"grad_norm": 1.8159214167836841, |
|
"learning_rate": 9.445325452870459e-06, |
|
"loss": 0.9837, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 0.23700365218742714, |
|
"grad_norm": 2.530492729153044, |
|
"learning_rate": 9.439100504109772e-06, |
|
"loss": 1.0975, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 0.23778071334214002, |
|
"grad_norm": 1.9008032910522048, |
|
"learning_rate": 9.432842894510164e-06, |
|
"loss": 0.975, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 0.2385577744968529, |
|
"grad_norm": 1.340909447158594, |
|
"learning_rate": 9.42655267011222e-06, |
|
"loss": 0.8966, |
|
"step": 3070 |
|
}, |
|
{ |
|
"epoch": 0.23933483565156577, |
|
"grad_norm": 2.3032534649906053, |
|
"learning_rate": 9.420229877196484e-06, |
|
"loss": 0.899, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 0.24011189680627865, |
|
"grad_norm": 3.3083719276637815, |
|
"learning_rate": 9.413874562283136e-06, |
|
"loss": 1.0154, |
|
"step": 3090 |
|
}, |
|
{ |
|
"epoch": 0.24088895796099152, |
|
"grad_norm": 1.7584921998647791, |
|
"learning_rate": 9.407486772131624e-06, |
|
"loss": 0.9767, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.2416660191157044, |
|
"grad_norm": 2.9427356878313686, |
|
"learning_rate": 9.401066553740343e-06, |
|
"loss": 0.9662, |
|
"step": 3110 |
|
}, |
|
{ |
|
"epoch": 0.24244308027041728, |
|
"grad_norm": 2.1699016387323233, |
|
"learning_rate": 9.394613954346274e-06, |
|
"loss": 0.9713, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 0.24322014142513015, |
|
"grad_norm": 2.1813371019451653, |
|
"learning_rate": 9.388129021424648e-06, |
|
"loss": 0.9555, |
|
"step": 3130 |
|
}, |
|
{ |
|
"epoch": 0.24399720257984303, |
|
"grad_norm": 1.9891788565996813, |
|
"learning_rate": 9.381611802688586e-06, |
|
"loss": 1.0036, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 0.2447742637345559, |
|
"grad_norm": 2.3143675049942014, |
|
"learning_rate": 9.375062346088759e-06, |
|
"loss": 0.971, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 0.24555132488926878, |
|
"grad_norm": 2.6629770871009155, |
|
"learning_rate": 9.368480699813021e-06, |
|
"loss": 0.9176, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 0.24632838604398166, |
|
"grad_norm": 2.1132958055867808, |
|
"learning_rate": 9.36186691228607e-06, |
|
"loss": 0.8972, |
|
"step": 3170 |
|
}, |
|
{ |
|
"epoch": 0.24710544719869454, |
|
"grad_norm": 2.029313412599108, |
|
"learning_rate": 9.35522103216908e-06, |
|
"loss": 0.9154, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 0.24788250835340742, |
|
"grad_norm": 1.4578868082629726, |
|
"learning_rate": 9.34854310835935e-06, |
|
"loss": 1.036, |
|
"step": 3190 |
|
}, |
|
{ |
|
"epoch": 0.2486595695081203, |
|
"grad_norm": 2.1062999698802503, |
|
"learning_rate": 9.341833189989942e-06, |
|
"loss": 0.8603, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.24943663066283317, |
|
"grad_norm": 2.6614219310606892, |
|
"learning_rate": 9.335091326429313e-06, |
|
"loss": 0.9924, |
|
"step": 3210 |
|
}, |
|
{ |
|
"epoch": 0.25021369181754605, |
|
"grad_norm": 2.0301151705921665, |
|
"learning_rate": 9.328317567280968e-06, |
|
"loss": 0.953, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 0.2509907529722589, |
|
"grad_norm": 1.9699445720729638, |
|
"learning_rate": 9.321511962383077e-06, |
|
"loss": 0.9379, |
|
"step": 3230 |
|
}, |
|
{ |
|
"epoch": 0.2517678141269718, |
|
"grad_norm": 2.2607361825721854, |
|
"learning_rate": 9.314674561808117e-06, |
|
"loss": 0.986, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 0.2525448752816847, |
|
"grad_norm": 1.839113470172114, |
|
"learning_rate": 9.307805415862507e-06, |
|
"loss": 0.9541, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 0.25332193643639755, |
|
"grad_norm": 1.8823362594556383, |
|
"learning_rate": 9.300904575086232e-06, |
|
"loss": 0.9203, |
|
"step": 3260 |
|
}, |
|
{ |
|
"epoch": 0.25409899759111043, |
|
"grad_norm": 2.259964303887286, |
|
"learning_rate": 9.293972090252468e-06, |
|
"loss": 0.9679, |
|
"step": 3270 |
|
}, |
|
{ |
|
"epoch": 0.2548760587458233, |
|
"grad_norm": 2.058151781656702, |
|
"learning_rate": 9.287008012367221e-06, |
|
"loss": 1.0023, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 0.2556531199005362, |
|
"grad_norm": 2.306218040399529, |
|
"learning_rate": 9.280012392668938e-06, |
|
"loss": 1.0326, |
|
"step": 3290 |
|
}, |
|
{ |
|
"epoch": 0.25643018105524906, |
|
"grad_norm": 2.218261287466935, |
|
"learning_rate": 9.272985282628138e-06, |
|
"loss": 0.988, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.25720724220996194, |
|
"grad_norm": 2.8185774692963146, |
|
"learning_rate": 9.265926733947035e-06, |
|
"loss": 0.9237, |
|
"step": 3310 |
|
}, |
|
{ |
|
"epoch": 0.2579843033646748, |
|
"grad_norm": 1.966754798605311, |
|
"learning_rate": 9.258836798559148e-06, |
|
"loss": 0.8764, |
|
"step": 3320 |
|
}, |
|
{ |
|
"epoch": 0.2587613645193877, |
|
"grad_norm": 2.907713378609492, |
|
"learning_rate": 9.251715528628926e-06, |
|
"loss": 0.9781, |
|
"step": 3330 |
|
}, |
|
{ |
|
"epoch": 0.25953842567410057, |
|
"grad_norm": 2.5867766624212107, |
|
"learning_rate": 9.244562976551368e-06, |
|
"loss": 0.9835, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 0.26031548682881345, |
|
"grad_norm": 2.659891863331392, |
|
"learning_rate": 9.237379194951626e-06, |
|
"loss": 0.9438, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 0.2610925479835263, |
|
"grad_norm": 1.8970250029232214, |
|
"learning_rate": 9.230164236684628e-06, |
|
"loss": 0.9617, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 0.2618696091382392, |
|
"grad_norm": 1.4823476343052233, |
|
"learning_rate": 9.222918154834684e-06, |
|
"loss": 1.0756, |
|
"step": 3370 |
|
}, |
|
{ |
|
"epoch": 0.2626466702929521, |
|
"grad_norm": 2.1930418016202577, |
|
"learning_rate": 9.215641002715097e-06, |
|
"loss": 1.0523, |
|
"step": 3380 |
|
}, |
|
{ |
|
"epoch": 0.26342373144766495, |
|
"grad_norm": 1.8533472991342042, |
|
"learning_rate": 9.208332833867772e-06, |
|
"loss": 0.8869, |
|
"step": 3390 |
|
}, |
|
{ |
|
"epoch": 0.26420079260237783, |
|
"grad_norm": 2.184383922916281, |
|
"learning_rate": 9.200993702062821e-06, |
|
"loss": 0.9808, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.2649778537570907, |
|
"grad_norm": 2.510050570387309, |
|
"learning_rate": 9.193623661298164e-06, |
|
"loss": 0.9156, |
|
"step": 3410 |
|
}, |
|
{ |
|
"epoch": 0.2657549149118036, |
|
"grad_norm": 2.546343372247806, |
|
"learning_rate": 9.186222765799137e-06, |
|
"loss": 0.9764, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 0.2665319760665164, |
|
"grad_norm": 1.5693684379771662, |
|
"learning_rate": 9.17879107001809e-06, |
|
"loss": 0.9491, |
|
"step": 3430 |
|
}, |
|
{ |
|
"epoch": 0.2673090372212293, |
|
"grad_norm": 2.2264963076350544, |
|
"learning_rate": 9.171328628633987e-06, |
|
"loss": 0.9796, |
|
"step": 3440 |
|
}, |
|
{ |
|
"epoch": 0.26808609837594216, |
|
"grad_norm": 1.8513099710874061, |
|
"learning_rate": 9.163835496552006e-06, |
|
"loss": 0.9294, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 0.26886315953065504, |
|
"grad_norm": 2.1369479039679913, |
|
"learning_rate": 9.15631172890313e-06, |
|
"loss": 0.9428, |
|
"step": 3460 |
|
}, |
|
{ |
|
"epoch": 0.2696402206853679, |
|
"grad_norm": 2.1701410069417806, |
|
"learning_rate": 9.148757381043745e-06, |
|
"loss": 0.9497, |
|
"step": 3470 |
|
}, |
|
{ |
|
"epoch": 0.2704172818400808, |
|
"grad_norm": 2.4018785001267102, |
|
"learning_rate": 9.141172508555234e-06, |
|
"loss": 0.9611, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 0.27119434299479367, |
|
"grad_norm": 2.5173991790204346, |
|
"learning_rate": 9.133557167243565e-06, |
|
"loss": 0.9233, |
|
"step": 3490 |
|
}, |
|
{ |
|
"epoch": 0.27197140414950655, |
|
"grad_norm": 2.5138075382856497, |
|
"learning_rate": 9.125911413138877e-06, |
|
"loss": 0.9203, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.2727484653042194, |
|
"grad_norm": 2.898893363605526, |
|
"learning_rate": 9.11823530249508e-06, |
|
"loss": 0.8849, |
|
"step": 3510 |
|
}, |
|
{ |
|
"epoch": 0.2735255264589323, |
|
"grad_norm": 1.6850916480287021, |
|
"learning_rate": 9.11052889178943e-06, |
|
"loss": 0.875, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 0.2743025876136452, |
|
"grad_norm": 2.3316883827873447, |
|
"learning_rate": 9.102792237722114e-06, |
|
"loss": 1.0095, |
|
"step": 3530 |
|
}, |
|
{ |
|
"epoch": 0.27507964876835805, |
|
"grad_norm": 2.1632200172689298, |
|
"learning_rate": 9.095025397215838e-06, |
|
"loss": 0.9276, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 0.27585670992307093, |
|
"grad_norm": 2.8796310855009795, |
|
"learning_rate": 9.087228427415405e-06, |
|
"loss": 0.9235, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 0.2766337710777838, |
|
"grad_norm": 2.0564562085035023, |
|
"learning_rate": 9.079401385687299e-06, |
|
"loss": 0.9491, |
|
"step": 3560 |
|
}, |
|
{ |
|
"epoch": 0.2774108322324967, |
|
"grad_norm": 2.608162831191934, |
|
"learning_rate": 9.071544329619253e-06, |
|
"loss": 0.9458, |
|
"step": 3570 |
|
}, |
|
{ |
|
"epoch": 0.27818789338720956, |
|
"grad_norm": 2.521963823842101, |
|
"learning_rate": 9.063657317019838e-06, |
|
"loss": 0.9137, |
|
"step": 3580 |
|
}, |
|
{ |
|
"epoch": 0.27896495454192244, |
|
"grad_norm": 2.535651222771701, |
|
"learning_rate": 9.055740405918026e-06, |
|
"loss": 0.9567, |
|
"step": 3590 |
|
}, |
|
{ |
|
"epoch": 0.2797420156966353, |
|
"grad_norm": 2.2389260303888476, |
|
"learning_rate": 9.04779365456277e-06, |
|
"loss": 0.9689, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.2805190768513482, |
|
"grad_norm": 1.7592398575015094, |
|
"learning_rate": 9.039817121422575e-06, |
|
"loss": 0.9177, |
|
"step": 3610 |
|
}, |
|
{ |
|
"epoch": 0.28129613800606107, |
|
"grad_norm": 1.8002755024191208, |
|
"learning_rate": 9.031810865185066e-06, |
|
"loss": 0.9407, |
|
"step": 3620 |
|
}, |
|
{ |
|
"epoch": 0.28207319916077395, |
|
"grad_norm": 2.3928408034774082, |
|
"learning_rate": 9.023774944756555e-06, |
|
"loss": 0.9863, |
|
"step": 3630 |
|
}, |
|
{ |
|
"epoch": 0.2828502603154868, |
|
"grad_norm": 2.395034750902151, |
|
"learning_rate": 9.015709419261612e-06, |
|
"loss": 0.9869, |
|
"step": 3640 |
|
}, |
|
{ |
|
"epoch": 0.2836273214701997, |
|
"grad_norm": 2.3890411242782466, |
|
"learning_rate": 9.007614348042626e-06, |
|
"loss": 0.909, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 0.2844043826249126, |
|
"grad_norm": 2.2350831565472107, |
|
"learning_rate": 8.999489790659368e-06, |
|
"loss": 0.8966, |
|
"step": 3660 |
|
}, |
|
{ |
|
"epoch": 0.28518144377962545, |
|
"grad_norm": 3.694934035517618, |
|
"learning_rate": 8.991335806888558e-06, |
|
"loss": 0.9765, |
|
"step": 3670 |
|
}, |
|
{ |
|
"epoch": 0.28595850493433833, |
|
"grad_norm": 3.0768679656946794, |
|
"learning_rate": 8.983152456723419e-06, |
|
"loss": 0.9859, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 0.2867355660890512, |
|
"grad_norm": 2.4664124428796548, |
|
"learning_rate": 8.97493980037324e-06, |
|
"loss": 0.9534, |
|
"step": 3690 |
|
}, |
|
{ |
|
"epoch": 0.2875126272437641, |
|
"grad_norm": 2.245723206050526, |
|
"learning_rate": 8.96669789826293e-06, |
|
"loss": 0.9482, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.28828968839847696, |
|
"grad_norm": 2.355965037185437, |
|
"learning_rate": 8.958426811032576e-06, |
|
"loss": 0.8993, |
|
"step": 3710 |
|
}, |
|
{ |
|
"epoch": 0.28906674955318984, |
|
"grad_norm": 2.43480515736849, |
|
"learning_rate": 8.950126599536993e-06, |
|
"loss": 0.9597, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 0.2898438107079027, |
|
"grad_norm": 2.5741426103315304, |
|
"learning_rate": 8.941797324845284e-06, |
|
"loss": 0.9499, |
|
"step": 3730 |
|
}, |
|
{ |
|
"epoch": 0.2906208718626156, |
|
"grad_norm": 2.490107440300966, |
|
"learning_rate": 8.933439048240376e-06, |
|
"loss": 0.8834, |
|
"step": 3740 |
|
}, |
|
{ |
|
"epoch": 0.29139793301732847, |
|
"grad_norm": 2.079854760599078, |
|
"learning_rate": 8.92505183121859e-06, |
|
"loss": 0.9257, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 0.29217499417204135, |
|
"grad_norm": 2.520480318994419, |
|
"learning_rate": 8.91663573548917e-06, |
|
"loss": 0.9679, |
|
"step": 3760 |
|
}, |
|
{ |
|
"epoch": 0.2929520553267542, |
|
"grad_norm": 1.8583413033492335, |
|
"learning_rate": 8.908190822973838e-06, |
|
"loss": 0.8838, |
|
"step": 3770 |
|
}, |
|
{ |
|
"epoch": 0.2937291164814671, |
|
"grad_norm": 2.3837910942670177, |
|
"learning_rate": 8.899717155806337e-06, |
|
"loss": 0.8847, |
|
"step": 3780 |
|
}, |
|
{ |
|
"epoch": 0.29450617763618, |
|
"grad_norm": 2.711696676240023, |
|
"learning_rate": 8.891214796331973e-06, |
|
"loss": 0.9878, |
|
"step": 3790 |
|
}, |
|
{ |
|
"epoch": 0.29528323879089285, |
|
"grad_norm": 2.502641692502333, |
|
"learning_rate": 8.882683807107154e-06, |
|
"loss": 0.9536, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.29606029994560573, |
|
"grad_norm": 2.3453784276871708, |
|
"learning_rate": 8.874124250898937e-06, |
|
"loss": 0.8787, |
|
"step": 3810 |
|
}, |
|
{ |
|
"epoch": 0.2968373611003186, |
|
"grad_norm": 1.8832906440195756, |
|
"learning_rate": 8.865536190684559e-06, |
|
"loss": 0.9384, |
|
"step": 3820 |
|
}, |
|
{ |
|
"epoch": 0.2976144222550315, |
|
"grad_norm": 2.1009680565481514, |
|
"learning_rate": 8.856919689650977e-06, |
|
"loss": 0.8934, |
|
"step": 3830 |
|
}, |
|
{ |
|
"epoch": 0.29839148340974436, |
|
"grad_norm": 1.9232637840358615, |
|
"learning_rate": 8.848274811194402e-06, |
|
"loss": 0.9733, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 0.29916854456445724, |
|
"grad_norm": 2.807204409009, |
|
"learning_rate": 8.839601618919833e-06, |
|
"loss": 0.9018, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 0.2999456057191701, |
|
"grad_norm": 2.0589460869005065, |
|
"learning_rate": 8.830900176640587e-06, |
|
"loss": 0.9858, |
|
"step": 3860 |
|
}, |
|
{ |
|
"epoch": 0.300722666873883, |
|
"grad_norm": 2.454773689152951, |
|
"learning_rate": 8.822170548377835e-06, |
|
"loss": 0.9769, |
|
"step": 3870 |
|
}, |
|
{ |
|
"epoch": 0.30149972802859587, |
|
"grad_norm": 2.128683839495848, |
|
"learning_rate": 8.813412798360126e-06, |
|
"loss": 0.8856, |
|
"step": 3880 |
|
}, |
|
{ |
|
"epoch": 0.30227678918330875, |
|
"grad_norm": 2.4279634048337213, |
|
"learning_rate": 8.804626991022915e-06, |
|
"loss": 0.9671, |
|
"step": 3890 |
|
}, |
|
{ |
|
"epoch": 0.3030538503380216, |
|
"grad_norm": 3.6045788043321894, |
|
"learning_rate": 8.79581319100809e-06, |
|
"loss": 0.8933, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 0.3038309114927345, |
|
"grad_norm": 2.1672482233441084, |
|
"learning_rate": 8.786971463163495e-06, |
|
"loss": 0.9564, |
|
"step": 3910 |
|
}, |
|
{ |
|
"epoch": 0.3046079726474474, |
|
"grad_norm": 2.1636428752933328, |
|
"learning_rate": 8.778101872542458e-06, |
|
"loss": 0.9913, |
|
"step": 3920 |
|
}, |
|
{ |
|
"epoch": 0.30538503380216026, |
|
"grad_norm": 2.871516588464275, |
|
"learning_rate": 8.769204484403304e-06, |
|
"loss": 0.8939, |
|
"step": 3930 |
|
}, |
|
{ |
|
"epoch": 0.30616209495687313, |
|
"grad_norm": 2.2048100149121814, |
|
"learning_rate": 8.760279364208879e-06, |
|
"loss": 0.8993, |
|
"step": 3940 |
|
}, |
|
{ |
|
"epoch": 0.306939156111586, |
|
"grad_norm": 2.0054550377532343, |
|
"learning_rate": 8.751326577626075e-06, |
|
"loss": 0.9712, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 0.30771621726629883, |
|
"grad_norm": 1.941321214144556, |
|
"learning_rate": 8.742346190525332e-06, |
|
"loss": 0.9545, |
|
"step": 3960 |
|
}, |
|
{ |
|
"epoch": 0.3084932784210117, |
|
"grad_norm": 2.3634949614963743, |
|
"learning_rate": 8.733338268980166e-06, |
|
"loss": 0.887, |
|
"step": 3970 |
|
}, |
|
{ |
|
"epoch": 0.3092703395757246, |
|
"grad_norm": 3.5243533187865403, |
|
"learning_rate": 8.72430287926668e-06, |
|
"loss": 0.8955, |
|
"step": 3980 |
|
}, |
|
{ |
|
"epoch": 0.31004740073043746, |
|
"grad_norm": 2.3622243989894747, |
|
"learning_rate": 8.715240087863072e-06, |
|
"loss": 0.8944, |
|
"step": 3990 |
|
}, |
|
{ |
|
"epoch": 0.31082446188515034, |
|
"grad_norm": 1.946906851098621, |
|
"learning_rate": 8.70614996144915e-06, |
|
"loss": 0.8534, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.3116015230398632, |
|
"grad_norm": 2.268588081924812, |
|
"learning_rate": 8.697032566905842e-06, |
|
"loss": 0.8884, |
|
"step": 4010 |
|
}, |
|
{ |
|
"epoch": 0.3123785841945761, |
|
"grad_norm": 2.183711381325099, |
|
"learning_rate": 8.6878879713147e-06, |
|
"loss": 0.9143, |
|
"step": 4020 |
|
}, |
|
{ |
|
"epoch": 0.31315564534928897, |
|
"grad_norm": 2.627681687760923, |
|
"learning_rate": 8.678716241957408e-06, |
|
"loss": 0.8835, |
|
"step": 4030 |
|
}, |
|
{ |
|
"epoch": 0.31393270650400185, |
|
"grad_norm": 1.443133233680791, |
|
"learning_rate": 8.669517446315292e-06, |
|
"loss": 0.9273, |
|
"step": 4040 |
|
}, |
|
{ |
|
"epoch": 0.3147097676587147, |
|
"grad_norm": 2.393245491803305, |
|
"learning_rate": 8.660291652068813e-06, |
|
"loss": 0.9162, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 0.3154868288134276, |
|
"grad_norm": 2.2137742145203987, |
|
"learning_rate": 8.65103892709708e-06, |
|
"loss": 0.9558, |
|
"step": 4060 |
|
}, |
|
{ |
|
"epoch": 0.3162638899681405, |
|
"grad_norm": 1.8575771555594642, |
|
"learning_rate": 8.641759339477345e-06, |
|
"loss": 0.9469, |
|
"step": 4070 |
|
}, |
|
{ |
|
"epoch": 0.31704095112285335, |
|
"grad_norm": 2.3987640931014496, |
|
"learning_rate": 8.632452957484498e-06, |
|
"loss": 0.8976, |
|
"step": 4080 |
|
}, |
|
{ |
|
"epoch": 0.31781801227756623, |
|
"grad_norm": 2.6592688199749612, |
|
"learning_rate": 8.62311984959058e-06, |
|
"loss": 0.8577, |
|
"step": 4090 |
|
}, |
|
{ |
|
"epoch": 0.3185950734322791, |
|
"grad_norm": 2.6015155100334226, |
|
"learning_rate": 8.613760084464258e-06, |
|
"loss": 0.8989, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 0.319372134586992, |
|
"grad_norm": 3.2861649632260903, |
|
"learning_rate": 8.604373730970334e-06, |
|
"loss": 0.9379, |
|
"step": 4110 |
|
}, |
|
{ |
|
"epoch": 0.32014919574170486, |
|
"grad_norm": 2.2805290644540315, |
|
"learning_rate": 8.59496085816924e-06, |
|
"loss": 0.9307, |
|
"step": 4120 |
|
}, |
|
{ |
|
"epoch": 0.32092625689641774, |
|
"grad_norm": 1.9526498942261281, |
|
"learning_rate": 8.585521535316517e-06, |
|
"loss": 0.9789, |
|
"step": 4130 |
|
}, |
|
{ |
|
"epoch": 0.3217033180511306, |
|
"grad_norm": 2.1513380917456923, |
|
"learning_rate": 8.576055831862317e-06, |
|
"loss": 0.9632, |
|
"step": 4140 |
|
}, |
|
{ |
|
"epoch": 0.3224803792058435, |
|
"grad_norm": 2.9117768462597273, |
|
"learning_rate": 8.56656381745089e-06, |
|
"loss": 0.8607, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 0.32325744036055637, |
|
"grad_norm": 1.6037295849873296, |
|
"learning_rate": 8.557045561920066e-06, |
|
"loss": 0.9062, |
|
"step": 4160 |
|
}, |
|
{ |
|
"epoch": 0.32403450151526925, |
|
"grad_norm": 2.3047029595748745, |
|
"learning_rate": 8.547501135300747e-06, |
|
"loss": 0.8982, |
|
"step": 4170 |
|
}, |
|
{ |
|
"epoch": 0.3248115626699821, |
|
"grad_norm": 2.414214418200032, |
|
"learning_rate": 8.537930607816386e-06, |
|
"loss": 0.952, |
|
"step": 4180 |
|
}, |
|
{ |
|
"epoch": 0.325588623824695, |
|
"grad_norm": 2.6048634749383037, |
|
"learning_rate": 8.528334049882482e-06, |
|
"loss": 0.9004, |
|
"step": 4190 |
|
}, |
|
{ |
|
"epoch": 0.3263656849794079, |
|
"grad_norm": 2.090591332073441, |
|
"learning_rate": 8.51871153210605e-06, |
|
"loss": 0.9109, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 0.32714274613412075, |
|
"grad_norm": 2.039137230473015, |
|
"learning_rate": 8.5090631252851e-06, |
|
"loss": 0.8622, |
|
"step": 4210 |
|
}, |
|
{ |
|
"epoch": 0.32791980728883363, |
|
"grad_norm": 1.3644794656877728, |
|
"learning_rate": 8.499388900408131e-06, |
|
"loss": 0.8932, |
|
"step": 4220 |
|
}, |
|
{ |
|
"epoch": 0.3286968684435465, |
|
"grad_norm": 1.9869041419127695, |
|
"learning_rate": 8.489688928653593e-06, |
|
"loss": 0.8921, |
|
"step": 4230 |
|
}, |
|
{ |
|
"epoch": 0.3294739295982594, |
|
"grad_norm": 2.1198129652125908, |
|
"learning_rate": 8.479963281389369e-06, |
|
"loss": 0.9178, |
|
"step": 4240 |
|
}, |
|
{ |
|
"epoch": 0.33025099075297226, |
|
"grad_norm": 2.922298668933732, |
|
"learning_rate": 8.470212030172254e-06, |
|
"loss": 0.8541, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 0.33102805190768514, |
|
"grad_norm": 2.862204782837741, |
|
"learning_rate": 8.460435246747425e-06, |
|
"loss": 0.9081, |
|
"step": 4260 |
|
}, |
|
{ |
|
"epoch": 0.331805113062398, |
|
"grad_norm": 2.4866367731953103, |
|
"learning_rate": 8.45063300304791e-06, |
|
"loss": 0.9563, |
|
"step": 4270 |
|
}, |
|
{ |
|
"epoch": 0.3325821742171109, |
|
"grad_norm": 3.6054620715626298, |
|
"learning_rate": 8.440805371194064e-06, |
|
"loss": 0.8762, |
|
"step": 4280 |
|
}, |
|
{ |
|
"epoch": 0.33335923537182377, |
|
"grad_norm": 1.357274089384285, |
|
"learning_rate": 8.430952423493038e-06, |
|
"loss": 0.89, |
|
"step": 4290 |
|
}, |
|
{ |
|
"epoch": 0.33413629652653665, |
|
"grad_norm": 2.462550588436075, |
|
"learning_rate": 8.42107423243824e-06, |
|
"loss": 0.8998, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 0.3349133576812495, |
|
"grad_norm": 2.4758376060526337, |
|
"learning_rate": 8.41117087070881e-06, |
|
"loss": 0.8602, |
|
"step": 4310 |
|
}, |
|
{ |
|
"epoch": 0.3356904188359624, |
|
"grad_norm": 2.670924674405534, |
|
"learning_rate": 8.401242411169085e-06, |
|
"loss": 0.9091, |
|
"step": 4320 |
|
}, |
|
{ |
|
"epoch": 0.3364674799906753, |
|
"grad_norm": 2.4965212229622855, |
|
"learning_rate": 8.391288926868055e-06, |
|
"loss": 0.905, |
|
"step": 4330 |
|
}, |
|
{ |
|
"epoch": 0.33724454114538815, |
|
"grad_norm": 2.6193244431141105, |
|
"learning_rate": 8.381310491038835e-06, |
|
"loss": 0.8834, |
|
"step": 4340 |
|
}, |
|
{ |
|
"epoch": 0.33802160230010103, |
|
"grad_norm": 2.639094468488719, |
|
"learning_rate": 8.371307177098114e-06, |
|
"loss": 0.9659, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 0.3387986634548139, |
|
"grad_norm": 1.844532803490863, |
|
"learning_rate": 8.361279058645634e-06, |
|
"loss": 0.8736, |
|
"step": 4360 |
|
}, |
|
{ |
|
"epoch": 0.3395757246095268, |
|
"grad_norm": 3.4447047963873647, |
|
"learning_rate": 8.351226209463628e-06, |
|
"loss": 0.8564, |
|
"step": 4370 |
|
}, |
|
{ |
|
"epoch": 0.34035278576423966, |
|
"grad_norm": 2.0546081486698773, |
|
"learning_rate": 8.341148703516291e-06, |
|
"loss": 0.929, |
|
"step": 4380 |
|
}, |
|
{ |
|
"epoch": 0.34112984691895254, |
|
"grad_norm": 2.498839246884663, |
|
"learning_rate": 8.331046614949228e-06, |
|
"loss": 0.8663, |
|
"step": 4390 |
|
}, |
|
{ |
|
"epoch": 0.3419069080736654, |
|
"grad_norm": 2.574109259388575, |
|
"learning_rate": 8.320920018088912e-06, |
|
"loss": 0.9137, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 0.3426839692283783, |
|
"grad_norm": 3.1393397756280206, |
|
"learning_rate": 8.310768987442139e-06, |
|
"loss": 0.9368, |
|
"step": 4410 |
|
}, |
|
{ |
|
"epoch": 0.34346103038309117, |
|
"grad_norm": 3.20210731977578, |
|
"learning_rate": 8.300593597695476e-06, |
|
"loss": 0.9299, |
|
"step": 4420 |
|
}, |
|
{ |
|
"epoch": 0.34423809153780405, |
|
"grad_norm": 3.5589792979708994, |
|
"learning_rate": 8.290393923714713e-06, |
|
"loss": 0.9587, |
|
"step": 4430 |
|
}, |
|
{ |
|
"epoch": 0.3450151526925169, |
|
"grad_norm": 2.8541415351108825, |
|
"learning_rate": 8.280170040544312e-06, |
|
"loss": 0.8605, |
|
"step": 4440 |
|
}, |
|
{ |
|
"epoch": 0.3457922138472298, |
|
"grad_norm": 2.0518411713546554, |
|
"learning_rate": 8.269922023406851e-06, |
|
"loss": 0.7918, |
|
"step": 4450 |
|
}, |
|
{ |
|
"epoch": 0.3465692750019427, |
|
"grad_norm": 2.699406909968831, |
|
"learning_rate": 8.259649947702485e-06, |
|
"loss": 0.873, |
|
"step": 4460 |
|
}, |
|
{ |
|
"epoch": 0.34734633615665556, |
|
"grad_norm": 3.0919334403019425, |
|
"learning_rate": 8.24935388900837e-06, |
|
"loss": 0.8373, |
|
"step": 4470 |
|
}, |
|
{ |
|
"epoch": 0.34812339731136843, |
|
"grad_norm": 2.9019624759746305, |
|
"learning_rate": 8.239033923078124e-06, |
|
"loss": 0.9174, |
|
"step": 4480 |
|
}, |
|
{ |
|
"epoch": 0.34890045846608125, |
|
"grad_norm": 2.1140460699445764, |
|
"learning_rate": 8.228690125841258e-06, |
|
"loss": 0.8672, |
|
"step": 4490 |
|
}, |
|
{ |
|
"epoch": 0.34967751962079413, |
|
"grad_norm": 3.0197408308584146, |
|
"learning_rate": 8.218322573402629e-06, |
|
"loss": 0.8523, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.350454580775507, |
|
"grad_norm": 2.657040743922122, |
|
"learning_rate": 8.20793134204187e-06, |
|
"loss": 0.8497, |
|
"step": 4510 |
|
}, |
|
{ |
|
"epoch": 0.3512316419302199, |
|
"grad_norm": 3.4478785002624903, |
|
"learning_rate": 8.197516508212832e-06, |
|
"loss": 0.9144, |
|
"step": 4520 |
|
}, |
|
{ |
|
"epoch": 0.35200870308493276, |
|
"grad_norm": 2.615501805261325, |
|
"learning_rate": 8.187078148543026e-06, |
|
"loss": 0.8521, |
|
"step": 4530 |
|
}, |
|
{ |
|
"epoch": 0.35278576423964564, |
|
"grad_norm": 2.7673910964569566, |
|
"learning_rate": 8.176616339833048e-06, |
|
"loss": 0.9834, |
|
"step": 4540 |
|
}, |
|
{ |
|
"epoch": 0.3535628253943585, |
|
"grad_norm": 3.110704979833664, |
|
"learning_rate": 8.166131159056028e-06, |
|
"loss": 0.9291, |
|
"step": 4550 |
|
}, |
|
{ |
|
"epoch": 0.3543398865490714, |
|
"grad_norm": 2.382239717418457, |
|
"learning_rate": 8.155622683357056e-06, |
|
"loss": 0.962, |
|
"step": 4560 |
|
}, |
|
{ |
|
"epoch": 0.35511694770378427, |
|
"grad_norm": 2.974819074830629, |
|
"learning_rate": 8.14509099005261e-06, |
|
"loss": 0.9076, |
|
"step": 4570 |
|
}, |
|
{ |
|
"epoch": 0.35589400885849715, |
|
"grad_norm": 2.025484177379498, |
|
"learning_rate": 8.13453615663e-06, |
|
"loss": 0.9316, |
|
"step": 4580 |
|
}, |
|
{ |
|
"epoch": 0.35667107001321, |
|
"grad_norm": 2.490523269053249, |
|
"learning_rate": 8.123958260746781e-06, |
|
"loss": 0.9202, |
|
"step": 4590 |
|
}, |
|
{ |
|
"epoch": 0.3574481311679229, |
|
"grad_norm": 2.4151860798523566, |
|
"learning_rate": 8.113357380230198e-06, |
|
"loss": 0.8332, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 0.3582251923226358, |
|
"grad_norm": 2.994576094392819, |
|
"learning_rate": 8.102733593076608e-06, |
|
"loss": 0.907, |
|
"step": 4610 |
|
}, |
|
{ |
|
"epoch": 0.35900225347734865, |
|
"grad_norm": 2.2570861805827898, |
|
"learning_rate": 8.092086977450896e-06, |
|
"loss": 0.892, |
|
"step": 4620 |
|
}, |
|
{ |
|
"epoch": 0.35977931463206153, |
|
"grad_norm": 1.9441465953568793, |
|
"learning_rate": 8.081417611685914e-06, |
|
"loss": 0.8221, |
|
"step": 4630 |
|
}, |
|
{ |
|
"epoch": 0.3605563757867744, |
|
"grad_norm": 2.9229560639134, |
|
"learning_rate": 8.0707255742819e-06, |
|
"loss": 0.8765, |
|
"step": 4640 |
|
}, |
|
{ |
|
"epoch": 0.3613334369414873, |
|
"grad_norm": 3.3085405723587216, |
|
"learning_rate": 8.060010943905894e-06, |
|
"loss": 0.8406, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 0.36211049809620016, |
|
"grad_norm": 2.7364277865283624, |
|
"learning_rate": 8.049273799391171e-06, |
|
"loss": 0.8282, |
|
"step": 4660 |
|
}, |
|
{ |
|
"epoch": 0.36288755925091304, |
|
"grad_norm": 2.483155933386303, |
|
"learning_rate": 8.038514219736648e-06, |
|
"loss": 0.9325, |
|
"step": 4670 |
|
}, |
|
{ |
|
"epoch": 0.3636646204056259, |
|
"grad_norm": 3.132743137231315, |
|
"learning_rate": 8.027732284106316e-06, |
|
"loss": 0.8662, |
|
"step": 4680 |
|
}, |
|
{ |
|
"epoch": 0.3644416815603388, |
|
"grad_norm": 2.9308723735400233, |
|
"learning_rate": 8.016928071828644e-06, |
|
"loss": 0.876, |
|
"step": 4690 |
|
}, |
|
{ |
|
"epoch": 0.36521874271505167, |
|
"grad_norm": 2.4289372656874058, |
|
"learning_rate": 8.006101662396011e-06, |
|
"loss": 0.8752, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 0.36599580386976455, |
|
"grad_norm": 3.5005034837842794, |
|
"learning_rate": 7.995253135464103e-06, |
|
"loss": 0.8211, |
|
"step": 4710 |
|
}, |
|
{ |
|
"epoch": 0.3667728650244774, |
|
"grad_norm": 2.6219168824993897, |
|
"learning_rate": 7.984382570851341e-06, |
|
"loss": 0.8963, |
|
"step": 4720 |
|
}, |
|
{ |
|
"epoch": 0.3675499261791903, |
|
"grad_norm": 2.6913591077446544, |
|
"learning_rate": 7.973490048538291e-06, |
|
"loss": 0.8135, |
|
"step": 4730 |
|
}, |
|
{ |
|
"epoch": 0.3683269873339032, |
|
"grad_norm": 3.323688764018341, |
|
"learning_rate": 7.962575648667068e-06, |
|
"loss": 0.8394, |
|
"step": 4740 |
|
}, |
|
{ |
|
"epoch": 0.36910404848861605, |
|
"grad_norm": 1.9160655382592797, |
|
"learning_rate": 7.951639451540759e-06, |
|
"loss": 0.8373, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 0.36988110964332893, |
|
"grad_norm": 2.2592953806408977, |
|
"learning_rate": 7.940681537622816e-06, |
|
"loss": 0.8717, |
|
"step": 4760 |
|
}, |
|
{ |
|
"epoch": 0.3706581707980418, |
|
"grad_norm": 2.4625597781213933, |
|
"learning_rate": 7.92970198753648e-06, |
|
"loss": 0.8353, |
|
"step": 4770 |
|
}, |
|
{ |
|
"epoch": 0.3714352319527547, |
|
"grad_norm": 2.547595160954955, |
|
"learning_rate": 7.918700882064181e-06, |
|
"loss": 0.8747, |
|
"step": 4780 |
|
}, |
|
{ |
|
"epoch": 0.37221229310746756, |
|
"grad_norm": 3.276135067674202, |
|
"learning_rate": 7.907678302146939e-06, |
|
"loss": 0.8997, |
|
"step": 4790 |
|
}, |
|
{ |
|
"epoch": 0.37298935426218044, |
|
"grad_norm": 3.036723238718559, |
|
"learning_rate": 7.896634328883777e-06, |
|
"loss": 0.8189, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 0.3737664154168933, |
|
"grad_norm": 2.0650698930773093, |
|
"learning_rate": 7.885569043531118e-06, |
|
"loss": 0.8454, |
|
"step": 4810 |
|
}, |
|
{ |
|
"epoch": 0.3745434765716062, |
|
"grad_norm": 3.760117109301269, |
|
"learning_rate": 7.874482527502192e-06, |
|
"loss": 0.8213, |
|
"step": 4820 |
|
}, |
|
{ |
|
"epoch": 0.37532053772631907, |
|
"grad_norm": 3.531426821109854, |
|
"learning_rate": 7.863374862366428e-06, |
|
"loss": 0.8113, |
|
"step": 4830 |
|
}, |
|
{ |
|
"epoch": 0.37609759888103195, |
|
"grad_norm": 2.3515365517581164, |
|
"learning_rate": 7.85224612984887e-06, |
|
"loss": 0.8064, |
|
"step": 4840 |
|
}, |
|
{ |
|
"epoch": 0.3768746600357448, |
|
"grad_norm": 1.8840341910034588, |
|
"learning_rate": 7.841096411829561e-06, |
|
"loss": 0.8683, |
|
"step": 4850 |
|
}, |
|
{ |
|
"epoch": 0.3776517211904577, |
|
"grad_norm": 2.38418725628485, |
|
"learning_rate": 7.829925790342942e-06, |
|
"loss": 0.7812, |
|
"step": 4860 |
|
}, |
|
{ |
|
"epoch": 0.3784287823451706, |
|
"grad_norm": 2.4785026498656615, |
|
"learning_rate": 7.818734347577258e-06, |
|
"loss": 0.8119, |
|
"step": 4870 |
|
}, |
|
{ |
|
"epoch": 0.37920584349988345, |
|
"grad_norm": 3.137259786348735, |
|
"learning_rate": 7.807522165873945e-06, |
|
"loss": 0.8764, |
|
"step": 4880 |
|
}, |
|
{ |
|
"epoch": 0.37998290465459633, |
|
"grad_norm": 2.8359325177369845, |
|
"learning_rate": 7.796289327727022e-06, |
|
"loss": 0.7978, |
|
"step": 4890 |
|
}, |
|
{ |
|
"epoch": 0.3807599658093092, |
|
"grad_norm": 3.158128777649866, |
|
"learning_rate": 7.7850359157825e-06, |
|
"loss": 0.8412, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 0.3815370269640221, |
|
"grad_norm": 3.501006126578136, |
|
"learning_rate": 7.773762012837751e-06, |
|
"loss": 0.8779, |
|
"step": 4910 |
|
}, |
|
{ |
|
"epoch": 0.38231408811873496, |
|
"grad_norm": 2.468978859483751, |
|
"learning_rate": 7.762467701840914e-06, |
|
"loss": 0.8813, |
|
"step": 4920 |
|
}, |
|
{ |
|
"epoch": 0.38309114927344784, |
|
"grad_norm": 3.0067259204153634, |
|
"learning_rate": 7.751153065890284e-06, |
|
"loss": 0.7915, |
|
"step": 4930 |
|
}, |
|
{ |
|
"epoch": 0.3838682104281607, |
|
"grad_norm": 3.9988455962849865, |
|
"learning_rate": 7.739818188233693e-06, |
|
"loss": 0.8698, |
|
"step": 4940 |
|
}, |
|
{ |
|
"epoch": 0.3846452715828736, |
|
"grad_norm": 2.8749069871202746, |
|
"learning_rate": 7.728463152267905e-06, |
|
"loss": 0.8986, |
|
"step": 4950 |
|
}, |
|
{ |
|
"epoch": 0.38542233273758647, |
|
"grad_norm": 1.8557781579247277, |
|
"learning_rate": 7.717088041538e-06, |
|
"loss": 0.836, |
|
"step": 4960 |
|
}, |
|
{ |
|
"epoch": 0.38619939389229935, |
|
"grad_norm": 2.554552315654769, |
|
"learning_rate": 7.705692939736754e-06, |
|
"loss": 0.905, |
|
"step": 4970 |
|
}, |
|
{ |
|
"epoch": 0.3869764550470122, |
|
"grad_norm": 3.253478052848826, |
|
"learning_rate": 7.694277930704035e-06, |
|
"loss": 0.8877, |
|
"step": 4980 |
|
}, |
|
{ |
|
"epoch": 0.3877535162017251, |
|
"grad_norm": 2.8816016322900095, |
|
"learning_rate": 7.682843098426173e-06, |
|
"loss": 0.9017, |
|
"step": 4990 |
|
}, |
|
{ |
|
"epoch": 0.388530577356438, |
|
"grad_norm": 3.6095277498188465, |
|
"learning_rate": 7.671388527035353e-06, |
|
"loss": 0.839, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.38930763851115086, |
|
"grad_norm": 2.872689759467288, |
|
"learning_rate": 7.659914300808987e-06, |
|
"loss": 0.8551, |
|
"step": 5010 |
|
}, |
|
{ |
|
"epoch": 0.3900846996658637, |
|
"grad_norm": 3.57677819644193, |
|
"learning_rate": 7.6484205041691e-06, |
|
"loss": 0.9367, |
|
"step": 5020 |
|
}, |
|
{ |
|
"epoch": 0.39086176082057655, |
|
"grad_norm": 2.516301941871412, |
|
"learning_rate": 7.63690722168171e-06, |
|
"loss": 0.8439, |
|
"step": 5030 |
|
}, |
|
{ |
|
"epoch": 0.39163882197528943, |
|
"grad_norm": 3.6976446460324985, |
|
"learning_rate": 7.625374538056196e-06, |
|
"loss": 0.9143, |
|
"step": 5040 |
|
}, |
|
{ |
|
"epoch": 0.3924158831300023, |
|
"grad_norm": 2.4108959760850976, |
|
"learning_rate": 7.61382253814469e-06, |
|
"loss": 0.8488, |
|
"step": 5050 |
|
}, |
|
{ |
|
"epoch": 0.3931929442847152, |
|
"grad_norm": 3.575102830931404, |
|
"learning_rate": 7.6022513069414375e-06, |
|
"loss": 0.9244, |
|
"step": 5060 |
|
}, |
|
{ |
|
"epoch": 0.39397000543942806, |
|
"grad_norm": 2.5214806607432156, |
|
"learning_rate": 7.5906609295821785e-06, |
|
"loss": 0.7828, |
|
"step": 5070 |
|
}, |
|
{ |
|
"epoch": 0.39474706659414094, |
|
"grad_norm": 2.2256882514741267, |
|
"learning_rate": 7.57905149134353e-06, |
|
"loss": 0.8343, |
|
"step": 5080 |
|
}, |
|
{ |
|
"epoch": 0.3955241277488538, |
|
"grad_norm": 2.9737799015299915, |
|
"learning_rate": 7.567423077642342e-06, |
|
"loss": 0.8029, |
|
"step": 5090 |
|
}, |
|
{ |
|
"epoch": 0.3963011889035667, |
|
"grad_norm": 2.1814937586449474, |
|
"learning_rate": 7.555775774035077e-06, |
|
"loss": 0.8595, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 0.39707825005827957, |
|
"grad_norm": 3.756192351660152, |
|
"learning_rate": 7.544109666217186e-06, |
|
"loss": 0.8058, |
|
"step": 5110 |
|
}, |
|
{ |
|
"epoch": 0.39785531121299245, |
|
"grad_norm": 2.3316584523565544, |
|
"learning_rate": 7.532424840022468e-06, |
|
"loss": 0.8203, |
|
"step": 5120 |
|
}, |
|
{ |
|
"epoch": 0.3986323723677053, |
|
"grad_norm": 3.3303069401649195, |
|
"learning_rate": 7.520721381422444e-06, |
|
"loss": 0.8766, |
|
"step": 5130 |
|
}, |
|
{ |
|
"epoch": 0.3994094335224182, |
|
"grad_norm": 2.7096079276885203, |
|
"learning_rate": 7.5089993765257295e-06, |
|
"loss": 0.8252, |
|
"step": 5140 |
|
}, |
|
{ |
|
"epoch": 0.4001864946771311, |
|
"grad_norm": 2.7989889775088987, |
|
"learning_rate": 7.497258911577385e-06, |
|
"loss": 0.8241, |
|
"step": 5150 |
|
}, |
|
{ |
|
"epoch": 0.40096355583184395, |
|
"grad_norm": 2.8348088908838833, |
|
"learning_rate": 7.485500072958298e-06, |
|
"loss": 0.8047, |
|
"step": 5160 |
|
}, |
|
{ |
|
"epoch": 0.40174061698655683, |
|
"grad_norm": 2.7178413634018206, |
|
"learning_rate": 7.4737229471845384e-06, |
|
"loss": 0.8469, |
|
"step": 5170 |
|
}, |
|
{ |
|
"epoch": 0.4025176781412697, |
|
"grad_norm": 2.653716140680188, |
|
"learning_rate": 7.46192762090673e-06, |
|
"loss": 0.8986, |
|
"step": 5180 |
|
}, |
|
{ |
|
"epoch": 0.4032947392959826, |
|
"grad_norm": 3.2114074118987097, |
|
"learning_rate": 7.450114180909396e-06, |
|
"loss": 0.8572, |
|
"step": 5190 |
|
}, |
|
{ |
|
"epoch": 0.40407180045069546, |
|
"grad_norm": 2.5594692675174904, |
|
"learning_rate": 7.438282714110346e-06, |
|
"loss": 0.8348, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 0.40484886160540834, |
|
"grad_norm": 2.570719975580699, |
|
"learning_rate": 7.4264333075600094e-06, |
|
"loss": 0.817, |
|
"step": 5210 |
|
}, |
|
{ |
|
"epoch": 0.4056259227601212, |
|
"grad_norm": 1.7904273430264364, |
|
"learning_rate": 7.414566048440815e-06, |
|
"loss": 0.773, |
|
"step": 5220 |
|
}, |
|
{ |
|
"epoch": 0.4064029839148341, |
|
"grad_norm": 3.1160992335315836, |
|
"learning_rate": 7.4026810240665455e-06, |
|
"loss": 0.8406, |
|
"step": 5230 |
|
}, |
|
{ |
|
"epoch": 0.40718004506954697, |
|
"grad_norm": 2.879056289166062, |
|
"learning_rate": 7.390778321881684e-06, |
|
"loss": 0.8299, |
|
"step": 5240 |
|
}, |
|
{ |
|
"epoch": 0.40795710622425985, |
|
"grad_norm": 3.4705886843955134, |
|
"learning_rate": 7.378858029460785e-06, |
|
"loss": 0.8443, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 0.4087341673789727, |
|
"grad_norm": 3.0683501999483203, |
|
"learning_rate": 7.366920234507819e-06, |
|
"loss": 0.8563, |
|
"step": 5260 |
|
}, |
|
{ |
|
"epoch": 0.4095112285336856, |
|
"grad_norm": 3.0155201359764248, |
|
"learning_rate": 7.354965024855536e-06, |
|
"loss": 0.7995, |
|
"step": 5270 |
|
}, |
|
{ |
|
"epoch": 0.4102882896883985, |
|
"grad_norm": 3.7649992863447594, |
|
"learning_rate": 7.342992488464813e-06, |
|
"loss": 0.8513, |
|
"step": 5280 |
|
}, |
|
{ |
|
"epoch": 0.41106535084311135, |
|
"grad_norm": 2.766804831311677, |
|
"learning_rate": 7.331002713424012e-06, |
|
"loss": 0.818, |
|
"step": 5290 |
|
}, |
|
{ |
|
"epoch": 0.41184241199782423, |
|
"grad_norm": 3.759592880394652, |
|
"learning_rate": 7.3189957879483235e-06, |
|
"loss": 0.8724, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 0.4126194731525371, |
|
"grad_norm": 3.069207342018398, |
|
"learning_rate": 7.3069718003791276e-06, |
|
"loss": 0.8836, |
|
"step": 5310 |
|
}, |
|
{ |
|
"epoch": 0.41339653430725, |
|
"grad_norm": 3.3679689130107433, |
|
"learning_rate": 7.29493083918334e-06, |
|
"loss": 0.8408, |
|
"step": 5320 |
|
}, |
|
{ |
|
"epoch": 0.41417359546196286, |
|
"grad_norm": 3.1614295846456244, |
|
"learning_rate": 7.282872992952757e-06, |
|
"loss": 0.796, |
|
"step": 5330 |
|
}, |
|
{ |
|
"epoch": 0.41495065661667574, |
|
"grad_norm": 2.3615270875399905, |
|
"learning_rate": 7.270798350403407e-06, |
|
"loss": 0.7622, |
|
"step": 5340 |
|
}, |
|
{ |
|
"epoch": 0.4157277177713886, |
|
"grad_norm": 4.796953025378249, |
|
"learning_rate": 7.2587070003749015e-06, |
|
"loss": 0.8264, |
|
"step": 5350 |
|
}, |
|
{ |
|
"epoch": 0.4165047789261015, |
|
"grad_norm": 3.137452467564146, |
|
"learning_rate": 7.246599031829775e-06, |
|
"loss": 0.7943, |
|
"step": 5360 |
|
}, |
|
{ |
|
"epoch": 0.41728184008081437, |
|
"grad_norm": 3.0340412586302064, |
|
"learning_rate": 7.234474533852834e-06, |
|
"loss": 0.8368, |
|
"step": 5370 |
|
}, |
|
{ |
|
"epoch": 0.41805890123552725, |
|
"grad_norm": 3.5888770778936627, |
|
"learning_rate": 7.222333595650502e-06, |
|
"loss": 0.8416, |
|
"step": 5380 |
|
}, |
|
{ |
|
"epoch": 0.4188359623902401, |
|
"grad_norm": 1.602353309028904, |
|
"learning_rate": 7.210176306550161e-06, |
|
"loss": 0.8347, |
|
"step": 5390 |
|
}, |
|
{ |
|
"epoch": 0.419613023544953, |
|
"grad_norm": 4.051100900717811, |
|
"learning_rate": 7.198002755999495e-06, |
|
"loss": 0.8079, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 0.4203900846996659, |
|
"grad_norm": 2.6685524323790215, |
|
"learning_rate": 7.185813033565832e-06, |
|
"loss": 0.8434, |
|
"step": 5410 |
|
}, |
|
{ |
|
"epoch": 0.42116714585437875, |
|
"grad_norm": 2.729322434976506, |
|
"learning_rate": 7.1736072289354875e-06, |
|
"loss": 0.8578, |
|
"step": 5420 |
|
}, |
|
{ |
|
"epoch": 0.42194420700909163, |
|
"grad_norm": 2.906073044503289, |
|
"learning_rate": 7.161385431913098e-06, |
|
"loss": 0.7804, |
|
"step": 5430 |
|
}, |
|
{ |
|
"epoch": 0.4227212681638045, |
|
"grad_norm": 2.290496693909145, |
|
"learning_rate": 7.149147732420971e-06, |
|
"loss": 0.8248, |
|
"step": 5440 |
|
}, |
|
{ |
|
"epoch": 0.4234983293185174, |
|
"grad_norm": 5.010159443056758, |
|
"learning_rate": 7.1368942204984094e-06, |
|
"loss": 0.8057, |
|
"step": 5450 |
|
}, |
|
{ |
|
"epoch": 0.42427539047323026, |
|
"grad_norm": 2.536646021262698, |
|
"learning_rate": 7.124624986301062e-06, |
|
"loss": 0.8439, |
|
"step": 5460 |
|
}, |
|
{ |
|
"epoch": 0.42505245162794314, |
|
"grad_norm": 2.9421994943957364, |
|
"learning_rate": 7.112340120100255e-06, |
|
"loss": 0.8744, |
|
"step": 5470 |
|
}, |
|
{ |
|
"epoch": 0.425829512782656, |
|
"grad_norm": 3.3641741595063888, |
|
"learning_rate": 7.100039712282323e-06, |
|
"loss": 0.8211, |
|
"step": 5480 |
|
}, |
|
{ |
|
"epoch": 0.4266065739373689, |
|
"grad_norm": 4.096933321696819, |
|
"learning_rate": 7.0877238533479535e-06, |
|
"loss": 0.838, |
|
"step": 5490 |
|
}, |
|
{ |
|
"epoch": 0.42738363509208177, |
|
"grad_norm": 3.4094346710709678, |
|
"learning_rate": 7.075392633911513e-06, |
|
"loss": 0.8409, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.42816069624679465, |
|
"grad_norm": 1.993447683519007, |
|
"learning_rate": 7.063046144700383e-06, |
|
"loss": 0.8555, |
|
"step": 5510 |
|
}, |
|
{ |
|
"epoch": 0.4289377574015075, |
|
"grad_norm": 2.5909937579577256, |
|
"learning_rate": 7.050684476554299e-06, |
|
"loss": 0.822, |
|
"step": 5520 |
|
}, |
|
{ |
|
"epoch": 0.4297148185562204, |
|
"grad_norm": 3.3258757291630716, |
|
"learning_rate": 7.038307720424668e-06, |
|
"loss": 0.8538, |
|
"step": 5530 |
|
}, |
|
{ |
|
"epoch": 0.4304918797109332, |
|
"grad_norm": 3.6044299100524535, |
|
"learning_rate": 7.025915967373911e-06, |
|
"loss": 0.7909, |
|
"step": 5540 |
|
}, |
|
{ |
|
"epoch": 0.4312689408656461, |
|
"grad_norm": 2.945760411127075, |
|
"learning_rate": 7.013509308574788e-06, |
|
"loss": 0.7084, |
|
"step": 5550 |
|
}, |
|
{ |
|
"epoch": 0.432046002020359, |
|
"grad_norm": 3.9943856557515405, |
|
"learning_rate": 7.001087835309734e-06, |
|
"loss": 0.8192, |
|
"step": 5560 |
|
}, |
|
{ |
|
"epoch": 0.43282306317507185, |
|
"grad_norm": 3.9363696932078094, |
|
"learning_rate": 6.988651638970175e-06, |
|
"loss": 0.7937, |
|
"step": 5570 |
|
}, |
|
{ |
|
"epoch": 0.43360012432978473, |
|
"grad_norm": 2.7961832443632697, |
|
"learning_rate": 6.976200811055867e-06, |
|
"loss": 0.8409, |
|
"step": 5580 |
|
}, |
|
{ |
|
"epoch": 0.4343771854844976, |
|
"grad_norm": 3.573733698773883, |
|
"learning_rate": 6.963735443174213e-06, |
|
"loss": 0.8, |
|
"step": 5590 |
|
}, |
|
{ |
|
"epoch": 0.4351542466392105, |
|
"grad_norm": 1.861238869282892, |
|
"learning_rate": 6.9512556270395996e-06, |
|
"loss": 0.8202, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 0.43593130779392336, |
|
"grad_norm": 3.435004374927387, |
|
"learning_rate": 6.938761454472718e-06, |
|
"loss": 0.7907, |
|
"step": 5610 |
|
}, |
|
{ |
|
"epoch": 0.43670836894863624, |
|
"grad_norm": 2.169031421644934, |
|
"learning_rate": 6.926253017399882e-06, |
|
"loss": 0.7455, |
|
"step": 5620 |
|
}, |
|
{ |
|
"epoch": 0.4374854301033491, |
|
"grad_norm": 2.639119266804599, |
|
"learning_rate": 6.913730407852359e-06, |
|
"loss": 0.7798, |
|
"step": 5630 |
|
}, |
|
{ |
|
"epoch": 0.438262491258062, |
|
"grad_norm": 3.0923108923433653, |
|
"learning_rate": 6.9011937179656956e-06, |
|
"loss": 0.86, |
|
"step": 5640 |
|
}, |
|
{ |
|
"epoch": 0.43903955241277487, |
|
"grad_norm": 3.4778690753111974, |
|
"learning_rate": 6.888643039979025e-06, |
|
"loss": 0.8565, |
|
"step": 5650 |
|
}, |
|
{ |
|
"epoch": 0.43981661356748775, |
|
"grad_norm": 2.019550042044677, |
|
"learning_rate": 6.8760784662344085e-06, |
|
"loss": 0.8222, |
|
"step": 5660 |
|
}, |
|
{ |
|
"epoch": 0.4405936747222006, |
|
"grad_norm": 2.531115492821316, |
|
"learning_rate": 6.863500089176141e-06, |
|
"loss": 0.7994, |
|
"step": 5670 |
|
}, |
|
{ |
|
"epoch": 0.4413707358769135, |
|
"grad_norm": 3.623980012450744, |
|
"learning_rate": 6.850908001350076e-06, |
|
"loss": 0.8085, |
|
"step": 5680 |
|
}, |
|
{ |
|
"epoch": 0.4421477970316264, |
|
"grad_norm": 2.874269072854778, |
|
"learning_rate": 6.838302295402944e-06, |
|
"loss": 0.8206, |
|
"step": 5690 |
|
}, |
|
{ |
|
"epoch": 0.44292485818633925, |
|
"grad_norm": 3.3046693857663767, |
|
"learning_rate": 6.825683064081673e-06, |
|
"loss": 0.7733, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 0.44370191934105213, |
|
"grad_norm": 2.820815832528071, |
|
"learning_rate": 6.813050400232705e-06, |
|
"loss": 0.7684, |
|
"step": 5710 |
|
}, |
|
{ |
|
"epoch": 0.444478980495765, |
|
"grad_norm": 3.2657952823703513, |
|
"learning_rate": 6.800404396801309e-06, |
|
"loss": 0.8003, |
|
"step": 5720 |
|
}, |
|
{ |
|
"epoch": 0.4452560416504779, |
|
"grad_norm": 3.316944889654959, |
|
"learning_rate": 6.787745146830903e-06, |
|
"loss": 0.8037, |
|
"step": 5730 |
|
}, |
|
{ |
|
"epoch": 0.44603310280519076, |
|
"grad_norm": 3.850090302427542, |
|
"learning_rate": 6.775072743462368e-06, |
|
"loss": 0.7318, |
|
"step": 5740 |
|
}, |
|
{ |
|
"epoch": 0.44681016395990364, |
|
"grad_norm": 2.488942618483238, |
|
"learning_rate": 6.762387279933355e-06, |
|
"loss": 0.7842, |
|
"step": 5750 |
|
}, |
|
{ |
|
"epoch": 0.4475872251146165, |
|
"grad_norm": 3.9986923312061537, |
|
"learning_rate": 6.749688849577616e-06, |
|
"loss": 0.7452, |
|
"step": 5760 |
|
}, |
|
{ |
|
"epoch": 0.4483642862693294, |
|
"grad_norm": 3.174677745330878, |
|
"learning_rate": 6.736977545824299e-06, |
|
"loss": 0.7755, |
|
"step": 5770 |
|
}, |
|
{ |
|
"epoch": 0.44914134742404227, |
|
"grad_norm": 3.008290639491103, |
|
"learning_rate": 6.72425346219727e-06, |
|
"loss": 0.7483, |
|
"step": 5780 |
|
}, |
|
{ |
|
"epoch": 0.44991840857875515, |
|
"grad_norm": 3.7842544499599335, |
|
"learning_rate": 6.711516692314426e-06, |
|
"loss": 0.8714, |
|
"step": 5790 |
|
}, |
|
{ |
|
"epoch": 0.450695469733468, |
|
"grad_norm": 3.595279361244756, |
|
"learning_rate": 6.698767329887001e-06, |
|
"loss": 0.8087, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 0.4514725308881809, |
|
"grad_norm": 3.2985766841264974, |
|
"learning_rate": 6.686005468718879e-06, |
|
"loss": 0.7593, |
|
"step": 5810 |
|
}, |
|
{ |
|
"epoch": 0.4522495920428938, |
|
"grad_norm": 3.3364617948252855, |
|
"learning_rate": 6.673231202705906e-06, |
|
"loss": 0.744, |
|
"step": 5820 |
|
}, |
|
{ |
|
"epoch": 0.45302665319760665, |
|
"grad_norm": 1.6739208971136896, |
|
"learning_rate": 6.660444625835194e-06, |
|
"loss": 0.7233, |
|
"step": 5830 |
|
}, |
|
{ |
|
"epoch": 0.45380371435231953, |
|
"grad_norm": 2.908524261261958, |
|
"learning_rate": 6.647645832184437e-06, |
|
"loss": 0.7726, |
|
"step": 5840 |
|
}, |
|
{ |
|
"epoch": 0.4545807755070324, |
|
"grad_norm": 3.741049911001574, |
|
"learning_rate": 6.634834915921211e-06, |
|
"loss": 0.7414, |
|
"step": 5850 |
|
}, |
|
{ |
|
"epoch": 0.4553578366617453, |
|
"grad_norm": 3.525582515759396, |
|
"learning_rate": 6.6220119713022855e-06, |
|
"loss": 0.7431, |
|
"step": 5860 |
|
}, |
|
{ |
|
"epoch": 0.45613489781645816, |
|
"grad_norm": 3.6441156387339446, |
|
"learning_rate": 6.609177092672927e-06, |
|
"loss": 0.8191, |
|
"step": 5870 |
|
}, |
|
{ |
|
"epoch": 0.45691195897117104, |
|
"grad_norm": 3.235190279824699, |
|
"learning_rate": 6.596330374466212e-06, |
|
"loss": 0.7609, |
|
"step": 5880 |
|
}, |
|
{ |
|
"epoch": 0.4576890201258839, |
|
"grad_norm": 2.6003682513249555, |
|
"learning_rate": 6.5834719112023215e-06, |
|
"loss": 0.7252, |
|
"step": 5890 |
|
}, |
|
{ |
|
"epoch": 0.4584660812805968, |
|
"grad_norm": 4.03595764942659, |
|
"learning_rate": 6.570601797487854e-06, |
|
"loss": 0.8437, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 0.45924314243530967, |
|
"grad_norm": 2.7068297821785943, |
|
"learning_rate": 6.557720128015127e-06, |
|
"loss": 0.8236, |
|
"step": 5910 |
|
}, |
|
{ |
|
"epoch": 0.46002020359002255, |
|
"grad_norm": 3.4599815225643495, |
|
"learning_rate": 6.544826997561479e-06, |
|
"loss": 0.7797, |
|
"step": 5920 |
|
}, |
|
{ |
|
"epoch": 0.4607972647447354, |
|
"grad_norm": 3.773628994151356, |
|
"learning_rate": 6.531922500988572e-06, |
|
"loss": 0.751, |
|
"step": 5930 |
|
}, |
|
{ |
|
"epoch": 0.4615743258994483, |
|
"grad_norm": 2.2173873623143563, |
|
"learning_rate": 6.519006733241697e-06, |
|
"loss": 0.7701, |
|
"step": 5940 |
|
}, |
|
{ |
|
"epoch": 0.4623513870541612, |
|
"grad_norm": 3.033174067089371, |
|
"learning_rate": 6.506079789349074e-06, |
|
"loss": 0.7682, |
|
"step": 5950 |
|
}, |
|
{ |
|
"epoch": 0.46312844820887406, |
|
"grad_norm": 4.1166433622525584, |
|
"learning_rate": 6.493141764421145e-06, |
|
"loss": 0.8537, |
|
"step": 5960 |
|
}, |
|
{ |
|
"epoch": 0.46390550936358693, |
|
"grad_norm": 3.131603304402972, |
|
"learning_rate": 6.48019275364989e-06, |
|
"loss": 0.7729, |
|
"step": 5970 |
|
}, |
|
{ |
|
"epoch": 0.4646825705182998, |
|
"grad_norm": 3.0925113977774674, |
|
"learning_rate": 6.46723285230811e-06, |
|
"loss": 0.7959, |
|
"step": 5980 |
|
}, |
|
{ |
|
"epoch": 0.4654596316730127, |
|
"grad_norm": 4.214785149959189, |
|
"learning_rate": 6.454262155748741e-06, |
|
"loss": 0.771, |
|
"step": 5990 |
|
}, |
|
{ |
|
"epoch": 0.46623669282772556, |
|
"grad_norm": 4.231644528802966, |
|
"learning_rate": 6.4412807594041396e-06, |
|
"loss": 0.8038, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.46701375398243844, |
|
"grad_norm": 3.077252834668561, |
|
"learning_rate": 6.428288758785387e-06, |
|
"loss": 0.7784, |
|
"step": 6010 |
|
}, |
|
{ |
|
"epoch": 0.4677908151371513, |
|
"grad_norm": 3.710905060380187, |
|
"learning_rate": 6.415286249481591e-06, |
|
"loss": 0.7705, |
|
"step": 6020 |
|
}, |
|
{ |
|
"epoch": 0.4685678762918642, |
|
"grad_norm": 4.489857568139187, |
|
"learning_rate": 6.402273327159169e-06, |
|
"loss": 0.7182, |
|
"step": 6030 |
|
}, |
|
{ |
|
"epoch": 0.46934493744657707, |
|
"grad_norm": 3.852955528938296, |
|
"learning_rate": 6.389250087561162e-06, |
|
"loss": 0.7736, |
|
"step": 6040 |
|
}, |
|
{ |
|
"epoch": 0.47012199860128995, |
|
"grad_norm": 3.9025918987862878, |
|
"learning_rate": 6.376216626506513e-06, |
|
"loss": 0.7431, |
|
"step": 6050 |
|
}, |
|
{ |
|
"epoch": 0.4708990597560028, |
|
"grad_norm": 3.4097364478378203, |
|
"learning_rate": 6.363173039889373e-06, |
|
"loss": 0.7973, |
|
"step": 6060 |
|
}, |
|
{ |
|
"epoch": 0.47167612091071565, |
|
"grad_norm": 4.33473272302523, |
|
"learning_rate": 6.350119423678391e-06, |
|
"loss": 0.7898, |
|
"step": 6070 |
|
}, |
|
{ |
|
"epoch": 0.4724531820654285, |
|
"grad_norm": 3.679757021095654, |
|
"learning_rate": 6.3370558739160096e-06, |
|
"loss": 0.7576, |
|
"step": 6080 |
|
}, |
|
{ |
|
"epoch": 0.4732302432201414, |
|
"grad_norm": 3.9057618817922033, |
|
"learning_rate": 6.32398248671776e-06, |
|
"loss": 0.7725, |
|
"step": 6090 |
|
}, |
|
{ |
|
"epoch": 0.4740073043748543, |
|
"grad_norm": 3.403797504220692, |
|
"learning_rate": 6.310899358271549e-06, |
|
"loss": 0.8273, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 0.47478436552956715, |
|
"grad_norm": 2.2498527490634936, |
|
"learning_rate": 6.2978065848369594e-06, |
|
"loss": 0.7365, |
|
"step": 6110 |
|
}, |
|
{ |
|
"epoch": 0.47556142668428003, |
|
"grad_norm": 3.5041131745023777, |
|
"learning_rate": 6.284704262744532e-06, |
|
"loss": 0.7739, |
|
"step": 6120 |
|
}, |
|
{ |
|
"epoch": 0.4763384878389929, |
|
"grad_norm": 3.236195246500179, |
|
"learning_rate": 6.271592488395064e-06, |
|
"loss": 0.769, |
|
"step": 6130 |
|
}, |
|
{ |
|
"epoch": 0.4771155489937058, |
|
"grad_norm": 4.227426671695652, |
|
"learning_rate": 6.2584713582589015e-06, |
|
"loss": 0.801, |
|
"step": 6140 |
|
}, |
|
{ |
|
"epoch": 0.47789261014841866, |
|
"grad_norm": 2.395986835968045, |
|
"learning_rate": 6.2453409688752244e-06, |
|
"loss": 0.7343, |
|
"step": 6150 |
|
}, |
|
{ |
|
"epoch": 0.47866967130313154, |
|
"grad_norm": 3.050933140103267, |
|
"learning_rate": 6.232201416851332e-06, |
|
"loss": 0.7774, |
|
"step": 6160 |
|
}, |
|
{ |
|
"epoch": 0.4794467324578444, |
|
"grad_norm": 3.680174317755052, |
|
"learning_rate": 6.219052798861948e-06, |
|
"loss": 0.8151, |
|
"step": 6170 |
|
}, |
|
{ |
|
"epoch": 0.4802237936125573, |
|
"grad_norm": 3.282669805242103, |
|
"learning_rate": 6.205895211648489e-06, |
|
"loss": 0.7851, |
|
"step": 6180 |
|
}, |
|
{ |
|
"epoch": 0.48100085476727017, |
|
"grad_norm": 3.0746449279394454, |
|
"learning_rate": 6.192728752018373e-06, |
|
"loss": 0.8465, |
|
"step": 6190 |
|
}, |
|
{ |
|
"epoch": 0.48177791592198305, |
|
"grad_norm": 3.6239050452367345, |
|
"learning_rate": 6.179553516844291e-06, |
|
"loss": 0.7675, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 0.4825549770766959, |
|
"grad_norm": 2.4293135613154706, |
|
"learning_rate": 6.1663696030635e-06, |
|
"loss": 0.7459, |
|
"step": 6210 |
|
}, |
|
{ |
|
"epoch": 0.4833320382314088, |
|
"grad_norm": 2.4717149655776716, |
|
"learning_rate": 6.153177107677112e-06, |
|
"loss": 0.7385, |
|
"step": 6220 |
|
}, |
|
{ |
|
"epoch": 0.4841090993861217, |
|
"grad_norm": 3.7011954863420424, |
|
"learning_rate": 6.139976127749381e-06, |
|
"loss": 0.7594, |
|
"step": 6230 |
|
}, |
|
{ |
|
"epoch": 0.48488616054083455, |
|
"grad_norm": 3.580923341493924, |
|
"learning_rate": 6.126766760406982e-06, |
|
"loss": 0.7504, |
|
"step": 6240 |
|
}, |
|
{ |
|
"epoch": 0.48566322169554743, |
|
"grad_norm": 3.7474824398696054, |
|
"learning_rate": 6.1135491028383e-06, |
|
"loss": 0.8189, |
|
"step": 6250 |
|
}, |
|
{ |
|
"epoch": 0.4864402828502603, |
|
"grad_norm": 4.008525494927905, |
|
"learning_rate": 6.100323252292721e-06, |
|
"loss": 0.8037, |
|
"step": 6260 |
|
}, |
|
{ |
|
"epoch": 0.4872173440049732, |
|
"grad_norm": 4.533137670554457, |
|
"learning_rate": 6.087089306079907e-06, |
|
"loss": 0.7396, |
|
"step": 6270 |
|
}, |
|
{ |
|
"epoch": 0.48799440515968606, |
|
"grad_norm": 3.577325942559521, |
|
"learning_rate": 6.073847361569085e-06, |
|
"loss": 0.7712, |
|
"step": 6280 |
|
}, |
|
{ |
|
"epoch": 0.48877146631439894, |
|
"grad_norm": 3.4785892916574226, |
|
"learning_rate": 6.06059751618833e-06, |
|
"loss": 0.7744, |
|
"step": 6290 |
|
}, |
|
{ |
|
"epoch": 0.4895485274691118, |
|
"grad_norm": 2.726294641729152, |
|
"learning_rate": 6.047339867423849e-06, |
|
"loss": 0.739, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 0.4903255886238247, |
|
"grad_norm": 3.2923367667657244, |
|
"learning_rate": 6.034074512819259e-06, |
|
"loss": 0.7921, |
|
"step": 6310 |
|
}, |
|
{ |
|
"epoch": 0.49110264977853757, |
|
"grad_norm": 2.5138919730315163, |
|
"learning_rate": 6.020801549974879e-06, |
|
"loss": 0.7627, |
|
"step": 6320 |
|
}, |
|
{ |
|
"epoch": 0.49187971093325045, |
|
"grad_norm": 3.0639205838133923, |
|
"learning_rate": 6.007521076546999e-06, |
|
"loss": 0.6908, |
|
"step": 6330 |
|
}, |
|
{ |
|
"epoch": 0.4926567720879633, |
|
"grad_norm": 5.28489991162866, |
|
"learning_rate": 5.994233190247174e-06, |
|
"loss": 0.6984, |
|
"step": 6340 |
|
}, |
|
{ |
|
"epoch": 0.4934338332426762, |
|
"grad_norm": 3.1930218466849665, |
|
"learning_rate": 5.9809379888414975e-06, |
|
"loss": 0.7312, |
|
"step": 6350 |
|
}, |
|
{ |
|
"epoch": 0.4942108943973891, |
|
"grad_norm": 2.140853783592497, |
|
"learning_rate": 5.967635570149881e-06, |
|
"loss": 0.739, |
|
"step": 6360 |
|
}, |
|
{ |
|
"epoch": 0.49498795555210195, |
|
"grad_norm": 2.6520877753384706, |
|
"learning_rate": 5.9543260320453445e-06, |
|
"loss": 0.7115, |
|
"step": 6370 |
|
}, |
|
{ |
|
"epoch": 0.49576501670681483, |
|
"grad_norm": 3.5362571286933693, |
|
"learning_rate": 5.941009472453283e-06, |
|
"loss": 0.7313, |
|
"step": 6380 |
|
}, |
|
{ |
|
"epoch": 0.4965420778615277, |
|
"grad_norm": 3.1479357916202173, |
|
"learning_rate": 5.927685989350755e-06, |
|
"loss": 0.7689, |
|
"step": 6390 |
|
}, |
|
{ |
|
"epoch": 0.4973191390162406, |
|
"grad_norm": 4.239286662147043, |
|
"learning_rate": 5.914355680765757e-06, |
|
"loss": 0.7209, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 0.49809620017095346, |
|
"grad_norm": 4.168222516693175, |
|
"learning_rate": 5.901018644776509e-06, |
|
"loss": 0.7151, |
|
"step": 6410 |
|
}, |
|
{ |
|
"epoch": 0.49887326132566634, |
|
"grad_norm": 2.857843662958384, |
|
"learning_rate": 5.8876749795107214e-06, |
|
"loss": 0.768, |
|
"step": 6420 |
|
}, |
|
{ |
|
"epoch": 0.4996503224803792, |
|
"grad_norm": 3.52360411131157, |
|
"learning_rate": 5.874324783144885e-06, |
|
"loss": 0.8139, |
|
"step": 6430 |
|
}, |
|
{ |
|
"epoch": 0.5004273836350921, |
|
"grad_norm": 3.657211308302993, |
|
"learning_rate": 5.860968153903542e-06, |
|
"loss": 0.6869, |
|
"step": 6440 |
|
}, |
|
{ |
|
"epoch": 0.501204444789805, |
|
"grad_norm": 2.331407753002653, |
|
"learning_rate": 5.847605190058563e-06, |
|
"loss": 0.747, |
|
"step": 6450 |
|
}, |
|
{ |
|
"epoch": 0.5019815059445178, |
|
"grad_norm": 3.7182364487724713, |
|
"learning_rate": 5.8342359899284286e-06, |
|
"loss": 0.7425, |
|
"step": 6460 |
|
}, |
|
{ |
|
"epoch": 0.5027585670992307, |
|
"grad_norm": 3.5617096002819926, |
|
"learning_rate": 5.8208606518775e-06, |
|
"loss": 0.7474, |
|
"step": 6470 |
|
}, |
|
{ |
|
"epoch": 0.5035356282539436, |
|
"grad_norm": 3.1283143308974477, |
|
"learning_rate": 5.807479274315302e-06, |
|
"loss": 0.7354, |
|
"step": 6480 |
|
}, |
|
{ |
|
"epoch": 0.5043126894086565, |
|
"grad_norm": 3.183649544594623, |
|
"learning_rate": 5.79409195569579e-06, |
|
"loss": 0.7693, |
|
"step": 6490 |
|
}, |
|
{ |
|
"epoch": 0.5050897505633694, |
|
"grad_norm": 4.183143639793591, |
|
"learning_rate": 5.780698794516636e-06, |
|
"loss": 0.7159, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.5058668117180822, |
|
"grad_norm": 3.3530863093489613, |
|
"learning_rate": 5.767299889318496e-06, |
|
"loss": 0.7258, |
|
"step": 6510 |
|
}, |
|
{ |
|
"epoch": 0.5066438728727951, |
|
"grad_norm": 3.4594325919428703, |
|
"learning_rate": 5.75389533868429e-06, |
|
"loss": 0.831, |
|
"step": 6520 |
|
}, |
|
{ |
|
"epoch": 0.507420934027508, |
|
"grad_norm": 2.9431596981070642, |
|
"learning_rate": 5.7404852412384725e-06, |
|
"loss": 0.6962, |
|
"step": 6530 |
|
}, |
|
{ |
|
"epoch": 0.5081979951822209, |
|
"grad_norm": 3.0367905793947894, |
|
"learning_rate": 5.72706969564631e-06, |
|
"loss": 0.7612, |
|
"step": 6540 |
|
}, |
|
{ |
|
"epoch": 0.5089750563369337, |
|
"grad_norm": 2.429198874828814, |
|
"learning_rate": 5.713648800613154e-06, |
|
"loss": 0.7464, |
|
"step": 6550 |
|
}, |
|
{ |
|
"epoch": 0.5097521174916466, |
|
"grad_norm": 3.4346659673155964, |
|
"learning_rate": 5.700222654883712e-06, |
|
"loss": 0.784, |
|
"step": 6560 |
|
}, |
|
{ |
|
"epoch": 0.5105291786463595, |
|
"grad_norm": 3.412520275752024, |
|
"learning_rate": 5.686791357241329e-06, |
|
"loss": 0.7418, |
|
"step": 6570 |
|
}, |
|
{ |
|
"epoch": 0.5113062398010724, |
|
"grad_norm": 3.5500533489754957, |
|
"learning_rate": 5.673355006507251e-06, |
|
"loss": 0.7931, |
|
"step": 6580 |
|
}, |
|
{ |
|
"epoch": 0.5120833009557852, |
|
"grad_norm": 3.3785219578924073, |
|
"learning_rate": 5.659913701539903e-06, |
|
"loss": 0.7255, |
|
"step": 6590 |
|
}, |
|
{ |
|
"epoch": 0.5128603621104981, |
|
"grad_norm": 2.8478099507815493, |
|
"learning_rate": 5.646467541234162e-06, |
|
"loss": 0.6869, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 0.513637423265211, |
|
"grad_norm": 4.116946216809252, |
|
"learning_rate": 5.633016624520627e-06, |
|
"loss": 0.723, |
|
"step": 6610 |
|
}, |
|
{ |
|
"epoch": 0.5144144844199239, |
|
"grad_norm": 4.278208268527751, |
|
"learning_rate": 5.619561050364897e-06, |
|
"loss": 0.7021, |
|
"step": 6620 |
|
}, |
|
{ |
|
"epoch": 0.5151915455746368, |
|
"grad_norm": 3.9380435048254068, |
|
"learning_rate": 5.606100917766829e-06, |
|
"loss": 0.7289, |
|
"step": 6630 |
|
}, |
|
{ |
|
"epoch": 0.5159686067293496, |
|
"grad_norm": 3.035312643544745, |
|
"learning_rate": 5.592636325759829e-06, |
|
"loss": 0.6616, |
|
"step": 6640 |
|
}, |
|
{ |
|
"epoch": 0.5167456678840625, |
|
"grad_norm": 4.67293135855067, |
|
"learning_rate": 5.579167373410108e-06, |
|
"loss": 0.6983, |
|
"step": 6650 |
|
}, |
|
{ |
|
"epoch": 0.5175227290387754, |
|
"grad_norm": 4.655170532587341, |
|
"learning_rate": 5.565694159815955e-06, |
|
"loss": 0.7799, |
|
"step": 6660 |
|
}, |
|
{ |
|
"epoch": 0.5182997901934883, |
|
"grad_norm": 3.3764468867138193, |
|
"learning_rate": 5.552216784107022e-06, |
|
"loss": 0.7443, |
|
"step": 6670 |
|
}, |
|
{ |
|
"epoch": 0.5190768513482011, |
|
"grad_norm": 3.441315238146844, |
|
"learning_rate": 5.538735345443573e-06, |
|
"loss": 0.7195, |
|
"step": 6680 |
|
}, |
|
{ |
|
"epoch": 0.519853912502914, |
|
"grad_norm": 4.575454800944016, |
|
"learning_rate": 5.525249943015771e-06, |
|
"loss": 0.7499, |
|
"step": 6690 |
|
}, |
|
{ |
|
"epoch": 0.5206309736576269, |
|
"grad_norm": 5.206336978319692, |
|
"learning_rate": 5.511760676042941e-06, |
|
"loss": 0.7462, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 0.5214080348123398, |
|
"grad_norm": 2.782422183265534, |
|
"learning_rate": 5.498267643772842e-06, |
|
"loss": 0.6735, |
|
"step": 6710 |
|
}, |
|
{ |
|
"epoch": 0.5221850959670526, |
|
"grad_norm": 4.799976665563157, |
|
"learning_rate": 5.484770945480935e-06, |
|
"loss": 0.7432, |
|
"step": 6720 |
|
}, |
|
{ |
|
"epoch": 0.5229621571217655, |
|
"grad_norm": 3.68056618328099, |
|
"learning_rate": 5.471270680469656e-06, |
|
"loss": 0.7086, |
|
"step": 6730 |
|
}, |
|
{ |
|
"epoch": 0.5237392182764784, |
|
"grad_norm": 4.337600776833273, |
|
"learning_rate": 5.457766948067682e-06, |
|
"loss": 0.6972, |
|
"step": 6740 |
|
}, |
|
{ |
|
"epoch": 0.5245162794311913, |
|
"grad_norm": 2.9170786823925754, |
|
"learning_rate": 5.4442598476292e-06, |
|
"loss": 0.697, |
|
"step": 6750 |
|
}, |
|
{ |
|
"epoch": 0.5252933405859042, |
|
"grad_norm": 3.389813065457727, |
|
"learning_rate": 5.430749478533182e-06, |
|
"loss": 0.6823, |
|
"step": 6760 |
|
}, |
|
{ |
|
"epoch": 0.526070401740617, |
|
"grad_norm": 4.405810375053449, |
|
"learning_rate": 5.417235940182646e-06, |
|
"loss": 0.6954, |
|
"step": 6770 |
|
}, |
|
{ |
|
"epoch": 0.5268474628953299, |
|
"grad_norm": 3.745948791175591, |
|
"learning_rate": 5.403719332003925e-06, |
|
"loss": 0.7129, |
|
"step": 6780 |
|
}, |
|
{ |
|
"epoch": 0.5276245240500428, |
|
"grad_norm": 3.5759861354998095, |
|
"learning_rate": 5.390199753445945e-06, |
|
"loss": 0.7457, |
|
"step": 6790 |
|
}, |
|
{ |
|
"epoch": 0.5284015852047557, |
|
"grad_norm": 3.133292740862389, |
|
"learning_rate": 5.376677303979481e-06, |
|
"loss": 0.716, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 0.5291786463594685, |
|
"grad_norm": 3.6994792177101536, |
|
"learning_rate": 5.3631520830964335e-06, |
|
"loss": 0.7075, |
|
"step": 6810 |
|
}, |
|
{ |
|
"epoch": 0.5299557075141814, |
|
"grad_norm": 4.2709254391755875, |
|
"learning_rate": 5.349624190309095e-06, |
|
"loss": 0.6646, |
|
"step": 6820 |
|
}, |
|
{ |
|
"epoch": 0.5307327686688943, |
|
"grad_norm": 4.757235420288998, |
|
"learning_rate": 5.3360937251494145e-06, |
|
"loss": 0.7197, |
|
"step": 6830 |
|
}, |
|
{ |
|
"epoch": 0.5315098298236072, |
|
"grad_norm": 3.970395562121448, |
|
"learning_rate": 5.322560787168266e-06, |
|
"loss": 0.7113, |
|
"step": 6840 |
|
}, |
|
{ |
|
"epoch": 0.53228689097832, |
|
"grad_norm": 3.4076129510381636, |
|
"learning_rate": 5.30902547593472e-06, |
|
"loss": 0.7051, |
|
"step": 6850 |
|
}, |
|
{ |
|
"epoch": 0.5330639521330328, |
|
"grad_norm": 4.69605182138137, |
|
"learning_rate": 5.29548789103531e-06, |
|
"loss": 0.7044, |
|
"step": 6860 |
|
}, |
|
{ |
|
"epoch": 0.5338410132877457, |
|
"grad_norm": 3.804895971708535, |
|
"learning_rate": 5.281948132073293e-06, |
|
"loss": 0.7119, |
|
"step": 6870 |
|
}, |
|
{ |
|
"epoch": 0.5346180744424586, |
|
"grad_norm": 3.6916149040278596, |
|
"learning_rate": 5.2684062986679245e-06, |
|
"loss": 0.7208, |
|
"step": 6880 |
|
}, |
|
{ |
|
"epoch": 0.5353951355971714, |
|
"grad_norm": 3.0845852262650775, |
|
"learning_rate": 5.254862490453723e-06, |
|
"loss": 0.6855, |
|
"step": 6890 |
|
}, |
|
{ |
|
"epoch": 0.5361721967518843, |
|
"grad_norm": 4.685912874705627, |
|
"learning_rate": 5.241316807079735e-06, |
|
"loss": 0.7176, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 0.5369492579065972, |
|
"grad_norm": 2.9240144110586157, |
|
"learning_rate": 5.227769348208808e-06, |
|
"loss": 0.7158, |
|
"step": 6910 |
|
}, |
|
{ |
|
"epoch": 0.5377263190613101, |
|
"grad_norm": 3.258492056259544, |
|
"learning_rate": 5.214220213516849e-06, |
|
"loss": 0.6492, |
|
"step": 6920 |
|
}, |
|
{ |
|
"epoch": 0.538503380216023, |
|
"grad_norm": 4.273950085839226, |
|
"learning_rate": 5.200669502692092e-06, |
|
"loss": 0.6784, |
|
"step": 6930 |
|
}, |
|
{ |
|
"epoch": 0.5392804413707358, |
|
"grad_norm": 2.6079076529513503, |
|
"learning_rate": 5.187117315434374e-06, |
|
"loss": 0.6969, |
|
"step": 6940 |
|
}, |
|
{ |
|
"epoch": 0.5400575025254487, |
|
"grad_norm": 3.4737447133789847, |
|
"learning_rate": 5.173563751454393e-06, |
|
"loss": 0.7804, |
|
"step": 6950 |
|
}, |
|
{ |
|
"epoch": 0.5408345636801616, |
|
"grad_norm": 4.786817720128349, |
|
"learning_rate": 5.160008910472971e-06, |
|
"loss": 0.6805, |
|
"step": 6960 |
|
}, |
|
{ |
|
"epoch": 0.5416116248348745, |
|
"grad_norm": 3.7701770083150197, |
|
"learning_rate": 5.146452892220334e-06, |
|
"loss": 0.7214, |
|
"step": 6970 |
|
}, |
|
{ |
|
"epoch": 0.5423886859895873, |
|
"grad_norm": 3.7554811031983344, |
|
"learning_rate": 5.132895796435363e-06, |
|
"loss": 0.6417, |
|
"step": 6980 |
|
}, |
|
{ |
|
"epoch": 0.5431657471443002, |
|
"grad_norm": 3.5547381426364097, |
|
"learning_rate": 5.119337722864871e-06, |
|
"loss": 0.6636, |
|
"step": 6990 |
|
}, |
|
{ |
|
"epoch": 0.5439428082990131, |
|
"grad_norm": 5.011611632534712, |
|
"learning_rate": 5.1057787712628645e-06, |
|
"loss": 0.6869, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.544719869453726, |
|
"grad_norm": 3.833252076719035, |
|
"learning_rate": 5.092219041389809e-06, |
|
"loss": 0.698, |
|
"step": 7010 |
|
}, |
|
{ |
|
"epoch": 0.5454969306084388, |
|
"grad_norm": 3.94968001273636, |
|
"learning_rate": 5.0786586330118936e-06, |
|
"loss": 0.6499, |
|
"step": 7020 |
|
}, |
|
{ |
|
"epoch": 0.5462739917631517, |
|
"grad_norm": 4.652418519560147, |
|
"learning_rate": 5.065097645900305e-06, |
|
"loss": 0.7365, |
|
"step": 7030 |
|
}, |
|
{ |
|
"epoch": 0.5470510529178646, |
|
"grad_norm": 3.4688260249453333, |
|
"learning_rate": 5.051536179830485e-06, |
|
"loss": 0.7244, |
|
"step": 7040 |
|
}, |
|
{ |
|
"epoch": 0.5478281140725775, |
|
"grad_norm": 3.507980085656876, |
|
"learning_rate": 5.0379743345814e-06, |
|
"loss": 0.6463, |
|
"step": 7050 |
|
}, |
|
{ |
|
"epoch": 0.5486051752272904, |
|
"grad_norm": 4.08415517826481, |
|
"learning_rate": 5.024412209934806e-06, |
|
"loss": 0.7134, |
|
"step": 7060 |
|
}, |
|
{ |
|
"epoch": 0.5493822363820032, |
|
"grad_norm": 3.1430434027718848, |
|
"learning_rate": 5.010849905674513e-06, |
|
"loss": 0.6646, |
|
"step": 7070 |
|
}, |
|
{ |
|
"epoch": 0.5501592975367161, |
|
"grad_norm": 1.7398353080625177, |
|
"learning_rate": 4.997287521585657e-06, |
|
"loss": 0.6604, |
|
"step": 7080 |
|
}, |
|
{ |
|
"epoch": 0.550936358691429, |
|
"grad_norm": 3.6616218145390356, |
|
"learning_rate": 4.983725157453956e-06, |
|
"loss": 0.6713, |
|
"step": 7090 |
|
}, |
|
{ |
|
"epoch": 0.5517134198461419, |
|
"grad_norm": 3.811153246818418, |
|
"learning_rate": 4.9701629130649834e-06, |
|
"loss": 0.7095, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 0.5524904810008547, |
|
"grad_norm": 4.929016419712588, |
|
"learning_rate": 4.956600888203433e-06, |
|
"loss": 0.6714, |
|
"step": 7110 |
|
}, |
|
{ |
|
"epoch": 0.5532675421555676, |
|
"grad_norm": 3.4541756616239927, |
|
"learning_rate": 4.943039182652383e-06, |
|
"loss": 0.7235, |
|
"step": 7120 |
|
}, |
|
{ |
|
"epoch": 0.5540446033102805, |
|
"grad_norm": 4.095722371398238, |
|
"learning_rate": 4.929477896192561e-06, |
|
"loss": 0.8093, |
|
"step": 7130 |
|
}, |
|
{ |
|
"epoch": 0.5548216644649934, |
|
"grad_norm": 4.870666395156222, |
|
"learning_rate": 4.915917128601611e-06, |
|
"loss": 0.7031, |
|
"step": 7140 |
|
}, |
|
{ |
|
"epoch": 0.5555987256197062, |
|
"grad_norm": 3.448418758510041, |
|
"learning_rate": 4.902356979653361e-06, |
|
"loss": 0.7084, |
|
"step": 7150 |
|
}, |
|
{ |
|
"epoch": 0.5563757867744191, |
|
"grad_norm": 3.829159584215915, |
|
"learning_rate": 4.8887975491170845e-06, |
|
"loss": 0.7181, |
|
"step": 7160 |
|
}, |
|
{ |
|
"epoch": 0.557152847929132, |
|
"grad_norm": 3.555777208653401, |
|
"learning_rate": 4.875238936756774e-06, |
|
"loss": 0.6763, |
|
"step": 7170 |
|
}, |
|
{ |
|
"epoch": 0.5579299090838449, |
|
"grad_norm": 2.5493937496001187, |
|
"learning_rate": 4.861681242330397e-06, |
|
"loss": 0.6756, |
|
"step": 7180 |
|
}, |
|
{ |
|
"epoch": 0.5587069702385578, |
|
"grad_norm": 3.3198532718689813, |
|
"learning_rate": 4.84812456558917e-06, |
|
"loss": 0.6644, |
|
"step": 7190 |
|
}, |
|
{ |
|
"epoch": 0.5594840313932706, |
|
"grad_norm": 3.829290955616477, |
|
"learning_rate": 4.834569006276823e-06, |
|
"loss": 0.6786, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 0.5602610925479835, |
|
"grad_norm": 2.592783541640363, |
|
"learning_rate": 4.821014664128859e-06, |
|
"loss": 0.7156, |
|
"step": 7210 |
|
}, |
|
{ |
|
"epoch": 0.5610381537026964, |
|
"grad_norm": 4.188978510013467, |
|
"learning_rate": 4.807461638871835e-06, |
|
"loss": 0.7262, |
|
"step": 7220 |
|
}, |
|
{ |
|
"epoch": 0.5618152148574093, |
|
"grad_norm": 3.069522579226053, |
|
"learning_rate": 4.79391003022261e-06, |
|
"loss": 0.6989, |
|
"step": 7230 |
|
}, |
|
{ |
|
"epoch": 0.5625922760121221, |
|
"grad_norm": 4.039799899118001, |
|
"learning_rate": 4.780359937887625e-06, |
|
"loss": 0.6682, |
|
"step": 7240 |
|
}, |
|
{ |
|
"epoch": 0.563369337166835, |
|
"grad_norm": 4.6623197649536126, |
|
"learning_rate": 4.766811461562163e-06, |
|
"loss": 0.6464, |
|
"step": 7250 |
|
}, |
|
{ |
|
"epoch": 0.5641463983215479, |
|
"grad_norm": 5.438968217638661, |
|
"learning_rate": 4.753264700929619e-06, |
|
"loss": 0.6507, |
|
"step": 7260 |
|
}, |
|
{ |
|
"epoch": 0.5649234594762608, |
|
"grad_norm": 4.0222533809812, |
|
"learning_rate": 4.739719755660761e-06, |
|
"loss": 0.7014, |
|
"step": 7270 |
|
}, |
|
{ |
|
"epoch": 0.5657005206309736, |
|
"grad_norm": 4.058570524163514, |
|
"learning_rate": 4.726176725413004e-06, |
|
"loss": 0.693, |
|
"step": 7280 |
|
}, |
|
{ |
|
"epoch": 0.5664775817856865, |
|
"grad_norm": 3.3787013409423445, |
|
"learning_rate": 4.712635709829672e-06, |
|
"loss": 0.6591, |
|
"step": 7290 |
|
}, |
|
{ |
|
"epoch": 0.5672546429403994, |
|
"grad_norm": 3.3640659595948708, |
|
"learning_rate": 4.699096808539264e-06, |
|
"loss": 0.7431, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 0.5680317040951123, |
|
"grad_norm": 3.1238662551833616, |
|
"learning_rate": 4.685560121154729e-06, |
|
"loss": 0.6474, |
|
"step": 7310 |
|
}, |
|
{ |
|
"epoch": 0.5688087652498252, |
|
"grad_norm": 2.452949406434516, |
|
"learning_rate": 4.672025747272721e-06, |
|
"loss": 0.6816, |
|
"step": 7320 |
|
}, |
|
{ |
|
"epoch": 0.569585826404538, |
|
"grad_norm": 3.127308776747053, |
|
"learning_rate": 4.658493786472874e-06, |
|
"loss": 0.6741, |
|
"step": 7330 |
|
}, |
|
{ |
|
"epoch": 0.5703628875592509, |
|
"grad_norm": 3.9891903397041455, |
|
"learning_rate": 4.644964338317069e-06, |
|
"loss": 0.7111, |
|
"step": 7340 |
|
}, |
|
{ |
|
"epoch": 0.5711399487139638, |
|
"grad_norm": 3.495751965003335, |
|
"learning_rate": 4.631437502348697e-06, |
|
"loss": 0.6552, |
|
"step": 7350 |
|
}, |
|
{ |
|
"epoch": 0.5719170098686767, |
|
"grad_norm": 3.436449484433345, |
|
"learning_rate": 4.617913378091935e-06, |
|
"loss": 0.6893, |
|
"step": 7360 |
|
}, |
|
{ |
|
"epoch": 0.5726940710233895, |
|
"grad_norm": 3.0865849237950784, |
|
"learning_rate": 4.604392065051003e-06, |
|
"loss": 0.7376, |
|
"step": 7370 |
|
}, |
|
{ |
|
"epoch": 0.5734711321781024, |
|
"grad_norm": 4.474788471571803, |
|
"learning_rate": 4.590873662709441e-06, |
|
"loss": 0.6914, |
|
"step": 7380 |
|
}, |
|
{ |
|
"epoch": 0.5742481933328153, |
|
"grad_norm": 2.91533419260106, |
|
"learning_rate": 4.577358270529371e-06, |
|
"loss": 0.6414, |
|
"step": 7390 |
|
}, |
|
{ |
|
"epoch": 0.5750252544875282, |
|
"grad_norm": 4.0797704361429785, |
|
"learning_rate": 4.5638459879507685e-06, |
|
"loss": 0.6661, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 0.575802315642241, |
|
"grad_norm": 4.709772893333078, |
|
"learning_rate": 4.550336914390734e-06, |
|
"loss": 0.6594, |
|
"step": 7410 |
|
}, |
|
{ |
|
"epoch": 0.5765793767969539, |
|
"grad_norm": 4.564968479413114, |
|
"learning_rate": 4.536831149242752e-06, |
|
"loss": 0.6672, |
|
"step": 7420 |
|
}, |
|
{ |
|
"epoch": 0.5773564379516668, |
|
"grad_norm": 4.056479158493849, |
|
"learning_rate": 4.5233287918759645e-06, |
|
"loss": 0.708, |
|
"step": 7430 |
|
}, |
|
{ |
|
"epoch": 0.5781334991063797, |
|
"grad_norm": 3.645071188138108, |
|
"learning_rate": 4.509829941634447e-06, |
|
"loss": 0.686, |
|
"step": 7440 |
|
}, |
|
{ |
|
"epoch": 0.5789105602610926, |
|
"grad_norm": 3.7318479118380044, |
|
"learning_rate": 4.496334697836466e-06, |
|
"loss": 0.6866, |
|
"step": 7450 |
|
}, |
|
{ |
|
"epoch": 0.5796876214158054, |
|
"grad_norm": 3.6748150242674384, |
|
"learning_rate": 4.482843159773753e-06, |
|
"loss": 0.701, |
|
"step": 7460 |
|
}, |
|
{ |
|
"epoch": 0.5804646825705183, |
|
"grad_norm": 3.532495775566941, |
|
"learning_rate": 4.46935542671078e-06, |
|
"loss": 0.6266, |
|
"step": 7470 |
|
}, |
|
{ |
|
"epoch": 0.5812417437252312, |
|
"grad_norm": 3.917282093097207, |
|
"learning_rate": 4.455871597884016e-06, |
|
"loss": 0.6965, |
|
"step": 7480 |
|
}, |
|
{ |
|
"epoch": 0.5820188048799441, |
|
"grad_norm": 3.541326700374132, |
|
"learning_rate": 4.4423917725012125e-06, |
|
"loss": 0.6256, |
|
"step": 7490 |
|
}, |
|
{ |
|
"epoch": 0.5827958660346569, |
|
"grad_norm": 2.8073311337818088, |
|
"learning_rate": 4.428916049740657e-06, |
|
"loss": 0.5885, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.5835729271893698, |
|
"grad_norm": 3.374101386732686, |
|
"learning_rate": 4.41544452875046e-06, |
|
"loss": 0.6549, |
|
"step": 7510 |
|
}, |
|
{ |
|
"epoch": 0.5843499883440827, |
|
"grad_norm": 4.325578617573067, |
|
"learning_rate": 4.401977308647811e-06, |
|
"loss": 0.6566, |
|
"step": 7520 |
|
}, |
|
{ |
|
"epoch": 0.5851270494987956, |
|
"grad_norm": 4.915536833619769, |
|
"learning_rate": 4.38851448851826e-06, |
|
"loss": 0.6687, |
|
"step": 7530 |
|
}, |
|
{ |
|
"epoch": 0.5859041106535084, |
|
"grad_norm": 3.6537787425693544, |
|
"learning_rate": 4.3750561674149815e-06, |
|
"loss": 0.6292, |
|
"step": 7540 |
|
}, |
|
{ |
|
"epoch": 0.5866811718082213, |
|
"grad_norm": 2.9777148243481335, |
|
"learning_rate": 4.3616024443580475e-06, |
|
"loss": 0.6541, |
|
"step": 7550 |
|
}, |
|
{ |
|
"epoch": 0.5874582329629342, |
|
"grad_norm": 3.5260018889623455, |
|
"learning_rate": 4.348153418333703e-06, |
|
"loss": 0.667, |
|
"step": 7560 |
|
}, |
|
{ |
|
"epoch": 0.5882352941176471, |
|
"grad_norm": 3.7174490457010654, |
|
"learning_rate": 4.334709188293631e-06, |
|
"loss": 0.6419, |
|
"step": 7570 |
|
}, |
|
{ |
|
"epoch": 0.58901235527236, |
|
"grad_norm": 3.4684662206499355, |
|
"learning_rate": 4.321269853154231e-06, |
|
"loss": 0.65, |
|
"step": 7580 |
|
}, |
|
{ |
|
"epoch": 0.5897894164270728, |
|
"grad_norm": 3.1882054970304083, |
|
"learning_rate": 4.307835511795883e-06, |
|
"loss": 0.622, |
|
"step": 7590 |
|
}, |
|
{ |
|
"epoch": 0.5905664775817857, |
|
"grad_norm": 4.381319562804776, |
|
"learning_rate": 4.294406263062235e-06, |
|
"loss": 0.6422, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 0.5913435387364986, |
|
"grad_norm": 3.724730362444138, |
|
"learning_rate": 4.280982205759453e-06, |
|
"loss": 0.664, |
|
"step": 7610 |
|
}, |
|
{ |
|
"epoch": 0.5921205998912115, |
|
"grad_norm": 3.2942646676430027, |
|
"learning_rate": 4.267563438655517e-06, |
|
"loss": 0.6834, |
|
"step": 7620 |
|
}, |
|
{ |
|
"epoch": 0.5928976610459243, |
|
"grad_norm": 3.9059709080382445, |
|
"learning_rate": 4.254150060479479e-06, |
|
"loss": 0.6773, |
|
"step": 7630 |
|
}, |
|
{ |
|
"epoch": 0.5936747222006372, |
|
"grad_norm": 3.2926775490538867, |
|
"learning_rate": 4.240742169920744e-06, |
|
"loss": 0.6612, |
|
"step": 7640 |
|
}, |
|
{ |
|
"epoch": 0.5944517833553501, |
|
"grad_norm": 3.721480675397905, |
|
"learning_rate": 4.22733986562834e-06, |
|
"loss": 0.5946, |
|
"step": 7650 |
|
}, |
|
{ |
|
"epoch": 0.595228844510063, |
|
"grad_norm": 3.6657313410284282, |
|
"learning_rate": 4.213943246210195e-06, |
|
"loss": 0.6839, |
|
"step": 7660 |
|
}, |
|
{ |
|
"epoch": 0.5960059056647758, |
|
"grad_norm": 3.555216109953286, |
|
"learning_rate": 4.200552410232411e-06, |
|
"loss": 0.6839, |
|
"step": 7670 |
|
}, |
|
{ |
|
"epoch": 0.5967829668194887, |
|
"grad_norm": 4.24437071856819, |
|
"learning_rate": 4.187167456218536e-06, |
|
"loss": 0.7096, |
|
"step": 7680 |
|
}, |
|
{ |
|
"epoch": 0.5975600279742016, |
|
"grad_norm": 3.760444842640791, |
|
"learning_rate": 4.173788482648841e-06, |
|
"loss": 0.6495, |
|
"step": 7690 |
|
}, |
|
{ |
|
"epoch": 0.5983370891289145, |
|
"grad_norm": 3.2749111360276086, |
|
"learning_rate": 4.1604155879595985e-06, |
|
"loss": 0.6266, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 0.5991141502836274, |
|
"grad_norm": 4.05061726263054, |
|
"learning_rate": 4.147048870542358e-06, |
|
"loss": 0.6682, |
|
"step": 7710 |
|
}, |
|
{ |
|
"epoch": 0.5998912114383402, |
|
"grad_norm": 4.177296915658458, |
|
"learning_rate": 4.133688428743209e-06, |
|
"loss": 0.6504, |
|
"step": 7720 |
|
}, |
|
{ |
|
"epoch": 0.6006682725930531, |
|
"grad_norm": 3.4374499956078997, |
|
"learning_rate": 4.120334360862078e-06, |
|
"loss": 0.6068, |
|
"step": 7730 |
|
}, |
|
{ |
|
"epoch": 0.601445333747766, |
|
"grad_norm": 3.7771571359160374, |
|
"learning_rate": 4.106986765151992e-06, |
|
"loss": 0.6811, |
|
"step": 7740 |
|
}, |
|
{ |
|
"epoch": 0.6022223949024789, |
|
"grad_norm": 2.755405096701383, |
|
"learning_rate": 4.093645739818357e-06, |
|
"loss": 0.6374, |
|
"step": 7750 |
|
}, |
|
{ |
|
"epoch": 0.6029994560571917, |
|
"grad_norm": 4.718012688255332, |
|
"learning_rate": 4.080311383018239e-06, |
|
"loss": 0.7078, |
|
"step": 7760 |
|
}, |
|
{ |
|
"epoch": 0.6037765172119046, |
|
"grad_norm": 2.894912540809299, |
|
"learning_rate": 4.06698379285964e-06, |
|
"loss": 0.6759, |
|
"step": 7770 |
|
}, |
|
{ |
|
"epoch": 0.6045535783666175, |
|
"grad_norm": 3.025336800067562, |
|
"learning_rate": 4.0536630674007734e-06, |
|
"loss": 0.6109, |
|
"step": 7780 |
|
}, |
|
{ |
|
"epoch": 0.6053306395213304, |
|
"grad_norm": 3.2614510795042126, |
|
"learning_rate": 4.040349304649351e-06, |
|
"loss": 0.685, |
|
"step": 7790 |
|
}, |
|
{ |
|
"epoch": 0.6061077006760432, |
|
"grad_norm": 2.800252117497351, |
|
"learning_rate": 4.027042602561853e-06, |
|
"loss": 0.6498, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 0.6068847618307561, |
|
"grad_norm": 3.8460226274586122, |
|
"learning_rate": 4.013743059042808e-06, |
|
"loss": 0.6977, |
|
"step": 7810 |
|
}, |
|
{ |
|
"epoch": 0.607661822985469, |
|
"grad_norm": 3.771896387641876, |
|
"learning_rate": 4.0004507719440795e-06, |
|
"loss": 0.6635, |
|
"step": 7820 |
|
}, |
|
{ |
|
"epoch": 0.6084388841401819, |
|
"grad_norm": 3.1786304501140092, |
|
"learning_rate": 3.987165839064141e-06, |
|
"loss": 0.6758, |
|
"step": 7830 |
|
}, |
|
{ |
|
"epoch": 0.6092159452948948, |
|
"grad_norm": 5.015425132509244, |
|
"learning_rate": 3.973888358147353e-06, |
|
"loss": 0.623, |
|
"step": 7840 |
|
}, |
|
{ |
|
"epoch": 0.6099930064496076, |
|
"grad_norm": 4.27847425835873, |
|
"learning_rate": 3.9606184268832525e-06, |
|
"loss": 0.6758, |
|
"step": 7850 |
|
}, |
|
{ |
|
"epoch": 0.6107700676043205, |
|
"grad_norm": 3.3936214832633507, |
|
"learning_rate": 3.947356142905827e-06, |
|
"loss": 0.6132, |
|
"step": 7860 |
|
}, |
|
{ |
|
"epoch": 0.6115471287590334, |
|
"grad_norm": 2.5020153230654896, |
|
"learning_rate": 3.934101603792802e-06, |
|
"loss": 0.6084, |
|
"step": 7870 |
|
}, |
|
{ |
|
"epoch": 0.6123241899137463, |
|
"grad_norm": 3.0348186320695936, |
|
"learning_rate": 3.920854907064912e-06, |
|
"loss": 0.6277, |
|
"step": 7880 |
|
}, |
|
{ |
|
"epoch": 0.6131012510684591, |
|
"grad_norm": 4.926182627828219, |
|
"learning_rate": 3.907616150185205e-06, |
|
"loss": 0.6746, |
|
"step": 7890 |
|
}, |
|
{ |
|
"epoch": 0.613878312223172, |
|
"grad_norm": 4.0423507052637735, |
|
"learning_rate": 3.894385430558297e-06, |
|
"loss": 0.6112, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 0.6146553733778849, |
|
"grad_norm": 3.549727749823181, |
|
"learning_rate": 3.881162845529678e-06, |
|
"loss": 0.6219, |
|
"step": 7910 |
|
}, |
|
{ |
|
"epoch": 0.6154324345325977, |
|
"grad_norm": 4.713227361162499, |
|
"learning_rate": 3.867948492384983e-06, |
|
"loss": 0.6693, |
|
"step": 7920 |
|
}, |
|
{ |
|
"epoch": 0.6162094956873105, |
|
"grad_norm": 3.471848373352376, |
|
"learning_rate": 3.854742468349283e-06, |
|
"loss": 0.6833, |
|
"step": 7930 |
|
}, |
|
{ |
|
"epoch": 0.6169865568420234, |
|
"grad_norm": 7.217595191023394, |
|
"learning_rate": 3.841544870586369e-06, |
|
"loss": 0.6947, |
|
"step": 7940 |
|
}, |
|
{ |
|
"epoch": 0.6177636179967363, |
|
"grad_norm": 2.9040989631629976, |
|
"learning_rate": 3.828355796198029e-06, |
|
"loss": 0.6342, |
|
"step": 7950 |
|
}, |
|
{ |
|
"epoch": 0.6185406791514492, |
|
"grad_norm": 3.7080878359935268, |
|
"learning_rate": 3.815175342223349e-06, |
|
"loss": 0.6267, |
|
"step": 7960 |
|
}, |
|
{ |
|
"epoch": 0.619317740306162, |
|
"grad_norm": 4.731993499154974, |
|
"learning_rate": 3.80200360563798e-06, |
|
"loss": 0.6319, |
|
"step": 7970 |
|
}, |
|
{ |
|
"epoch": 0.6200948014608749, |
|
"grad_norm": 3.2422107203395267, |
|
"learning_rate": 3.7888406833534447e-06, |
|
"loss": 0.6219, |
|
"step": 7980 |
|
}, |
|
{ |
|
"epoch": 0.6208718626155878, |
|
"grad_norm": 2.7384103955014565, |
|
"learning_rate": 3.7756866722164055e-06, |
|
"loss": 0.6304, |
|
"step": 7990 |
|
}, |
|
{ |
|
"epoch": 0.6216489237703007, |
|
"grad_norm": 4.934854236839532, |
|
"learning_rate": 3.7625416690079674e-06, |
|
"loss": 0.5913, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.6224259849250136, |
|
"grad_norm": 5.278185394532136, |
|
"learning_rate": 3.749405770442954e-06, |
|
"loss": 0.6062, |
|
"step": 8010 |
|
}, |
|
{ |
|
"epoch": 0.6232030460797264, |
|
"grad_norm": 3.745775463675437, |
|
"learning_rate": 3.7362790731692045e-06, |
|
"loss": 0.5785, |
|
"step": 8020 |
|
}, |
|
{ |
|
"epoch": 0.6239801072344393, |
|
"grad_norm": 3.0793776700444893, |
|
"learning_rate": 3.7231616737668587e-06, |
|
"loss": 0.6212, |
|
"step": 8030 |
|
}, |
|
{ |
|
"epoch": 0.6247571683891522, |
|
"grad_norm": 4.616140309647705, |
|
"learning_rate": 3.710053668747644e-06, |
|
"loss": 0.6978, |
|
"step": 8040 |
|
}, |
|
{ |
|
"epoch": 0.6255342295438651, |
|
"grad_norm": 2.266055763696263, |
|
"learning_rate": 3.696955154554174e-06, |
|
"loss": 0.6677, |
|
"step": 8050 |
|
}, |
|
{ |
|
"epoch": 0.6263112906985779, |
|
"grad_norm": 3.167710349649831, |
|
"learning_rate": 3.6838662275592285e-06, |
|
"loss": 0.5961, |
|
"step": 8060 |
|
}, |
|
{ |
|
"epoch": 0.6270883518532908, |
|
"grad_norm": 3.6679021169417583, |
|
"learning_rate": 3.670786984065049e-06, |
|
"loss": 0.5932, |
|
"step": 8070 |
|
}, |
|
{ |
|
"epoch": 0.6278654130080037, |
|
"grad_norm": 4.807394417840595, |
|
"learning_rate": 3.657717520302635e-06, |
|
"loss": 0.6507, |
|
"step": 8080 |
|
}, |
|
{ |
|
"epoch": 0.6286424741627166, |
|
"grad_norm": 2.8567195928058697, |
|
"learning_rate": 3.6446579324310283e-06, |
|
"loss": 0.5622, |
|
"step": 8090 |
|
}, |
|
{ |
|
"epoch": 0.6294195353174294, |
|
"grad_norm": 4.87655399348002, |
|
"learning_rate": 3.6316083165366066e-06, |
|
"loss": 0.6807, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 0.6301965964721423, |
|
"grad_norm": 3.7014748147970886, |
|
"learning_rate": 3.61856876863238e-06, |
|
"loss": 0.6127, |
|
"step": 8110 |
|
}, |
|
{ |
|
"epoch": 0.6309736576268552, |
|
"grad_norm": 3.9766985471750482, |
|
"learning_rate": 3.6055393846572863e-06, |
|
"loss": 0.6355, |
|
"step": 8120 |
|
}, |
|
{ |
|
"epoch": 0.6317507187815681, |
|
"grad_norm": 5.176163354598203, |
|
"learning_rate": 3.592520260475474e-06, |
|
"loss": 0.5764, |
|
"step": 8130 |
|
}, |
|
{ |
|
"epoch": 0.632527779936281, |
|
"grad_norm": 3.3915897413256273, |
|
"learning_rate": 3.579511491875614e-06, |
|
"loss": 0.5824, |
|
"step": 8140 |
|
}, |
|
{ |
|
"epoch": 0.6333048410909938, |
|
"grad_norm": 2.968301217496569, |
|
"learning_rate": 3.5665131745701796e-06, |
|
"loss": 0.6927, |
|
"step": 8150 |
|
}, |
|
{ |
|
"epoch": 0.6340819022457067, |
|
"grad_norm": 3.4049937558114367, |
|
"learning_rate": 3.5535254041947487e-06, |
|
"loss": 0.6589, |
|
"step": 8160 |
|
}, |
|
{ |
|
"epoch": 0.6348589634004196, |
|
"grad_norm": 3.0490199659476223, |
|
"learning_rate": 3.5405482763073006e-06, |
|
"loss": 0.6264, |
|
"step": 8170 |
|
}, |
|
{ |
|
"epoch": 0.6356360245551325, |
|
"grad_norm": 4.610543482084557, |
|
"learning_rate": 3.5275818863875176e-06, |
|
"loss": 0.6298, |
|
"step": 8180 |
|
}, |
|
{ |
|
"epoch": 0.6364130857098453, |
|
"grad_norm": 3.792284286942197, |
|
"learning_rate": 3.5146263298360676e-06, |
|
"loss": 0.6409, |
|
"step": 8190 |
|
}, |
|
{ |
|
"epoch": 0.6371901468645582, |
|
"grad_norm": 4.791463361046891, |
|
"learning_rate": 3.501681701973917e-06, |
|
"loss": 0.5988, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 0.6379672080192711, |
|
"grad_norm": 2.946227557833364, |
|
"learning_rate": 3.488748098041623e-06, |
|
"loss": 0.56, |
|
"step": 8210 |
|
}, |
|
{ |
|
"epoch": 0.638744269173984, |
|
"grad_norm": 3.9143118513649013, |
|
"learning_rate": 3.4758256131986333e-06, |
|
"loss": 0.6102, |
|
"step": 8220 |
|
}, |
|
{ |
|
"epoch": 0.6395213303286968, |
|
"grad_norm": 7.013871477575305, |
|
"learning_rate": 3.4629143425225893e-06, |
|
"loss": 0.6887, |
|
"step": 8230 |
|
}, |
|
{ |
|
"epoch": 0.6402983914834097, |
|
"grad_norm": 3.771798826744058, |
|
"learning_rate": 3.4500143810086194e-06, |
|
"loss": 0.6373, |
|
"step": 8240 |
|
}, |
|
{ |
|
"epoch": 0.6410754526381226, |
|
"grad_norm": 3.132474576222066, |
|
"learning_rate": 3.437125823568646e-06, |
|
"loss": 0.6452, |
|
"step": 8250 |
|
}, |
|
{ |
|
"epoch": 0.6418525137928355, |
|
"grad_norm": 4.0341361359246, |
|
"learning_rate": 3.4242487650306867e-06, |
|
"loss": 0.65, |
|
"step": 8260 |
|
}, |
|
{ |
|
"epoch": 0.6426295749475484, |
|
"grad_norm": 3.489817034481266, |
|
"learning_rate": 3.4113833001381575e-06, |
|
"loss": 0.6041, |
|
"step": 8270 |
|
}, |
|
{ |
|
"epoch": 0.6434066361022612, |
|
"grad_norm": 4.207948013742414, |
|
"learning_rate": 3.398529523549169e-06, |
|
"loss": 0.6047, |
|
"step": 8280 |
|
}, |
|
{ |
|
"epoch": 0.6441836972569741, |
|
"grad_norm": 3.300977059658827, |
|
"learning_rate": 3.3856875298358365e-06, |
|
"loss": 0.6619, |
|
"step": 8290 |
|
}, |
|
{ |
|
"epoch": 0.644960758411687, |
|
"grad_norm": 3.8241041070180413, |
|
"learning_rate": 3.3728574134835846e-06, |
|
"loss": 0.6198, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 0.6457378195663999, |
|
"grad_norm": 3.875014176616493, |
|
"learning_rate": 3.360039268890446e-06, |
|
"loss": 0.6003, |
|
"step": 8310 |
|
}, |
|
{ |
|
"epoch": 0.6465148807211127, |
|
"grad_norm": 3.2752573740495556, |
|
"learning_rate": 3.347233190366375e-06, |
|
"loss": 0.6101, |
|
"step": 8320 |
|
}, |
|
{ |
|
"epoch": 0.6472919418758256, |
|
"grad_norm": 3.8745882003993177, |
|
"learning_rate": 3.3344392721325458e-06, |
|
"loss": 0.6248, |
|
"step": 8330 |
|
}, |
|
{ |
|
"epoch": 0.6480690030305385, |
|
"grad_norm": 2.942894246587158, |
|
"learning_rate": 3.3216576083206637e-06, |
|
"loss": 0.6087, |
|
"step": 8340 |
|
}, |
|
{ |
|
"epoch": 0.6488460641852514, |
|
"grad_norm": 2.990495379975504, |
|
"learning_rate": 3.308888292972273e-06, |
|
"loss": 0.5888, |
|
"step": 8350 |
|
}, |
|
{ |
|
"epoch": 0.6496231253399642, |
|
"grad_norm": 3.376642101090337, |
|
"learning_rate": 3.2961314200380616e-06, |
|
"loss": 0.637, |
|
"step": 8360 |
|
}, |
|
{ |
|
"epoch": 0.6504001864946771, |
|
"grad_norm": 3.4092448553804156, |
|
"learning_rate": 3.2833870833771753e-06, |
|
"loss": 0.6105, |
|
"step": 8370 |
|
}, |
|
{ |
|
"epoch": 0.65117724764939, |
|
"grad_norm": 5.292717322884515, |
|
"learning_rate": 3.270655376756521e-06, |
|
"loss": 0.579, |
|
"step": 8380 |
|
}, |
|
{ |
|
"epoch": 0.6519543088041029, |
|
"grad_norm": 3.7225346348995982, |
|
"learning_rate": 3.25793639385008e-06, |
|
"loss": 0.6072, |
|
"step": 8390 |
|
}, |
|
{ |
|
"epoch": 0.6527313699588158, |
|
"grad_norm": 3.656912994279593, |
|
"learning_rate": 3.2452302282382185e-06, |
|
"loss": 0.5656, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 0.6535084311135286, |
|
"grad_norm": 5.191851471827204, |
|
"learning_rate": 3.232536973407e-06, |
|
"loss": 0.6353, |
|
"step": 8410 |
|
}, |
|
{ |
|
"epoch": 0.6542854922682415, |
|
"grad_norm": 4.5342622406097135, |
|
"learning_rate": 3.2198567227474954e-06, |
|
"loss": 0.6239, |
|
"step": 8420 |
|
}, |
|
{ |
|
"epoch": 0.6550625534229544, |
|
"grad_norm": 3.2997906214128507, |
|
"learning_rate": 3.207189569555096e-06, |
|
"loss": 0.6493, |
|
"step": 8430 |
|
}, |
|
{ |
|
"epoch": 0.6558396145776673, |
|
"grad_norm": 3.7417655823104092, |
|
"learning_rate": 3.194535607028832e-06, |
|
"loss": 0.5765, |
|
"step": 8440 |
|
}, |
|
{ |
|
"epoch": 0.6566166757323801, |
|
"grad_norm": 4.1174225350073685, |
|
"learning_rate": 3.1818949282706764e-06, |
|
"loss": 0.584, |
|
"step": 8450 |
|
}, |
|
{ |
|
"epoch": 0.657393736887093, |
|
"grad_norm": 5.288074659352862, |
|
"learning_rate": 3.1692676262848732e-06, |
|
"loss": 0.5846, |
|
"step": 8460 |
|
}, |
|
{ |
|
"epoch": 0.6581707980418059, |
|
"grad_norm": 6.8794935144127285, |
|
"learning_rate": 3.1566537939772433e-06, |
|
"loss": 0.6164, |
|
"step": 8470 |
|
}, |
|
{ |
|
"epoch": 0.6589478591965188, |
|
"grad_norm": 3.369610724208555, |
|
"learning_rate": 3.1440535241545035e-06, |
|
"loss": 0.5667, |
|
"step": 8480 |
|
}, |
|
{ |
|
"epoch": 0.6597249203512316, |
|
"grad_norm": 2.700055960128087, |
|
"learning_rate": 3.131466909523582e-06, |
|
"loss": 0.5729, |
|
"step": 8490 |
|
}, |
|
{ |
|
"epoch": 0.6605019815059445, |
|
"grad_norm": 4.481552377327523, |
|
"learning_rate": 3.118894042690945e-06, |
|
"loss": 0.5639, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.6612790426606574, |
|
"grad_norm": 5.130216388568981, |
|
"learning_rate": 3.1063350161619025e-06, |
|
"loss": 0.5904, |
|
"step": 8510 |
|
}, |
|
{ |
|
"epoch": 0.6620561038153703, |
|
"grad_norm": 4.00502225199317, |
|
"learning_rate": 3.093789922339936e-06, |
|
"loss": 0.5998, |
|
"step": 8520 |
|
}, |
|
{ |
|
"epoch": 0.6628331649700832, |
|
"grad_norm": 3.774461462354705, |
|
"learning_rate": 3.081258853526018e-06, |
|
"loss": 0.5886, |
|
"step": 8530 |
|
}, |
|
{ |
|
"epoch": 0.663610226124796, |
|
"grad_norm": 2.821168583180078, |
|
"learning_rate": 3.0687419019179285e-06, |
|
"loss": 0.6011, |
|
"step": 8540 |
|
}, |
|
{ |
|
"epoch": 0.6643872872795089, |
|
"grad_norm": 4.63573425963788, |
|
"learning_rate": 3.0562391596095833e-06, |
|
"loss": 0.61, |
|
"step": 8550 |
|
}, |
|
{ |
|
"epoch": 0.6651643484342218, |
|
"grad_norm": 4.151701829585363, |
|
"learning_rate": 3.0437507185903516e-06, |
|
"loss": 0.6334, |
|
"step": 8560 |
|
}, |
|
{ |
|
"epoch": 0.6659414095889347, |
|
"grad_norm": 3.1823244853803097, |
|
"learning_rate": 3.0312766707443784e-06, |
|
"loss": 0.6492, |
|
"step": 8570 |
|
}, |
|
{ |
|
"epoch": 0.6667184707436475, |
|
"grad_norm": 3.494168616800063, |
|
"learning_rate": 3.0188171078499117e-06, |
|
"loss": 0.6293, |
|
"step": 8580 |
|
}, |
|
{ |
|
"epoch": 0.6674955318983604, |
|
"grad_norm": 3.007455561802234, |
|
"learning_rate": 3.0063721215786274e-06, |
|
"loss": 0.6125, |
|
"step": 8590 |
|
}, |
|
{ |
|
"epoch": 0.6682725930530733, |
|
"grad_norm": 4.328591303423522, |
|
"learning_rate": 2.99394180349495e-06, |
|
"loss": 0.6152, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 0.6690496542077862, |
|
"grad_norm": 3.0920402812840413, |
|
"learning_rate": 2.981526245055387e-06, |
|
"loss": 0.5768, |
|
"step": 8610 |
|
}, |
|
{ |
|
"epoch": 0.669826715362499, |
|
"grad_norm": 2.9353592413440155, |
|
"learning_rate": 2.9691255376078464e-06, |
|
"loss": 0.542, |
|
"step": 8620 |
|
}, |
|
{ |
|
"epoch": 0.6706037765172119, |
|
"grad_norm": 3.882400088723547, |
|
"learning_rate": 2.9567397723909725e-06, |
|
"loss": 0.519, |
|
"step": 8630 |
|
}, |
|
{ |
|
"epoch": 0.6713808376719248, |
|
"grad_norm": 4.783097703300002, |
|
"learning_rate": 2.944369040533471e-06, |
|
"loss": 0.6396, |
|
"step": 8640 |
|
}, |
|
{ |
|
"epoch": 0.6721578988266377, |
|
"grad_norm": 4.770262430972376, |
|
"learning_rate": 2.9320134330534367e-06, |
|
"loss": 0.6385, |
|
"step": 8650 |
|
}, |
|
{ |
|
"epoch": 0.6729349599813506, |
|
"grad_norm": 3.1574059447890486, |
|
"learning_rate": 2.919673040857693e-06, |
|
"loss": 0.5935, |
|
"step": 8660 |
|
}, |
|
{ |
|
"epoch": 0.6737120211360634, |
|
"grad_norm": 3.945392779400959, |
|
"learning_rate": 2.9073479547411087e-06, |
|
"loss": 0.6041, |
|
"step": 8670 |
|
}, |
|
{ |
|
"epoch": 0.6744890822907763, |
|
"grad_norm": 3.834570241650989, |
|
"learning_rate": 2.89503826538594e-06, |
|
"loss": 0.5603, |
|
"step": 8680 |
|
}, |
|
{ |
|
"epoch": 0.6752661434454892, |
|
"grad_norm": 3.322325574324924, |
|
"learning_rate": 2.882744063361165e-06, |
|
"loss": 0.5839, |
|
"step": 8690 |
|
}, |
|
{ |
|
"epoch": 0.6760432046002021, |
|
"grad_norm": 5.400737978025128, |
|
"learning_rate": 2.870465439121807e-06, |
|
"loss": 0.6, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 0.6768202657549149, |
|
"grad_norm": 3.7907802256324614, |
|
"learning_rate": 2.8582024830082796e-06, |
|
"loss": 0.6255, |
|
"step": 8710 |
|
}, |
|
{ |
|
"epoch": 0.6775973269096278, |
|
"grad_norm": 3.912677923882123, |
|
"learning_rate": 2.845955285245715e-06, |
|
"loss": 0.5545, |
|
"step": 8720 |
|
}, |
|
{ |
|
"epoch": 0.6783743880643407, |
|
"grad_norm": 4.941243247209147, |
|
"learning_rate": 2.833723935943301e-06, |
|
"loss": 0.5684, |
|
"step": 8730 |
|
}, |
|
{ |
|
"epoch": 0.6791514492190536, |
|
"grad_norm": 3.289971837418658, |
|
"learning_rate": 2.821508525093627e-06, |
|
"loss": 0.6519, |
|
"step": 8740 |
|
}, |
|
{ |
|
"epoch": 0.6799285103737664, |
|
"grad_norm": 3.939920814084507, |
|
"learning_rate": 2.8093091425720097e-06, |
|
"loss": 0.6229, |
|
"step": 8750 |
|
}, |
|
{ |
|
"epoch": 0.6807055715284793, |
|
"grad_norm": 4.336532929599707, |
|
"learning_rate": 2.797125878135837e-06, |
|
"loss": 0.5641, |
|
"step": 8760 |
|
}, |
|
{ |
|
"epoch": 0.6814826326831922, |
|
"grad_norm": 3.322566385669406, |
|
"learning_rate": 2.784958821423907e-06, |
|
"loss": 0.6232, |
|
"step": 8770 |
|
}, |
|
{ |
|
"epoch": 0.6822596938379051, |
|
"grad_norm": 4.200430984375038, |
|
"learning_rate": 2.7728080619557702e-06, |
|
"loss": 0.5977, |
|
"step": 8780 |
|
}, |
|
{ |
|
"epoch": 0.683036754992618, |
|
"grad_norm": 3.740176445426232, |
|
"learning_rate": 2.760673689131068e-06, |
|
"loss": 0.6185, |
|
"step": 8790 |
|
}, |
|
{ |
|
"epoch": 0.6838138161473308, |
|
"grad_norm": 2.1066076609366613, |
|
"learning_rate": 2.7485557922288776e-06, |
|
"loss": 0.6274, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 0.6845908773020437, |
|
"grad_norm": 2.8053182283923213, |
|
"learning_rate": 2.736454460407055e-06, |
|
"loss": 0.6181, |
|
"step": 8810 |
|
}, |
|
{ |
|
"epoch": 0.6853679384567566, |
|
"grad_norm": 3.437087088984394, |
|
"learning_rate": 2.724369782701578e-06, |
|
"loss": 0.621, |
|
"step": 8820 |
|
}, |
|
{ |
|
"epoch": 0.6861449996114695, |
|
"grad_norm": 3.0623391960294595, |
|
"learning_rate": 2.7123018480258876e-06, |
|
"loss": 0.5441, |
|
"step": 8830 |
|
}, |
|
{ |
|
"epoch": 0.6869220607661823, |
|
"grad_norm": 4.447855889156802, |
|
"learning_rate": 2.7002507451702394e-06, |
|
"loss": 0.5498, |
|
"step": 8840 |
|
}, |
|
{ |
|
"epoch": 0.6876991219208952, |
|
"grad_norm": 3.328238936470799, |
|
"learning_rate": 2.688216562801052e-06, |
|
"loss": 0.5992, |
|
"step": 8850 |
|
}, |
|
{ |
|
"epoch": 0.6884761830756081, |
|
"grad_norm": 4.421506555636393, |
|
"learning_rate": 2.6761993894602444e-06, |
|
"loss": 0.5945, |
|
"step": 8860 |
|
}, |
|
{ |
|
"epoch": 0.689253244230321, |
|
"grad_norm": 5.322591815355897, |
|
"learning_rate": 2.664199313564598e-06, |
|
"loss": 0.5958, |
|
"step": 8870 |
|
}, |
|
{ |
|
"epoch": 0.6900303053850338, |
|
"grad_norm": 3.7611828384663393, |
|
"learning_rate": 2.652216423405093e-06, |
|
"loss": 0.5645, |
|
"step": 8880 |
|
}, |
|
{ |
|
"epoch": 0.6908073665397467, |
|
"grad_norm": 3.3085304945194176, |
|
"learning_rate": 2.6402508071462685e-06, |
|
"loss": 0.5821, |
|
"step": 8890 |
|
}, |
|
{ |
|
"epoch": 0.6915844276944596, |
|
"grad_norm": 4.5103793305482105, |
|
"learning_rate": 2.6283025528255685e-06, |
|
"loss": 0.6111, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 0.6923614888491725, |
|
"grad_norm": 3.2568624242920623, |
|
"learning_rate": 2.6163717483526953e-06, |
|
"loss": 0.5546, |
|
"step": 8910 |
|
}, |
|
{ |
|
"epoch": 0.6931385500038854, |
|
"grad_norm": 2.973519357151336, |
|
"learning_rate": 2.6044584815089667e-06, |
|
"loss": 0.5685, |
|
"step": 8920 |
|
}, |
|
{ |
|
"epoch": 0.6939156111585982, |
|
"grad_norm": 3.5837020468987166, |
|
"learning_rate": 2.592562839946664e-06, |
|
"loss": 0.5456, |
|
"step": 8930 |
|
}, |
|
{ |
|
"epoch": 0.6946926723133111, |
|
"grad_norm": 4.064184411405787, |
|
"learning_rate": 2.5806849111883913e-06, |
|
"loss": 0.559, |
|
"step": 8940 |
|
}, |
|
{ |
|
"epoch": 0.695469733468024, |
|
"grad_norm": 3.3437426814478406, |
|
"learning_rate": 2.56882478262643e-06, |
|
"loss": 0.5538, |
|
"step": 8950 |
|
}, |
|
{ |
|
"epoch": 0.6962467946227369, |
|
"grad_norm": 3.107677218552789, |
|
"learning_rate": 2.556982541522094e-06, |
|
"loss": 0.5383, |
|
"step": 8960 |
|
}, |
|
{ |
|
"epoch": 0.6970238557774496, |
|
"grad_norm": 2.882272796253547, |
|
"learning_rate": 2.5451582750050896e-06, |
|
"loss": 0.5698, |
|
"step": 8970 |
|
}, |
|
{ |
|
"epoch": 0.6978009169321625, |
|
"grad_norm": 3.2190081599711164, |
|
"learning_rate": 2.5333520700728793e-06, |
|
"loss": 0.5581, |
|
"step": 8980 |
|
}, |
|
{ |
|
"epoch": 0.6985779780868754, |
|
"grad_norm": 4.12751667992376, |
|
"learning_rate": 2.521564013590031e-06, |
|
"loss": 0.5334, |
|
"step": 8990 |
|
}, |
|
{ |
|
"epoch": 0.6993550392415883, |
|
"grad_norm": 4.145588694570731, |
|
"learning_rate": 2.509794192287588e-06, |
|
"loss": 0.561, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.7001321003963011, |
|
"grad_norm": 3.155212860949128, |
|
"learning_rate": 2.498042692762426e-06, |
|
"loss": 0.5418, |
|
"step": 9010 |
|
}, |
|
{ |
|
"epoch": 0.700909161551014, |
|
"grad_norm": 3.2632869764204897, |
|
"learning_rate": 2.4863096014766193e-06, |
|
"loss": 0.5411, |
|
"step": 9020 |
|
}, |
|
{ |
|
"epoch": 0.7016862227057269, |
|
"grad_norm": 4.001715026222935, |
|
"learning_rate": 2.474595004756799e-06, |
|
"loss": 0.5589, |
|
"step": 9030 |
|
}, |
|
{ |
|
"epoch": 0.7024632838604398, |
|
"grad_norm": 3.3415316677677325, |
|
"learning_rate": 2.4628989887935266e-06, |
|
"loss": 0.537, |
|
"step": 9040 |
|
}, |
|
{ |
|
"epoch": 0.7032403450151526, |
|
"grad_norm": 5.797689446433965, |
|
"learning_rate": 2.4512216396406552e-06, |
|
"loss": 0.6243, |
|
"step": 9050 |
|
}, |
|
{ |
|
"epoch": 0.7040174061698655, |
|
"grad_norm": 4.284101589916973, |
|
"learning_rate": 2.4395630432146926e-06, |
|
"loss": 0.5817, |
|
"step": 9060 |
|
}, |
|
{ |
|
"epoch": 0.7047944673245784, |
|
"grad_norm": 3.211724547014886, |
|
"learning_rate": 2.427923285294174e-06, |
|
"loss": 0.5788, |
|
"step": 9070 |
|
}, |
|
{ |
|
"epoch": 0.7055715284792913, |
|
"grad_norm": 3.2055910232947085, |
|
"learning_rate": 2.4163024515190293e-06, |
|
"loss": 0.5311, |
|
"step": 9080 |
|
}, |
|
{ |
|
"epoch": 0.7063485896340042, |
|
"grad_norm": 4.255051995836248, |
|
"learning_rate": 2.4047006273899527e-06, |
|
"loss": 0.5713, |
|
"step": 9090 |
|
}, |
|
{ |
|
"epoch": 0.707125650788717, |
|
"grad_norm": 4.597394692328588, |
|
"learning_rate": 2.393117898267779e-06, |
|
"loss": 0.6031, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 0.7079027119434299, |
|
"grad_norm": 3.2150862347569933, |
|
"learning_rate": 2.3815543493728454e-06, |
|
"loss": 0.5594, |
|
"step": 9110 |
|
}, |
|
{ |
|
"epoch": 0.7086797730981428, |
|
"grad_norm": 4.683878110698539, |
|
"learning_rate": 2.370010065784372e-06, |
|
"loss": 0.5461, |
|
"step": 9120 |
|
}, |
|
{ |
|
"epoch": 0.7094568342528557, |
|
"grad_norm": 4.033438486304492, |
|
"learning_rate": 2.358485132439831e-06, |
|
"loss": 0.5815, |
|
"step": 9130 |
|
}, |
|
{ |
|
"epoch": 0.7102338954075685, |
|
"grad_norm": 3.3703523652063168, |
|
"learning_rate": 2.3469796341343315e-06, |
|
"loss": 0.5247, |
|
"step": 9140 |
|
}, |
|
{ |
|
"epoch": 0.7110109565622814, |
|
"grad_norm": 4.325956291425198, |
|
"learning_rate": 2.33549365551998e-06, |
|
"loss": 0.5387, |
|
"step": 9150 |
|
}, |
|
{ |
|
"epoch": 0.7117880177169943, |
|
"grad_norm": 2.490947555344077, |
|
"learning_rate": 2.3240272811052738e-06, |
|
"loss": 0.5776, |
|
"step": 9160 |
|
}, |
|
{ |
|
"epoch": 0.7125650788717072, |
|
"grad_norm": 4.949535189967038, |
|
"learning_rate": 2.3125805952544666e-06, |
|
"loss": 0.5842, |
|
"step": 9170 |
|
}, |
|
{ |
|
"epoch": 0.71334214002642, |
|
"grad_norm": 3.670543908233672, |
|
"learning_rate": 2.301153682186954e-06, |
|
"loss": 0.53, |
|
"step": 9180 |
|
}, |
|
{ |
|
"epoch": 0.7141192011811329, |
|
"grad_norm": 4.866130796619525, |
|
"learning_rate": 2.289746625976653e-06, |
|
"loss": 0.5681, |
|
"step": 9190 |
|
}, |
|
{ |
|
"epoch": 0.7148962623358458, |
|
"grad_norm": 3.4112599844471467, |
|
"learning_rate": 2.2783595105513832e-06, |
|
"loss": 0.5575, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 0.7156733234905587, |
|
"grad_norm": 3.844471466545408, |
|
"learning_rate": 2.266992419692247e-06, |
|
"loss": 0.5716, |
|
"step": 9210 |
|
}, |
|
{ |
|
"epoch": 0.7164503846452716, |
|
"grad_norm": 3.3046961399811474, |
|
"learning_rate": 2.2556454370330195e-06, |
|
"loss": 0.5431, |
|
"step": 9220 |
|
}, |
|
{ |
|
"epoch": 0.7172274457999844, |
|
"grad_norm": 2.960816022759597, |
|
"learning_rate": 2.2443186460595277e-06, |
|
"loss": 0.5502, |
|
"step": 9230 |
|
}, |
|
{ |
|
"epoch": 0.7180045069546973, |
|
"grad_norm": 3.7931643481456794, |
|
"learning_rate": 2.2330121301090362e-06, |
|
"loss": 0.5844, |
|
"step": 9240 |
|
}, |
|
{ |
|
"epoch": 0.7187815681094102, |
|
"grad_norm": 3.4283490865176853, |
|
"learning_rate": 2.221725972369635e-06, |
|
"loss": 0.5568, |
|
"step": 9250 |
|
}, |
|
{ |
|
"epoch": 0.7195586292641231, |
|
"grad_norm": 4.3583902590026895, |
|
"learning_rate": 2.210460255879629e-06, |
|
"loss": 0.5173, |
|
"step": 9260 |
|
}, |
|
{ |
|
"epoch": 0.7203356904188359, |
|
"grad_norm": 3.653581931257441, |
|
"learning_rate": 2.1992150635269233e-06, |
|
"loss": 0.5229, |
|
"step": 9270 |
|
}, |
|
{ |
|
"epoch": 0.7211127515735488, |
|
"grad_norm": 4.770502864647989, |
|
"learning_rate": 2.187990478048423e-06, |
|
"loss": 0.5761, |
|
"step": 9280 |
|
}, |
|
{ |
|
"epoch": 0.7218898127282617, |
|
"grad_norm": 3.878473847618142, |
|
"learning_rate": 2.1767865820294093e-06, |
|
"loss": 0.4937, |
|
"step": 9290 |
|
}, |
|
{ |
|
"epoch": 0.7226668738829746, |
|
"grad_norm": 3.9771101901252157, |
|
"learning_rate": 2.165603457902945e-06, |
|
"loss": 0.5237, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 0.7234439350376874, |
|
"grad_norm": 3.533717896030411, |
|
"learning_rate": 2.1544411879492597e-06, |
|
"loss": 0.5743, |
|
"step": 9310 |
|
}, |
|
{ |
|
"epoch": 0.7242209961924003, |
|
"grad_norm": 3.65695725762207, |
|
"learning_rate": 2.143299854295149e-06, |
|
"loss": 0.5824, |
|
"step": 9320 |
|
}, |
|
{ |
|
"epoch": 0.7249980573471132, |
|
"grad_norm": 2.492214523438049, |
|
"learning_rate": 2.13217953891337e-06, |
|
"loss": 0.5274, |
|
"step": 9330 |
|
}, |
|
{ |
|
"epoch": 0.7257751185018261, |
|
"grad_norm": 3.386138297909339, |
|
"learning_rate": 2.121080323622038e-06, |
|
"loss": 0.5612, |
|
"step": 9340 |
|
}, |
|
{ |
|
"epoch": 0.726552179656539, |
|
"grad_norm": 3.9436014142777096, |
|
"learning_rate": 2.1100022900840208e-06, |
|
"loss": 0.5317, |
|
"step": 9350 |
|
}, |
|
{ |
|
"epoch": 0.7273292408112518, |
|
"grad_norm": 4.412376927983859, |
|
"learning_rate": 2.0989455198063415e-06, |
|
"loss": 0.574, |
|
"step": 9360 |
|
}, |
|
{ |
|
"epoch": 0.7281063019659647, |
|
"grad_norm": 2.3279248382650737, |
|
"learning_rate": 2.0879100941395787e-06, |
|
"loss": 0.5289, |
|
"step": 9370 |
|
}, |
|
{ |
|
"epoch": 0.7288833631206776, |
|
"grad_norm": 4.637433311164565, |
|
"learning_rate": 2.076896094277265e-06, |
|
"loss": 0.5622, |
|
"step": 9380 |
|
}, |
|
{ |
|
"epoch": 0.7296604242753905, |
|
"grad_norm": 4.904954853760184, |
|
"learning_rate": 2.065903601255297e-06, |
|
"loss": 0.5176, |
|
"step": 9390 |
|
}, |
|
{ |
|
"epoch": 0.7304374854301033, |
|
"grad_norm": 3.729037710128586, |
|
"learning_rate": 2.0549326959513287e-06, |
|
"loss": 0.5315, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 0.7312145465848162, |
|
"grad_norm": 3.5966860873794966, |
|
"learning_rate": 2.0439834590841833e-06, |
|
"loss": 0.5177, |
|
"step": 9410 |
|
}, |
|
{ |
|
"epoch": 0.7319916077395291, |
|
"grad_norm": 4.464459321144577, |
|
"learning_rate": 2.0330559712132614e-06, |
|
"loss": 0.5484, |
|
"step": 9420 |
|
}, |
|
{ |
|
"epoch": 0.732768668894242, |
|
"grad_norm": 4.589314499941277, |
|
"learning_rate": 2.022150312737939e-06, |
|
"loss": 0.5467, |
|
"step": 9430 |
|
}, |
|
{ |
|
"epoch": 0.7335457300489548, |
|
"grad_norm": 4.017841935745773, |
|
"learning_rate": 2.0112665638969842e-06, |
|
"loss": 0.5266, |
|
"step": 9440 |
|
}, |
|
{ |
|
"epoch": 0.7343227912036677, |
|
"grad_norm": 3.0931816369991703, |
|
"learning_rate": 2.0004048047679624e-06, |
|
"loss": 0.5767, |
|
"step": 9450 |
|
}, |
|
{ |
|
"epoch": 0.7350998523583806, |
|
"grad_norm": 4.495169108132031, |
|
"learning_rate": 1.9895651152666538e-06, |
|
"loss": 0.5613, |
|
"step": 9460 |
|
}, |
|
{ |
|
"epoch": 0.7358769135130935, |
|
"grad_norm": 4.1470825704755, |
|
"learning_rate": 1.978747575146455e-06, |
|
"loss": 0.5111, |
|
"step": 9470 |
|
}, |
|
{ |
|
"epoch": 0.7366539746678064, |
|
"grad_norm": 4.197560473624663, |
|
"learning_rate": 1.967952263997801e-06, |
|
"loss": 0.5538, |
|
"step": 9480 |
|
}, |
|
{ |
|
"epoch": 0.7374310358225192, |
|
"grad_norm": 3.7319528048077246, |
|
"learning_rate": 1.9571792612475747e-06, |
|
"loss": 0.5741, |
|
"step": 9490 |
|
}, |
|
{ |
|
"epoch": 0.7382080969772321, |
|
"grad_norm": 5.01956999231008, |
|
"learning_rate": 1.9464286461585223e-06, |
|
"loss": 0.5357, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 0.738985158131945, |
|
"grad_norm": 3.7344522235830264, |
|
"learning_rate": 1.9357004978286777e-06, |
|
"loss": 0.5369, |
|
"step": 9510 |
|
}, |
|
{ |
|
"epoch": 0.7397622192866579, |
|
"grad_norm": 5.534900941588667, |
|
"learning_rate": 1.924994895190772e-06, |
|
"loss": 0.547, |
|
"step": 9520 |
|
}, |
|
{ |
|
"epoch": 0.7405392804413707, |
|
"grad_norm": 3.544511900994509, |
|
"learning_rate": 1.9143119170116534e-06, |
|
"loss": 0.5365, |
|
"step": 9530 |
|
}, |
|
{ |
|
"epoch": 0.7413163415960836, |
|
"grad_norm": 3.617025368147638, |
|
"learning_rate": 1.9036516418917128e-06, |
|
"loss": 0.576, |
|
"step": 9540 |
|
}, |
|
{ |
|
"epoch": 0.7420934027507965, |
|
"grad_norm": 2.717825183803928, |
|
"learning_rate": 1.8930141482643005e-06, |
|
"loss": 0.5528, |
|
"step": 9550 |
|
}, |
|
{ |
|
"epoch": 0.7428704639055094, |
|
"grad_norm": 3.8576185713414732, |
|
"learning_rate": 1.88239951439515e-06, |
|
"loss": 0.5505, |
|
"step": 9560 |
|
}, |
|
{ |
|
"epoch": 0.7436475250602222, |
|
"grad_norm": 5.360570148700179, |
|
"learning_rate": 1.8718078183818094e-06, |
|
"loss": 0.547, |
|
"step": 9570 |
|
}, |
|
{ |
|
"epoch": 0.7444245862149351, |
|
"grad_norm": 3.9702986251974126, |
|
"learning_rate": 1.8612391381530548e-06, |
|
"loss": 0.5361, |
|
"step": 9580 |
|
}, |
|
{ |
|
"epoch": 0.745201647369648, |
|
"grad_norm": 4.210077667591901, |
|
"learning_rate": 1.8506935514683244e-06, |
|
"loss": 0.5558, |
|
"step": 9590 |
|
}, |
|
{ |
|
"epoch": 0.7459787085243609, |
|
"grad_norm": 4.27553292233449, |
|
"learning_rate": 1.8401711359171438e-06, |
|
"loss": 0.5406, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 0.7467557696790738, |
|
"grad_norm": 5.023769063952561, |
|
"learning_rate": 1.82967196891856e-06, |
|
"loss": 0.5345, |
|
"step": 9610 |
|
}, |
|
{ |
|
"epoch": 0.7475328308337866, |
|
"grad_norm": 3.7148918067051353, |
|
"learning_rate": 1.819196127720565e-06, |
|
"loss": 0.5417, |
|
"step": 9620 |
|
}, |
|
{ |
|
"epoch": 0.7483098919884995, |
|
"grad_norm": 4.636272948323283, |
|
"learning_rate": 1.808743689399528e-06, |
|
"loss": 0.5792, |
|
"step": 9630 |
|
}, |
|
{ |
|
"epoch": 0.7490869531432124, |
|
"grad_norm": 3.103713105912325, |
|
"learning_rate": 1.798314730859637e-06, |
|
"loss": 0.5527, |
|
"step": 9640 |
|
}, |
|
{ |
|
"epoch": 0.7498640142979253, |
|
"grad_norm": 3.204765078923141, |
|
"learning_rate": 1.787909328832323e-06, |
|
"loss": 0.5491, |
|
"step": 9650 |
|
}, |
|
{ |
|
"epoch": 0.7506410754526381, |
|
"grad_norm": 4.894522393499138, |
|
"learning_rate": 1.7775275598756974e-06, |
|
"loss": 0.5553, |
|
"step": 9660 |
|
}, |
|
{ |
|
"epoch": 0.751418136607351, |
|
"grad_norm": 3.428628239034369, |
|
"learning_rate": 1.7671695003739935e-06, |
|
"loss": 0.5143, |
|
"step": 9670 |
|
}, |
|
{ |
|
"epoch": 0.7521951977620639, |
|
"grad_norm": 4.535044446134579, |
|
"learning_rate": 1.7568352265369987e-06, |
|
"loss": 0.5291, |
|
"step": 9680 |
|
}, |
|
{ |
|
"epoch": 0.7529722589167768, |
|
"grad_norm": 4.546057980769502, |
|
"learning_rate": 1.7465248143995011e-06, |
|
"loss": 0.5271, |
|
"step": 9690 |
|
}, |
|
{ |
|
"epoch": 0.7537493200714896, |
|
"grad_norm": 3.6725535134363785, |
|
"learning_rate": 1.7362383398207189e-06, |
|
"loss": 0.5665, |
|
"step": 9700 |
|
}, |
|
{ |
|
"epoch": 0.7545263812262025, |
|
"grad_norm": 3.3515951674477793, |
|
"learning_rate": 1.725975878483757e-06, |
|
"loss": 0.5282, |
|
"step": 9710 |
|
}, |
|
{ |
|
"epoch": 0.7553034423809154, |
|
"grad_norm": 4.187132180488078, |
|
"learning_rate": 1.7157375058950349e-06, |
|
"loss": 0.5572, |
|
"step": 9720 |
|
}, |
|
{ |
|
"epoch": 0.7560805035356283, |
|
"grad_norm": 3.013413844455128, |
|
"learning_rate": 1.705523297383741e-06, |
|
"loss": 0.5502, |
|
"step": 9730 |
|
}, |
|
{ |
|
"epoch": 0.7568575646903412, |
|
"grad_norm": 4.034990404281864, |
|
"learning_rate": 1.6953333281012745e-06, |
|
"loss": 0.5557, |
|
"step": 9740 |
|
}, |
|
{ |
|
"epoch": 0.757634625845054, |
|
"grad_norm": 4.2869070311052475, |
|
"learning_rate": 1.6851676730206978e-06, |
|
"loss": 0.5067, |
|
"step": 9750 |
|
}, |
|
{ |
|
"epoch": 0.7584116869997669, |
|
"grad_norm": 2.55851587794808, |
|
"learning_rate": 1.6750264069361755e-06, |
|
"loss": 0.521, |
|
"step": 9760 |
|
}, |
|
{ |
|
"epoch": 0.7591887481544798, |
|
"grad_norm": 3.860783467248806, |
|
"learning_rate": 1.664909604462432e-06, |
|
"loss": 0.5162, |
|
"step": 9770 |
|
}, |
|
{ |
|
"epoch": 0.7599658093091927, |
|
"grad_norm": 2.964535685167722, |
|
"learning_rate": 1.6548173400341988e-06, |
|
"loss": 0.4662, |
|
"step": 9780 |
|
}, |
|
{ |
|
"epoch": 0.7607428704639055, |
|
"grad_norm": 4.5148211810505, |
|
"learning_rate": 1.6447496879056667e-06, |
|
"loss": 0.5326, |
|
"step": 9790 |
|
}, |
|
{ |
|
"epoch": 0.7615199316186184, |
|
"grad_norm": 2.9731810276505595, |
|
"learning_rate": 1.6347067221499441e-06, |
|
"loss": 0.5221, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 0.7622969927733313, |
|
"grad_norm": 4.225015592243322, |
|
"learning_rate": 1.6246885166585081e-06, |
|
"loss": 0.5404, |
|
"step": 9810 |
|
}, |
|
{ |
|
"epoch": 0.7630740539280442, |
|
"grad_norm": 4.195775975703309, |
|
"learning_rate": 1.6146951451406583e-06, |
|
"loss": 0.4837, |
|
"step": 9820 |
|
}, |
|
{ |
|
"epoch": 0.763851115082757, |
|
"grad_norm": 2.77408092127348, |
|
"learning_rate": 1.604726681122979e-06, |
|
"loss": 0.4849, |
|
"step": 9830 |
|
}, |
|
{ |
|
"epoch": 0.7646281762374699, |
|
"grad_norm": 4.215861830136612, |
|
"learning_rate": 1.5947831979487966e-06, |
|
"loss": 0.5925, |
|
"step": 9840 |
|
}, |
|
{ |
|
"epoch": 0.7654052373921828, |
|
"grad_norm": 3.1030479659610393, |
|
"learning_rate": 1.5848647687776397e-06, |
|
"loss": 0.5019, |
|
"step": 9850 |
|
}, |
|
{ |
|
"epoch": 0.7661822985468957, |
|
"grad_norm": 3.926045471634979, |
|
"learning_rate": 1.574971466584701e-06, |
|
"loss": 0.5124, |
|
"step": 9860 |
|
}, |
|
{ |
|
"epoch": 0.7669593597016086, |
|
"grad_norm": 4.015070211236076, |
|
"learning_rate": 1.5651033641603041e-06, |
|
"loss": 0.5314, |
|
"step": 9870 |
|
}, |
|
{ |
|
"epoch": 0.7677364208563214, |
|
"grad_norm": 3.649601860518483, |
|
"learning_rate": 1.555260534109359e-06, |
|
"loss": 0.5089, |
|
"step": 9880 |
|
}, |
|
{ |
|
"epoch": 0.7685134820110343, |
|
"grad_norm": 3.604893647217938, |
|
"learning_rate": 1.5454430488508359e-06, |
|
"loss": 0.5472, |
|
"step": 9890 |
|
}, |
|
{ |
|
"epoch": 0.7692905431657472, |
|
"grad_norm": 3.3095117069291624, |
|
"learning_rate": 1.5356509806172315e-06, |
|
"loss": 0.5168, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 0.7700676043204601, |
|
"grad_norm": 3.8970071625899445, |
|
"learning_rate": 1.525884401454033e-06, |
|
"loss": 0.5485, |
|
"step": 9910 |
|
}, |
|
{ |
|
"epoch": 0.7708446654751729, |
|
"grad_norm": 2.80658001169654, |
|
"learning_rate": 1.5161433832191902e-06, |
|
"loss": 0.5044, |
|
"step": 9920 |
|
}, |
|
{ |
|
"epoch": 0.7716217266298858, |
|
"grad_norm": 3.1868297865512214, |
|
"learning_rate": 1.5064279975825923e-06, |
|
"loss": 0.4934, |
|
"step": 9930 |
|
}, |
|
{ |
|
"epoch": 0.7723987877845987, |
|
"grad_norm": 3.0425811492999366, |
|
"learning_rate": 1.4967383160255316e-06, |
|
"loss": 0.5183, |
|
"step": 9940 |
|
}, |
|
{ |
|
"epoch": 0.7731758489393116, |
|
"grad_norm": 4.54933754793044, |
|
"learning_rate": 1.4870744098401819e-06, |
|
"loss": 0.5306, |
|
"step": 9950 |
|
}, |
|
{ |
|
"epoch": 0.7739529100940244, |
|
"grad_norm": 3.931701576666515, |
|
"learning_rate": 1.4774363501290755e-06, |
|
"loss": 0.5415, |
|
"step": 9960 |
|
}, |
|
{ |
|
"epoch": 0.7747299712487373, |
|
"grad_norm": 3.282020379585411, |
|
"learning_rate": 1.4678242078045756e-06, |
|
"loss": 0.5421, |
|
"step": 9970 |
|
}, |
|
{ |
|
"epoch": 0.7755070324034502, |
|
"grad_norm": 3.2735246508623366, |
|
"learning_rate": 1.4582380535883622e-06, |
|
"loss": 0.5452, |
|
"step": 9980 |
|
}, |
|
{ |
|
"epoch": 0.7762840935581631, |
|
"grad_norm": 3.2961538894269067, |
|
"learning_rate": 1.4486779580109012e-06, |
|
"loss": 0.5254, |
|
"step": 9990 |
|
}, |
|
{ |
|
"epoch": 0.777061154712876, |
|
"grad_norm": 4.499334024075413, |
|
"learning_rate": 1.4391439914109367e-06, |
|
"loss": 0.4899, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.777061154712876, |
|
"eval_loss": 0.5171714425086975, |
|
"eval_runtime": 472.4039, |
|
"eval_samples_per_second": 22.94, |
|
"eval_steps_per_second": 2.868, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.7778382158675888, |
|
"grad_norm": 4.012283871593952, |
|
"learning_rate": 1.429636223934963e-06, |
|
"loss": 0.4927, |
|
"step": 10010 |
|
}, |
|
{ |
|
"epoch": 0.7786152770223017, |
|
"grad_norm": 3.483797094263642, |
|
"learning_rate": 1.4201547255367165e-06, |
|
"loss": 0.5085, |
|
"step": 10020 |
|
}, |
|
{ |
|
"epoch": 0.7793923381770145, |
|
"grad_norm": 4.75329332254169, |
|
"learning_rate": 1.4106995659766547e-06, |
|
"loss": 0.5058, |
|
"step": 10030 |
|
}, |
|
{ |
|
"epoch": 0.7801693993317274, |
|
"grad_norm": 3.68815778033119, |
|
"learning_rate": 1.4012708148214522e-06, |
|
"loss": 0.5265, |
|
"step": 10040 |
|
}, |
|
{ |
|
"epoch": 0.7809464604864402, |
|
"grad_norm": 3.4635761925286306, |
|
"learning_rate": 1.3918685414434763e-06, |
|
"loss": 0.4623, |
|
"step": 10050 |
|
}, |
|
{ |
|
"epoch": 0.7817235216411531, |
|
"grad_norm": 4.024245798823526, |
|
"learning_rate": 1.3824928150202866e-06, |
|
"loss": 0.4865, |
|
"step": 10060 |
|
}, |
|
{ |
|
"epoch": 0.782500582795866, |
|
"grad_norm": 3.876558527294442, |
|
"learning_rate": 1.3731437045341218e-06, |
|
"loss": 0.5297, |
|
"step": 10070 |
|
}, |
|
{ |
|
"epoch": 0.7832776439505789, |
|
"grad_norm": 4.13041441043086, |
|
"learning_rate": 1.363821278771391e-06, |
|
"loss": 0.5588, |
|
"step": 10080 |
|
}, |
|
{ |
|
"epoch": 0.7840547051052917, |
|
"grad_norm": 4.828512693632229, |
|
"learning_rate": 1.3545256063221745e-06, |
|
"loss": 0.5241, |
|
"step": 10090 |
|
}, |
|
{ |
|
"epoch": 0.7848317662600046, |
|
"grad_norm": 3.330489049598463, |
|
"learning_rate": 1.3452567555797085e-06, |
|
"loss": 0.5351, |
|
"step": 10100 |
|
}, |
|
{ |
|
"epoch": 0.7856088274147175, |
|
"grad_norm": 3.577340154782965, |
|
"learning_rate": 1.3360147947398927e-06, |
|
"loss": 0.4874, |
|
"step": 10110 |
|
}, |
|
{ |
|
"epoch": 0.7863858885694304, |
|
"grad_norm": 4.201117799816586, |
|
"learning_rate": 1.3267997918007792e-06, |
|
"loss": 0.5148, |
|
"step": 10120 |
|
}, |
|
{ |
|
"epoch": 0.7871629497241432, |
|
"grad_norm": 2.5965256135200643, |
|
"learning_rate": 1.3176118145620775e-06, |
|
"loss": 0.4988, |
|
"step": 10130 |
|
}, |
|
{ |
|
"epoch": 0.7879400108788561, |
|
"grad_norm": 2.397365078889302, |
|
"learning_rate": 1.3084509306246562e-06, |
|
"loss": 0.4687, |
|
"step": 10140 |
|
}, |
|
{ |
|
"epoch": 0.788717072033569, |
|
"grad_norm": 5.5016070521496, |
|
"learning_rate": 1.29931720739004e-06, |
|
"loss": 0.518, |
|
"step": 10150 |
|
}, |
|
{ |
|
"epoch": 0.7894941331882819, |
|
"grad_norm": 4.9408112199928444, |
|
"learning_rate": 1.2902107120599249e-06, |
|
"loss": 0.5312, |
|
"step": 10160 |
|
}, |
|
{ |
|
"epoch": 0.7902711943429948, |
|
"grad_norm": 3.557763106103323, |
|
"learning_rate": 1.2811315116356698e-06, |
|
"loss": 0.5196, |
|
"step": 10170 |
|
}, |
|
{ |
|
"epoch": 0.7910482554977076, |
|
"grad_norm": 4.192138798834655, |
|
"learning_rate": 1.2720796729178115e-06, |
|
"loss": 0.527, |
|
"step": 10180 |
|
}, |
|
{ |
|
"epoch": 0.7918253166524205, |
|
"grad_norm": 3.586108157059095, |
|
"learning_rate": 1.2630552625055763e-06, |
|
"loss": 0.5347, |
|
"step": 10190 |
|
}, |
|
{ |
|
"epoch": 0.7926023778071334, |
|
"grad_norm": 3.9368756234903195, |
|
"learning_rate": 1.2540583467963817e-06, |
|
"loss": 0.4811, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 0.7933794389618463, |
|
"grad_norm": 4.518574036325759, |
|
"learning_rate": 1.245088991985352e-06, |
|
"loss": 0.5086, |
|
"step": 10210 |
|
}, |
|
{ |
|
"epoch": 0.7941565001165591, |
|
"grad_norm": 3.850061816242949, |
|
"learning_rate": 1.2361472640648347e-06, |
|
"loss": 0.4862, |
|
"step": 10220 |
|
}, |
|
{ |
|
"epoch": 0.794933561271272, |
|
"grad_norm": 3.5644700141713064, |
|
"learning_rate": 1.227233228823908e-06, |
|
"loss": 0.5303, |
|
"step": 10230 |
|
}, |
|
{ |
|
"epoch": 0.7957106224259849, |
|
"grad_norm": 2.1351987055036985, |
|
"learning_rate": 1.2183469518479018e-06, |
|
"loss": 0.5179, |
|
"step": 10240 |
|
}, |
|
{ |
|
"epoch": 0.7964876835806978, |
|
"grad_norm": 5.25048528063306, |
|
"learning_rate": 1.2094884985179117e-06, |
|
"loss": 0.5318, |
|
"step": 10250 |
|
}, |
|
{ |
|
"epoch": 0.7972647447354106, |
|
"grad_norm": 2.1615227439546745, |
|
"learning_rate": 1.200657934010323e-06, |
|
"loss": 0.4547, |
|
"step": 10260 |
|
}, |
|
{ |
|
"epoch": 0.7980418058901235, |
|
"grad_norm": 2.6751655695167154, |
|
"learning_rate": 1.1918553232963237e-06, |
|
"loss": 0.5134, |
|
"step": 10270 |
|
}, |
|
{ |
|
"epoch": 0.7988188670448364, |
|
"grad_norm": 4.159654861888376, |
|
"learning_rate": 1.1830807311414355e-06, |
|
"loss": 0.524, |
|
"step": 10280 |
|
}, |
|
{ |
|
"epoch": 0.7995959281995493, |
|
"grad_norm": 3.6944240100922214, |
|
"learning_rate": 1.1743342221050314e-06, |
|
"loss": 0.5175, |
|
"step": 10290 |
|
}, |
|
{ |
|
"epoch": 0.8003729893542622, |
|
"grad_norm": 4.133885672495875, |
|
"learning_rate": 1.1656158605398599e-06, |
|
"loss": 0.4854, |
|
"step": 10300 |
|
}, |
|
{ |
|
"epoch": 0.801150050508975, |
|
"grad_norm": 4.0354219471053305, |
|
"learning_rate": 1.1569257105915743e-06, |
|
"loss": 0.5293, |
|
"step": 10310 |
|
}, |
|
{ |
|
"epoch": 0.8019271116636879, |
|
"grad_norm": 4.987229671719538, |
|
"learning_rate": 1.1482638361982595e-06, |
|
"loss": 0.5067, |
|
"step": 10320 |
|
}, |
|
{ |
|
"epoch": 0.8027041728184008, |
|
"grad_norm": 4.060534061900532, |
|
"learning_rate": 1.1396303010899623e-06, |
|
"loss": 0.5031, |
|
"step": 10330 |
|
}, |
|
{ |
|
"epoch": 0.8034812339731137, |
|
"grad_norm": 3.8027639891295615, |
|
"learning_rate": 1.131025168788225e-06, |
|
"loss": 0.5339, |
|
"step": 10340 |
|
}, |
|
{ |
|
"epoch": 0.8042582951278265, |
|
"grad_norm": 4.5696870186179215, |
|
"learning_rate": 1.122448502605611e-06, |
|
"loss": 0.5187, |
|
"step": 10350 |
|
}, |
|
{ |
|
"epoch": 0.8050353562825394, |
|
"grad_norm": 3.4544068898990257, |
|
"learning_rate": 1.1139003656452451e-06, |
|
"loss": 0.5012, |
|
"step": 10360 |
|
}, |
|
{ |
|
"epoch": 0.8058124174372523, |
|
"grad_norm": 4.024795478219517, |
|
"learning_rate": 1.1053808208003463e-06, |
|
"loss": 0.5039, |
|
"step": 10370 |
|
}, |
|
{ |
|
"epoch": 0.8065894785919652, |
|
"grad_norm": 3.451121303154774, |
|
"learning_rate": 1.0968899307537688e-06, |
|
"loss": 0.5096, |
|
"step": 10380 |
|
}, |
|
{ |
|
"epoch": 0.807366539746678, |
|
"grad_norm": 3.5430435341751374, |
|
"learning_rate": 1.088427757977535e-06, |
|
"loss": 0.4995, |
|
"step": 10390 |
|
}, |
|
{ |
|
"epoch": 0.8081436009013909, |
|
"grad_norm": 3.3568799457193315, |
|
"learning_rate": 1.0799943647323823e-06, |
|
"loss": 0.4896, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 0.8089206620561038, |
|
"grad_norm": 2.7324998256576265, |
|
"learning_rate": 1.071589813067298e-06, |
|
"loss": 0.4757, |
|
"step": 10410 |
|
}, |
|
{ |
|
"epoch": 0.8096977232108167, |
|
"grad_norm": 3.114681260826415, |
|
"learning_rate": 1.0632141648190685e-06, |
|
"loss": 0.5033, |
|
"step": 10420 |
|
}, |
|
{ |
|
"epoch": 0.8104747843655296, |
|
"grad_norm": 3.7347524196800856, |
|
"learning_rate": 1.054867481611822e-06, |
|
"loss": 0.4849, |
|
"step": 10430 |
|
}, |
|
{ |
|
"epoch": 0.8112518455202424, |
|
"grad_norm": 2.4431545580868423, |
|
"learning_rate": 1.046549824856574e-06, |
|
"loss": 0.4344, |
|
"step": 10440 |
|
}, |
|
{ |
|
"epoch": 0.8120289066749553, |
|
"grad_norm": 3.370757705323888, |
|
"learning_rate": 1.038261255750781e-06, |
|
"loss": 0.4419, |
|
"step": 10450 |
|
}, |
|
{ |
|
"epoch": 0.8128059678296682, |
|
"grad_norm": 4.176509993840626, |
|
"learning_rate": 1.0300018352778817e-06, |
|
"loss": 0.4905, |
|
"step": 10460 |
|
}, |
|
{ |
|
"epoch": 0.8135830289843811, |
|
"grad_norm": 4.2860515845724505, |
|
"learning_rate": 1.0217716242068525e-06, |
|
"loss": 0.4989, |
|
"step": 10470 |
|
}, |
|
{ |
|
"epoch": 0.8143600901390939, |
|
"grad_norm": 2.914685646542763, |
|
"learning_rate": 1.0135706830917663e-06, |
|
"loss": 0.4527, |
|
"step": 10480 |
|
}, |
|
{ |
|
"epoch": 0.8151371512938068, |
|
"grad_norm": 4.781204814322438, |
|
"learning_rate": 1.0053990722713347e-06, |
|
"loss": 0.5185, |
|
"step": 10490 |
|
}, |
|
{ |
|
"epoch": 0.8159142124485197, |
|
"grad_norm": 4.336551191079965, |
|
"learning_rate": 9.97256851868474e-07, |
|
"loss": 0.5453, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 0.8166912736032326, |
|
"grad_norm": 3.980153258528895, |
|
"learning_rate": 9.891440817898569e-07, |
|
"loss": 0.4476, |
|
"step": 10510 |
|
}, |
|
{ |
|
"epoch": 0.8174683347579454, |
|
"grad_norm": 4.803099851628047, |
|
"learning_rate": 9.810608217254785e-07, |
|
"loss": 0.4535, |
|
"step": 10520 |
|
}, |
|
{ |
|
"epoch": 0.8182453959126583, |
|
"grad_norm": 5.434746877487003, |
|
"learning_rate": 9.730071311482104e-07, |
|
"loss": 0.5266, |
|
"step": 10530 |
|
}, |
|
{ |
|
"epoch": 0.8190224570673712, |
|
"grad_norm": 4.132134349770947, |
|
"learning_rate": 9.649830693133649e-07, |
|
"loss": 0.4794, |
|
"step": 10540 |
|
}, |
|
{ |
|
"epoch": 0.8197995182220841, |
|
"grad_norm": 3.8042895258614657, |
|
"learning_rate": 9.569886952582613e-07, |
|
"loss": 0.4857, |
|
"step": 10550 |
|
}, |
|
{ |
|
"epoch": 0.820576579376797, |
|
"grad_norm": 4.505324473871432, |
|
"learning_rate": 9.49024067801787e-07, |
|
"loss": 0.4773, |
|
"step": 10560 |
|
}, |
|
{ |
|
"epoch": 0.8213536405315098, |
|
"grad_norm": 4.085373275991255, |
|
"learning_rate": 9.410892455439724e-07, |
|
"loss": 0.5123, |
|
"step": 10570 |
|
}, |
|
{ |
|
"epoch": 0.8221307016862227, |
|
"grad_norm": 2.8077333631243047, |
|
"learning_rate": 9.331842868655538e-07, |
|
"loss": 0.4766, |
|
"step": 10580 |
|
}, |
|
{ |
|
"epoch": 0.8229077628409356, |
|
"grad_norm": 4.995807097173484, |
|
"learning_rate": 9.253092499275435e-07, |
|
"loss": 0.5059, |
|
"step": 10590 |
|
}, |
|
{ |
|
"epoch": 0.8236848239956485, |
|
"grad_norm": 3.0312698428527085, |
|
"learning_rate": 9.174641926708028e-07, |
|
"loss": 0.5072, |
|
"step": 10600 |
|
}, |
|
{ |
|
"epoch": 0.8244618851503613, |
|
"grad_norm": 3.6228940116700166, |
|
"learning_rate": 9.096491728156187e-07, |
|
"loss": 0.5157, |
|
"step": 10610 |
|
}, |
|
{ |
|
"epoch": 0.8252389463050742, |
|
"grad_norm": 4.4841778480785885, |
|
"learning_rate": 9.018642478612755e-07, |
|
"loss": 0.5325, |
|
"step": 10620 |
|
}, |
|
{ |
|
"epoch": 0.8260160074597871, |
|
"grad_norm": 3.7081609263257596, |
|
"learning_rate": 8.941094750856349e-07, |
|
"loss": 0.5225, |
|
"step": 10630 |
|
}, |
|
{ |
|
"epoch": 0.8267930686145, |
|
"grad_norm": 2.9403067849013493, |
|
"learning_rate": 8.863849115447121e-07, |
|
"loss": 0.4859, |
|
"step": 10640 |
|
}, |
|
{ |
|
"epoch": 0.8275701297692128, |
|
"grad_norm": 3.9121829857836925, |
|
"learning_rate": 8.786906140722551e-07, |
|
"loss": 0.4704, |
|
"step": 10650 |
|
}, |
|
{ |
|
"epoch": 0.8283471909239257, |
|
"grad_norm": 3.7718616897098234, |
|
"learning_rate": 8.710266392793293e-07, |
|
"loss": 0.5054, |
|
"step": 10660 |
|
}, |
|
{ |
|
"epoch": 0.8291242520786386, |
|
"grad_norm": 3.108303958961309, |
|
"learning_rate": 8.633930435539023e-07, |
|
"loss": 0.5006, |
|
"step": 10670 |
|
}, |
|
{ |
|
"epoch": 0.8299013132333515, |
|
"grad_norm": 2.5549313563071725, |
|
"learning_rate": 8.557898830604239e-07, |
|
"loss": 0.4795, |
|
"step": 10680 |
|
}, |
|
{ |
|
"epoch": 0.8306783743880644, |
|
"grad_norm": 3.459144570766454, |
|
"learning_rate": 8.48217213739414e-07, |
|
"loss": 0.5052, |
|
"step": 10690 |
|
}, |
|
{ |
|
"epoch": 0.8314554355427772, |
|
"grad_norm": 3.8583077857999992, |
|
"learning_rate": 8.406750913070582e-07, |
|
"loss": 0.5121, |
|
"step": 10700 |
|
}, |
|
{ |
|
"epoch": 0.8322324966974901, |
|
"grad_norm": 3.963740775603707, |
|
"learning_rate": 8.33163571254787e-07, |
|
"loss": 0.4949, |
|
"step": 10710 |
|
}, |
|
{ |
|
"epoch": 0.833009557852203, |
|
"grad_norm": 4.576071555267779, |
|
"learning_rate": 8.256827088488756e-07, |
|
"loss": 0.488, |
|
"step": 10720 |
|
}, |
|
{ |
|
"epoch": 0.8337866190069159, |
|
"grad_norm": 4.018939367025651, |
|
"learning_rate": 8.182325591300333e-07, |
|
"loss": 0.4584, |
|
"step": 10730 |
|
}, |
|
{ |
|
"epoch": 0.8345636801616287, |
|
"grad_norm": 5.537702555635495, |
|
"learning_rate": 8.10813176912999e-07, |
|
"loss": 0.5078, |
|
"step": 10740 |
|
}, |
|
{ |
|
"epoch": 0.8353407413163416, |
|
"grad_norm": 4.521346564196193, |
|
"learning_rate": 8.03424616786142e-07, |
|
"loss": 0.5017, |
|
"step": 10750 |
|
}, |
|
{ |
|
"epoch": 0.8361178024710545, |
|
"grad_norm": 4.426790844413774, |
|
"learning_rate": 7.960669331110521e-07, |
|
"loss": 0.4832, |
|
"step": 10760 |
|
}, |
|
{ |
|
"epoch": 0.8368948636257674, |
|
"grad_norm": 4.986892159186973, |
|
"learning_rate": 7.887401800221495e-07, |
|
"loss": 0.5278, |
|
"step": 10770 |
|
}, |
|
{ |
|
"epoch": 0.8376719247804802, |
|
"grad_norm": 3.034636301392233, |
|
"learning_rate": 7.814444114262786e-07, |
|
"loss": 0.4996, |
|
"step": 10780 |
|
}, |
|
{ |
|
"epoch": 0.8384489859351931, |
|
"grad_norm": 2.63148766912681, |
|
"learning_rate": 7.741796810023139e-07, |
|
"loss": 0.4839, |
|
"step": 10790 |
|
}, |
|
{ |
|
"epoch": 0.839226047089906, |
|
"grad_norm": 4.33674902614418, |
|
"learning_rate": 7.669460422007657e-07, |
|
"loss": 0.439, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 0.8400031082446189, |
|
"grad_norm": 4.048856363638596, |
|
"learning_rate": 7.597435482433896e-07, |
|
"loss": 0.4783, |
|
"step": 10810 |
|
}, |
|
{ |
|
"epoch": 0.8407801693993318, |
|
"grad_norm": 3.925372203600619, |
|
"learning_rate": 7.525722521227885e-07, |
|
"loss": 0.5017, |
|
"step": 10820 |
|
}, |
|
{ |
|
"epoch": 0.8415572305540446, |
|
"grad_norm": 2.3654265887367054, |
|
"learning_rate": 7.45432206602027e-07, |
|
"loss": 0.5123, |
|
"step": 10830 |
|
}, |
|
{ |
|
"epoch": 0.8423342917087575, |
|
"grad_norm": 3.754610906804235, |
|
"learning_rate": 7.383234642142422e-07, |
|
"loss": 0.4907, |
|
"step": 10840 |
|
}, |
|
{ |
|
"epoch": 0.8431113528634704, |
|
"grad_norm": 4.1554282145692625, |
|
"learning_rate": 7.312460772622565e-07, |
|
"loss": 0.5107, |
|
"step": 10850 |
|
}, |
|
{ |
|
"epoch": 0.8438884140181833, |
|
"grad_norm": 3.319418655291393, |
|
"learning_rate": 7.242000978181963e-07, |
|
"loss": 0.5048, |
|
"step": 10860 |
|
}, |
|
{ |
|
"epoch": 0.8446654751728961, |
|
"grad_norm": 4.374110046424012, |
|
"learning_rate": 7.171855777231058e-07, |
|
"loss": 0.4617, |
|
"step": 10870 |
|
}, |
|
{ |
|
"epoch": 0.845442536327609, |
|
"grad_norm": 4.441680587693151, |
|
"learning_rate": 7.102025685865622e-07, |
|
"loss": 0.4959, |
|
"step": 10880 |
|
}, |
|
{ |
|
"epoch": 0.8462195974823219, |
|
"grad_norm": 2.8350312541634803, |
|
"learning_rate": 7.032511217863031e-07, |
|
"loss": 0.4677, |
|
"step": 10890 |
|
}, |
|
{ |
|
"epoch": 0.8469966586370348, |
|
"grad_norm": 3.982485022264907, |
|
"learning_rate": 6.963312884678441e-07, |
|
"loss": 0.4954, |
|
"step": 10900 |
|
}, |
|
{ |
|
"epoch": 0.8477737197917476, |
|
"grad_norm": 4.590377956407083, |
|
"learning_rate": 6.894431195441037e-07, |
|
"loss": 0.5297, |
|
"step": 10910 |
|
}, |
|
{ |
|
"epoch": 0.8485507809464605, |
|
"grad_norm": 2.408789067882966, |
|
"learning_rate": 6.825866656950264e-07, |
|
"loss": 0.445, |
|
"step": 10920 |
|
}, |
|
{ |
|
"epoch": 0.8493278421011734, |
|
"grad_norm": 4.694687311202965, |
|
"learning_rate": 6.757619773672169e-07, |
|
"loss": 0.493, |
|
"step": 10930 |
|
}, |
|
{ |
|
"epoch": 0.8501049032558863, |
|
"grad_norm": 4.491758478617379, |
|
"learning_rate": 6.689691047735597e-07, |
|
"loss": 0.5153, |
|
"step": 10940 |
|
}, |
|
{ |
|
"epoch": 0.8508819644105992, |
|
"grad_norm": 3.959513693411194, |
|
"learning_rate": 6.62208097892853e-07, |
|
"loss": 0.4797, |
|
"step": 10950 |
|
}, |
|
{ |
|
"epoch": 0.851659025565312, |
|
"grad_norm": 5.339647237399662, |
|
"learning_rate": 6.554790064694471e-07, |
|
"loss": 0.4897, |
|
"step": 10960 |
|
}, |
|
{ |
|
"epoch": 0.8524360867200249, |
|
"grad_norm": 4.541122198536199, |
|
"learning_rate": 6.487818800128692e-07, |
|
"loss": 0.4698, |
|
"step": 10970 |
|
}, |
|
{ |
|
"epoch": 0.8532131478747378, |
|
"grad_norm": 4.7468681798060395, |
|
"learning_rate": 6.421167677974622e-07, |
|
"loss": 0.5016, |
|
"step": 10980 |
|
}, |
|
{ |
|
"epoch": 0.8539902090294507, |
|
"grad_norm": 4.381332344102587, |
|
"learning_rate": 6.354837188620278e-07, |
|
"loss": 0.51, |
|
"step": 10990 |
|
}, |
|
{ |
|
"epoch": 0.8547672701841635, |
|
"grad_norm": 4.1592821906223705, |
|
"learning_rate": 6.288827820094562e-07, |
|
"loss": 0.4875, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.8555443313388764, |
|
"grad_norm": 5.029800475729443, |
|
"learning_rate": 6.223140058063737e-07, |
|
"loss": 0.4549, |
|
"step": 11010 |
|
}, |
|
{ |
|
"epoch": 0.8563213924935893, |
|
"grad_norm": 3.254886843193101, |
|
"learning_rate": 6.157774385827847e-07, |
|
"loss": 0.4314, |
|
"step": 11020 |
|
}, |
|
{ |
|
"epoch": 0.8570984536483022, |
|
"grad_norm": 3.434364877703452, |
|
"learning_rate": 6.092731284317111e-07, |
|
"loss": 0.4654, |
|
"step": 11030 |
|
}, |
|
{ |
|
"epoch": 0.857875514803015, |
|
"grad_norm": 4.488825872633713, |
|
"learning_rate": 6.028011232088471e-07, |
|
"loss": 0.482, |
|
"step": 11040 |
|
}, |
|
{ |
|
"epoch": 0.8586525759577279, |
|
"grad_norm": 3.0602137297514638, |
|
"learning_rate": 5.963614705321996e-07, |
|
"loss": 0.4618, |
|
"step": 11050 |
|
}, |
|
{ |
|
"epoch": 0.8594296371124408, |
|
"grad_norm": 4.827196277112413, |
|
"learning_rate": 5.899542177817413e-07, |
|
"loss": 0.4525, |
|
"step": 11060 |
|
}, |
|
{ |
|
"epoch": 0.8602066982671537, |
|
"grad_norm": 4.39228489153871, |
|
"learning_rate": 5.835794120990607e-07, |
|
"loss": 0.5458, |
|
"step": 11070 |
|
}, |
|
{ |
|
"epoch": 0.8609837594218664, |
|
"grad_norm": 4.013851924684146, |
|
"learning_rate": 5.772371003870147e-07, |
|
"loss": 0.521, |
|
"step": 11080 |
|
}, |
|
{ |
|
"epoch": 0.8617608205765793, |
|
"grad_norm": 4.599909020480007, |
|
"learning_rate": 5.709273293093865e-07, |
|
"loss": 0.4641, |
|
"step": 11090 |
|
}, |
|
{ |
|
"epoch": 0.8625378817312922, |
|
"grad_norm": 3.522635100581711, |
|
"learning_rate": 5.646501452905406e-07, |
|
"loss": 0.4613, |
|
"step": 11100 |
|
}, |
|
{ |
|
"epoch": 0.8633149428860051, |
|
"grad_norm": 4.170720600102606, |
|
"learning_rate": 5.584055945150807e-07, |
|
"loss": 0.4533, |
|
"step": 11110 |
|
}, |
|
{ |
|
"epoch": 0.864092004040718, |
|
"grad_norm": 5.0485560375944365, |
|
"learning_rate": 5.521937229275087e-07, |
|
"loss": 0.4584, |
|
"step": 11120 |
|
}, |
|
{ |
|
"epoch": 0.8648690651954308, |
|
"grad_norm": 4.0298286961319105, |
|
"learning_rate": 5.460145762318903e-07, |
|
"loss": 0.5072, |
|
"step": 11130 |
|
}, |
|
{ |
|
"epoch": 0.8656461263501437, |
|
"grad_norm": 3.963316318056793, |
|
"learning_rate": 5.398681998915145e-07, |
|
"loss": 0.454, |
|
"step": 11140 |
|
}, |
|
{ |
|
"epoch": 0.8664231875048566, |
|
"grad_norm": 2.2989684529089076, |
|
"learning_rate": 5.337546391285647e-07, |
|
"loss": 0.4753, |
|
"step": 11150 |
|
}, |
|
{ |
|
"epoch": 0.8672002486595695, |
|
"grad_norm": 4.488811638369375, |
|
"learning_rate": 5.276739389237778e-07, |
|
"loss": 0.452, |
|
"step": 11160 |
|
}, |
|
{ |
|
"epoch": 0.8679773098142823, |
|
"grad_norm": 4.7387272438267605, |
|
"learning_rate": 5.216261440161236e-07, |
|
"loss": 0.4891, |
|
"step": 11170 |
|
}, |
|
{ |
|
"epoch": 0.8687543709689952, |
|
"grad_norm": 5.278573940043423, |
|
"learning_rate": 5.156112989024653e-07, |
|
"loss": 0.477, |
|
"step": 11180 |
|
}, |
|
{ |
|
"epoch": 0.8695314321237081, |
|
"grad_norm": 3.9270271390134828, |
|
"learning_rate": 5.096294478372382e-07, |
|
"loss": 0.465, |
|
"step": 11190 |
|
}, |
|
{ |
|
"epoch": 0.870308493278421, |
|
"grad_norm": 3.2023556593268427, |
|
"learning_rate": 5.036806348321238e-07, |
|
"loss": 0.4654, |
|
"step": 11200 |
|
}, |
|
{ |
|
"epoch": 0.8710855544331338, |
|
"grad_norm": 4.204967484017854, |
|
"learning_rate": 4.977649036557225e-07, |
|
"loss": 0.4933, |
|
"step": 11210 |
|
}, |
|
{ |
|
"epoch": 0.8718626155878467, |
|
"grad_norm": 3.8562465627781743, |
|
"learning_rate": 4.918822978332377e-07, |
|
"loss": 0.4487, |
|
"step": 11220 |
|
}, |
|
{ |
|
"epoch": 0.8726396767425596, |
|
"grad_norm": 2.7494815741242484, |
|
"learning_rate": 4.860328606461485e-07, |
|
"loss": 0.4637, |
|
"step": 11230 |
|
}, |
|
{ |
|
"epoch": 0.8734167378972725, |
|
"grad_norm": 2.1088033052796895, |
|
"learning_rate": 4.802166351318965e-07, |
|
"loss": 0.4899, |
|
"step": 11240 |
|
}, |
|
{ |
|
"epoch": 0.8741937990519854, |
|
"grad_norm": 5.113207022204942, |
|
"learning_rate": 4.7443366408356673e-07, |
|
"loss": 0.5035, |
|
"step": 11250 |
|
}, |
|
{ |
|
"epoch": 0.8749708602066982, |
|
"grad_norm": 3.993509884814402, |
|
"learning_rate": 4.6868399004957266e-07, |
|
"loss": 0.4983, |
|
"step": 11260 |
|
}, |
|
{ |
|
"epoch": 0.8757479213614111, |
|
"grad_norm": 6.019062769443196, |
|
"learning_rate": 4.6296765533334345e-07, |
|
"loss": 0.5127, |
|
"step": 11270 |
|
}, |
|
{ |
|
"epoch": 0.876524982516124, |
|
"grad_norm": 3.42141410170646, |
|
"learning_rate": 4.57284701993016e-07, |
|
"loss": 0.4686, |
|
"step": 11280 |
|
}, |
|
{ |
|
"epoch": 0.8773020436708369, |
|
"grad_norm": 4.401665485132851, |
|
"learning_rate": 4.5163517184111885e-07, |
|
"loss": 0.4423, |
|
"step": 11290 |
|
}, |
|
{ |
|
"epoch": 0.8780791048255497, |
|
"grad_norm": 1.7965008908739462, |
|
"learning_rate": 4.460191064442704e-07, |
|
"loss": 0.5013, |
|
"step": 11300 |
|
}, |
|
{ |
|
"epoch": 0.8788561659802626, |
|
"grad_norm": 4.038506349330642, |
|
"learning_rate": 4.4043654712287e-07, |
|
"loss": 0.4681, |
|
"step": 11310 |
|
}, |
|
{ |
|
"epoch": 0.8796332271349755, |
|
"grad_norm": 2.6713825342303084, |
|
"learning_rate": 4.348875349507953e-07, |
|
"loss": 0.4723, |
|
"step": 11320 |
|
}, |
|
{ |
|
"epoch": 0.8804102882896884, |
|
"grad_norm": 2.5242881927131493, |
|
"learning_rate": 4.293721107551002e-07, |
|
"loss": 0.4948, |
|
"step": 11330 |
|
}, |
|
{ |
|
"epoch": 0.8811873494444012, |
|
"grad_norm": 3.089605520005084, |
|
"learning_rate": 4.23890315115712e-07, |
|
"loss": 0.4837, |
|
"step": 11340 |
|
}, |
|
{ |
|
"epoch": 0.8819644105991141, |
|
"grad_norm": 4.640356219725602, |
|
"learning_rate": 4.184421883651374e-07, |
|
"loss": 0.4594, |
|
"step": 11350 |
|
}, |
|
{ |
|
"epoch": 0.882741471753827, |
|
"grad_norm": 4.452516441213523, |
|
"learning_rate": 4.1302777058816136e-07, |
|
"loss": 0.5087, |
|
"step": 11360 |
|
}, |
|
{ |
|
"epoch": 0.8835185329085399, |
|
"grad_norm": 3.2814252714146903, |
|
"learning_rate": 4.076471016215533e-07, |
|
"loss": 0.4585, |
|
"step": 11370 |
|
}, |
|
{ |
|
"epoch": 0.8842955940632528, |
|
"grad_norm": 4.17360304036643, |
|
"learning_rate": 4.023002210537763e-07, |
|
"loss": 0.4808, |
|
"step": 11380 |
|
}, |
|
{ |
|
"epoch": 0.8850726552179656, |
|
"grad_norm": 3.4710617417209897, |
|
"learning_rate": 3.9698716822469175e-07, |
|
"loss": 0.4764, |
|
"step": 11390 |
|
}, |
|
{ |
|
"epoch": 0.8858497163726785, |
|
"grad_norm": 4.94630365171049, |
|
"learning_rate": 3.917079822252756e-07, |
|
"loss": 0.4676, |
|
"step": 11400 |
|
}, |
|
{ |
|
"epoch": 0.8866267775273914, |
|
"grad_norm": 3.9963020658849295, |
|
"learning_rate": 3.864627018973244e-07, |
|
"loss": 0.4594, |
|
"step": 11410 |
|
}, |
|
{ |
|
"epoch": 0.8874038386821043, |
|
"grad_norm": 4.149575936577817, |
|
"learning_rate": 3.8125136583317404e-07, |
|
"loss": 0.4408, |
|
"step": 11420 |
|
}, |
|
{ |
|
"epoch": 0.8881808998368171, |
|
"grad_norm": 4.0908393768408535, |
|
"learning_rate": 3.760740123754125e-07, |
|
"loss": 0.4906, |
|
"step": 11430 |
|
}, |
|
{ |
|
"epoch": 0.88895796099153, |
|
"grad_norm": 3.2442681217314413, |
|
"learning_rate": 3.709306796166029e-07, |
|
"loss": 0.4602, |
|
"step": 11440 |
|
}, |
|
{ |
|
"epoch": 0.8897350221462429, |
|
"grad_norm": 3.2062024108356786, |
|
"learning_rate": 3.658214053989967e-07, |
|
"loss": 0.4291, |
|
"step": 11450 |
|
}, |
|
{ |
|
"epoch": 0.8905120833009558, |
|
"grad_norm": 3.070354137183584, |
|
"learning_rate": 3.6074622731426036e-07, |
|
"loss": 0.4704, |
|
"step": 11460 |
|
}, |
|
{ |
|
"epoch": 0.8912891444556686, |
|
"grad_norm": 3.7959986708913136, |
|
"learning_rate": 3.557051827031954e-07, |
|
"loss": 0.4694, |
|
"step": 11470 |
|
}, |
|
{ |
|
"epoch": 0.8920662056103815, |
|
"grad_norm": 4.3724752517742145, |
|
"learning_rate": 3.506983086554666e-07, |
|
"loss": 0.4679, |
|
"step": 11480 |
|
}, |
|
{ |
|
"epoch": 0.8928432667650944, |
|
"grad_norm": 4.7403654025736035, |
|
"learning_rate": 3.4572564200932634e-07, |
|
"loss": 0.5283, |
|
"step": 11490 |
|
}, |
|
{ |
|
"epoch": 0.8936203279198073, |
|
"grad_norm": 4.243101118629279, |
|
"learning_rate": 3.4078721935134397e-07, |
|
"loss": 0.5125, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 0.8943973890745202, |
|
"grad_norm": 4.475859170580614, |
|
"learning_rate": 3.3588307701614144e-07, |
|
"loss": 0.4869, |
|
"step": 11510 |
|
}, |
|
{ |
|
"epoch": 0.895174450229233, |
|
"grad_norm": 4.052974333086782, |
|
"learning_rate": 3.310132510861169e-07, |
|
"loss": 0.497, |
|
"step": 11520 |
|
}, |
|
{ |
|
"epoch": 0.8959515113839459, |
|
"grad_norm": 3.373865018498319, |
|
"learning_rate": 3.2617777739118894e-07, |
|
"loss": 0.4441, |
|
"step": 11530 |
|
}, |
|
{ |
|
"epoch": 0.8967285725386588, |
|
"grad_norm": 3.276175321494806, |
|
"learning_rate": 3.213766915085248e-07, |
|
"loss": 0.4451, |
|
"step": 11540 |
|
}, |
|
{ |
|
"epoch": 0.8975056336933717, |
|
"grad_norm": 3.908380664561767, |
|
"learning_rate": 3.1661002876228473e-07, |
|
"loss": 0.4243, |
|
"step": 11550 |
|
}, |
|
{ |
|
"epoch": 0.8982826948480845, |
|
"grad_norm": 2.6868106053772003, |
|
"learning_rate": 3.118778242233572e-07, |
|
"loss": 0.4427, |
|
"step": 11560 |
|
}, |
|
{ |
|
"epoch": 0.8990597560027974, |
|
"grad_norm": 3.3557801815767285, |
|
"learning_rate": 3.0718011270910455e-07, |
|
"loss": 0.4702, |
|
"step": 11570 |
|
}, |
|
{ |
|
"epoch": 0.8998368171575103, |
|
"grad_norm": 3.473766818324853, |
|
"learning_rate": 3.02516928783107e-07, |
|
"loss": 0.4744, |
|
"step": 11580 |
|
}, |
|
{ |
|
"epoch": 0.9006138783122232, |
|
"grad_norm": 3.8754395433857503, |
|
"learning_rate": 2.978883067549032e-07, |
|
"loss": 0.4519, |
|
"step": 11590 |
|
}, |
|
{ |
|
"epoch": 0.901390939466936, |
|
"grad_norm": 4.145319857126792, |
|
"learning_rate": 2.9329428067974454e-07, |
|
"loss": 0.4612, |
|
"step": 11600 |
|
}, |
|
{ |
|
"epoch": 0.9021680006216489, |
|
"grad_norm": 3.4197421104899424, |
|
"learning_rate": 2.8873488435833983e-07, |
|
"loss": 0.46, |
|
"step": 11610 |
|
}, |
|
{ |
|
"epoch": 0.9029450617763618, |
|
"grad_norm": 5.689929153660378, |
|
"learning_rate": 2.8421015133660856e-07, |
|
"loss": 0.4345, |
|
"step": 11620 |
|
}, |
|
{ |
|
"epoch": 0.9037221229310747, |
|
"grad_norm": 2.292957288599791, |
|
"learning_rate": 2.797201149054335e-07, |
|
"loss": 0.4454, |
|
"step": 11630 |
|
}, |
|
{ |
|
"epoch": 0.9044991840857876, |
|
"grad_norm": 4.486223577334596, |
|
"learning_rate": 2.752648081004183e-07, |
|
"loss": 0.4593, |
|
"step": 11640 |
|
}, |
|
{ |
|
"epoch": 0.9052762452405004, |
|
"grad_norm": 3.8405561325920745, |
|
"learning_rate": 2.7084426370163954e-07, |
|
"loss": 0.4888, |
|
"step": 11650 |
|
}, |
|
{ |
|
"epoch": 0.9060533063952133, |
|
"grad_norm": 3.406878245329023, |
|
"learning_rate": 2.6645851423340806e-07, |
|
"loss": 0.4558, |
|
"step": 11660 |
|
}, |
|
{ |
|
"epoch": 0.9068303675499262, |
|
"grad_norm": 4.950678382840644, |
|
"learning_rate": 2.621075919640309e-07, |
|
"loss": 0.4762, |
|
"step": 11670 |
|
}, |
|
{ |
|
"epoch": 0.9076074287046391, |
|
"grad_norm": 3.322238216032584, |
|
"learning_rate": 2.577915289055727e-07, |
|
"loss": 0.4759, |
|
"step": 11680 |
|
}, |
|
{ |
|
"epoch": 0.9083844898593519, |
|
"grad_norm": 3.3945486166885006, |
|
"learning_rate": 2.535103568136205e-07, |
|
"loss": 0.4955, |
|
"step": 11690 |
|
}, |
|
{ |
|
"epoch": 0.9091615510140648, |
|
"grad_norm": 3.8694072275201945, |
|
"learning_rate": 2.492641071870489e-07, |
|
"loss": 0.5166, |
|
"step": 11700 |
|
}, |
|
{ |
|
"epoch": 0.9099386121687777, |
|
"grad_norm": 4.7651096314002865, |
|
"learning_rate": 2.450528112677886e-07, |
|
"loss": 0.4971, |
|
"step": 11710 |
|
}, |
|
{ |
|
"epoch": 0.9107156733234906, |
|
"grad_norm": 4.469927022538459, |
|
"learning_rate": 2.408765000406005e-07, |
|
"loss": 0.4796, |
|
"step": 11720 |
|
}, |
|
{ |
|
"epoch": 0.9114927344782034, |
|
"grad_norm": 4.519223313466715, |
|
"learning_rate": 2.367352042328408e-07, |
|
"loss": 0.4685, |
|
"step": 11730 |
|
}, |
|
{ |
|
"epoch": 0.9122697956329163, |
|
"grad_norm": 3.963061942219626, |
|
"learning_rate": 2.3262895431424015e-07, |
|
"loss": 0.4851, |
|
"step": 11740 |
|
}, |
|
{ |
|
"epoch": 0.9130468567876292, |
|
"grad_norm": 2.4524133862796313, |
|
"learning_rate": 2.2855778049667653e-07, |
|
"loss": 0.4534, |
|
"step": 11750 |
|
}, |
|
{ |
|
"epoch": 0.9138239179423421, |
|
"grad_norm": 2.834722369254088, |
|
"learning_rate": 2.2452171273395716e-07, |
|
"loss": 0.4548, |
|
"step": 11760 |
|
}, |
|
{ |
|
"epoch": 0.914600979097055, |
|
"grad_norm": 3.662017876045297, |
|
"learning_rate": 2.2052078072159143e-07, |
|
"loss": 0.4596, |
|
"step": 11770 |
|
}, |
|
{ |
|
"epoch": 0.9153780402517678, |
|
"grad_norm": 4.021945589966396, |
|
"learning_rate": 2.1655501389657941e-07, |
|
"loss": 0.4744, |
|
"step": 11780 |
|
}, |
|
{ |
|
"epoch": 0.9161551014064807, |
|
"grad_norm": 3.251036017263966, |
|
"learning_rate": 2.126244414371903e-07, |
|
"loss": 0.4575, |
|
"step": 11790 |
|
}, |
|
{ |
|
"epoch": 0.9169321625611936, |
|
"grad_norm": 3.351594261133528, |
|
"learning_rate": 2.087290922627494e-07, |
|
"loss": 0.4722, |
|
"step": 11800 |
|
}, |
|
{ |
|
"epoch": 0.9177092237159065, |
|
"grad_norm": 2.9100443321260645, |
|
"learning_rate": 2.0486899503342595e-07, |
|
"loss": 0.4781, |
|
"step": 11810 |
|
}, |
|
{ |
|
"epoch": 0.9184862848706193, |
|
"grad_norm": 5.769177396129288, |
|
"learning_rate": 2.010441781500233e-07, |
|
"loss": 0.4561, |
|
"step": 11820 |
|
}, |
|
{ |
|
"epoch": 0.9192633460253322, |
|
"grad_norm": 3.6257554055271703, |
|
"learning_rate": 1.9725466975376585e-07, |
|
"loss": 0.4628, |
|
"step": 11830 |
|
}, |
|
{ |
|
"epoch": 0.9200404071800451, |
|
"grad_norm": 5.698219899736846, |
|
"learning_rate": 1.9350049772609568e-07, |
|
"loss": 0.4849, |
|
"step": 11840 |
|
}, |
|
{ |
|
"epoch": 0.920817468334758, |
|
"grad_norm": 5.10283696189389, |
|
"learning_rate": 1.8978168968846632e-07, |
|
"loss": 0.4584, |
|
"step": 11850 |
|
}, |
|
{ |
|
"epoch": 0.9215945294894708, |
|
"grad_norm": 2.4057166233933107, |
|
"learning_rate": 1.8609827300213877e-07, |
|
"loss": 0.4575, |
|
"step": 11860 |
|
}, |
|
{ |
|
"epoch": 0.9223715906441837, |
|
"grad_norm": 4.039902041938024, |
|
"learning_rate": 1.8245027476798295e-07, |
|
"loss": 0.4237, |
|
"step": 11870 |
|
}, |
|
{ |
|
"epoch": 0.9231486517988966, |
|
"grad_norm": 5.0104310640190155, |
|
"learning_rate": 1.7883772182627378e-07, |
|
"loss": 0.4609, |
|
"step": 11880 |
|
}, |
|
{ |
|
"epoch": 0.9239257129536095, |
|
"grad_norm": 3.235199066685605, |
|
"learning_rate": 1.7526064075649718e-07, |
|
"loss": 0.4725, |
|
"step": 11890 |
|
}, |
|
{ |
|
"epoch": 0.9247027741083224, |
|
"grad_norm": 5.7000179030429, |
|
"learning_rate": 1.7171905787715436e-07, |
|
"loss": 0.4844, |
|
"step": 11900 |
|
}, |
|
{ |
|
"epoch": 0.9254798352630352, |
|
"grad_norm": 4.833515226751012, |
|
"learning_rate": 1.6821299924556557e-07, |
|
"loss": 0.4711, |
|
"step": 11910 |
|
}, |
|
{ |
|
"epoch": 0.9262568964177481, |
|
"grad_norm": 4.541973195325704, |
|
"learning_rate": 1.647424906576811e-07, |
|
"loss": 0.4536, |
|
"step": 11920 |
|
}, |
|
{ |
|
"epoch": 0.927033957572461, |
|
"grad_norm": 3.1471929054096464, |
|
"learning_rate": 1.613075576478923e-07, |
|
"loss": 0.461, |
|
"step": 11930 |
|
}, |
|
{ |
|
"epoch": 0.9278110187271739, |
|
"grad_norm": 5.155810640275875, |
|
"learning_rate": 1.5790822548883921e-07, |
|
"loss": 0.4619, |
|
"step": 11940 |
|
}, |
|
{ |
|
"epoch": 0.9285880798818867, |
|
"grad_norm": 4.815168413187984, |
|
"learning_rate": 1.545445191912287e-07, |
|
"loss": 0.4811, |
|
"step": 11950 |
|
}, |
|
{ |
|
"epoch": 0.9293651410365996, |
|
"grad_norm": 4.039603939657306, |
|
"learning_rate": 1.5121646350364784e-07, |
|
"loss": 0.4677, |
|
"step": 11960 |
|
}, |
|
{ |
|
"epoch": 0.9301422021913125, |
|
"grad_norm": 3.0484480106622565, |
|
"learning_rate": 1.4792408291238514e-07, |
|
"loss": 0.4621, |
|
"step": 11970 |
|
}, |
|
{ |
|
"epoch": 0.9309192633460254, |
|
"grad_norm": 3.30445623378334, |
|
"learning_rate": 1.4466740164124582e-07, |
|
"loss": 0.423, |
|
"step": 11980 |
|
}, |
|
{ |
|
"epoch": 0.9316963245007382, |
|
"grad_norm": 5.507483370884143, |
|
"learning_rate": 1.4144644365137906e-07, |
|
"loss": 0.4395, |
|
"step": 11990 |
|
}, |
|
{ |
|
"epoch": 0.9324733856554511, |
|
"grad_norm": 4.472623280485502, |
|
"learning_rate": 1.382612326410959e-07, |
|
"loss": 0.4407, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.933250446810164, |
|
"grad_norm": 4.43958885227866, |
|
"learning_rate": 1.3511179204570014e-07, |
|
"loss": 0.4594, |
|
"step": 12010 |
|
}, |
|
{ |
|
"epoch": 0.9340275079648769, |
|
"grad_norm": 4.219831856666021, |
|
"learning_rate": 1.3199814503731144e-07, |
|
"loss": 0.4935, |
|
"step": 12020 |
|
}, |
|
{ |
|
"epoch": 0.9348045691195898, |
|
"grad_norm": 3.6973725388649887, |
|
"learning_rate": 1.289203145246981e-07, |
|
"loss": 0.4163, |
|
"step": 12030 |
|
}, |
|
{ |
|
"epoch": 0.9355816302743026, |
|
"grad_norm": 3.7631001641207087, |
|
"learning_rate": 1.258783231531069e-07, |
|
"loss": 0.4795, |
|
"step": 12040 |
|
}, |
|
{ |
|
"epoch": 0.9363586914290155, |
|
"grad_norm": 4.91235572426644, |
|
"learning_rate": 1.2287219330409716e-07, |
|
"loss": 0.4763, |
|
"step": 12050 |
|
}, |
|
{ |
|
"epoch": 0.9371357525837284, |
|
"grad_norm": 5.162011591962256, |
|
"learning_rate": 1.1990194709537496e-07, |
|
"loss": 0.4663, |
|
"step": 12060 |
|
}, |
|
{ |
|
"epoch": 0.9379128137384413, |
|
"grad_norm": 4.847494906904684, |
|
"learning_rate": 1.1696760638063243e-07, |
|
"loss": 0.4638, |
|
"step": 12070 |
|
}, |
|
{ |
|
"epoch": 0.9386898748931541, |
|
"grad_norm": 3.4104319145126203, |
|
"learning_rate": 1.1406919274938477e-07, |
|
"loss": 0.5046, |
|
"step": 12080 |
|
}, |
|
{ |
|
"epoch": 0.939466936047867, |
|
"grad_norm": 3.5111768971357793, |
|
"learning_rate": 1.112067275268125e-07, |
|
"loss": 0.4713, |
|
"step": 12090 |
|
}, |
|
{ |
|
"epoch": 0.9402439972025799, |
|
"grad_norm": 2.618733293064988, |
|
"learning_rate": 1.083802317736049e-07, |
|
"loss": 0.4698, |
|
"step": 12100 |
|
}, |
|
{ |
|
"epoch": 0.9410210583572928, |
|
"grad_norm": 3.9733552104692333, |
|
"learning_rate": 1.0558972628580522e-07, |
|
"loss": 0.5037, |
|
"step": 12110 |
|
}, |
|
{ |
|
"epoch": 0.9417981195120056, |
|
"grad_norm": 3.876212810601272, |
|
"learning_rate": 1.0283523159465514e-07, |
|
"loss": 0.4538, |
|
"step": 12120 |
|
}, |
|
{ |
|
"epoch": 0.9425751806667185, |
|
"grad_norm": 3.2929997252443193, |
|
"learning_rate": 1.0011676796644776e-07, |
|
"loss": 0.4606, |
|
"step": 12130 |
|
}, |
|
{ |
|
"epoch": 0.9433522418214313, |
|
"grad_norm": 4.672469430214036, |
|
"learning_rate": 9.743435540237433e-08, |
|
"loss": 0.4695, |
|
"step": 12140 |
|
}, |
|
{ |
|
"epoch": 0.9441293029761442, |
|
"grad_norm": 3.462179429994501, |
|
"learning_rate": 9.478801363838052e-08, |
|
"loss": 0.448, |
|
"step": 12150 |
|
}, |
|
{ |
|
"epoch": 0.944906364130857, |
|
"grad_norm": 4.83408662775341, |
|
"learning_rate": 9.217776214501984e-08, |
|
"loss": 0.484, |
|
"step": 12160 |
|
}, |
|
{ |
|
"epoch": 0.9456834252855699, |
|
"grad_norm": 4.438140209760804, |
|
"learning_rate": 8.960362012730983e-08, |
|
"loss": 0.4603, |
|
"step": 12170 |
|
}, |
|
{ |
|
"epoch": 0.9464604864402828, |
|
"grad_norm": 3.4973998750156543, |
|
"learning_rate": 8.706560652459062e-08, |
|
"loss": 0.4249, |
|
"step": 12180 |
|
}, |
|
{ |
|
"epoch": 0.9472375475949957, |
|
"grad_norm": 4.089297315615882, |
|
"learning_rate": 8.456374001038769e-08, |
|
"loss": 0.4491, |
|
"step": 12190 |
|
}, |
|
{ |
|
"epoch": 0.9480146087497086, |
|
"grad_norm": 4.084120402865338, |
|
"learning_rate": 8.209803899227209e-08, |
|
"loss": 0.4535, |
|
"step": 12200 |
|
}, |
|
{ |
|
"epoch": 0.9487916699044214, |
|
"grad_norm": 3.941907831396277, |
|
"learning_rate": 7.966852161172711e-08, |
|
"loss": 0.4496, |
|
"step": 12210 |
|
}, |
|
{ |
|
"epoch": 0.9495687310591343, |
|
"grad_norm": 2.833823469782505, |
|
"learning_rate": 7.727520574401127e-08, |
|
"loss": 0.4243, |
|
"step": 12220 |
|
}, |
|
{ |
|
"epoch": 0.9503457922138472, |
|
"grad_norm": 4.551822537590359, |
|
"learning_rate": 7.49181089980322e-08, |
|
"loss": 0.4582, |
|
"step": 12230 |
|
}, |
|
{ |
|
"epoch": 0.9511228533685601, |
|
"grad_norm": 3.043933176817138, |
|
"learning_rate": 7.259724871621188e-08, |
|
"loss": 0.5034, |
|
"step": 12240 |
|
}, |
|
{ |
|
"epoch": 0.9518999145232729, |
|
"grad_norm": 3.4621240444267665, |
|
"learning_rate": 7.031264197436161e-08, |
|
"loss": 0.4268, |
|
"step": 12250 |
|
}, |
|
{ |
|
"epoch": 0.9526769756779858, |
|
"grad_norm": 4.056375247941382, |
|
"learning_rate": 6.806430558155719e-08, |
|
"loss": 0.4745, |
|
"step": 12260 |
|
}, |
|
{ |
|
"epoch": 0.9534540368326987, |
|
"grad_norm": 4.535857419133766, |
|
"learning_rate": 6.585225608001178e-08, |
|
"loss": 0.4308, |
|
"step": 12270 |
|
}, |
|
{ |
|
"epoch": 0.9542310979874116, |
|
"grad_norm": 2.4310495050933816, |
|
"learning_rate": 6.367650974495875e-08, |
|
"loss": 0.4222, |
|
"step": 12280 |
|
}, |
|
{ |
|
"epoch": 0.9550081591421244, |
|
"grad_norm": 1.8480746534853145, |
|
"learning_rate": 6.153708258452851e-08, |
|
"loss": 0.4637, |
|
"step": 12290 |
|
}, |
|
{ |
|
"epoch": 0.9557852202968373, |
|
"grad_norm": 4.469852603004664, |
|
"learning_rate": 5.943399033963182e-08, |
|
"loss": 0.4771, |
|
"step": 12300 |
|
}, |
|
{ |
|
"epoch": 0.9565622814515502, |
|
"grad_norm": 3.2674434265539745, |
|
"learning_rate": 5.7367248483845005e-08, |
|
"loss": 0.4866, |
|
"step": 12310 |
|
}, |
|
{ |
|
"epoch": 0.9573393426062631, |
|
"grad_norm": 2.005250278061698, |
|
"learning_rate": 5.533687222329332e-08, |
|
"loss": 0.4144, |
|
"step": 12320 |
|
}, |
|
{ |
|
"epoch": 0.958116403760976, |
|
"grad_norm": 2.6745479068375824, |
|
"learning_rate": 5.3342876496542126e-08, |
|
"loss": 0.4685, |
|
"step": 12330 |
|
}, |
|
{ |
|
"epoch": 0.9588934649156888, |
|
"grad_norm": 2.9539394159745815, |
|
"learning_rate": 5.138527597448595e-08, |
|
"loss": 0.4639, |
|
"step": 12340 |
|
}, |
|
{ |
|
"epoch": 0.9596705260704017, |
|
"grad_norm": 4.017786152412138, |
|
"learning_rate": 4.946408506023958e-08, |
|
"loss": 0.442, |
|
"step": 12350 |
|
}, |
|
{ |
|
"epoch": 0.9604475872251146, |
|
"grad_norm": 3.850870480799147, |
|
"learning_rate": 4.757931788903325e-08, |
|
"loss": 0.4304, |
|
"step": 12360 |
|
}, |
|
{ |
|
"epoch": 0.9612246483798275, |
|
"grad_norm": 3.0544561131913586, |
|
"learning_rate": 4.573098832810818e-08, |
|
"loss": 0.4478, |
|
"step": 12370 |
|
}, |
|
{ |
|
"epoch": 0.9620017095345403, |
|
"grad_norm": 5.80094396671801, |
|
"learning_rate": 4.391910997661397e-08, |
|
"loss": 0.4821, |
|
"step": 12380 |
|
}, |
|
{ |
|
"epoch": 0.9627787706892532, |
|
"grad_norm": 3.5119994742694773, |
|
"learning_rate": 4.214369616550973e-08, |
|
"loss": 0.4362, |
|
"step": 12390 |
|
}, |
|
{ |
|
"epoch": 0.9635558318439661, |
|
"grad_norm": 4.914214488501594, |
|
"learning_rate": 4.040475995746529e-08, |
|
"loss": 0.4375, |
|
"step": 12400 |
|
}, |
|
{ |
|
"epoch": 0.964332892998679, |
|
"grad_norm": 3.0958335114663322, |
|
"learning_rate": 3.8702314146766284e-08, |
|
"loss": 0.4565, |
|
"step": 12410 |
|
}, |
|
{ |
|
"epoch": 0.9651099541533918, |
|
"grad_norm": 3.156460394460856, |
|
"learning_rate": 3.7036371259216994e-08, |
|
"loss": 0.4625, |
|
"step": 12420 |
|
}, |
|
{ |
|
"epoch": 0.9658870153081047, |
|
"grad_norm": 3.411808395407994, |
|
"learning_rate": 3.540694355205099e-08, |
|
"loss": 0.4403, |
|
"step": 12430 |
|
}, |
|
{ |
|
"epoch": 0.9666640764628176, |
|
"grad_norm": 3.823904951701004, |
|
"learning_rate": 3.381404301384117e-08, |
|
"loss": 0.4446, |
|
"step": 12440 |
|
}, |
|
{ |
|
"epoch": 0.9674411376175305, |
|
"grad_norm": 3.762167967184466, |
|
"learning_rate": 3.225768136440821e-08, |
|
"loss": 0.4588, |
|
"step": 12450 |
|
}, |
|
{ |
|
"epoch": 0.9682181987722434, |
|
"grad_norm": 3.0475232787033835, |
|
"learning_rate": 3.0737870054739496e-08, |
|
"loss": 0.4643, |
|
"step": 12460 |
|
}, |
|
{ |
|
"epoch": 0.9689952599269562, |
|
"grad_norm": 3.768638648169802, |
|
"learning_rate": 2.925462026689918e-08, |
|
"loss": 0.4438, |
|
"step": 12470 |
|
}, |
|
{ |
|
"epoch": 0.9697723210816691, |
|
"grad_norm": 3.1658559982961942, |
|
"learning_rate": 2.7807942913950504e-08, |
|
"loss": 0.4872, |
|
"step": 12480 |
|
}, |
|
{ |
|
"epoch": 0.970549382236382, |
|
"grad_norm": 4.351267552340424, |
|
"learning_rate": 2.6397848639874156e-08, |
|
"loss": 0.4828, |
|
"step": 12490 |
|
}, |
|
{ |
|
"epoch": 0.9713264433910949, |
|
"grad_norm": 4.485145700676859, |
|
"learning_rate": 2.502434781948726e-08, |
|
"loss": 0.4754, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 0.9721035045458077, |
|
"grad_norm": 3.772459688285439, |
|
"learning_rate": 2.3687450558370627e-08, |
|
"loss": 0.4425, |
|
"step": 12510 |
|
}, |
|
{ |
|
"epoch": 0.9728805657005206, |
|
"grad_norm": 2.3671995254376474, |
|
"learning_rate": 2.2387166692794392e-08, |
|
"loss": 0.4698, |
|
"step": 12520 |
|
}, |
|
{ |
|
"epoch": 0.9736576268552335, |
|
"grad_norm": 4.4933944619724, |
|
"learning_rate": 2.1123505789642507e-08, |
|
"loss": 0.4746, |
|
"step": 12530 |
|
}, |
|
{ |
|
"epoch": 0.9744346880099464, |
|
"grad_norm": 4.072967347229249, |
|
"learning_rate": 1.989647714634446e-08, |
|
"loss": 0.4646, |
|
"step": 12540 |
|
}, |
|
{ |
|
"epoch": 0.9752117491646592, |
|
"grad_norm": 3.8176450930369965, |
|
"learning_rate": 1.8706089790807014e-08, |
|
"loss": 0.4885, |
|
"step": 12550 |
|
}, |
|
{ |
|
"epoch": 0.9759888103193721, |
|
"grad_norm": 5.406894035256226, |
|
"learning_rate": 1.7552352481347013e-08, |
|
"loss": 0.4495, |
|
"step": 12560 |
|
}, |
|
{ |
|
"epoch": 0.976765871474085, |
|
"grad_norm": 2.91507715459867, |
|
"learning_rate": 1.6435273706627564e-08, |
|
"loss": 0.4498, |
|
"step": 12570 |
|
}, |
|
{ |
|
"epoch": 0.9775429326287979, |
|
"grad_norm": 3.455836019853387, |
|
"learning_rate": 1.5354861685595855e-08, |
|
"loss": 0.4679, |
|
"step": 12580 |
|
}, |
|
{ |
|
"epoch": 0.9783199937835108, |
|
"grad_norm": 3.065621924437169, |
|
"learning_rate": 1.4311124367420992e-08, |
|
"loss": 0.424, |
|
"step": 12590 |
|
}, |
|
{ |
|
"epoch": 0.9790970549382236, |
|
"grad_norm": 5.408364243129198, |
|
"learning_rate": 1.3304069431437362e-08, |
|
"loss": 0.4582, |
|
"step": 12600 |
|
}, |
|
{ |
|
"epoch": 0.9798741160929365, |
|
"grad_norm": 3.9623851369922485, |
|
"learning_rate": 1.2333704287087467e-08, |
|
"loss": 0.4733, |
|
"step": 12610 |
|
}, |
|
{ |
|
"epoch": 0.9806511772476494, |
|
"grad_norm": 3.6951264488478976, |
|
"learning_rate": 1.1400036073866416e-08, |
|
"loss": 0.46, |
|
"step": 12620 |
|
}, |
|
{ |
|
"epoch": 0.9814282384023623, |
|
"grad_norm": 2.8637927854551233, |
|
"learning_rate": 1.0503071661271957e-08, |
|
"loss": 0.4449, |
|
"step": 12630 |
|
}, |
|
{ |
|
"epoch": 0.9822052995570751, |
|
"grad_norm": 3.2568596741604523, |
|
"learning_rate": 9.642817648750636e-09, |
|
"loss": 0.4644, |
|
"step": 12640 |
|
}, |
|
{ |
|
"epoch": 0.982982360711788, |
|
"grad_norm": 4.000380462168666, |
|
"learning_rate": 8.819280365652827e-09, |
|
"loss": 0.4525, |
|
"step": 12650 |
|
}, |
|
{ |
|
"epoch": 0.9837594218665009, |
|
"grad_norm": 4.048475764438385, |
|
"learning_rate": 8.032465871182227e-09, |
|
"loss": 0.4586, |
|
"step": 12660 |
|
}, |
|
{ |
|
"epoch": 0.9845364830212138, |
|
"grad_norm": 3.2880203159325307, |
|
"learning_rate": 7.282379954354768e-09, |
|
"loss": 0.4334, |
|
"step": 12670 |
|
}, |
|
{ |
|
"epoch": 0.9853135441759266, |
|
"grad_norm": 4.0643620339312605, |
|
"learning_rate": 6.569028133954214e-09, |
|
"loss": 0.4458, |
|
"step": 12680 |
|
}, |
|
{ |
|
"epoch": 0.9860906053306395, |
|
"grad_norm": 4.549795834627539, |
|
"learning_rate": 5.892415658491634e-09, |
|
"loss": 0.4554, |
|
"step": 12690 |
|
}, |
|
{ |
|
"epoch": 0.9868676664853524, |
|
"grad_norm": 2.533413360663321, |
|
"learning_rate": 5.252547506167105e-09, |
|
"loss": 0.4535, |
|
"step": 12700 |
|
}, |
|
{ |
|
"epoch": 0.9876447276400653, |
|
"grad_norm": 3.539581600293753, |
|
"learning_rate": 4.649428384833065e-09, |
|
"loss": 0.4591, |
|
"step": 12710 |
|
}, |
|
{ |
|
"epoch": 0.9884217887947782, |
|
"grad_norm": 3.392398736378723, |
|
"learning_rate": 4.083062731960463e-09, |
|
"loss": 0.4609, |
|
"step": 12720 |
|
}, |
|
{ |
|
"epoch": 0.989198849949491, |
|
"grad_norm": 3.8523386314806305, |
|
"learning_rate": 3.5534547146043318e-09, |
|
"loss": 0.4601, |
|
"step": 12730 |
|
}, |
|
{ |
|
"epoch": 0.9899759111042039, |
|
"grad_norm": 4.270954545588355, |
|
"learning_rate": 3.060608229373818e-09, |
|
"loss": 0.4578, |
|
"step": 12740 |
|
}, |
|
{ |
|
"epoch": 0.9907529722589168, |
|
"grad_norm": 3.428519580605601, |
|
"learning_rate": 2.6045269024049802e-09, |
|
"loss": 0.4564, |
|
"step": 12750 |
|
}, |
|
{ |
|
"epoch": 0.9915300334136297, |
|
"grad_norm": 3.2452332555408683, |
|
"learning_rate": 2.1852140893319218e-09, |
|
"loss": 0.4291, |
|
"step": 12760 |
|
}, |
|
{ |
|
"epoch": 0.9923070945683425, |
|
"grad_norm": 2.788077697667321, |
|
"learning_rate": 1.8026728752634781e-09, |
|
"loss": 0.4726, |
|
"step": 12770 |
|
}, |
|
{ |
|
"epoch": 0.9930841557230554, |
|
"grad_norm": 4.551637132581418, |
|
"learning_rate": 1.4569060747610109e-09, |
|
"loss": 0.4655, |
|
"step": 12780 |
|
}, |
|
{ |
|
"epoch": 0.9938612168777683, |
|
"grad_norm": 3.8202797170955614, |
|
"learning_rate": 1.1479162318150939e-09, |
|
"loss": 0.4136, |
|
"step": 12790 |
|
}, |
|
{ |
|
"epoch": 0.9946382780324812, |
|
"grad_norm": 4.028638686891394, |
|
"learning_rate": 8.757056198294145e-10, |
|
"loss": 0.4866, |
|
"step": 12800 |
|
}, |
|
{ |
|
"epoch": 0.995415339187194, |
|
"grad_norm": 3.652876594672518, |
|
"learning_rate": 6.402762416035657e-10, |
|
"loss": 0.4361, |
|
"step": 12810 |
|
}, |
|
{ |
|
"epoch": 0.9961924003419069, |
|
"grad_norm": 3.900658925525932, |
|
"learning_rate": 4.4162982931750255e-10, |
|
"loss": 0.4366, |
|
"step": 12820 |
|
}, |
|
{ |
|
"epoch": 0.9969694614966198, |
|
"grad_norm": 4.0222498355179, |
|
"learning_rate": 2.7976784451877457e-10, |
|
"loss": 0.5075, |
|
"step": 12830 |
|
}, |
|
{ |
|
"epoch": 0.9977465226513327, |
|
"grad_norm": 3.2583568617059995, |
|
"learning_rate": 1.5469147811308926e-10, |
|
"loss": 0.438, |
|
"step": 12840 |
|
}, |
|
{ |
|
"epoch": 0.9985235838060456, |
|
"grad_norm": 3.7370678626951936, |
|
"learning_rate": 6.640165035431967e-11, |
|
"loss": 0.4643, |
|
"step": 12850 |
|
}, |
|
{ |
|
"epoch": 0.9993006449607584, |
|
"grad_norm": 4.275291482479352, |
|
"learning_rate": 1.4899010837288174e-11, |
|
"loss": 0.4825, |
|
"step": 12860 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 12869, |
|
"total_flos": 626183508787200.0, |
|
"train_loss": 0.7687553066651168, |
|
"train_runtime": 40960.0959, |
|
"train_samples_per_second": 5.027, |
|
"train_steps_per_second": 0.314 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 12869, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 20000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 626183508787200.0, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|