|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 5.0, |
|
"eval_steps": 500, |
|
"global_step": 87450, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.005717552887364208, |
|
"grad_norm": 9.984894752502441, |
|
"learning_rate": 9.999969012132217e-07, |
|
"loss": 1.852, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.011435105774728416, |
|
"grad_norm": 6.384902000427246, |
|
"learning_rate": 9.999873506403478e-07, |
|
"loss": 1.1916, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.017152658662092625, |
|
"grad_norm": 5.624429225921631, |
|
"learning_rate": 9.999713471140152e-07, |
|
"loss": 1.1066, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.022870211549456832, |
|
"grad_norm": 5.947996616363525, |
|
"learning_rate": 9.99948890840769e-07, |
|
"loss": 1.0538, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.02858776443682104, |
|
"grad_norm": 5.026174068450928, |
|
"learning_rate": 9.999199821104353e-07, |
|
"loss": 1.0143, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.03430531732418525, |
|
"grad_norm": 6.598933696746826, |
|
"learning_rate": 9.99884621296117e-07, |
|
"loss": 0.9908, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.040022870211549454, |
|
"grad_norm": 4.971883296966553, |
|
"learning_rate": 9.99842808854189e-07, |
|
"loss": 0.9857, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.045740423098913664, |
|
"grad_norm": 5.7355570793151855, |
|
"learning_rate": 9.997945453242922e-07, |
|
"loss": 0.9578, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.051457975986277875, |
|
"grad_norm": 5.731710910797119, |
|
"learning_rate": 9.997398313293272e-07, |
|
"loss": 0.9344, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.05717552887364208, |
|
"grad_norm": 6.716394901275635, |
|
"learning_rate": 9.996786675754455e-07, |
|
"loss": 0.9511, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.06289308176100629, |
|
"grad_norm": 7.5202178955078125, |
|
"learning_rate": 9.996110548520408e-07, |
|
"loss": 0.9319, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.0686106346483705, |
|
"grad_norm": 6.257505416870117, |
|
"learning_rate": 9.995369940317388e-07, |
|
"loss": 0.9303, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.07432818753573471, |
|
"grad_norm": 8.514854431152344, |
|
"learning_rate": 9.994564860703857e-07, |
|
"loss": 0.9233, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.08004574042309891, |
|
"grad_norm": 7.62768030166626, |
|
"learning_rate": 9.993695320070358e-07, |
|
"loss": 0.9061, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.08576329331046312, |
|
"grad_norm": 7.282722473144531, |
|
"learning_rate": 9.992761329639389e-07, |
|
"loss": 0.9147, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.09148084619782733, |
|
"grad_norm": 6.965019226074219, |
|
"learning_rate": 9.991762901465247e-07, |
|
"loss": 0.9013, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.09719839908519154, |
|
"grad_norm": 8.01344108581543, |
|
"learning_rate": 9.990700048433879e-07, |
|
"loss": 0.8811, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.10291595197255575, |
|
"grad_norm": 7.747630596160889, |
|
"learning_rate": 9.989572784262714e-07, |
|
"loss": 0.8737, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.10863350485991996, |
|
"grad_norm": 6.288477897644043, |
|
"learning_rate": 9.988381123500485e-07, |
|
"loss": 0.8984, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.11435105774728416, |
|
"grad_norm": 9.441058158874512, |
|
"learning_rate": 9.987125081527047e-07, |
|
"loss": 0.8659, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.12006861063464837, |
|
"grad_norm": 7.924680233001709, |
|
"learning_rate": 9.98580467455317e-07, |
|
"loss": 0.884, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.12578616352201258, |
|
"grad_norm": 7.8524322509765625, |
|
"learning_rate": 9.984419919620333e-07, |
|
"loss": 0.8653, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.13150371640937678, |
|
"grad_norm": 7.980896949768066, |
|
"learning_rate": 9.982970834600508e-07, |
|
"loss": 0.8732, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.137221269296741, |
|
"grad_norm": 9.265905380249023, |
|
"learning_rate": 9.981457438195925e-07, |
|
"loss": 0.8934, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.1429388221841052, |
|
"grad_norm": 7.617094039916992, |
|
"learning_rate": 9.97987974993883e-07, |
|
"loss": 0.8902, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.14865637507146942, |
|
"grad_norm": 9.14963436126709, |
|
"learning_rate": 9.978237790191236e-07, |
|
"loss": 0.887, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.15437392795883362, |
|
"grad_norm": 8.893723487854004, |
|
"learning_rate": 9.97653158014466e-07, |
|
"loss": 0.8648, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.16009148084619781, |
|
"grad_norm": 9.325992584228516, |
|
"learning_rate": 9.974761141819848e-07, |
|
"loss": 0.8782, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.16580903373356204, |
|
"grad_norm": 9.336038589477539, |
|
"learning_rate": 9.972926498066484e-07, |
|
"loss": 0.8657, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.17152658662092624, |
|
"grad_norm": 9.833565711975098, |
|
"learning_rate": 9.971027672562918e-07, |
|
"loss": 0.8674, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.17724413950829046, |
|
"grad_norm": 8.262983322143555, |
|
"learning_rate": 9.969064689815828e-07, |
|
"loss": 0.8531, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.18296169239565466, |
|
"grad_norm": 8.745858192443848, |
|
"learning_rate": 9.967037575159929e-07, |
|
"loss": 0.9019, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.18867924528301888, |
|
"grad_norm": 9.545065879821777, |
|
"learning_rate": 9.964946354757638e-07, |
|
"loss": 0.8313, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.19439679817038308, |
|
"grad_norm": 10.856668472290039, |
|
"learning_rate": 9.962791055598731e-07, |
|
"loss": 0.8339, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.20011435105774728, |
|
"grad_norm": 8.230608940124512, |
|
"learning_rate": 9.960571705500005e-07, |
|
"loss": 0.8456, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.2058319039451115, |
|
"grad_norm": 10.353604316711426, |
|
"learning_rate": 9.958288333104907e-07, |
|
"loss": 0.8514, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.2115494568324757, |
|
"grad_norm": 9.70090389251709, |
|
"learning_rate": 9.95594096788318e-07, |
|
"loss": 0.872, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.21726700971983992, |
|
"grad_norm": 8.226770401000977, |
|
"learning_rate": 9.953529640130459e-07, |
|
"loss": 0.8532, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.22298456260720412, |
|
"grad_norm": 10.802261352539062, |
|
"learning_rate": 9.95105438096791e-07, |
|
"loss": 0.8183, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 0.22870211549456831, |
|
"grad_norm": 9.883532524108887, |
|
"learning_rate": 9.948515222341802e-07, |
|
"loss": 0.8244, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.23441966838193254, |
|
"grad_norm": 9.516398429870605, |
|
"learning_rate": 9.94591219702311e-07, |
|
"loss": 0.8464, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 0.24013722126929674, |
|
"grad_norm": 9.146637916564941, |
|
"learning_rate": 9.943245338607086e-07, |
|
"loss": 0.838, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 0.24585477415666096, |
|
"grad_norm": 9.489537239074707, |
|
"learning_rate": 9.94051468151283e-07, |
|
"loss": 0.8245, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 0.25157232704402516, |
|
"grad_norm": 9.833827018737793, |
|
"learning_rate": 9.937720260982834e-07, |
|
"loss": 0.8361, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 0.25728987993138935, |
|
"grad_norm": 13.346197128295898, |
|
"learning_rate": 9.934862113082547e-07, |
|
"loss": 0.8464, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.26300743281875355, |
|
"grad_norm": 8.923548698425293, |
|
"learning_rate": 9.93194027469989e-07, |
|
"loss": 0.8166, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 0.2687249857061178, |
|
"grad_norm": 8.640154838562012, |
|
"learning_rate": 9.928954783544794e-07, |
|
"loss": 0.8253, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 0.274442538593482, |
|
"grad_norm": 12.083985328674316, |
|
"learning_rate": 9.9259056781487e-07, |
|
"loss": 0.8319, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 0.2801600914808462, |
|
"grad_norm": 8.075922012329102, |
|
"learning_rate": 9.92279299786408e-07, |
|
"loss": 0.8371, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 0.2858776443682104, |
|
"grad_norm": 8.545024871826172, |
|
"learning_rate": 9.919616782863908e-07, |
|
"loss": 0.8319, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.2915951972555746, |
|
"grad_norm": 8.820647239685059, |
|
"learning_rate": 9.916377074141157e-07, |
|
"loss": 0.8474, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 0.29731275014293884, |
|
"grad_norm": 8.804427146911621, |
|
"learning_rate": 9.913073913508266e-07, |
|
"loss": 0.8183, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 0.30303030303030304, |
|
"grad_norm": 8.17628002166748, |
|
"learning_rate": 9.909707343596596e-07, |
|
"loss": 0.8122, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 0.30874785591766724, |
|
"grad_norm": 9.780845642089844, |
|
"learning_rate": 9.906277407855883e-07, |
|
"loss": 0.8329, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 0.31446540880503143, |
|
"grad_norm": 9.164664268493652, |
|
"learning_rate": 9.902784150553678e-07, |
|
"loss": 0.805, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.32018296169239563, |
|
"grad_norm": 10.00500774383545, |
|
"learning_rate": 9.899227616774776e-07, |
|
"loss": 0.823, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 0.3259005145797599, |
|
"grad_norm": 10.257847785949707, |
|
"learning_rate": 9.895607852420636e-07, |
|
"loss": 0.8209, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 0.3316180674671241, |
|
"grad_norm": 12.896175384521484, |
|
"learning_rate": 9.891924904208774e-07, |
|
"loss": 0.815, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 0.3373356203544883, |
|
"grad_norm": 12.980055809020996, |
|
"learning_rate": 9.888178819672186e-07, |
|
"loss": 0.8061, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 0.34305317324185247, |
|
"grad_norm": 9.492401123046875, |
|
"learning_rate": 9.884369647158711e-07, |
|
"loss": 0.8187, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.34877072612921667, |
|
"grad_norm": 10.031133651733398, |
|
"learning_rate": 9.880497435830418e-07, |
|
"loss": 0.8317, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 0.3544882790165809, |
|
"grad_norm": 10.923727989196777, |
|
"learning_rate": 9.87656223566297e-07, |
|
"loss": 0.8279, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 0.3602058319039451, |
|
"grad_norm": 11.254301071166992, |
|
"learning_rate": 9.872564097444981e-07, |
|
"loss": 0.8274, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 0.3659233847913093, |
|
"grad_norm": 11.593939781188965, |
|
"learning_rate": 9.868503072777356e-07, |
|
"loss": 0.8018, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 0.3716409376786735, |
|
"grad_norm": 12.787039756774902, |
|
"learning_rate": 9.864379214072626e-07, |
|
"loss": 0.8114, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.37735849056603776, |
|
"grad_norm": 9.75014877319336, |
|
"learning_rate": 9.860192574554274e-07, |
|
"loss": 0.8398, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 0.38307604345340196, |
|
"grad_norm": 9.99585247039795, |
|
"learning_rate": 9.855943208256046e-07, |
|
"loss": 0.8166, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 0.38879359634076616, |
|
"grad_norm": 9.503119468688965, |
|
"learning_rate": 9.851631170021257e-07, |
|
"loss": 0.7923, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 0.39451114922813035, |
|
"grad_norm": 9.30905818939209, |
|
"learning_rate": 9.84725651550208e-07, |
|
"loss": 0.8128, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 0.40022870211549455, |
|
"grad_norm": 12.592491149902344, |
|
"learning_rate": 9.842819301158825e-07, |
|
"loss": 0.8064, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.4059462550028588, |
|
"grad_norm": 11.191341400146484, |
|
"learning_rate": 9.838319584259217e-07, |
|
"loss": 0.7924, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 0.411663807890223, |
|
"grad_norm": 9.108357429504395, |
|
"learning_rate": 9.833757422877653e-07, |
|
"loss": 0.7678, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 0.4173813607775872, |
|
"grad_norm": 9.786158561706543, |
|
"learning_rate": 9.829132875894453e-07, |
|
"loss": 0.8139, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 0.4230989136649514, |
|
"grad_norm": 9.721033096313477, |
|
"learning_rate": 9.8244460029951e-07, |
|
"loss": 0.8294, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 0.4288164665523156, |
|
"grad_norm": 12.101911544799805, |
|
"learning_rate": 9.819696864669466e-07, |
|
"loss": 0.8122, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.43453401943967984, |
|
"grad_norm": 10.104898452758789, |
|
"learning_rate": 9.814885522211044e-07, |
|
"loss": 0.7911, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 0.44025157232704404, |
|
"grad_norm": 10.600286483764648, |
|
"learning_rate": 9.810012037716142e-07, |
|
"loss": 0.8108, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 0.44596912521440824, |
|
"grad_norm": 9.540319442749023, |
|
"learning_rate": 9.805076474083085e-07, |
|
"loss": 0.8296, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 0.45168667810177243, |
|
"grad_norm": 8.930343627929688, |
|
"learning_rate": 9.800078895011414e-07, |
|
"loss": 0.8172, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 0.45740423098913663, |
|
"grad_norm": 10.50644302368164, |
|
"learning_rate": 9.795019365001047e-07, |
|
"loss": 0.8063, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.4631217838765009, |
|
"grad_norm": 10.2007417678833, |
|
"learning_rate": 9.789897949351463e-07, |
|
"loss": 0.8141, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 0.4688393367638651, |
|
"grad_norm": 9.180241584777832, |
|
"learning_rate": 9.784714714160844e-07, |
|
"loss": 0.7992, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 0.4745568896512293, |
|
"grad_norm": 13.358614921569824, |
|
"learning_rate": 9.779469726325235e-07, |
|
"loss": 0.7994, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 0.48027444253859347, |
|
"grad_norm": 8.851704597473145, |
|
"learning_rate": 9.774163053537675e-07, |
|
"loss": 0.8179, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 0.48599199542595767, |
|
"grad_norm": 11.237141609191895, |
|
"learning_rate": 9.768794764287319e-07, |
|
"loss": 0.7957, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.4917095483133219, |
|
"grad_norm": 12.960529327392578, |
|
"learning_rate": 9.76336492785856e-07, |
|
"loss": 0.7971, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 0.4974271012006861, |
|
"grad_norm": 15.1110258102417, |
|
"learning_rate": 9.75787361433014e-07, |
|
"loss": 0.8132, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 0.5031446540880503, |
|
"grad_norm": 11.033527374267578, |
|
"learning_rate": 9.752320894574232e-07, |
|
"loss": 0.8141, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 0.5088622069754145, |
|
"grad_norm": 14.08973503112793, |
|
"learning_rate": 9.74670684025553e-07, |
|
"loss": 0.801, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 0.5145797598627787, |
|
"grad_norm": 11.95508098602295, |
|
"learning_rate": 9.74103152383033e-07, |
|
"loss": 0.8061, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.5202973127501429, |
|
"grad_norm": 11.355822563171387, |
|
"learning_rate": 9.73529501854559e-07, |
|
"loss": 0.7923, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 0.5260148656375071, |
|
"grad_norm": 12.26524829864502, |
|
"learning_rate": 9.729497398437991e-07, |
|
"loss": 0.7977, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 0.5317324185248714, |
|
"grad_norm": 9.304167747497559, |
|
"learning_rate": 9.723638738332967e-07, |
|
"loss": 0.7951, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 0.5374499714122356, |
|
"grad_norm": 14.560432434082031, |
|
"learning_rate": 9.71771911384375e-07, |
|
"loss": 0.7921, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 0.5431675242995998, |
|
"grad_norm": 9.710556983947754, |
|
"learning_rate": 9.711738601370406e-07, |
|
"loss": 0.796, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 0.548885077186964, |
|
"grad_norm": 11.41674518585205, |
|
"learning_rate": 9.705697278098815e-07, |
|
"loss": 0.8159, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 0.5546026300743282, |
|
"grad_norm": 10.389808654785156, |
|
"learning_rate": 9.69959522199971e-07, |
|
"loss": 0.8113, |
|
"step": 9700 |
|
}, |
|
{ |
|
"epoch": 0.5603201829616924, |
|
"grad_norm": 10.79917049407959, |
|
"learning_rate": 9.69343251182765e-07, |
|
"loss": 0.7919, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 0.5660377358490566, |
|
"grad_norm": 13.113627433776855, |
|
"learning_rate": 9.687209227120013e-07, |
|
"loss": 0.7975, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 0.5717552887364208, |
|
"grad_norm": 13.61053466796875, |
|
"learning_rate": 9.68092544819596e-07, |
|
"loss": 0.8035, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.577472841623785, |
|
"grad_norm": 12.462188720703125, |
|
"learning_rate": 9.674581256155413e-07, |
|
"loss": 0.7696, |
|
"step": 10100 |
|
}, |
|
{ |
|
"epoch": 0.5831903945111492, |
|
"grad_norm": 10.420384407043457, |
|
"learning_rate": 9.668176732877992e-07, |
|
"loss": 0.7682, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 0.5889079473985135, |
|
"grad_norm": 10.863661766052246, |
|
"learning_rate": 9.661711961021971e-07, |
|
"loss": 0.7512, |
|
"step": 10300 |
|
}, |
|
{ |
|
"epoch": 0.5946255002858777, |
|
"grad_norm": 11.13371753692627, |
|
"learning_rate": 9.655187024023205e-07, |
|
"loss": 0.7939, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 0.6003430531732419, |
|
"grad_norm": 10.809890747070312, |
|
"learning_rate": 9.648602006094056e-07, |
|
"loss": 0.7964, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 0.6060606060606061, |
|
"grad_norm": 10.5729398727417, |
|
"learning_rate": 9.641956992222297e-07, |
|
"loss": 0.8021, |
|
"step": 10600 |
|
}, |
|
{ |
|
"epoch": 0.6117781589479703, |
|
"grad_norm": 10.581096649169922, |
|
"learning_rate": 9.635252068170032e-07, |
|
"loss": 0.7723, |
|
"step": 10700 |
|
}, |
|
{ |
|
"epoch": 0.6174957118353345, |
|
"grad_norm": 9.770636558532715, |
|
"learning_rate": 9.628487320472575e-07, |
|
"loss": 0.7809, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 0.6232132647226987, |
|
"grad_norm": 11.409103393554688, |
|
"learning_rate": 9.621662836437339e-07, |
|
"loss": 0.8104, |
|
"step": 10900 |
|
}, |
|
{ |
|
"epoch": 0.6289308176100629, |
|
"grad_norm": 11.642632484436035, |
|
"learning_rate": 9.61477870414271e-07, |
|
"loss": 0.7712, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.6346483704974271, |
|
"grad_norm": 11.671961784362793, |
|
"learning_rate": 9.607835012436903e-07, |
|
"loss": 0.7691, |
|
"step": 11100 |
|
}, |
|
{ |
|
"epoch": 0.6403659233847913, |
|
"grad_norm": 12.849811553955078, |
|
"learning_rate": 9.600831850936823e-07, |
|
"loss": 0.8127, |
|
"step": 11200 |
|
}, |
|
{ |
|
"epoch": 0.6460834762721556, |
|
"grad_norm": 12.369158744812012, |
|
"learning_rate": 9.593769310026914e-07, |
|
"loss": 0.7885, |
|
"step": 11300 |
|
}, |
|
{ |
|
"epoch": 0.6518010291595198, |
|
"grad_norm": 11.423858642578125, |
|
"learning_rate": 9.58664748085797e-07, |
|
"loss": 0.7764, |
|
"step": 11400 |
|
}, |
|
{ |
|
"epoch": 0.657518582046884, |
|
"grad_norm": 13.777689933776855, |
|
"learning_rate": 9.579466455345984e-07, |
|
"loss": 0.7833, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 0.6632361349342482, |
|
"grad_norm": 12.836466789245605, |
|
"learning_rate": 9.572226326170947e-07, |
|
"loss": 0.7872, |
|
"step": 11600 |
|
}, |
|
{ |
|
"epoch": 0.6689536878216124, |
|
"grad_norm": 11.667618751525879, |
|
"learning_rate": 9.564927186775657e-07, |
|
"loss": 0.7793, |
|
"step": 11700 |
|
}, |
|
{ |
|
"epoch": 0.6746712407089765, |
|
"grad_norm": 11.850467681884766, |
|
"learning_rate": 9.557569131364512e-07, |
|
"loss": 0.7635, |
|
"step": 11800 |
|
}, |
|
{ |
|
"epoch": 0.6803887935963407, |
|
"grad_norm": 11.954856872558594, |
|
"learning_rate": 9.550152254902288e-07, |
|
"loss": 0.7885, |
|
"step": 11900 |
|
}, |
|
{ |
|
"epoch": 0.6861063464837049, |
|
"grad_norm": 12.209726333618164, |
|
"learning_rate": 9.54267665311293e-07, |
|
"loss": 0.8116, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.6918238993710691, |
|
"grad_norm": 15.221282005310059, |
|
"learning_rate": 9.535142422478295e-07, |
|
"loss": 0.7969, |
|
"step": 12100 |
|
}, |
|
{ |
|
"epoch": 0.6975414522584333, |
|
"grad_norm": 13.073972702026367, |
|
"learning_rate": 9.527549660236924e-07, |
|
"loss": 0.775, |
|
"step": 12200 |
|
}, |
|
{ |
|
"epoch": 0.7032590051457976, |
|
"grad_norm": 11.428675651550293, |
|
"learning_rate": 9.519898464382779e-07, |
|
"loss": 0.7997, |
|
"step": 12300 |
|
}, |
|
{ |
|
"epoch": 0.7089765580331618, |
|
"grad_norm": 12.806885719299316, |
|
"learning_rate": 9.512188933663979e-07, |
|
"loss": 0.7469, |
|
"step": 12400 |
|
}, |
|
{ |
|
"epoch": 0.714694110920526, |
|
"grad_norm": 11.91589641571045, |
|
"learning_rate": 9.504421167581529e-07, |
|
"loss": 0.7899, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 0.7204116638078902, |
|
"grad_norm": 14.660259246826172, |
|
"learning_rate": 9.496595266388027e-07, |
|
"loss": 0.7725, |
|
"step": 12600 |
|
}, |
|
{ |
|
"epoch": 0.7261292166952544, |
|
"grad_norm": 12.04018783569336, |
|
"learning_rate": 9.488711331086387e-07, |
|
"loss": 0.7732, |
|
"step": 12700 |
|
}, |
|
{ |
|
"epoch": 0.7318467695826186, |
|
"grad_norm": 12.394238471984863, |
|
"learning_rate": 9.480769463428513e-07, |
|
"loss": 0.8006, |
|
"step": 12800 |
|
}, |
|
{ |
|
"epoch": 0.7375643224699828, |
|
"grad_norm": 10.931447982788086, |
|
"learning_rate": 9.472769765914003e-07, |
|
"loss": 0.7648, |
|
"step": 12900 |
|
}, |
|
{ |
|
"epoch": 0.743281875357347, |
|
"grad_norm": 12.420381546020508, |
|
"learning_rate": 9.464712341788826e-07, |
|
"loss": 0.772, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 0.7489994282447112, |
|
"grad_norm": 11.603715896606445, |
|
"learning_rate": 9.456597295043971e-07, |
|
"loss": 0.786, |
|
"step": 13100 |
|
}, |
|
{ |
|
"epoch": 0.7547169811320755, |
|
"grad_norm": 11.238115310668945, |
|
"learning_rate": 9.448424730414131e-07, |
|
"loss": 0.7662, |
|
"step": 13200 |
|
}, |
|
{ |
|
"epoch": 0.7604345340194397, |
|
"grad_norm": 13.849480628967285, |
|
"learning_rate": 9.440194753376332e-07, |
|
"loss": 0.7558, |
|
"step": 13300 |
|
}, |
|
{ |
|
"epoch": 0.7661520869068039, |
|
"grad_norm": 13.335151672363281, |
|
"learning_rate": 9.431907470148577e-07, |
|
"loss": 0.7562, |
|
"step": 13400 |
|
}, |
|
{ |
|
"epoch": 0.7718696397941681, |
|
"grad_norm": 13.283151626586914, |
|
"learning_rate": 9.423562987688478e-07, |
|
"loss": 0.7767, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 0.7775871926815323, |
|
"grad_norm": 11.497271537780762, |
|
"learning_rate": 9.415161413691875e-07, |
|
"loss": 0.7544, |
|
"step": 13600 |
|
}, |
|
{ |
|
"epoch": 0.7833047455688965, |
|
"grad_norm": 13.979777336120605, |
|
"learning_rate": 9.406702856591441e-07, |
|
"loss": 0.7311, |
|
"step": 13700 |
|
}, |
|
{ |
|
"epoch": 0.7890222984562607, |
|
"grad_norm": 13.748780250549316, |
|
"learning_rate": 9.398187425555291e-07, |
|
"loss": 0.7717, |
|
"step": 13800 |
|
}, |
|
{ |
|
"epoch": 0.7947398513436249, |
|
"grad_norm": 11.056880950927734, |
|
"learning_rate": 9.389615230485564e-07, |
|
"loss": 0.7417, |
|
"step": 13900 |
|
}, |
|
{ |
|
"epoch": 0.8004574042309891, |
|
"grad_norm": 11.314404487609863, |
|
"learning_rate": 9.380986382017011e-07, |
|
"loss": 0.7675, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 0.8061749571183533, |
|
"grad_norm": 10.690199851989746, |
|
"learning_rate": 9.372300991515565e-07, |
|
"loss": 0.7664, |
|
"step": 14100 |
|
}, |
|
{ |
|
"epoch": 0.8118925100057176, |
|
"grad_norm": 14.313260078430176, |
|
"learning_rate": 9.363559171076902e-07, |
|
"loss": 0.7533, |
|
"step": 14200 |
|
}, |
|
{ |
|
"epoch": 0.8176100628930818, |
|
"grad_norm": 13.0926513671875, |
|
"learning_rate": 9.354761033524999e-07, |
|
"loss": 0.7845, |
|
"step": 14300 |
|
}, |
|
{ |
|
"epoch": 0.823327615780446, |
|
"grad_norm": 13.171210289001465, |
|
"learning_rate": 9.345906692410671e-07, |
|
"loss": 0.7785, |
|
"step": 14400 |
|
}, |
|
{ |
|
"epoch": 0.8290451686678102, |
|
"grad_norm": 11.313733100891113, |
|
"learning_rate": 9.336996262010113e-07, |
|
"loss": 0.7562, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 0.8347627215551744, |
|
"grad_norm": 13.027429580688477, |
|
"learning_rate": 9.328029857323418e-07, |
|
"loss": 0.764, |
|
"step": 14600 |
|
}, |
|
{ |
|
"epoch": 0.8404802744425386, |
|
"grad_norm": 11.618389129638672, |
|
"learning_rate": 9.319007594073099e-07, |
|
"loss": 0.7552, |
|
"step": 14700 |
|
}, |
|
{ |
|
"epoch": 0.8461978273299028, |
|
"grad_norm": 12.712207794189453, |
|
"learning_rate": 9.309929588702592e-07, |
|
"loss": 0.7638, |
|
"step": 14800 |
|
}, |
|
{ |
|
"epoch": 0.851915380217267, |
|
"grad_norm": 14.38819408416748, |
|
"learning_rate": 9.300795958374752e-07, |
|
"loss": 0.7716, |
|
"step": 14900 |
|
}, |
|
{ |
|
"epoch": 0.8576329331046312, |
|
"grad_norm": 13.714140892028809, |
|
"learning_rate": 9.291606820970345e-07, |
|
"loss": 0.778, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 0.8633504859919954, |
|
"grad_norm": 10.53870964050293, |
|
"learning_rate": 9.282362295086525e-07, |
|
"loss": 0.782, |
|
"step": 15100 |
|
}, |
|
{ |
|
"epoch": 0.8690680388793597, |
|
"grad_norm": 20.9774227142334, |
|
"learning_rate": 9.273062500035296e-07, |
|
"loss": 0.7657, |
|
"step": 15200 |
|
}, |
|
{ |
|
"epoch": 0.8747855917667239, |
|
"grad_norm": 11.294500350952148, |
|
"learning_rate": 9.263707555841989e-07, |
|
"loss": 0.7585, |
|
"step": 15300 |
|
}, |
|
{ |
|
"epoch": 0.8805031446540881, |
|
"grad_norm": 13.51392936706543, |
|
"learning_rate": 9.254297583243695e-07, |
|
"loss": 0.7648, |
|
"step": 15400 |
|
}, |
|
{ |
|
"epoch": 0.8862206975414523, |
|
"grad_norm": 12.258986473083496, |
|
"learning_rate": 9.244832703687718e-07, |
|
"loss": 0.7744, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 0.8919382504288165, |
|
"grad_norm": 13.734026908874512, |
|
"learning_rate": 9.235313039330001e-07, |
|
"loss": 0.7544, |
|
"step": 15600 |
|
}, |
|
{ |
|
"epoch": 0.8976558033161807, |
|
"grad_norm": 13.782454490661621, |
|
"learning_rate": 9.225738713033555e-07, |
|
"loss": 0.7536, |
|
"step": 15700 |
|
}, |
|
{ |
|
"epoch": 0.9033733562035449, |
|
"grad_norm": 11.396181106567383, |
|
"learning_rate": 9.216109848366872e-07, |
|
"loss": 0.7411, |
|
"step": 15800 |
|
}, |
|
{ |
|
"epoch": 0.9090909090909091, |
|
"grad_norm": 11.917641639709473, |
|
"learning_rate": 9.206426569602326e-07, |
|
"loss": 0.7821, |
|
"step": 15900 |
|
}, |
|
{ |
|
"epoch": 0.9148084619782733, |
|
"grad_norm": 12.97021198272705, |
|
"learning_rate": 9.196689001714572e-07, |
|
"loss": 0.7743, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.9205260148656375, |
|
"grad_norm": 16.050718307495117, |
|
"learning_rate": 9.186897270378935e-07, |
|
"loss": 0.774, |
|
"step": 16100 |
|
}, |
|
{ |
|
"epoch": 0.9262435677530018, |
|
"grad_norm": 12.98830795288086, |
|
"learning_rate": 9.177051501969786e-07, |
|
"loss": 0.7544, |
|
"step": 16200 |
|
}, |
|
{ |
|
"epoch": 0.931961120640366, |
|
"grad_norm": 11.758196830749512, |
|
"learning_rate": 9.167151823558908e-07, |
|
"loss": 0.733, |
|
"step": 16300 |
|
}, |
|
{ |
|
"epoch": 0.9376786735277302, |
|
"grad_norm": 14.87873649597168, |
|
"learning_rate": 9.157198362913865e-07, |
|
"loss": 0.7548, |
|
"step": 16400 |
|
}, |
|
{ |
|
"epoch": 0.9433962264150944, |
|
"grad_norm": 11.650275230407715, |
|
"learning_rate": 9.14719124849634e-07, |
|
"loss": 0.7642, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 0.9491137793024585, |
|
"grad_norm": 12.603800773620605, |
|
"learning_rate": 9.137130609460491e-07, |
|
"loss": 0.7729, |
|
"step": 16600 |
|
}, |
|
{ |
|
"epoch": 0.9548313321898227, |
|
"grad_norm": 13.641478538513184, |
|
"learning_rate": 9.12701657565127e-07, |
|
"loss": 0.7663, |
|
"step": 16700 |
|
}, |
|
{ |
|
"epoch": 0.9605488850771869, |
|
"grad_norm": 12.762767791748047, |
|
"learning_rate": 9.116849277602762e-07, |
|
"loss": 0.7667, |
|
"step": 16800 |
|
}, |
|
{ |
|
"epoch": 0.9662664379645511, |
|
"grad_norm": 15.196799278259277, |
|
"learning_rate": 9.106628846536486e-07, |
|
"loss": 0.7676, |
|
"step": 16900 |
|
}, |
|
{ |
|
"epoch": 0.9719839908519153, |
|
"grad_norm": 13.028931617736816, |
|
"learning_rate": 9.096355414359714e-07, |
|
"loss": 0.7681, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 0.9777015437392796, |
|
"grad_norm": 15.02116584777832, |
|
"learning_rate": 9.086029113663756e-07, |
|
"loss": 0.7487, |
|
"step": 17100 |
|
}, |
|
{ |
|
"epoch": 0.9834190966266438, |
|
"grad_norm": 12.758607864379883, |
|
"learning_rate": 9.075650077722263e-07, |
|
"loss": 0.7502, |
|
"step": 17200 |
|
}, |
|
{ |
|
"epoch": 0.989136649514008, |
|
"grad_norm": 12.625545501708984, |
|
"learning_rate": 9.065218440489493e-07, |
|
"loss": 0.7363, |
|
"step": 17300 |
|
}, |
|
{ |
|
"epoch": 0.9948542024013722, |
|
"grad_norm": 13.428793907165527, |
|
"learning_rate": 9.054734336598592e-07, |
|
"loss": 0.7604, |
|
"step": 17400 |
|
}, |
|
{ |
|
"epoch": 1.0005717552887363, |
|
"grad_norm": 14.363871574401855, |
|
"learning_rate": 9.044197901359854e-07, |
|
"loss": 0.7669, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 1.0062893081761006, |
|
"grad_norm": 12.019951820373535, |
|
"learning_rate": 9.033609270758968e-07, |
|
"loss": 0.7553, |
|
"step": 17600 |
|
}, |
|
{ |
|
"epoch": 1.012006861063465, |
|
"grad_norm": 13.016924858093262, |
|
"learning_rate": 9.022968581455275e-07, |
|
"loss": 0.7455, |
|
"step": 17700 |
|
}, |
|
{ |
|
"epoch": 1.017724413950829, |
|
"grad_norm": 12.672630310058594, |
|
"learning_rate": 9.012275970779994e-07, |
|
"loss": 0.77, |
|
"step": 17800 |
|
}, |
|
{ |
|
"epoch": 1.0234419668381933, |
|
"grad_norm": 13.483732223510742, |
|
"learning_rate": 9.001531576734455e-07, |
|
"loss": 0.7572, |
|
"step": 17900 |
|
}, |
|
{ |
|
"epoch": 1.0291595197255574, |
|
"grad_norm": 11.392430305480957, |
|
"learning_rate": 8.990735537988315e-07, |
|
"loss": 0.7518, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 1.0348770726129217, |
|
"grad_norm": 11.757925033569336, |
|
"learning_rate": 8.979887993877768e-07, |
|
"loss": 0.7558, |
|
"step": 18100 |
|
}, |
|
{ |
|
"epoch": 1.0405946255002858, |
|
"grad_norm": 12.2052583694458, |
|
"learning_rate": 8.968989084403755e-07, |
|
"loss": 0.7539, |
|
"step": 18200 |
|
}, |
|
{ |
|
"epoch": 1.0463121783876501, |
|
"grad_norm": 14.284133911132812, |
|
"learning_rate": 8.958038950230144e-07, |
|
"loss": 0.7254, |
|
"step": 18300 |
|
}, |
|
{ |
|
"epoch": 1.0520297312750142, |
|
"grad_norm": 13.372608184814453, |
|
"learning_rate": 8.947037732681921e-07, |
|
"loss": 0.7109, |
|
"step": 18400 |
|
}, |
|
{ |
|
"epoch": 1.0577472841623785, |
|
"grad_norm": 14.469101905822754, |
|
"learning_rate": 8.93598557374337e-07, |
|
"loss": 0.7479, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 1.0634648370497426, |
|
"grad_norm": 11.611613273620605, |
|
"learning_rate": 8.924882616056231e-07, |
|
"loss": 0.7355, |
|
"step": 18600 |
|
}, |
|
{ |
|
"epoch": 1.069182389937107, |
|
"grad_norm": 11.983068466186523, |
|
"learning_rate": 8.913729002917872e-07, |
|
"loss": 0.751, |
|
"step": 18700 |
|
}, |
|
{ |
|
"epoch": 1.0748999428244712, |
|
"grad_norm": 12.077164649963379, |
|
"learning_rate": 8.902524878279424e-07, |
|
"loss": 0.7583, |
|
"step": 18800 |
|
}, |
|
{ |
|
"epoch": 1.0806174957118353, |
|
"grad_norm": 11.417193412780762, |
|
"learning_rate": 8.891270386743937e-07, |
|
"loss": 0.7598, |
|
"step": 18900 |
|
}, |
|
{ |
|
"epoch": 1.0863350485991996, |
|
"grad_norm": 14.211779594421387, |
|
"learning_rate": 8.879965673564504e-07, |
|
"loss": 0.7403, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 1.0920526014865637, |
|
"grad_norm": 12.398454666137695, |
|
"learning_rate": 8.868610884642394e-07, |
|
"loss": 0.7388, |
|
"step": 19100 |
|
}, |
|
{ |
|
"epoch": 1.097770154373928, |
|
"grad_norm": 10.495121002197266, |
|
"learning_rate": 8.857206166525163e-07, |
|
"loss": 0.7328, |
|
"step": 19200 |
|
}, |
|
{ |
|
"epoch": 1.103487707261292, |
|
"grad_norm": 14.72545051574707, |
|
"learning_rate": 8.845751666404764e-07, |
|
"loss": 0.7537, |
|
"step": 19300 |
|
}, |
|
{ |
|
"epoch": 1.1092052601486564, |
|
"grad_norm": 11.759736061096191, |
|
"learning_rate": 8.834247532115651e-07, |
|
"loss": 0.7537, |
|
"step": 19400 |
|
}, |
|
{ |
|
"epoch": 1.1149228130360207, |
|
"grad_norm": 13.681090354919434, |
|
"learning_rate": 8.822693912132865e-07, |
|
"loss": 0.7541, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 1.1206403659233848, |
|
"grad_norm": 15.426194190979004, |
|
"learning_rate": 8.811090955570126e-07, |
|
"loss": 0.7636, |
|
"step": 19600 |
|
}, |
|
{ |
|
"epoch": 1.126357918810749, |
|
"grad_norm": 13.35466194152832, |
|
"learning_rate": 8.799438812177895e-07, |
|
"loss": 0.7409, |
|
"step": 19700 |
|
}, |
|
{ |
|
"epoch": 1.1320754716981132, |
|
"grad_norm": 16.639293670654297, |
|
"learning_rate": 8.787737632341456e-07, |
|
"loss": 0.7467, |
|
"step": 19800 |
|
}, |
|
{ |
|
"epoch": 1.1377930245854775, |
|
"grad_norm": 13.953729629516602, |
|
"learning_rate": 8.775987567078969e-07, |
|
"loss": 0.7399, |
|
"step": 19900 |
|
}, |
|
{ |
|
"epoch": 1.1435105774728416, |
|
"grad_norm": 11.683821678161621, |
|
"learning_rate": 8.764188768039517e-07, |
|
"loss": 0.7449, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 1.1492281303602059, |
|
"grad_norm": 15.29350757598877, |
|
"learning_rate": 8.752341387501158e-07, |
|
"loss": 0.7468, |
|
"step": 20100 |
|
}, |
|
{ |
|
"epoch": 1.15494568324757, |
|
"grad_norm": 14.116186141967773, |
|
"learning_rate": 8.740445578368947e-07, |
|
"loss": 0.7351, |
|
"step": 20200 |
|
}, |
|
{ |
|
"epoch": 1.1606632361349343, |
|
"grad_norm": 12.669661521911621, |
|
"learning_rate": 8.728501494172975e-07, |
|
"loss": 0.7454, |
|
"step": 20300 |
|
}, |
|
{ |
|
"epoch": 1.1663807890222984, |
|
"grad_norm": 14.350497245788574, |
|
"learning_rate": 8.71650928906638e-07, |
|
"loss": 0.7339, |
|
"step": 20400 |
|
}, |
|
{ |
|
"epoch": 1.1720983419096627, |
|
"grad_norm": 13.84665584564209, |
|
"learning_rate": 8.704469117823363e-07, |
|
"loss": 0.7398, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 1.177815894797027, |
|
"grad_norm": 11.469983100891113, |
|
"learning_rate": 8.692381135837181e-07, |
|
"loss": 0.7573, |
|
"step": 20600 |
|
}, |
|
{ |
|
"epoch": 1.183533447684391, |
|
"grad_norm": 18.06913185119629, |
|
"learning_rate": 8.680245499118154e-07, |
|
"loss": 0.7581, |
|
"step": 20700 |
|
}, |
|
{ |
|
"epoch": 1.1892510005717554, |
|
"grad_norm": 14.282899856567383, |
|
"learning_rate": 8.668062364291639e-07, |
|
"loss": 0.73, |
|
"step": 20800 |
|
}, |
|
{ |
|
"epoch": 1.1949685534591195, |
|
"grad_norm": 13.120650291442871, |
|
"learning_rate": 8.655831888596023e-07, |
|
"loss": 0.7204, |
|
"step": 20900 |
|
}, |
|
{ |
|
"epoch": 1.2006861063464838, |
|
"grad_norm": 11.917327880859375, |
|
"learning_rate": 8.643554229880676e-07, |
|
"loss": 0.7479, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 1.2064036592338478, |
|
"grad_norm": 12.158246994018555, |
|
"learning_rate": 8.631229546603928e-07, |
|
"loss": 0.7578, |
|
"step": 21100 |
|
}, |
|
{ |
|
"epoch": 1.2121212121212122, |
|
"grad_norm": 11.658825874328613, |
|
"learning_rate": 8.618857997831021e-07, |
|
"loss": 0.7443, |
|
"step": 21200 |
|
}, |
|
{ |
|
"epoch": 1.2178387650085762, |
|
"grad_norm": 14.301033973693848, |
|
"learning_rate": 8.606439743232047e-07, |
|
"loss": 0.7212, |
|
"step": 21300 |
|
}, |
|
{ |
|
"epoch": 1.2235563178959405, |
|
"grad_norm": 17.928747177124023, |
|
"learning_rate": 8.593974943079903e-07, |
|
"loss": 0.7565, |
|
"step": 21400 |
|
}, |
|
{ |
|
"epoch": 1.2292738707833046, |
|
"grad_norm": 15.283846855163574, |
|
"learning_rate": 8.581463758248206e-07, |
|
"loss": 0.7541, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 1.234991423670669, |
|
"grad_norm": 14.58877182006836, |
|
"learning_rate": 8.56890635020923e-07, |
|
"loss": 0.7422, |
|
"step": 21600 |
|
}, |
|
{ |
|
"epoch": 1.2407089765580332, |
|
"grad_norm": 12.548053741455078, |
|
"learning_rate": 8.556302881031813e-07, |
|
"loss": 0.7326, |
|
"step": 21700 |
|
}, |
|
{ |
|
"epoch": 1.2464265294453973, |
|
"grad_norm": 17.030057907104492, |
|
"learning_rate": 8.543653513379272e-07, |
|
"loss": 0.7404, |
|
"step": 21800 |
|
}, |
|
{ |
|
"epoch": 1.2521440823327616, |
|
"grad_norm": 12.616168975830078, |
|
"learning_rate": 8.530958410507296e-07, |
|
"loss": 0.7262, |
|
"step": 21900 |
|
}, |
|
{ |
|
"epoch": 1.2578616352201257, |
|
"grad_norm": 12.804983139038086, |
|
"learning_rate": 8.518217736261848e-07, |
|
"loss": 0.7273, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 1.26357918810749, |
|
"grad_norm": 16.434965133666992, |
|
"learning_rate": 8.505431655077045e-07, |
|
"loss": 0.7326, |
|
"step": 22100 |
|
}, |
|
{ |
|
"epoch": 1.2692967409948541, |
|
"grad_norm": 15.001092910766602, |
|
"learning_rate": 8.492600331973032e-07, |
|
"loss": 0.7413, |
|
"step": 22200 |
|
}, |
|
{ |
|
"epoch": 1.2750142938822184, |
|
"grad_norm": 12.592192649841309, |
|
"learning_rate": 8.479723932553864e-07, |
|
"loss": 0.7229, |
|
"step": 22300 |
|
}, |
|
{ |
|
"epoch": 1.2807318467695827, |
|
"grad_norm": 12.405608177185059, |
|
"learning_rate": 8.466802623005354e-07, |
|
"loss": 0.7575, |
|
"step": 22400 |
|
}, |
|
{ |
|
"epoch": 1.2864493996569468, |
|
"grad_norm": 13.2118501663208, |
|
"learning_rate": 8.453836570092941e-07, |
|
"loss": 0.7368, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 1.292166952544311, |
|
"grad_norm": 14.132501602172852, |
|
"learning_rate": 8.44082594115953e-07, |
|
"loss": 0.7418, |
|
"step": 22600 |
|
}, |
|
{ |
|
"epoch": 1.2978845054316752, |
|
"grad_norm": 12.394464492797852, |
|
"learning_rate": 8.427770904123336e-07, |
|
"loss": 0.7769, |
|
"step": 22700 |
|
}, |
|
{ |
|
"epoch": 1.3036020583190395, |
|
"grad_norm": 14.223743438720703, |
|
"learning_rate": 8.414671627475716e-07, |
|
"loss": 0.7402, |
|
"step": 22800 |
|
}, |
|
{ |
|
"epoch": 1.3093196112064036, |
|
"grad_norm": 11.712512969970703, |
|
"learning_rate": 8.401528280278987e-07, |
|
"loss": 0.7132, |
|
"step": 22900 |
|
}, |
|
{ |
|
"epoch": 1.315037164093768, |
|
"grad_norm": 14.974958419799805, |
|
"learning_rate": 8.388341032164261e-07, |
|
"loss": 0.7513, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 1.320754716981132, |
|
"grad_norm": 12.907052993774414, |
|
"learning_rate": 8.375110053329239e-07, |
|
"loss": 0.7286, |
|
"step": 23100 |
|
}, |
|
{ |
|
"epoch": 1.3264722698684963, |
|
"grad_norm": 14.681177139282227, |
|
"learning_rate": 8.36183551453602e-07, |
|
"loss": 0.7272, |
|
"step": 23200 |
|
}, |
|
{ |
|
"epoch": 1.3321898227558604, |
|
"grad_norm": 14.21299934387207, |
|
"learning_rate": 8.348517587108904e-07, |
|
"loss": 0.7421, |
|
"step": 23300 |
|
}, |
|
{ |
|
"epoch": 1.3379073756432247, |
|
"grad_norm": 14.745231628417969, |
|
"learning_rate": 8.335156442932167e-07, |
|
"loss": 0.7193, |
|
"step": 23400 |
|
}, |
|
{ |
|
"epoch": 1.343624928530589, |
|
"grad_norm": 13.499730110168457, |
|
"learning_rate": 8.321752254447858e-07, |
|
"loss": 0.734, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 1.349342481417953, |
|
"grad_norm": 14.754764556884766, |
|
"learning_rate": 8.308305194653562e-07, |
|
"loss": 0.7312, |
|
"step": 23600 |
|
}, |
|
{ |
|
"epoch": 1.3550600343053174, |
|
"grad_norm": 13.786625862121582, |
|
"learning_rate": 8.294815437100171e-07, |
|
"loss": 0.7445, |
|
"step": 23700 |
|
}, |
|
{ |
|
"epoch": 1.3607775871926815, |
|
"grad_norm": 13.24636459350586, |
|
"learning_rate": 8.281283155889643e-07, |
|
"loss": 0.753, |
|
"step": 23800 |
|
}, |
|
{ |
|
"epoch": 1.3664951400800458, |
|
"grad_norm": 18.36018943786621, |
|
"learning_rate": 8.267708525672763e-07, |
|
"loss": 0.7211, |
|
"step": 23900 |
|
}, |
|
{ |
|
"epoch": 1.3722126929674099, |
|
"grad_norm": 13.684020042419434, |
|
"learning_rate": 8.254091721646872e-07, |
|
"loss": 0.7305, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 1.3779302458547742, |
|
"grad_norm": 14.666474342346191, |
|
"learning_rate": 8.240432919553624e-07, |
|
"loss": 0.7094, |
|
"step": 24100 |
|
}, |
|
{ |
|
"epoch": 1.3836477987421385, |
|
"grad_norm": 14.115777015686035, |
|
"learning_rate": 8.226732295676708e-07, |
|
"loss": 0.7347, |
|
"step": 24200 |
|
}, |
|
{ |
|
"epoch": 1.3893653516295026, |
|
"grad_norm": 13.236510276794434, |
|
"learning_rate": 8.212990026839571e-07, |
|
"loss": 0.7294, |
|
"step": 24300 |
|
}, |
|
{ |
|
"epoch": 1.3950829045168667, |
|
"grad_norm": 14.913257598876953, |
|
"learning_rate": 8.199206290403146e-07, |
|
"loss": 0.7548, |
|
"step": 24400 |
|
}, |
|
{ |
|
"epoch": 1.400800457404231, |
|
"grad_norm": 13.897913932800293, |
|
"learning_rate": 8.185381264263549e-07, |
|
"loss": 0.7123, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 1.4065180102915953, |
|
"grad_norm": 11.049424171447754, |
|
"learning_rate": 8.171515126849797e-07, |
|
"loss": 0.7351, |
|
"step": 24600 |
|
}, |
|
{ |
|
"epoch": 1.4122355631789594, |
|
"grad_norm": 14.041512489318848, |
|
"learning_rate": 8.157608057121499e-07, |
|
"loss": 0.7156, |
|
"step": 24700 |
|
}, |
|
{ |
|
"epoch": 1.4179531160663237, |
|
"grad_norm": 14.78030014038086, |
|
"learning_rate": 8.143660234566537e-07, |
|
"loss": 0.7234, |
|
"step": 24800 |
|
}, |
|
{ |
|
"epoch": 1.4236706689536878, |
|
"grad_norm": 14.697408676147461, |
|
"learning_rate": 8.129671839198769e-07, |
|
"loss": 0.7369, |
|
"step": 24900 |
|
}, |
|
{ |
|
"epoch": 1.429388221841052, |
|
"grad_norm": 19.855506896972656, |
|
"learning_rate": 8.115643051555687e-07, |
|
"loss": 0.7242, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 1.4351057747284162, |
|
"grad_norm": 12.898676872253418, |
|
"learning_rate": 8.101574052696105e-07, |
|
"loss": 0.7337, |
|
"step": 25100 |
|
}, |
|
{ |
|
"epoch": 1.4408233276157805, |
|
"grad_norm": 17.080387115478516, |
|
"learning_rate": 8.087465024197801e-07, |
|
"loss": 0.7563, |
|
"step": 25200 |
|
}, |
|
{ |
|
"epoch": 1.4465408805031448, |
|
"grad_norm": 14.034418106079102, |
|
"learning_rate": 8.073316148155194e-07, |
|
"loss": 0.7053, |
|
"step": 25300 |
|
}, |
|
{ |
|
"epoch": 1.4522584333905089, |
|
"grad_norm": 12.006173133850098, |
|
"learning_rate": 8.059127607176979e-07, |
|
"loss": 0.7416, |
|
"step": 25400 |
|
}, |
|
{ |
|
"epoch": 1.457975986277873, |
|
"grad_norm": 13.194514274597168, |
|
"learning_rate": 8.044899584383776e-07, |
|
"loss": 0.7358, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 1.4636935391652373, |
|
"grad_norm": 13.047014236450195, |
|
"learning_rate": 8.030632263405772e-07, |
|
"loss": 0.7316, |
|
"step": 25600 |
|
}, |
|
{ |
|
"epoch": 1.4694110920526016, |
|
"grad_norm": 16.628822326660156, |
|
"learning_rate": 8.016325828380342e-07, |
|
"loss": 0.7257, |
|
"step": 25700 |
|
}, |
|
{ |
|
"epoch": 1.4751286449399656, |
|
"grad_norm": 13.5772705078125, |
|
"learning_rate": 8.001980463949672e-07, |
|
"loss": 0.7531, |
|
"step": 25800 |
|
}, |
|
{ |
|
"epoch": 1.48084619782733, |
|
"grad_norm": 11.860750198364258, |
|
"learning_rate": 7.987596355258388e-07, |
|
"loss": 0.7302, |
|
"step": 25900 |
|
}, |
|
{ |
|
"epoch": 1.486563750714694, |
|
"grad_norm": 17.01180648803711, |
|
"learning_rate": 7.973173687951151e-07, |
|
"loss": 0.7251, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 1.4922813036020584, |
|
"grad_norm": 14.781421661376953, |
|
"learning_rate": 7.958712648170276e-07, |
|
"loss": 0.7333, |
|
"step": 26100 |
|
}, |
|
{ |
|
"epoch": 1.4979988564894224, |
|
"grad_norm": 13.350909233093262, |
|
"learning_rate": 7.944213422553315e-07, |
|
"loss": 0.7062, |
|
"step": 26200 |
|
}, |
|
{ |
|
"epoch": 1.5037164093767867, |
|
"grad_norm": 15.630850791931152, |
|
"learning_rate": 7.92967619823066e-07, |
|
"loss": 0.7294, |
|
"step": 26300 |
|
}, |
|
{ |
|
"epoch": 1.509433962264151, |
|
"grad_norm": 16.793577194213867, |
|
"learning_rate": 7.915101162823119e-07, |
|
"loss": 0.7447, |
|
"step": 26400 |
|
}, |
|
{ |
|
"epoch": 1.5151515151515151, |
|
"grad_norm": 16.42220115661621, |
|
"learning_rate": 7.900488504439504e-07, |
|
"loss": 0.7164, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 1.5208690680388792, |
|
"grad_norm": 15.00322437286377, |
|
"learning_rate": 7.885838411674192e-07, |
|
"loss": 0.7303, |
|
"step": 26600 |
|
}, |
|
{ |
|
"epoch": 1.5265866209262435, |
|
"grad_norm": 11.874865531921387, |
|
"learning_rate": 7.871151073604704e-07, |
|
"loss": 0.739, |
|
"step": 26700 |
|
}, |
|
{ |
|
"epoch": 1.5323041738136078, |
|
"grad_norm": 14.509276390075684, |
|
"learning_rate": 7.856426679789252e-07, |
|
"loss": 0.7344, |
|
"step": 26800 |
|
}, |
|
{ |
|
"epoch": 1.538021726700972, |
|
"grad_norm": 14.377605438232422, |
|
"learning_rate": 7.841665420264299e-07, |
|
"loss": 0.7339, |
|
"step": 26900 |
|
}, |
|
{ |
|
"epoch": 1.5437392795883362, |
|
"grad_norm": 13.096054077148438, |
|
"learning_rate": 7.826867485542106e-07, |
|
"loss": 0.7146, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 1.5494568324757005, |
|
"grad_norm": 13.225566864013672, |
|
"learning_rate": 7.812033066608272e-07, |
|
"loss": 0.7201, |
|
"step": 27100 |
|
}, |
|
{ |
|
"epoch": 1.5551743853630646, |
|
"grad_norm": 13.55895709991455, |
|
"learning_rate": 7.797162354919272e-07, |
|
"loss": 0.7703, |
|
"step": 27200 |
|
}, |
|
{ |
|
"epoch": 1.5608919382504287, |
|
"grad_norm": 13.646178245544434, |
|
"learning_rate": 7.782255542399983e-07, |
|
"loss": 0.7374, |
|
"step": 27300 |
|
}, |
|
{ |
|
"epoch": 1.566609491137793, |
|
"grad_norm": 14.485557556152344, |
|
"learning_rate": 7.767312821441205e-07, |
|
"loss": 0.7473, |
|
"step": 27400 |
|
}, |
|
{ |
|
"epoch": 1.5723270440251573, |
|
"grad_norm": 13.934147834777832, |
|
"learning_rate": 7.752334384897185e-07, |
|
"loss": 0.7222, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 1.5780445969125214, |
|
"grad_norm": 17.379066467285156, |
|
"learning_rate": 7.737320426083118e-07, |
|
"loss": 0.7526, |
|
"step": 27600 |
|
}, |
|
{ |
|
"epoch": 1.5837621497998855, |
|
"grad_norm": 14.709135055541992, |
|
"learning_rate": 7.722271138772665e-07, |
|
"loss": 0.7433, |
|
"step": 27700 |
|
}, |
|
{ |
|
"epoch": 1.5894797026872498, |
|
"grad_norm": 14.56612491607666, |
|
"learning_rate": 7.70718671719544e-07, |
|
"loss": 0.731, |
|
"step": 27800 |
|
}, |
|
{ |
|
"epoch": 1.5951972555746141, |
|
"grad_norm": 12.300278663635254, |
|
"learning_rate": 7.692067356034506e-07, |
|
"loss": 0.724, |
|
"step": 27900 |
|
}, |
|
{ |
|
"epoch": 1.6009148084619782, |
|
"grad_norm": 14.767439842224121, |
|
"learning_rate": 7.676913250423873e-07, |
|
"loss": 0.7301, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 1.6066323613493425, |
|
"grad_norm": 13.967864990234375, |
|
"learning_rate": 7.66172459594596e-07, |
|
"loss": 0.7166, |
|
"step": 28100 |
|
}, |
|
{ |
|
"epoch": 1.6123499142367068, |
|
"grad_norm": 13.520434379577637, |
|
"learning_rate": 7.64650158862909e-07, |
|
"loss": 0.7041, |
|
"step": 28200 |
|
}, |
|
{ |
|
"epoch": 1.618067467124071, |
|
"grad_norm": 15.095074653625488, |
|
"learning_rate": 7.631244424944948e-07, |
|
"loss": 0.7034, |
|
"step": 28300 |
|
}, |
|
{ |
|
"epoch": 1.623785020011435, |
|
"grad_norm": 15.181931495666504, |
|
"learning_rate": 7.615953301806048e-07, |
|
"loss": 0.719, |
|
"step": 28400 |
|
}, |
|
{ |
|
"epoch": 1.6295025728987993, |
|
"grad_norm": 14.636883735656738, |
|
"learning_rate": 7.600628416563199e-07, |
|
"loss": 0.7329, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 1.6352201257861636, |
|
"grad_norm": 13.246421813964844, |
|
"learning_rate": 7.585269967002946e-07, |
|
"loss": 0.7387, |
|
"step": 28600 |
|
}, |
|
{ |
|
"epoch": 1.6409376786735277, |
|
"grad_norm": 13.783883094787598, |
|
"learning_rate": 7.56987815134502e-07, |
|
"loss": 0.7445, |
|
"step": 28700 |
|
}, |
|
{ |
|
"epoch": 1.6466552315608918, |
|
"grad_norm": 13.172192573547363, |
|
"learning_rate": 7.554453168239793e-07, |
|
"loss": 0.7222, |
|
"step": 28800 |
|
}, |
|
{ |
|
"epoch": 1.6523727844482563, |
|
"grad_norm": 12.598628997802734, |
|
"learning_rate": 7.538995216765693e-07, |
|
"loss": 0.7329, |
|
"step": 28900 |
|
}, |
|
{ |
|
"epoch": 1.6580903373356204, |
|
"grad_norm": 13.531826972961426, |
|
"learning_rate": 7.523504496426651e-07, |
|
"loss": 0.7389, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 1.6638078902229845, |
|
"grad_norm": 12.720640182495117, |
|
"learning_rate": 7.507981207149523e-07, |
|
"loss": 0.7135, |
|
"step": 29100 |
|
}, |
|
{ |
|
"epoch": 1.6695254431103488, |
|
"grad_norm": 16.165489196777344, |
|
"learning_rate": 7.492425549281499e-07, |
|
"loss": 0.7352, |
|
"step": 29200 |
|
}, |
|
{ |
|
"epoch": 1.675242995997713, |
|
"grad_norm": 15.355998992919922, |
|
"learning_rate": 7.476837723587532e-07, |
|
"loss": 0.7273, |
|
"step": 29300 |
|
}, |
|
{ |
|
"epoch": 1.6809605488850772, |
|
"grad_norm": 15.340593338012695, |
|
"learning_rate": 7.461217931247741e-07, |
|
"loss": 0.7118, |
|
"step": 29400 |
|
}, |
|
{ |
|
"epoch": 1.6866781017724413, |
|
"grad_norm": 11.760583877563477, |
|
"learning_rate": 7.445566373854812e-07, |
|
"loss": 0.7161, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 1.6923956546598056, |
|
"grad_norm": 14.857872009277344, |
|
"learning_rate": 7.429883253411395e-07, |
|
"loss": 0.7319, |
|
"step": 29600 |
|
}, |
|
{ |
|
"epoch": 1.6981132075471699, |
|
"grad_norm": 13.463951110839844, |
|
"learning_rate": 7.414168772327507e-07, |
|
"loss": 0.7473, |
|
"step": 29700 |
|
}, |
|
{ |
|
"epoch": 1.703830760434534, |
|
"grad_norm": 14.559701919555664, |
|
"learning_rate": 7.398423133417906e-07, |
|
"loss": 0.7039, |
|
"step": 29800 |
|
}, |
|
{ |
|
"epoch": 1.7095483133218983, |
|
"grad_norm": 17.633832931518555, |
|
"learning_rate": 7.382646539899487e-07, |
|
"loss": 0.7348, |
|
"step": 29900 |
|
}, |
|
{ |
|
"epoch": 1.7152658662092626, |
|
"grad_norm": 21.451581954956055, |
|
"learning_rate": 7.366839195388643e-07, |
|
"loss": 0.7153, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 1.7209834190966267, |
|
"grad_norm": 15.963432312011719, |
|
"learning_rate": 7.351001303898658e-07, |
|
"loss": 0.7109, |
|
"step": 30100 |
|
}, |
|
{ |
|
"epoch": 1.7267009719839908, |
|
"grad_norm": 16.46929168701172, |
|
"learning_rate": 7.335133069837053e-07, |
|
"loss": 0.723, |
|
"step": 30200 |
|
}, |
|
{ |
|
"epoch": 1.732418524871355, |
|
"grad_norm": 14.31004524230957, |
|
"learning_rate": 7.319234698002963e-07, |
|
"loss": 0.7476, |
|
"step": 30300 |
|
}, |
|
{ |
|
"epoch": 1.7381360777587194, |
|
"grad_norm": 15.023375511169434, |
|
"learning_rate": 7.303306393584486e-07, |
|
"loss": 0.7405, |
|
"step": 30400 |
|
}, |
|
{ |
|
"epoch": 1.7438536306460835, |
|
"grad_norm": 13.826839447021484, |
|
"learning_rate": 7.287348362156034e-07, |
|
"loss": 0.7251, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 1.7495711835334475, |
|
"grad_norm": 14.537930488586426, |
|
"learning_rate": 7.271360809675688e-07, |
|
"loss": 0.731, |
|
"step": 30600 |
|
}, |
|
{ |
|
"epoch": 1.7552887364208118, |
|
"grad_norm": 15.455423355102539, |
|
"learning_rate": 7.255343942482534e-07, |
|
"loss": 0.7238, |
|
"step": 30700 |
|
}, |
|
{ |
|
"epoch": 1.7610062893081762, |
|
"grad_norm": 13.903472900390625, |
|
"learning_rate": 7.239297967293995e-07, |
|
"loss": 0.7282, |
|
"step": 30800 |
|
}, |
|
{ |
|
"epoch": 1.7667238421955402, |
|
"grad_norm": 11.774541854858398, |
|
"learning_rate": 7.223223091203174e-07, |
|
"loss": 0.7391, |
|
"step": 30900 |
|
}, |
|
{ |
|
"epoch": 1.7724413950829045, |
|
"grad_norm": 16.479053497314453, |
|
"learning_rate": 7.207119521676173e-07, |
|
"loss": 0.7151, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 1.7781589479702689, |
|
"grad_norm": 12.094974517822266, |
|
"learning_rate": 7.190987466549423e-07, |
|
"loss": 0.7351, |
|
"step": 31100 |
|
}, |
|
{ |
|
"epoch": 1.783876500857633, |
|
"grad_norm": 16.851329803466797, |
|
"learning_rate": 7.17482713402699e-07, |
|
"loss": 0.7326, |
|
"step": 31200 |
|
}, |
|
{ |
|
"epoch": 1.789594053744997, |
|
"grad_norm": 15.386981964111328, |
|
"learning_rate": 7.158638732677898e-07, |
|
"loss": 0.7278, |
|
"step": 31300 |
|
}, |
|
{ |
|
"epoch": 1.7953116066323613, |
|
"grad_norm": 21.271484375, |
|
"learning_rate": 7.142422471433435e-07, |
|
"loss": 0.7202, |
|
"step": 31400 |
|
}, |
|
{ |
|
"epoch": 1.8010291595197256, |
|
"grad_norm": 14.556968688964844, |
|
"learning_rate": 7.126178559584453e-07, |
|
"loss": 0.7188, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 1.8067467124070897, |
|
"grad_norm": 14.779976844787598, |
|
"learning_rate": 7.109907206778672e-07, |
|
"loss": 0.6977, |
|
"step": 31600 |
|
}, |
|
{ |
|
"epoch": 1.8124642652944538, |
|
"grad_norm": 14.60632610321045, |
|
"learning_rate": 7.093608623017965e-07, |
|
"loss": 0.7082, |
|
"step": 31700 |
|
}, |
|
{ |
|
"epoch": 1.8181818181818183, |
|
"grad_norm": 11.756601333618164, |
|
"learning_rate": 7.077283018655662e-07, |
|
"loss": 0.7348, |
|
"step": 31800 |
|
}, |
|
{ |
|
"epoch": 1.8238993710691824, |
|
"grad_norm": 14.042952537536621, |
|
"learning_rate": 7.060930604393825e-07, |
|
"loss": 0.7188, |
|
"step": 31900 |
|
}, |
|
{ |
|
"epoch": 1.8296169239565465, |
|
"grad_norm": 12.664731979370117, |
|
"learning_rate": 7.044551591280525e-07, |
|
"loss": 0.7124, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 1.8353344768439108, |
|
"grad_norm": 12.528229713439941, |
|
"learning_rate": 7.028146190707131e-07, |
|
"loss": 0.715, |
|
"step": 32100 |
|
}, |
|
{ |
|
"epoch": 1.8410520297312751, |
|
"grad_norm": 17.832958221435547, |
|
"learning_rate": 7.011714614405576e-07, |
|
"loss": 0.7424, |
|
"step": 32200 |
|
}, |
|
{ |
|
"epoch": 1.8467695826186392, |
|
"grad_norm": 14.167093276977539, |
|
"learning_rate": 6.995257074445614e-07, |
|
"loss": 0.6857, |
|
"step": 32300 |
|
}, |
|
{ |
|
"epoch": 1.8524871355060033, |
|
"grad_norm": 14.986869812011719, |
|
"learning_rate": 6.978773783232099e-07, |
|
"loss": 0.7286, |
|
"step": 32400 |
|
}, |
|
{ |
|
"epoch": 1.8582046883933676, |
|
"grad_norm": 13.462069511413574, |
|
"learning_rate": 6.962264953502237e-07, |
|
"loss": 0.7278, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 1.863922241280732, |
|
"grad_norm": 12.811728477478027, |
|
"learning_rate": 6.945730798322835e-07, |
|
"loss": 0.725, |
|
"step": 32600 |
|
}, |
|
{ |
|
"epoch": 1.869639794168096, |
|
"grad_norm": 15.677468299865723, |
|
"learning_rate": 6.929171531087561e-07, |
|
"loss": 0.7358, |
|
"step": 32700 |
|
}, |
|
{ |
|
"epoch": 1.8753573470554603, |
|
"grad_norm": 15.89470386505127, |
|
"learning_rate": 6.912587365514182e-07, |
|
"loss": 0.7404, |
|
"step": 32800 |
|
}, |
|
{ |
|
"epoch": 1.8810748999428246, |
|
"grad_norm": 13.49212646484375, |
|
"learning_rate": 6.895978515641814e-07, |
|
"loss": 0.6961, |
|
"step": 32900 |
|
}, |
|
{ |
|
"epoch": 1.8867924528301887, |
|
"grad_norm": 15.530927658081055, |
|
"learning_rate": 6.879345195828145e-07, |
|
"loss": 0.72, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 1.8925100057175528, |
|
"grad_norm": 15.449943542480469, |
|
"learning_rate": 6.862687620746688e-07, |
|
"loss": 0.7107, |
|
"step": 33100 |
|
}, |
|
{ |
|
"epoch": 1.898227558604917, |
|
"grad_norm": 12.777170181274414, |
|
"learning_rate": 6.846006005383992e-07, |
|
"loss": 0.7049, |
|
"step": 33200 |
|
}, |
|
{ |
|
"epoch": 1.9039451114922814, |
|
"grad_norm": 15.19930362701416, |
|
"learning_rate": 6.829300565036882e-07, |
|
"loss": 0.6905, |
|
"step": 33300 |
|
}, |
|
{ |
|
"epoch": 1.9096626643796455, |
|
"grad_norm": 14.737263679504395, |
|
"learning_rate": 6.812571515309667e-07, |
|
"loss": 0.7479, |
|
"step": 33400 |
|
}, |
|
{ |
|
"epoch": 1.9153802172670096, |
|
"grad_norm": 17.35552215576172, |
|
"learning_rate": 6.795819072111369e-07, |
|
"loss": 0.7247, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 1.9210977701543739, |
|
"grad_norm": 15.307523727416992, |
|
"learning_rate": 6.779043451652932e-07, |
|
"loss": 0.7392, |
|
"step": 33600 |
|
}, |
|
{ |
|
"epoch": 1.9268153230417382, |
|
"grad_norm": 12.777303695678711, |
|
"learning_rate": 6.762244870444427e-07, |
|
"loss": 0.7179, |
|
"step": 33700 |
|
}, |
|
{ |
|
"epoch": 1.9325328759291023, |
|
"grad_norm": 13.537027359008789, |
|
"learning_rate": 6.745423545292267e-07, |
|
"loss": 0.7123, |
|
"step": 33800 |
|
}, |
|
{ |
|
"epoch": 1.9382504288164666, |
|
"grad_norm": 14.208198547363281, |
|
"learning_rate": 6.7285796932964e-07, |
|
"loss": 0.7474, |
|
"step": 33900 |
|
}, |
|
{ |
|
"epoch": 1.943967981703831, |
|
"grad_norm": 15.080704689025879, |
|
"learning_rate": 6.711713531847512e-07, |
|
"loss": 0.7285, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 1.949685534591195, |
|
"grad_norm": 15.996607780456543, |
|
"learning_rate": 6.694825278624219e-07, |
|
"loss": 0.7365, |
|
"step": 34100 |
|
}, |
|
{ |
|
"epoch": 1.955403087478559, |
|
"grad_norm": 19.516117095947266, |
|
"learning_rate": 6.677915151590259e-07, |
|
"loss": 0.7181, |
|
"step": 34200 |
|
}, |
|
{ |
|
"epoch": 1.9611206403659234, |
|
"grad_norm": 13.266532897949219, |
|
"learning_rate": 6.660983368991679e-07, |
|
"loss": 0.7201, |
|
"step": 34300 |
|
}, |
|
{ |
|
"epoch": 1.9668381932532877, |
|
"grad_norm": 15.131275177001953, |
|
"learning_rate": 6.644030149354017e-07, |
|
"loss": 0.7209, |
|
"step": 34400 |
|
}, |
|
{ |
|
"epoch": 1.9725557461406518, |
|
"grad_norm": 13.388816833496094, |
|
"learning_rate": 6.627055711479486e-07, |
|
"loss": 0.7382, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 1.9782732990280159, |
|
"grad_norm": 13.799139976501465, |
|
"learning_rate": 6.61006027444414e-07, |
|
"loss": 0.707, |
|
"step": 34600 |
|
}, |
|
{ |
|
"epoch": 1.9839908519153804, |
|
"grad_norm": 13.762521743774414, |
|
"learning_rate": 6.593044057595059e-07, |
|
"loss": 0.7365, |
|
"step": 34700 |
|
}, |
|
{ |
|
"epoch": 1.9897084048027445, |
|
"grad_norm": 15.104240417480469, |
|
"learning_rate": 6.576007280547509e-07, |
|
"loss": 0.7561, |
|
"step": 34800 |
|
}, |
|
{ |
|
"epoch": 1.9954259576901086, |
|
"grad_norm": 19.18963623046875, |
|
"learning_rate": 6.558950163182111e-07, |
|
"loss": 0.7286, |
|
"step": 34900 |
|
}, |
|
{ |
|
"epoch": 2.0011435105774726, |
|
"grad_norm": 13.76323413848877, |
|
"learning_rate": 6.541872925642004e-07, |
|
"loss": 0.7234, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 2.006861063464837, |
|
"grad_norm": 16.3411808013916, |
|
"learning_rate": 6.52477578833e-07, |
|
"loss": 0.7251, |
|
"step": 35100 |
|
}, |
|
{ |
|
"epoch": 2.0125786163522013, |
|
"grad_norm": 13.213664054870605, |
|
"learning_rate": 6.507658971905746e-07, |
|
"loss": 0.7245, |
|
"step": 35200 |
|
}, |
|
{ |
|
"epoch": 2.0182961692395653, |
|
"grad_norm": 14.91565227508545, |
|
"learning_rate": 6.490522697282872e-07, |
|
"loss": 0.6989, |
|
"step": 35300 |
|
}, |
|
{ |
|
"epoch": 2.02401372212693, |
|
"grad_norm": 16.91631507873535, |
|
"learning_rate": 6.473367185626134e-07, |
|
"loss": 0.7126, |
|
"step": 35400 |
|
}, |
|
{ |
|
"epoch": 2.029731275014294, |
|
"grad_norm": 16.931249618530273, |
|
"learning_rate": 6.456192658348573e-07, |
|
"loss": 0.6899, |
|
"step": 35500 |
|
}, |
|
{ |
|
"epoch": 2.035448827901658, |
|
"grad_norm": 15.521553993225098, |
|
"learning_rate": 6.438999337108647e-07, |
|
"loss": 0.7277, |
|
"step": 35600 |
|
}, |
|
{ |
|
"epoch": 2.041166380789022, |
|
"grad_norm": 17.19545555114746, |
|
"learning_rate": 6.421787443807371e-07, |
|
"loss": 0.7121, |
|
"step": 35700 |
|
}, |
|
{ |
|
"epoch": 2.0468839336763867, |
|
"grad_norm": 18.010568618774414, |
|
"learning_rate": 6.404557200585463e-07, |
|
"loss": 0.6942, |
|
"step": 35800 |
|
}, |
|
{ |
|
"epoch": 2.0526014865637507, |
|
"grad_norm": 15.198273658752441, |
|
"learning_rate": 6.387308829820459e-07, |
|
"loss": 0.7072, |
|
"step": 35900 |
|
}, |
|
{ |
|
"epoch": 2.058319039451115, |
|
"grad_norm": 14.581058502197266, |
|
"learning_rate": 6.370042554123859e-07, |
|
"loss": 0.681, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 2.0640365923384794, |
|
"grad_norm": 13.858463287353516, |
|
"learning_rate": 6.352758596338249e-07, |
|
"loss": 0.7045, |
|
"step": 36100 |
|
}, |
|
{ |
|
"epoch": 2.0697541452258434, |
|
"grad_norm": 15.719825744628906, |
|
"learning_rate": 6.335457179534422e-07, |
|
"loss": 0.7093, |
|
"step": 36200 |
|
}, |
|
{ |
|
"epoch": 2.0754716981132075, |
|
"grad_norm": 16.084144592285156, |
|
"learning_rate": 6.318138527008503e-07, |
|
"loss": 0.703, |
|
"step": 36300 |
|
}, |
|
{ |
|
"epoch": 2.0811892510005716, |
|
"grad_norm": 15.696854591369629, |
|
"learning_rate": 6.300802862279063e-07, |
|
"loss": 0.7312, |
|
"step": 36400 |
|
}, |
|
{ |
|
"epoch": 2.086906803887936, |
|
"grad_norm": 15.007280349731445, |
|
"learning_rate": 6.283450409084237e-07, |
|
"loss": 0.7069, |
|
"step": 36500 |
|
}, |
|
{ |
|
"epoch": 2.0926243567753002, |
|
"grad_norm": 18.342792510986328, |
|
"learning_rate": 6.266081391378838e-07, |
|
"loss": 0.6863, |
|
"step": 36600 |
|
}, |
|
{ |
|
"epoch": 2.0983419096626643, |
|
"grad_norm": 16.63268280029297, |
|
"learning_rate": 6.248696033331463e-07, |
|
"loss": 0.6961, |
|
"step": 36700 |
|
}, |
|
{ |
|
"epoch": 2.1040594625500284, |
|
"grad_norm": 15.034991264343262, |
|
"learning_rate": 6.231294559321599e-07, |
|
"loss": 0.7175, |
|
"step": 36800 |
|
}, |
|
{ |
|
"epoch": 2.109777015437393, |
|
"grad_norm": 17.893192291259766, |
|
"learning_rate": 6.213877193936734e-07, |
|
"loss": 0.7064, |
|
"step": 36900 |
|
}, |
|
{ |
|
"epoch": 2.115494568324757, |
|
"grad_norm": 22.963558197021484, |
|
"learning_rate": 6.19644416196945e-07, |
|
"loss": 0.7146, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 2.121212121212121, |
|
"grad_norm": 16.66696548461914, |
|
"learning_rate": 6.178995688414529e-07, |
|
"loss": 0.7199, |
|
"step": 37100 |
|
}, |
|
{ |
|
"epoch": 2.126929674099485, |
|
"grad_norm": 17.46118927001953, |
|
"learning_rate": 6.161531998466041e-07, |
|
"loss": 0.6935, |
|
"step": 37200 |
|
}, |
|
{ |
|
"epoch": 2.1326472269868497, |
|
"grad_norm": 15.085572242736816, |
|
"learning_rate": 6.144053317514446e-07, |
|
"loss": 0.6958, |
|
"step": 37300 |
|
}, |
|
{ |
|
"epoch": 2.138364779874214, |
|
"grad_norm": 12.658440589904785, |
|
"learning_rate": 6.126559871143681e-07, |
|
"loss": 0.7337, |
|
"step": 37400 |
|
}, |
|
{ |
|
"epoch": 2.144082332761578, |
|
"grad_norm": 14.13125228881836, |
|
"learning_rate": 6.109051885128248e-07, |
|
"loss": 0.7458, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 2.1497998856489424, |
|
"grad_norm": 13.70114517211914, |
|
"learning_rate": 6.091529585430301e-07, |
|
"loss": 0.6969, |
|
"step": 37600 |
|
}, |
|
{ |
|
"epoch": 2.1555174385363065, |
|
"grad_norm": 14.79975414276123, |
|
"learning_rate": 6.07399319819673e-07, |
|
"loss": 0.7079, |
|
"step": 37700 |
|
}, |
|
{ |
|
"epoch": 2.1612349914236706, |
|
"grad_norm": 16.53200912475586, |
|
"learning_rate": 6.056442949756242e-07, |
|
"loss": 0.693, |
|
"step": 37800 |
|
}, |
|
{ |
|
"epoch": 2.1669525443110347, |
|
"grad_norm": 16.908708572387695, |
|
"learning_rate": 6.038879066616441e-07, |
|
"loss": 0.7188, |
|
"step": 37900 |
|
}, |
|
{ |
|
"epoch": 2.172670097198399, |
|
"grad_norm": 14.79818058013916, |
|
"learning_rate": 6.021301775460903e-07, |
|
"loss": 0.6782, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 2.1783876500857633, |
|
"grad_norm": 16.381088256835938, |
|
"learning_rate": 6.003711303146249e-07, |
|
"loss": 0.729, |
|
"step": 38100 |
|
}, |
|
{ |
|
"epoch": 2.1841052029731274, |
|
"grad_norm": 14.344415664672852, |
|
"learning_rate": 5.986107876699221e-07, |
|
"loss": 0.707, |
|
"step": 38200 |
|
}, |
|
{ |
|
"epoch": 2.189822755860492, |
|
"grad_norm": 14.784208297729492, |
|
"learning_rate": 5.968491723313753e-07, |
|
"loss": 0.6842, |
|
"step": 38300 |
|
}, |
|
{ |
|
"epoch": 2.195540308747856, |
|
"grad_norm": 14.890427589416504, |
|
"learning_rate": 5.950863070348029e-07, |
|
"loss": 0.6967, |
|
"step": 38400 |
|
}, |
|
{ |
|
"epoch": 2.20125786163522, |
|
"grad_norm": 17.429248809814453, |
|
"learning_rate": 5.933222145321561e-07, |
|
"loss": 0.701, |
|
"step": 38500 |
|
}, |
|
{ |
|
"epoch": 2.206975414522584, |
|
"grad_norm": 16.934354782104492, |
|
"learning_rate": 5.915569175912244e-07, |
|
"loss": 0.7107, |
|
"step": 38600 |
|
}, |
|
{ |
|
"epoch": 2.2126929674099487, |
|
"grad_norm": 16.012706756591797, |
|
"learning_rate": 5.897904389953423e-07, |
|
"loss": 0.7052, |
|
"step": 38700 |
|
}, |
|
{ |
|
"epoch": 2.218410520297313, |
|
"grad_norm": 14.395899772644043, |
|
"learning_rate": 5.880228015430948e-07, |
|
"loss": 0.7291, |
|
"step": 38800 |
|
}, |
|
{ |
|
"epoch": 2.224128073184677, |
|
"grad_norm": 15.9004545211792, |
|
"learning_rate": 5.862540280480237e-07, |
|
"loss": 0.7147, |
|
"step": 38900 |
|
}, |
|
{ |
|
"epoch": 2.2298456260720414, |
|
"grad_norm": 16.330738067626953, |
|
"learning_rate": 5.844841413383324e-07, |
|
"loss": 0.7343, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 2.2355631789594055, |
|
"grad_norm": 15.460098266601562, |
|
"learning_rate": 5.82713164256592e-07, |
|
"loss": 0.7122, |
|
"step": 39100 |
|
}, |
|
{ |
|
"epoch": 2.2412807318467696, |
|
"grad_norm": 16.06856918334961, |
|
"learning_rate": 5.809411196594462e-07, |
|
"loss": 0.7078, |
|
"step": 39200 |
|
}, |
|
{ |
|
"epoch": 2.2469982847341337, |
|
"grad_norm": 14.644042015075684, |
|
"learning_rate": 5.791680304173158e-07, |
|
"loss": 0.7013, |
|
"step": 39300 |
|
}, |
|
{ |
|
"epoch": 2.252715837621498, |
|
"grad_norm": 14.263148307800293, |
|
"learning_rate": 5.773939194141051e-07, |
|
"loss": 0.7021, |
|
"step": 39400 |
|
}, |
|
{ |
|
"epoch": 2.2584333905088623, |
|
"grad_norm": 14.338480949401855, |
|
"learning_rate": 5.756188095469043e-07, |
|
"loss": 0.7025, |
|
"step": 39500 |
|
}, |
|
{ |
|
"epoch": 2.2641509433962264, |
|
"grad_norm": 15.25626277923584, |
|
"learning_rate": 5.738427237256959e-07, |
|
"loss": 0.6988, |
|
"step": 39600 |
|
}, |
|
{ |
|
"epoch": 2.2698684962835904, |
|
"grad_norm": 13.902752876281738, |
|
"learning_rate": 5.720656848730582e-07, |
|
"loss": 0.7105, |
|
"step": 39700 |
|
}, |
|
{ |
|
"epoch": 2.275586049170955, |
|
"grad_norm": 16.74924659729004, |
|
"learning_rate": 5.702877159238692e-07, |
|
"loss": 0.7226, |
|
"step": 39800 |
|
}, |
|
{ |
|
"epoch": 2.281303602058319, |
|
"grad_norm": 17.56850814819336, |
|
"learning_rate": 5.685088398250113e-07, |
|
"loss": 0.7081, |
|
"step": 39900 |
|
}, |
|
{ |
|
"epoch": 2.287021154945683, |
|
"grad_norm": 13.788533210754395, |
|
"learning_rate": 5.66729079535075e-07, |
|
"loss": 0.7198, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 2.2927387078330472, |
|
"grad_norm": 15.556499481201172, |
|
"learning_rate": 5.649484580240616e-07, |
|
"loss": 0.7314, |
|
"step": 40100 |
|
}, |
|
{ |
|
"epoch": 2.2984562607204118, |
|
"grad_norm": 15.630646705627441, |
|
"learning_rate": 5.63166998273088e-07, |
|
"loss": 0.6885, |
|
"step": 40200 |
|
}, |
|
{ |
|
"epoch": 2.304173813607776, |
|
"grad_norm": 17.20891571044922, |
|
"learning_rate": 5.613847232740897e-07, |
|
"loss": 0.6971, |
|
"step": 40300 |
|
}, |
|
{ |
|
"epoch": 2.30989136649514, |
|
"grad_norm": 21.685649871826172, |
|
"learning_rate": 5.596016560295241e-07, |
|
"loss": 0.6983, |
|
"step": 40400 |
|
}, |
|
{ |
|
"epoch": 2.3156089193825045, |
|
"grad_norm": 15.817377090454102, |
|
"learning_rate": 5.578178195520728e-07, |
|
"loss": 0.7057, |
|
"step": 40500 |
|
}, |
|
{ |
|
"epoch": 2.3213264722698685, |
|
"grad_norm": 15.319486618041992, |
|
"learning_rate": 5.560332368643462e-07, |
|
"loss": 0.6795, |
|
"step": 40600 |
|
}, |
|
{ |
|
"epoch": 2.3270440251572326, |
|
"grad_norm": 14.907487869262695, |
|
"learning_rate": 5.542479309985849e-07, |
|
"loss": 0.6917, |
|
"step": 40700 |
|
}, |
|
{ |
|
"epoch": 2.3327615780445967, |
|
"grad_norm": 16.08393096923828, |
|
"learning_rate": 5.52461924996363e-07, |
|
"loss": 0.677, |
|
"step": 40800 |
|
}, |
|
{ |
|
"epoch": 2.3384791309319612, |
|
"grad_norm": 15.164365768432617, |
|
"learning_rate": 5.506752419082911e-07, |
|
"loss": 0.6969, |
|
"step": 40900 |
|
}, |
|
{ |
|
"epoch": 2.3441966838193253, |
|
"grad_norm": 16.06779670715332, |
|
"learning_rate": 5.48887904793718e-07, |
|
"loss": 0.7075, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 2.3499142367066894, |
|
"grad_norm": 16.33745002746582, |
|
"learning_rate": 5.470999367204338e-07, |
|
"loss": 0.7135, |
|
"step": 41100 |
|
}, |
|
{ |
|
"epoch": 2.355631789594054, |
|
"grad_norm": 17.381423950195312, |
|
"learning_rate": 5.453113607643719e-07, |
|
"loss": 0.7374, |
|
"step": 41200 |
|
}, |
|
{ |
|
"epoch": 2.361349342481418, |
|
"grad_norm": 14.613143920898438, |
|
"learning_rate": 5.435222000093109e-07, |
|
"loss": 0.7104, |
|
"step": 41300 |
|
}, |
|
{ |
|
"epoch": 2.367066895368782, |
|
"grad_norm": 14.394537925720215, |
|
"learning_rate": 5.417324775465773e-07, |
|
"loss": 0.7123, |
|
"step": 41400 |
|
}, |
|
{ |
|
"epoch": 2.372784448256146, |
|
"grad_norm": 14.066143035888672, |
|
"learning_rate": 5.399422164747469e-07, |
|
"loss": 0.7207, |
|
"step": 41500 |
|
}, |
|
{ |
|
"epoch": 2.3785020011435107, |
|
"grad_norm": 16.41046714782715, |
|
"learning_rate": 5.381514398993471e-07, |
|
"loss": 0.7217, |
|
"step": 41600 |
|
}, |
|
{ |
|
"epoch": 2.384219554030875, |
|
"grad_norm": 17.5543270111084, |
|
"learning_rate": 5.363601709325584e-07, |
|
"loss": 0.7061, |
|
"step": 41700 |
|
}, |
|
{ |
|
"epoch": 2.389937106918239, |
|
"grad_norm": 16.649717330932617, |
|
"learning_rate": 5.345684326929159e-07, |
|
"loss": 0.718, |
|
"step": 41800 |
|
}, |
|
{ |
|
"epoch": 2.3956546598056034, |
|
"grad_norm": 17.401193618774414, |
|
"learning_rate": 5.327762483050121e-07, |
|
"loss": 0.726, |
|
"step": 41900 |
|
}, |
|
{ |
|
"epoch": 2.4013722126929675, |
|
"grad_norm": 17.420589447021484, |
|
"learning_rate": 5.309836408991965e-07, |
|
"loss": 0.714, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 2.4070897655803316, |
|
"grad_norm": 17.177000045776367, |
|
"learning_rate": 5.291906336112793e-07, |
|
"loss": 0.7106, |
|
"step": 42100 |
|
}, |
|
{ |
|
"epoch": 2.4128073184676957, |
|
"grad_norm": 14.774796485900879, |
|
"learning_rate": 5.273972495822304e-07, |
|
"loss": 0.7013, |
|
"step": 42200 |
|
}, |
|
{ |
|
"epoch": 2.4185248713550602, |
|
"grad_norm": 16.312162399291992, |
|
"learning_rate": 5.256035119578833e-07, |
|
"loss": 0.7074, |
|
"step": 42300 |
|
}, |
|
{ |
|
"epoch": 2.4242424242424243, |
|
"grad_norm": 17.404407501220703, |
|
"learning_rate": 5.238094438886344e-07, |
|
"loss": 0.6916, |
|
"step": 42400 |
|
}, |
|
{ |
|
"epoch": 2.4299599771297884, |
|
"grad_norm": 15.71784782409668, |
|
"learning_rate": 5.220150685291445e-07, |
|
"loss": 0.6817, |
|
"step": 42500 |
|
}, |
|
{ |
|
"epoch": 2.4356775300171525, |
|
"grad_norm": 15.496809005737305, |
|
"learning_rate": 5.202204090380416e-07, |
|
"loss": 0.7052, |
|
"step": 42600 |
|
}, |
|
{ |
|
"epoch": 2.441395082904517, |
|
"grad_norm": 13.677003860473633, |
|
"learning_rate": 5.184254885776195e-07, |
|
"loss": 0.7252, |
|
"step": 42700 |
|
}, |
|
{ |
|
"epoch": 2.447112635791881, |
|
"grad_norm": 15.640375137329102, |
|
"learning_rate": 5.166303303135408e-07, |
|
"loss": 0.6826, |
|
"step": 42800 |
|
}, |
|
{ |
|
"epoch": 2.452830188679245, |
|
"grad_norm": 18.025440216064453, |
|
"learning_rate": 5.14834957414537e-07, |
|
"loss": 0.6858, |
|
"step": 42900 |
|
}, |
|
{ |
|
"epoch": 2.4585477415666093, |
|
"grad_norm": 13.545587539672852, |
|
"learning_rate": 5.1303939305211e-07, |
|
"loss": 0.6925, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 2.464265294453974, |
|
"grad_norm": 16.854537963867188, |
|
"learning_rate": 5.112436604002324e-07, |
|
"loss": 0.7173, |
|
"step": 43100 |
|
}, |
|
{ |
|
"epoch": 2.469982847341338, |
|
"grad_norm": 15.970281600952148, |
|
"learning_rate": 5.094477826350491e-07, |
|
"loss": 0.7095, |
|
"step": 43200 |
|
}, |
|
{ |
|
"epoch": 2.475700400228702, |
|
"grad_norm": 15.472623825073242, |
|
"learning_rate": 5.076517829345777e-07, |
|
"loss": 0.7186, |
|
"step": 43300 |
|
}, |
|
{ |
|
"epoch": 2.4814179531160665, |
|
"grad_norm": 19.989017486572266, |
|
"learning_rate": 5.058556844784098e-07, |
|
"loss": 0.6928, |
|
"step": 43400 |
|
}, |
|
{ |
|
"epoch": 2.4871355060034306, |
|
"grad_norm": 19.59756851196289, |
|
"learning_rate": 5.04059510447411e-07, |
|
"loss": 0.709, |
|
"step": 43500 |
|
}, |
|
{ |
|
"epoch": 2.4928530588907947, |
|
"grad_norm": 16.710494995117188, |
|
"learning_rate": 5.022632840234227e-07, |
|
"loss": 0.7031, |
|
"step": 43600 |
|
}, |
|
{ |
|
"epoch": 2.4985706117781588, |
|
"grad_norm": 16.303966522216797, |
|
"learning_rate": 5.004670283889626e-07, |
|
"loss": 0.6951, |
|
"step": 43700 |
|
}, |
|
{ |
|
"epoch": 2.5042881646655233, |
|
"grad_norm": 16.28445053100586, |
|
"learning_rate": 4.986707667269252e-07, |
|
"loss": 0.7017, |
|
"step": 43800 |
|
}, |
|
{ |
|
"epoch": 2.5100057175528874, |
|
"grad_norm": 16.038339614868164, |
|
"learning_rate": 4.968745222202824e-07, |
|
"loss": 0.7348, |
|
"step": 43900 |
|
}, |
|
{ |
|
"epoch": 2.5157232704402515, |
|
"grad_norm": 15.450057983398438, |
|
"learning_rate": 4.950783180517855e-07, |
|
"loss": 0.696, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 2.521440823327616, |
|
"grad_norm": 15.47207260131836, |
|
"learning_rate": 4.932821774036647e-07, |
|
"loss": 0.7014, |
|
"step": 44100 |
|
}, |
|
{ |
|
"epoch": 2.52715837621498, |
|
"grad_norm": 18.941865921020508, |
|
"learning_rate": 4.914861234573305e-07, |
|
"loss": 0.6922, |
|
"step": 44200 |
|
}, |
|
{ |
|
"epoch": 2.532875929102344, |
|
"grad_norm": 17.57488441467285, |
|
"learning_rate": 4.896901793930745e-07, |
|
"loss": 0.7082, |
|
"step": 44300 |
|
}, |
|
{ |
|
"epoch": 2.5385934819897082, |
|
"grad_norm": 15.198995590209961, |
|
"learning_rate": 4.878943683897696e-07, |
|
"loss": 0.7016, |
|
"step": 44400 |
|
}, |
|
{ |
|
"epoch": 2.5443110348770728, |
|
"grad_norm": 18.978601455688477, |
|
"learning_rate": 4.860987136245723e-07, |
|
"loss": 0.708, |
|
"step": 44500 |
|
}, |
|
{ |
|
"epoch": 2.550028587764437, |
|
"grad_norm": 15.620323181152344, |
|
"learning_rate": 4.843032382726217e-07, |
|
"loss": 0.6936, |
|
"step": 44600 |
|
}, |
|
{ |
|
"epoch": 2.555746140651801, |
|
"grad_norm": 15.971785545349121, |
|
"learning_rate": 4.82507965506742e-07, |
|
"loss": 0.7204, |
|
"step": 44700 |
|
}, |
|
{ |
|
"epoch": 2.5614636935391655, |
|
"grad_norm": 17.761568069458008, |
|
"learning_rate": 4.807129184971428e-07, |
|
"loss": 0.6965, |
|
"step": 44800 |
|
}, |
|
{ |
|
"epoch": 2.5671812464265296, |
|
"grad_norm": 16.433975219726562, |
|
"learning_rate": 4.789181204111195e-07, |
|
"loss": 0.6959, |
|
"step": 44900 |
|
}, |
|
{ |
|
"epoch": 2.5728987993138936, |
|
"grad_norm": 14.489997863769531, |
|
"learning_rate": 4.771235944127554e-07, |
|
"loss": 0.6675, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 2.5786163522012577, |
|
"grad_norm": 17.199810028076172, |
|
"learning_rate": 4.753293636626217e-07, |
|
"loss": 0.6907, |
|
"step": 45100 |
|
}, |
|
{ |
|
"epoch": 2.584333905088622, |
|
"grad_norm": 14.820212364196777, |
|
"learning_rate": 4.735354513174794e-07, |
|
"loss": 0.6955, |
|
"step": 45200 |
|
}, |
|
{ |
|
"epoch": 2.5900514579759863, |
|
"grad_norm": 15.466897964477539, |
|
"learning_rate": 4.717418805299801e-07, |
|
"loss": 0.727, |
|
"step": 45300 |
|
}, |
|
{ |
|
"epoch": 2.5957690108633504, |
|
"grad_norm": 13.702413558959961, |
|
"learning_rate": 4.6994867444836684e-07, |
|
"loss": 0.6923, |
|
"step": 45400 |
|
}, |
|
{ |
|
"epoch": 2.601486563750715, |
|
"grad_norm": 16.696453094482422, |
|
"learning_rate": 4.681558562161759e-07, |
|
"loss": 0.7204, |
|
"step": 45500 |
|
}, |
|
{ |
|
"epoch": 2.607204116638079, |
|
"grad_norm": 12.730810165405273, |
|
"learning_rate": 4.6636344897193824e-07, |
|
"loss": 0.7209, |
|
"step": 45600 |
|
}, |
|
{ |
|
"epoch": 2.612921669525443, |
|
"grad_norm": 18.060680389404297, |
|
"learning_rate": 4.645714758488797e-07, |
|
"loss": 0.6942, |
|
"step": 45700 |
|
}, |
|
{ |
|
"epoch": 2.618639222412807, |
|
"grad_norm": 13.605435371398926, |
|
"learning_rate": 4.627799599746238e-07, |
|
"loss": 0.6714, |
|
"step": 45800 |
|
}, |
|
{ |
|
"epoch": 2.6243567753001713, |
|
"grad_norm": 16.512327194213867, |
|
"learning_rate": 4.6098892447089274e-07, |
|
"loss": 0.7124, |
|
"step": 45900 |
|
}, |
|
{ |
|
"epoch": 2.630074328187536, |
|
"grad_norm": 17.260540008544922, |
|
"learning_rate": 4.5919839245320876e-07, |
|
"loss": 0.6999, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 2.6357918810749, |
|
"grad_norm": 19.792367935180664, |
|
"learning_rate": 4.574083870305961e-07, |
|
"loss": 0.7026, |
|
"step": 46100 |
|
}, |
|
{ |
|
"epoch": 2.641509433962264, |
|
"grad_norm": 15.92784309387207, |
|
"learning_rate": 4.556189313052824e-07, |
|
"loss": 0.7325, |
|
"step": 46200 |
|
}, |
|
{ |
|
"epoch": 2.6472269868496285, |
|
"grad_norm": 13.952754020690918, |
|
"learning_rate": 4.5383004837240137e-07, |
|
"loss": 0.7057, |
|
"step": 46300 |
|
}, |
|
{ |
|
"epoch": 2.6529445397369926, |
|
"grad_norm": 14.797870635986328, |
|
"learning_rate": 4.5204176131969316e-07, |
|
"loss": 0.7064, |
|
"step": 46400 |
|
}, |
|
{ |
|
"epoch": 2.6586620926243567, |
|
"grad_norm": 14.710382461547852, |
|
"learning_rate": 4.502540932272082e-07, |
|
"loss": 0.6909, |
|
"step": 46500 |
|
}, |
|
{ |
|
"epoch": 2.664379645511721, |
|
"grad_norm": 14.503265380859375, |
|
"learning_rate": 4.4846706716700816e-07, |
|
"loss": 0.6937, |
|
"step": 46600 |
|
}, |
|
{ |
|
"epoch": 2.6700971983990853, |
|
"grad_norm": 17.75704002380371, |
|
"learning_rate": 4.466807062028685e-07, |
|
"loss": 0.7037, |
|
"step": 46700 |
|
}, |
|
{ |
|
"epoch": 2.6758147512864494, |
|
"grad_norm": 19.279897689819336, |
|
"learning_rate": 4.4489503338998085e-07, |
|
"loss": 0.7048, |
|
"step": 46800 |
|
}, |
|
{ |
|
"epoch": 2.6815323041738135, |
|
"grad_norm": 18.44795036315918, |
|
"learning_rate": 4.431100717746554e-07, |
|
"loss": 0.7152, |
|
"step": 46900 |
|
}, |
|
{ |
|
"epoch": 2.687249857061178, |
|
"grad_norm": 14.337074279785156, |
|
"learning_rate": 4.4132584439402343e-07, |
|
"loss": 0.7076, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 2.692967409948542, |
|
"grad_norm": 17.55514144897461, |
|
"learning_rate": 4.3954237427573994e-07, |
|
"loss": 0.6757, |
|
"step": 47100 |
|
}, |
|
{ |
|
"epoch": 2.698684962835906, |
|
"grad_norm": 16.805578231811523, |
|
"learning_rate": 4.377596844376864e-07, |
|
"loss": 0.7015, |
|
"step": 47200 |
|
}, |
|
{ |
|
"epoch": 2.7044025157232703, |
|
"grad_norm": 13.724994659423828, |
|
"learning_rate": 4.359777978876742e-07, |
|
"loss": 0.7132, |
|
"step": 47300 |
|
}, |
|
{ |
|
"epoch": 2.710120068610635, |
|
"grad_norm": 15.69048023223877, |
|
"learning_rate": 4.341967376231471e-07, |
|
"loss": 0.6707, |
|
"step": 47400 |
|
}, |
|
{ |
|
"epoch": 2.715837621497999, |
|
"grad_norm": 15.279611587524414, |
|
"learning_rate": 4.324165266308846e-07, |
|
"loss": 0.7217, |
|
"step": 47500 |
|
}, |
|
{ |
|
"epoch": 2.721555174385363, |
|
"grad_norm": 14.593955039978027, |
|
"learning_rate": 4.3063718788670523e-07, |
|
"loss": 0.6932, |
|
"step": 47600 |
|
}, |
|
{ |
|
"epoch": 2.7272727272727275, |
|
"grad_norm": 17.884376525878906, |
|
"learning_rate": 4.288587443551705e-07, |
|
"loss": 0.7203, |
|
"step": 47700 |
|
}, |
|
{ |
|
"epoch": 2.7329902801600916, |
|
"grad_norm": 14.831436157226562, |
|
"learning_rate": 4.270812189892873e-07, |
|
"loss": 0.6927, |
|
"step": 47800 |
|
}, |
|
{ |
|
"epoch": 2.7387078330474557, |
|
"grad_norm": 13.828678131103516, |
|
"learning_rate": 4.253046347302133e-07, |
|
"loss": 0.726, |
|
"step": 47900 |
|
}, |
|
{ |
|
"epoch": 2.7444253859348198, |
|
"grad_norm": 16.05724334716797, |
|
"learning_rate": 4.235290145069594e-07, |
|
"loss": 0.6964, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 2.750142938822184, |
|
"grad_norm": 12.63790512084961, |
|
"learning_rate": 4.2175438123609475e-07, |
|
"loss": 0.6944, |
|
"step": 48100 |
|
}, |
|
{ |
|
"epoch": 2.7558604917095484, |
|
"grad_norm": 16.55657386779785, |
|
"learning_rate": 4.199807578214506e-07, |
|
"loss": 0.6944, |
|
"step": 48200 |
|
}, |
|
{ |
|
"epoch": 2.7615780445969125, |
|
"grad_norm": 16.512378692626953, |
|
"learning_rate": 4.182081671538248e-07, |
|
"loss": 0.6999, |
|
"step": 48300 |
|
}, |
|
{ |
|
"epoch": 2.767295597484277, |
|
"grad_norm": 16.38055992126465, |
|
"learning_rate": 4.1643663211068645e-07, |
|
"loss": 0.6956, |
|
"step": 48400 |
|
}, |
|
{ |
|
"epoch": 2.773013150371641, |
|
"grad_norm": 19.406944274902344, |
|
"learning_rate": 4.1466617555588e-07, |
|
"loss": 0.6961, |
|
"step": 48500 |
|
}, |
|
{ |
|
"epoch": 2.778730703259005, |
|
"grad_norm": 15.556472778320312, |
|
"learning_rate": 4.1289682033933114e-07, |
|
"loss": 0.7167, |
|
"step": 48600 |
|
}, |
|
{ |
|
"epoch": 2.7844482561463693, |
|
"grad_norm": 19.273683547973633, |
|
"learning_rate": 4.1112858929675145e-07, |
|
"loss": 0.7002, |
|
"step": 48700 |
|
}, |
|
{ |
|
"epoch": 2.7901658090337333, |
|
"grad_norm": 16.052841186523438, |
|
"learning_rate": 4.093615052493433e-07, |
|
"loss": 0.7183, |
|
"step": 48800 |
|
}, |
|
{ |
|
"epoch": 2.795883361921098, |
|
"grad_norm": 21.037002563476562, |
|
"learning_rate": 4.0759559100350605e-07, |
|
"loss": 0.685, |
|
"step": 48900 |
|
}, |
|
{ |
|
"epoch": 2.801600914808462, |
|
"grad_norm": 14.775420188903809, |
|
"learning_rate": 4.0583086935054136e-07, |
|
"loss": 0.706, |
|
"step": 49000 |
|
}, |
|
{ |
|
"epoch": 2.807318467695826, |
|
"grad_norm": 14.18565559387207, |
|
"learning_rate": 4.040673630663583e-07, |
|
"loss": 0.6957, |
|
"step": 49100 |
|
}, |
|
{ |
|
"epoch": 2.8130360205831906, |
|
"grad_norm": 17.286367416381836, |
|
"learning_rate": 4.023050949111809e-07, |
|
"loss": 0.7291, |
|
"step": 49200 |
|
}, |
|
{ |
|
"epoch": 2.8187535734705547, |
|
"grad_norm": 14.268885612487793, |
|
"learning_rate": 4.0054408762925343e-07, |
|
"loss": 0.7156, |
|
"step": 49300 |
|
}, |
|
{ |
|
"epoch": 2.8244711263579187, |
|
"grad_norm": 16.985570907592773, |
|
"learning_rate": 3.9878436394854685e-07, |
|
"loss": 0.6913, |
|
"step": 49400 |
|
}, |
|
{ |
|
"epoch": 2.830188679245283, |
|
"grad_norm": 19.517696380615234, |
|
"learning_rate": 3.970259465804658e-07, |
|
"loss": 0.7179, |
|
"step": 49500 |
|
}, |
|
{ |
|
"epoch": 2.8359062321326474, |
|
"grad_norm": 20.2652530670166, |
|
"learning_rate": 3.952688582195553e-07, |
|
"loss": 0.7018, |
|
"step": 49600 |
|
}, |
|
{ |
|
"epoch": 2.8416237850200115, |
|
"grad_norm": 19.40163803100586, |
|
"learning_rate": 3.9351312154320787e-07, |
|
"loss": 0.6981, |
|
"step": 49700 |
|
}, |
|
{ |
|
"epoch": 2.8473413379073755, |
|
"grad_norm": 17.079002380371094, |
|
"learning_rate": 3.9175875921137094e-07, |
|
"loss": 0.6888, |
|
"step": 49800 |
|
}, |
|
{ |
|
"epoch": 2.85305889079474, |
|
"grad_norm": 16.94386100769043, |
|
"learning_rate": 3.90005793866254e-07, |
|
"loss": 0.6734, |
|
"step": 49900 |
|
}, |
|
{ |
|
"epoch": 2.858776443682104, |
|
"grad_norm": 18.46063804626465, |
|
"learning_rate": 3.882542481320373e-07, |
|
"loss": 0.7051, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 2.8644939965694682, |
|
"grad_norm": 18.862567901611328, |
|
"learning_rate": 3.865041446145788e-07, |
|
"loss": 0.7099, |
|
"step": 50100 |
|
}, |
|
{ |
|
"epoch": 2.8702115494568323, |
|
"grad_norm": 16.764009475708008, |
|
"learning_rate": 3.847555059011231e-07, |
|
"loss": 0.6957, |
|
"step": 50200 |
|
}, |
|
{ |
|
"epoch": 2.8759291023441964, |
|
"grad_norm": 18.46530532836914, |
|
"learning_rate": 3.830083545600097e-07, |
|
"loss": 0.6949, |
|
"step": 50300 |
|
}, |
|
{ |
|
"epoch": 2.881646655231561, |
|
"grad_norm": 14.511909484863281, |
|
"learning_rate": 3.812627131403815e-07, |
|
"loss": 0.6842, |
|
"step": 50400 |
|
}, |
|
{ |
|
"epoch": 2.887364208118925, |
|
"grad_norm": 14.872234344482422, |
|
"learning_rate": 3.795186041718941e-07, |
|
"loss": 0.6798, |
|
"step": 50500 |
|
}, |
|
{ |
|
"epoch": 2.8930817610062896, |
|
"grad_norm": 13.427173614501953, |
|
"learning_rate": 3.7777605016442514e-07, |
|
"loss": 0.7098, |
|
"step": 50600 |
|
}, |
|
{ |
|
"epoch": 2.8987993138936536, |
|
"grad_norm": 17.797651290893555, |
|
"learning_rate": 3.7603507360778324e-07, |
|
"loss": 0.6901, |
|
"step": 50700 |
|
}, |
|
{ |
|
"epoch": 2.9045168667810177, |
|
"grad_norm": 18.58063507080078, |
|
"learning_rate": 3.7429569697141827e-07, |
|
"loss": 0.6859, |
|
"step": 50800 |
|
}, |
|
{ |
|
"epoch": 2.910234419668382, |
|
"grad_norm": 16.973247528076172, |
|
"learning_rate": 3.7255794270413123e-07, |
|
"loss": 0.6969, |
|
"step": 50900 |
|
}, |
|
{ |
|
"epoch": 2.915951972555746, |
|
"grad_norm": 15.047221183776855, |
|
"learning_rate": 3.708218332337841e-07, |
|
"loss": 0.6913, |
|
"step": 51000 |
|
}, |
|
{ |
|
"epoch": 2.9216695254431104, |
|
"grad_norm": 16.692338943481445, |
|
"learning_rate": 3.6908739096701145e-07, |
|
"loss": 0.695, |
|
"step": 51100 |
|
}, |
|
{ |
|
"epoch": 2.9273870783304745, |
|
"grad_norm": 15.817334175109863, |
|
"learning_rate": 3.6735463828892956e-07, |
|
"loss": 0.6864, |
|
"step": 51200 |
|
}, |
|
{ |
|
"epoch": 2.933104631217839, |
|
"grad_norm": 16.537561416625977, |
|
"learning_rate": 3.65623597562849e-07, |
|
"loss": 0.6994, |
|
"step": 51300 |
|
}, |
|
{ |
|
"epoch": 2.938822184105203, |
|
"grad_norm": 17.43041229248047, |
|
"learning_rate": 3.6389429112998574e-07, |
|
"loss": 0.7183, |
|
"step": 51400 |
|
}, |
|
{ |
|
"epoch": 2.944539736992567, |
|
"grad_norm": 14.238208770751953, |
|
"learning_rate": 3.62166741309172e-07, |
|
"loss": 0.711, |
|
"step": 51500 |
|
}, |
|
{ |
|
"epoch": 2.9502572898799313, |
|
"grad_norm": 18.8483829498291, |
|
"learning_rate": 3.6044097039656917e-07, |
|
"loss": 0.6999, |
|
"step": 51600 |
|
}, |
|
{ |
|
"epoch": 2.9559748427672954, |
|
"grad_norm": 18.139741897583008, |
|
"learning_rate": 3.587170006653794e-07, |
|
"loss": 0.7067, |
|
"step": 51700 |
|
}, |
|
{ |
|
"epoch": 2.96169239565466, |
|
"grad_norm": 16.7657470703125, |
|
"learning_rate": 3.569948543655588e-07, |
|
"loss": 0.7044, |
|
"step": 51800 |
|
}, |
|
{ |
|
"epoch": 2.967409948542024, |
|
"grad_norm": 15.640189170837402, |
|
"learning_rate": 3.55274553723529e-07, |
|
"loss": 0.7125, |
|
"step": 51900 |
|
}, |
|
{ |
|
"epoch": 2.973127501429388, |
|
"grad_norm": 17.32538414001465, |
|
"learning_rate": 3.535561209418918e-07, |
|
"loss": 0.6741, |
|
"step": 52000 |
|
}, |
|
{ |
|
"epoch": 2.9788450543167526, |
|
"grad_norm": 16.29108428955078, |
|
"learning_rate": 3.51839578199142e-07, |
|
"loss": 0.6876, |
|
"step": 52100 |
|
}, |
|
{ |
|
"epoch": 2.9845626072041167, |
|
"grad_norm": 16.50261116027832, |
|
"learning_rate": 3.5012494764938095e-07, |
|
"loss": 0.6672, |
|
"step": 52200 |
|
}, |
|
{ |
|
"epoch": 2.990280160091481, |
|
"grad_norm": 18.512115478515625, |
|
"learning_rate": 3.4841225142203045e-07, |
|
"loss": 0.6906, |
|
"step": 52300 |
|
}, |
|
{ |
|
"epoch": 2.995997712978845, |
|
"grad_norm": 17.311607360839844, |
|
"learning_rate": 3.4670151162154825e-07, |
|
"loss": 0.7176, |
|
"step": 52400 |
|
}, |
|
{ |
|
"epoch": 3.0017152658662094, |
|
"grad_norm": 17.8248348236084, |
|
"learning_rate": 3.4499275032714116e-07, |
|
"loss": 0.7088, |
|
"step": 52500 |
|
}, |
|
{ |
|
"epoch": 3.0074328187535735, |
|
"grad_norm": 17.059553146362305, |
|
"learning_rate": 3.4328598959248134e-07, |
|
"loss": 0.6826, |
|
"step": 52600 |
|
}, |
|
{ |
|
"epoch": 3.0131503716409376, |
|
"grad_norm": 18.721872329711914, |
|
"learning_rate": 3.415812514454215e-07, |
|
"loss": 0.6721, |
|
"step": 52700 |
|
}, |
|
{ |
|
"epoch": 3.018867924528302, |
|
"grad_norm": 16.92786979675293, |
|
"learning_rate": 3.398785578877098e-07, |
|
"loss": 0.709, |
|
"step": 52800 |
|
}, |
|
{ |
|
"epoch": 3.024585477415666, |
|
"grad_norm": 16.176301956176758, |
|
"learning_rate": 3.38177930894707e-07, |
|
"loss": 0.6862, |
|
"step": 52900 |
|
}, |
|
{ |
|
"epoch": 3.0303030303030303, |
|
"grad_norm": 16.669635772705078, |
|
"learning_rate": 3.36479392415102e-07, |
|
"loss": 0.7259, |
|
"step": 53000 |
|
}, |
|
{ |
|
"epoch": 3.0360205831903944, |
|
"grad_norm": 18.89837074279785, |
|
"learning_rate": 3.3478296437062905e-07, |
|
"loss": 0.6883, |
|
"step": 53100 |
|
}, |
|
{ |
|
"epoch": 3.041738136077759, |
|
"grad_norm": 18.024032592773438, |
|
"learning_rate": 3.330886686557841e-07, |
|
"loss": 0.6985, |
|
"step": 53200 |
|
}, |
|
{ |
|
"epoch": 3.047455688965123, |
|
"grad_norm": 16.023338317871094, |
|
"learning_rate": 3.313965271375433e-07, |
|
"loss": 0.7058, |
|
"step": 53300 |
|
}, |
|
{ |
|
"epoch": 3.053173241852487, |
|
"grad_norm": 16.61005401611328, |
|
"learning_rate": 3.2970656165508017e-07, |
|
"loss": 0.6842, |
|
"step": 53400 |
|
}, |
|
{ |
|
"epoch": 3.058890794739851, |
|
"grad_norm": 19.768972396850586, |
|
"learning_rate": 3.280187940194836e-07, |
|
"loss": 0.685, |
|
"step": 53500 |
|
}, |
|
{ |
|
"epoch": 3.0646083476272157, |
|
"grad_norm": 18.239892959594727, |
|
"learning_rate": 3.263332460134767e-07, |
|
"loss": 0.694, |
|
"step": 53600 |
|
}, |
|
{ |
|
"epoch": 3.0703259005145798, |
|
"grad_norm": 20.76768684387207, |
|
"learning_rate": 3.246499393911356e-07, |
|
"loss": 0.6915, |
|
"step": 53700 |
|
}, |
|
{ |
|
"epoch": 3.076043453401944, |
|
"grad_norm": 15.302906036376953, |
|
"learning_rate": 3.229688958776086e-07, |
|
"loss": 0.6968, |
|
"step": 53800 |
|
}, |
|
{ |
|
"epoch": 3.0817610062893084, |
|
"grad_norm": 17.009716033935547, |
|
"learning_rate": 3.212901371688353e-07, |
|
"loss": 0.6893, |
|
"step": 53900 |
|
}, |
|
{ |
|
"epoch": 3.0874785591766725, |
|
"grad_norm": 16.54390525817871, |
|
"learning_rate": 3.1961368493126784e-07, |
|
"loss": 0.6914, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 3.0931961120640366, |
|
"grad_norm": 16.515085220336914, |
|
"learning_rate": 3.179395608015898e-07, |
|
"loss": 0.6964, |
|
"step": 54100 |
|
}, |
|
{ |
|
"epoch": 3.0989136649514006, |
|
"grad_norm": 17.82352638244629, |
|
"learning_rate": 3.1626778638643816e-07, |
|
"loss": 0.6875, |
|
"step": 54200 |
|
}, |
|
{ |
|
"epoch": 3.104631217838765, |
|
"grad_norm": 26.154184341430664, |
|
"learning_rate": 3.1459838326212375e-07, |
|
"loss": 0.6829, |
|
"step": 54300 |
|
}, |
|
{ |
|
"epoch": 3.1103487707261293, |
|
"grad_norm": 15.472237586975098, |
|
"learning_rate": 3.1293137297435294e-07, |
|
"loss": 0.6771, |
|
"step": 54400 |
|
}, |
|
{ |
|
"epoch": 3.1160663236134933, |
|
"grad_norm": 19.009233474731445, |
|
"learning_rate": 3.1126677703794995e-07, |
|
"loss": 0.6897, |
|
"step": 54500 |
|
}, |
|
{ |
|
"epoch": 3.1217838765008574, |
|
"grad_norm": 19.676128387451172, |
|
"learning_rate": 3.09604616936578e-07, |
|
"loss": 0.6954, |
|
"step": 54600 |
|
}, |
|
{ |
|
"epoch": 3.127501429388222, |
|
"grad_norm": 15.318999290466309, |
|
"learning_rate": 3.079449141224636e-07, |
|
"loss": 0.683, |
|
"step": 54700 |
|
}, |
|
{ |
|
"epoch": 3.133218982275586, |
|
"grad_norm": 16.714824676513672, |
|
"learning_rate": 3.0628769001611873e-07, |
|
"loss": 0.6736, |
|
"step": 54800 |
|
}, |
|
{ |
|
"epoch": 3.13893653516295, |
|
"grad_norm": 19.067108154296875, |
|
"learning_rate": 3.0463296600606434e-07, |
|
"loss": 0.691, |
|
"step": 54900 |
|
}, |
|
{ |
|
"epoch": 3.1446540880503147, |
|
"grad_norm": 17.589868545532227, |
|
"learning_rate": 3.029807634485551e-07, |
|
"loss": 0.6967, |
|
"step": 55000 |
|
}, |
|
{ |
|
"epoch": 3.1503716409376787, |
|
"grad_norm": 15.109379768371582, |
|
"learning_rate": 3.013311036673025e-07, |
|
"loss": 0.6896, |
|
"step": 55100 |
|
}, |
|
{ |
|
"epoch": 3.156089193825043, |
|
"grad_norm": 16.637935638427734, |
|
"learning_rate": 2.9968400795320125e-07, |
|
"loss": 0.6745, |
|
"step": 55200 |
|
}, |
|
{ |
|
"epoch": 3.161806746712407, |
|
"grad_norm": 15.541582107543945, |
|
"learning_rate": 2.9803949756405254e-07, |
|
"loss": 0.6814, |
|
"step": 55300 |
|
}, |
|
{ |
|
"epoch": 3.1675242995997714, |
|
"grad_norm": 15.901352882385254, |
|
"learning_rate": 2.9639759372429166e-07, |
|
"loss": 0.6652, |
|
"step": 55400 |
|
}, |
|
{ |
|
"epoch": 3.1732418524871355, |
|
"grad_norm": 15.980581283569336, |
|
"learning_rate": 2.947583176247128e-07, |
|
"loss": 0.6916, |
|
"step": 55500 |
|
}, |
|
{ |
|
"epoch": 3.1789594053744996, |
|
"grad_norm": 20.715810775756836, |
|
"learning_rate": 2.93121690422196e-07, |
|
"loss": 0.7056, |
|
"step": 55600 |
|
}, |
|
{ |
|
"epoch": 3.184676958261864, |
|
"grad_norm": 15.07290267944336, |
|
"learning_rate": 2.914877332394339e-07, |
|
"loss": 0.6879, |
|
"step": 55700 |
|
}, |
|
{ |
|
"epoch": 3.1903945111492282, |
|
"grad_norm": 16.82330322265625, |
|
"learning_rate": 2.8985646716465915e-07, |
|
"loss": 0.6915, |
|
"step": 55800 |
|
}, |
|
{ |
|
"epoch": 3.1961120640365923, |
|
"grad_norm": 18.356937408447266, |
|
"learning_rate": 2.882279132513731e-07, |
|
"loss": 0.6875, |
|
"step": 55900 |
|
}, |
|
{ |
|
"epoch": 3.2018296169239564, |
|
"grad_norm": 15.063898086547852, |
|
"learning_rate": 2.8660209251807177e-07, |
|
"loss": 0.6848, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 3.207547169811321, |
|
"grad_norm": 18.487459182739258, |
|
"learning_rate": 2.8497902594797785e-07, |
|
"loss": 0.6903, |
|
"step": 56100 |
|
}, |
|
{ |
|
"epoch": 3.213264722698685, |
|
"grad_norm": 14.276288986206055, |
|
"learning_rate": 2.83358734488767e-07, |
|
"loss": 0.6849, |
|
"step": 56200 |
|
}, |
|
{ |
|
"epoch": 3.218982275586049, |
|
"grad_norm": 16.27110481262207, |
|
"learning_rate": 2.8174123905229897e-07, |
|
"loss": 0.6939, |
|
"step": 56300 |
|
}, |
|
{ |
|
"epoch": 3.224699828473413, |
|
"grad_norm": 17.376869201660156, |
|
"learning_rate": 2.8012656051434776e-07, |
|
"loss": 0.6865, |
|
"step": 56400 |
|
}, |
|
{ |
|
"epoch": 3.2304173813607777, |
|
"grad_norm": 18.101781845092773, |
|
"learning_rate": 2.785147197143312e-07, |
|
"loss": 0.6857, |
|
"step": 56500 |
|
}, |
|
{ |
|
"epoch": 3.236134934248142, |
|
"grad_norm": 19.654117584228516, |
|
"learning_rate": 2.7690573745504304e-07, |
|
"loss": 0.6939, |
|
"step": 56600 |
|
}, |
|
{ |
|
"epoch": 3.241852487135506, |
|
"grad_norm": 16.06427001953125, |
|
"learning_rate": 2.752996345023836e-07, |
|
"loss": 0.6955, |
|
"step": 56700 |
|
}, |
|
{ |
|
"epoch": 3.2475700400228704, |
|
"grad_norm": 16.123897552490234, |
|
"learning_rate": 2.7369643158509303e-07, |
|
"loss": 0.7136, |
|
"step": 56800 |
|
}, |
|
{ |
|
"epoch": 3.2532875929102345, |
|
"grad_norm": 17.64776611328125, |
|
"learning_rate": 2.72096149394482e-07, |
|
"loss": 0.6974, |
|
"step": 56900 |
|
}, |
|
{ |
|
"epoch": 3.2590051457975986, |
|
"grad_norm": 15.063745498657227, |
|
"learning_rate": 2.704988085841661e-07, |
|
"loss": 0.681, |
|
"step": 57000 |
|
}, |
|
{ |
|
"epoch": 3.2647226986849627, |
|
"grad_norm": 16.657075881958008, |
|
"learning_rate": 2.689044297697988e-07, |
|
"loss": 0.7049, |
|
"step": 57100 |
|
}, |
|
{ |
|
"epoch": 3.270440251572327, |
|
"grad_norm": 15.873147964477539, |
|
"learning_rate": 2.6731303352880523e-07, |
|
"loss": 0.6895, |
|
"step": 57200 |
|
}, |
|
{ |
|
"epoch": 3.2761578044596913, |
|
"grad_norm": 17.87798500061035, |
|
"learning_rate": 2.6572464040011674e-07, |
|
"loss": 0.6888, |
|
"step": 57300 |
|
}, |
|
{ |
|
"epoch": 3.2818753573470554, |
|
"grad_norm": 15.981317520141602, |
|
"learning_rate": 2.6413927088390554e-07, |
|
"loss": 0.6955, |
|
"step": 57400 |
|
}, |
|
{ |
|
"epoch": 3.2875929102344195, |
|
"grad_norm": 17.505796432495117, |
|
"learning_rate": 2.625569454413212e-07, |
|
"loss": 0.6938, |
|
"step": 57500 |
|
}, |
|
{ |
|
"epoch": 3.293310463121784, |
|
"grad_norm": 15.487998962402344, |
|
"learning_rate": 2.6097768449422473e-07, |
|
"loss": 0.6791, |
|
"step": 57600 |
|
}, |
|
{ |
|
"epoch": 3.299028016009148, |
|
"grad_norm": 13.96382999420166, |
|
"learning_rate": 2.59401508424927e-07, |
|
"loss": 0.6829, |
|
"step": 57700 |
|
}, |
|
{ |
|
"epoch": 3.304745568896512, |
|
"grad_norm": 17.845163345336914, |
|
"learning_rate": 2.578284375759241e-07, |
|
"loss": 0.6899, |
|
"step": 57800 |
|
}, |
|
{ |
|
"epoch": 3.3104631217838767, |
|
"grad_norm": 16.549325942993164, |
|
"learning_rate": 2.5625849224963557e-07, |
|
"loss": 0.6748, |
|
"step": 57900 |
|
}, |
|
{ |
|
"epoch": 3.316180674671241, |
|
"grad_norm": 17.144500732421875, |
|
"learning_rate": 2.5469169270814226e-07, |
|
"loss": 0.6887, |
|
"step": 58000 |
|
}, |
|
{ |
|
"epoch": 3.321898227558605, |
|
"grad_norm": 16.58336067199707, |
|
"learning_rate": 2.531280591729247e-07, |
|
"loss": 0.6905, |
|
"step": 58100 |
|
}, |
|
{ |
|
"epoch": 3.327615780445969, |
|
"grad_norm": 19.05868911743164, |
|
"learning_rate": 2.515676118246025e-07, |
|
"loss": 0.6878, |
|
"step": 58200 |
|
}, |
|
{ |
|
"epoch": 3.3333333333333335, |
|
"grad_norm": 15.123451232910156, |
|
"learning_rate": 2.5001037080267316e-07, |
|
"loss": 0.6896, |
|
"step": 58300 |
|
}, |
|
{ |
|
"epoch": 3.3390508862206976, |
|
"grad_norm": 14.63602352142334, |
|
"learning_rate": 2.484563562052532e-07, |
|
"loss": 0.7065, |
|
"step": 58400 |
|
}, |
|
{ |
|
"epoch": 3.3447684391080617, |
|
"grad_norm": 17.01044273376465, |
|
"learning_rate": 2.4690558808881745e-07, |
|
"loss": 0.6992, |
|
"step": 58500 |
|
}, |
|
{ |
|
"epoch": 3.350485991995426, |
|
"grad_norm": 14.306794166564941, |
|
"learning_rate": 2.4535808646794125e-07, |
|
"loss": 0.7112, |
|
"step": 58600 |
|
}, |
|
{ |
|
"epoch": 3.3562035448827903, |
|
"grad_norm": 14.732017517089844, |
|
"learning_rate": 2.438138713150418e-07, |
|
"loss": 0.6792, |
|
"step": 58700 |
|
}, |
|
{ |
|
"epoch": 3.3619210977701544, |
|
"grad_norm": 16.027528762817383, |
|
"learning_rate": 2.4227296256012013e-07, |
|
"loss": 0.681, |
|
"step": 58800 |
|
}, |
|
{ |
|
"epoch": 3.3676386506575184, |
|
"grad_norm": 19.412641525268555, |
|
"learning_rate": 2.407353800905047e-07, |
|
"loss": 0.7146, |
|
"step": 58900 |
|
}, |
|
{ |
|
"epoch": 3.373356203544883, |
|
"grad_norm": 17.197065353393555, |
|
"learning_rate": 2.3920114375059336e-07, |
|
"loss": 0.6998, |
|
"step": 59000 |
|
}, |
|
{ |
|
"epoch": 3.379073756432247, |
|
"grad_norm": 19.924795150756836, |
|
"learning_rate": 2.3767027334159883e-07, |
|
"loss": 0.6898, |
|
"step": 59100 |
|
}, |
|
{ |
|
"epoch": 3.384791309319611, |
|
"grad_norm": 18.47333335876465, |
|
"learning_rate": 2.3614278862129168e-07, |
|
"loss": 0.708, |
|
"step": 59200 |
|
}, |
|
{ |
|
"epoch": 3.3905088622069752, |
|
"grad_norm": 19.336467742919922, |
|
"learning_rate": 2.3461870930374667e-07, |
|
"loss": 0.6849, |
|
"step": 59300 |
|
}, |
|
{ |
|
"epoch": 3.3962264150943398, |
|
"grad_norm": 17.760784149169922, |
|
"learning_rate": 2.3309805505908664e-07, |
|
"loss": 0.6956, |
|
"step": 59400 |
|
}, |
|
{ |
|
"epoch": 3.401943967981704, |
|
"grad_norm": 16.625778198242188, |
|
"learning_rate": 2.3158084551323064e-07, |
|
"loss": 0.69, |
|
"step": 59500 |
|
}, |
|
{ |
|
"epoch": 3.407661520869068, |
|
"grad_norm": 16.27397346496582, |
|
"learning_rate": 2.300671002476392e-07, |
|
"loss": 0.6804, |
|
"step": 59600 |
|
}, |
|
{ |
|
"epoch": 3.413379073756432, |
|
"grad_norm": 17.837446212768555, |
|
"learning_rate": 2.2855683879906184e-07, |
|
"loss": 0.6639, |
|
"step": 59700 |
|
}, |
|
{ |
|
"epoch": 3.4190966266437965, |
|
"grad_norm": 13.911274909973145, |
|
"learning_rate": 2.2705008065928567e-07, |
|
"loss": 0.6826, |
|
"step": 59800 |
|
}, |
|
{ |
|
"epoch": 3.4248141795311606, |
|
"grad_norm": 14.893251419067383, |
|
"learning_rate": 2.2554684527488278e-07, |
|
"loss": 0.6715, |
|
"step": 59900 |
|
}, |
|
{ |
|
"epoch": 3.4305317324185247, |
|
"grad_norm": 15.816070556640625, |
|
"learning_rate": 2.2404715204695995e-07, |
|
"loss": 0.6893, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 3.4362492853058892, |
|
"grad_norm": 14.913555145263672, |
|
"learning_rate": 2.2255102033090794e-07, |
|
"loss": 0.6882, |
|
"step": 60100 |
|
}, |
|
{ |
|
"epoch": 3.4419668381932533, |
|
"grad_norm": 18.053449630737305, |
|
"learning_rate": 2.210584694361522e-07, |
|
"loss": 0.6733, |
|
"step": 60200 |
|
}, |
|
{ |
|
"epoch": 3.4476843910806174, |
|
"grad_norm": 17.731204986572266, |
|
"learning_rate": 2.195695186259028e-07, |
|
"loss": 0.6832, |
|
"step": 60300 |
|
}, |
|
{ |
|
"epoch": 3.4534019439679815, |
|
"grad_norm": 18.586484909057617, |
|
"learning_rate": 2.1808418711690635e-07, |
|
"loss": 0.6723, |
|
"step": 60400 |
|
}, |
|
{ |
|
"epoch": 3.459119496855346, |
|
"grad_norm": 16.404525756835938, |
|
"learning_rate": 2.1660249407919828e-07, |
|
"loss": 0.6914, |
|
"step": 60500 |
|
}, |
|
{ |
|
"epoch": 3.46483704974271, |
|
"grad_norm": 17.5385684967041, |
|
"learning_rate": 2.151244586358547e-07, |
|
"loss": 0.6789, |
|
"step": 60600 |
|
}, |
|
{ |
|
"epoch": 3.470554602630074, |
|
"grad_norm": 14.214513778686523, |
|
"learning_rate": 2.1365009986274607e-07, |
|
"loss": 0.7167, |
|
"step": 60700 |
|
}, |
|
{ |
|
"epoch": 3.4762721555174387, |
|
"grad_norm": 16.127033233642578, |
|
"learning_rate": 2.1217943678829063e-07, |
|
"loss": 0.6924, |
|
"step": 60800 |
|
}, |
|
{ |
|
"epoch": 3.481989708404803, |
|
"grad_norm": 18.360797882080078, |
|
"learning_rate": 2.1071248839320975e-07, |
|
"loss": 0.6815, |
|
"step": 60900 |
|
}, |
|
{ |
|
"epoch": 3.487707261292167, |
|
"grad_norm": 15.16903018951416, |
|
"learning_rate": 2.0924927361028145e-07, |
|
"loss": 0.6873, |
|
"step": 61000 |
|
}, |
|
{ |
|
"epoch": 3.493424814179531, |
|
"grad_norm": 15.084009170532227, |
|
"learning_rate": 2.0778981132409756e-07, |
|
"loss": 0.7026, |
|
"step": 61100 |
|
}, |
|
{ |
|
"epoch": 3.4991423670668955, |
|
"grad_norm": 17.53474235534668, |
|
"learning_rate": 2.0633412037081894e-07, |
|
"loss": 0.6972, |
|
"step": 61200 |
|
}, |
|
{ |
|
"epoch": 3.5048599199542596, |
|
"grad_norm": 14.676159858703613, |
|
"learning_rate": 2.0488221953793278e-07, |
|
"loss": 0.6897, |
|
"step": 61300 |
|
}, |
|
{ |
|
"epoch": 3.5105774728416237, |
|
"grad_norm": 15.6426420211792, |
|
"learning_rate": 2.0343412756401023e-07, |
|
"loss": 0.6925, |
|
"step": 61400 |
|
}, |
|
{ |
|
"epoch": 3.516295025728988, |
|
"grad_norm": 17.42006492614746, |
|
"learning_rate": 2.0198986313846406e-07, |
|
"loss": 0.67, |
|
"step": 61500 |
|
}, |
|
{ |
|
"epoch": 3.5220125786163523, |
|
"grad_norm": 17.58439826965332, |
|
"learning_rate": 2.0054944490130844e-07, |
|
"loss": 0.7124, |
|
"step": 61600 |
|
}, |
|
{ |
|
"epoch": 3.5277301315037164, |
|
"grad_norm": 16.255352020263672, |
|
"learning_rate": 1.9911289144291704e-07, |
|
"loss": 0.6698, |
|
"step": 61700 |
|
}, |
|
{ |
|
"epoch": 3.5334476843910805, |
|
"grad_norm": 19.97213363647461, |
|
"learning_rate": 1.9768022130378454e-07, |
|
"loss": 0.6892, |
|
"step": 61800 |
|
}, |
|
{ |
|
"epoch": 3.5391652372784446, |
|
"grad_norm": 18.21723747253418, |
|
"learning_rate": 1.962514529742859e-07, |
|
"loss": 0.6773, |
|
"step": 61900 |
|
}, |
|
{ |
|
"epoch": 3.544882790165809, |
|
"grad_norm": 16.968833923339844, |
|
"learning_rate": 1.9482660489443876e-07, |
|
"loss": 0.6759, |
|
"step": 62000 |
|
}, |
|
{ |
|
"epoch": 3.550600343053173, |
|
"grad_norm": 22.698190689086914, |
|
"learning_rate": 1.9340569545366487e-07, |
|
"loss": 0.6938, |
|
"step": 62100 |
|
}, |
|
{ |
|
"epoch": 3.5563178959405377, |
|
"grad_norm": 16.91656494140625, |
|
"learning_rate": 1.9198874299055345e-07, |
|
"loss": 0.6757, |
|
"step": 62200 |
|
}, |
|
{ |
|
"epoch": 3.562035448827902, |
|
"grad_norm": 14.955994606018066, |
|
"learning_rate": 1.9057576579262362e-07, |
|
"loss": 0.6737, |
|
"step": 62300 |
|
}, |
|
{ |
|
"epoch": 3.567753001715266, |
|
"grad_norm": 13.794143676757812, |
|
"learning_rate": 1.891667820960887e-07, |
|
"loss": 0.7063, |
|
"step": 62400 |
|
}, |
|
{ |
|
"epoch": 3.57347055460263, |
|
"grad_norm": 18.18063735961914, |
|
"learning_rate": 1.8776181008562165e-07, |
|
"loss": 0.6624, |
|
"step": 62500 |
|
}, |
|
{ |
|
"epoch": 3.579188107489994, |
|
"grad_norm": 18.60443878173828, |
|
"learning_rate": 1.863608678941187e-07, |
|
"loss": 0.7291, |
|
"step": 62600 |
|
}, |
|
{ |
|
"epoch": 3.5849056603773586, |
|
"grad_norm": 20.440473556518555, |
|
"learning_rate": 1.8496397360246735e-07, |
|
"loss": 0.6804, |
|
"step": 62700 |
|
}, |
|
{ |
|
"epoch": 3.5906232132647227, |
|
"grad_norm": 18.79050064086914, |
|
"learning_rate": 1.8357114523931078e-07, |
|
"loss": 0.681, |
|
"step": 62800 |
|
}, |
|
{ |
|
"epoch": 3.5963407661520868, |
|
"grad_norm": 17.849271774291992, |
|
"learning_rate": 1.8218240078081737e-07, |
|
"loss": 0.715, |
|
"step": 62900 |
|
}, |
|
{ |
|
"epoch": 3.6020583190394513, |
|
"grad_norm": 16.68122673034668, |
|
"learning_rate": 1.8079775815044724e-07, |
|
"loss": 0.6771, |
|
"step": 63000 |
|
}, |
|
{ |
|
"epoch": 3.6077758719268154, |
|
"grad_norm": 21.269062042236328, |
|
"learning_rate": 1.7941723521872114e-07, |
|
"loss": 0.6802, |
|
"step": 63100 |
|
}, |
|
{ |
|
"epoch": 3.6134934248141795, |
|
"grad_norm": 16.997730255126953, |
|
"learning_rate": 1.780408498029906e-07, |
|
"loss": 0.6978, |
|
"step": 63200 |
|
}, |
|
{ |
|
"epoch": 3.6192109777015435, |
|
"grad_norm": 16.483427047729492, |
|
"learning_rate": 1.7666861966720698e-07, |
|
"loss": 0.6705, |
|
"step": 63300 |
|
}, |
|
{ |
|
"epoch": 3.624928530588908, |
|
"grad_norm": 18.589954376220703, |
|
"learning_rate": 1.7530056252169274e-07, |
|
"loss": 0.6788, |
|
"step": 63400 |
|
}, |
|
{ |
|
"epoch": 3.630646083476272, |
|
"grad_norm": 16.00810432434082, |
|
"learning_rate": 1.7393669602291244e-07, |
|
"loss": 0.6839, |
|
"step": 63500 |
|
}, |
|
{ |
|
"epoch": 3.6363636363636362, |
|
"grad_norm": 18.412017822265625, |
|
"learning_rate": 1.7257703777324595e-07, |
|
"loss": 0.7016, |
|
"step": 63600 |
|
}, |
|
{ |
|
"epoch": 3.6420811892510008, |
|
"grad_norm": 18.97647476196289, |
|
"learning_rate": 1.712216053207597e-07, |
|
"loss": 0.6791, |
|
"step": 63700 |
|
}, |
|
{ |
|
"epoch": 3.647798742138365, |
|
"grad_norm": 16.424823760986328, |
|
"learning_rate": 1.6987041615898152e-07, |
|
"loss": 0.6766, |
|
"step": 63800 |
|
}, |
|
{ |
|
"epoch": 3.653516295025729, |
|
"grad_norm": 16.81319808959961, |
|
"learning_rate": 1.6852348772667406e-07, |
|
"loss": 0.7148, |
|
"step": 63900 |
|
}, |
|
{ |
|
"epoch": 3.659233847913093, |
|
"grad_norm": 18.625141143798828, |
|
"learning_rate": 1.6718083740761013e-07, |
|
"loss": 0.681, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 3.6649514008004576, |
|
"grad_norm": 20.720613479614258, |
|
"learning_rate": 1.6584248253034804e-07, |
|
"loss": 0.6653, |
|
"step": 64100 |
|
}, |
|
{ |
|
"epoch": 3.6706689536878216, |
|
"grad_norm": 19.35886573791504, |
|
"learning_rate": 1.6450844036800814e-07, |
|
"loss": 0.7049, |
|
"step": 64200 |
|
}, |
|
{ |
|
"epoch": 3.6763865065751857, |
|
"grad_norm": 15.835514068603516, |
|
"learning_rate": 1.6317872813805033e-07, |
|
"loss": 0.679, |
|
"step": 64300 |
|
}, |
|
{ |
|
"epoch": 3.6821040594625503, |
|
"grad_norm": 17.737224578857422, |
|
"learning_rate": 1.6185336300205073e-07, |
|
"loss": 0.6774, |
|
"step": 64400 |
|
}, |
|
{ |
|
"epoch": 3.6878216123499143, |
|
"grad_norm": 15.835929870605469, |
|
"learning_rate": 1.6053236206548143e-07, |
|
"loss": 0.6842, |
|
"step": 64500 |
|
}, |
|
{ |
|
"epoch": 3.6935391652372784, |
|
"grad_norm": 16.491268157958984, |
|
"learning_rate": 1.5921574237748868e-07, |
|
"loss": 0.7032, |
|
"step": 64600 |
|
}, |
|
{ |
|
"epoch": 3.6992567181246425, |
|
"grad_norm": 19.976850509643555, |
|
"learning_rate": 1.5790352093067365e-07, |
|
"loss": 0.6972, |
|
"step": 64700 |
|
}, |
|
{ |
|
"epoch": 3.7049742710120066, |
|
"grad_norm": 17.204557418823242, |
|
"learning_rate": 1.5659571466087253e-07, |
|
"loss": 0.6979, |
|
"step": 64800 |
|
}, |
|
{ |
|
"epoch": 3.710691823899371, |
|
"grad_norm": 16.28580093383789, |
|
"learning_rate": 1.5529234044693818e-07, |
|
"loss": 0.672, |
|
"step": 64900 |
|
}, |
|
{ |
|
"epoch": 3.716409376786735, |
|
"grad_norm": 16.73383903503418, |
|
"learning_rate": 1.5399341511052278e-07, |
|
"loss": 0.6713, |
|
"step": 65000 |
|
}, |
|
{ |
|
"epoch": 3.7221269296740997, |
|
"grad_norm": 16.786861419677734, |
|
"learning_rate": 1.5269895541585965e-07, |
|
"loss": 0.6799, |
|
"step": 65100 |
|
}, |
|
{ |
|
"epoch": 3.727844482561464, |
|
"grad_norm": 16.983375549316406, |
|
"learning_rate": 1.51408978069548e-07, |
|
"loss": 0.6916, |
|
"step": 65200 |
|
}, |
|
{ |
|
"epoch": 3.733562035448828, |
|
"grad_norm": 15.858549118041992, |
|
"learning_rate": 1.5012349972033634e-07, |
|
"loss": 0.6709, |
|
"step": 65300 |
|
}, |
|
{ |
|
"epoch": 3.739279588336192, |
|
"grad_norm": 19.247562408447266, |
|
"learning_rate": 1.488425369589087e-07, |
|
"loss": 0.7158, |
|
"step": 65400 |
|
}, |
|
{ |
|
"epoch": 3.744997141223556, |
|
"grad_norm": 15.711267471313477, |
|
"learning_rate": 1.4756610631766896e-07, |
|
"loss": 0.6888, |
|
"step": 65500 |
|
}, |
|
{ |
|
"epoch": 3.7507146941109206, |
|
"grad_norm": 19.656904220581055, |
|
"learning_rate": 1.4629422427052934e-07, |
|
"loss": 0.6687, |
|
"step": 65600 |
|
}, |
|
{ |
|
"epoch": 3.7564322469982847, |
|
"grad_norm": 16.6679630279541, |
|
"learning_rate": 1.4502690723269633e-07, |
|
"loss": 0.6728, |
|
"step": 65700 |
|
}, |
|
{ |
|
"epoch": 3.762149799885649, |
|
"grad_norm": 15.8047456741333, |
|
"learning_rate": 1.4376417156045923e-07, |
|
"loss": 0.6943, |
|
"step": 65800 |
|
}, |
|
{ |
|
"epoch": 3.7678673527730133, |
|
"grad_norm": 17.548009872436523, |
|
"learning_rate": 1.425060335509798e-07, |
|
"loss": 0.6843, |
|
"step": 65900 |
|
}, |
|
{ |
|
"epoch": 3.7735849056603774, |
|
"grad_norm": 18.866233825683594, |
|
"learning_rate": 1.4125250944208039e-07, |
|
"loss": 0.6661, |
|
"step": 66000 |
|
}, |
|
{ |
|
"epoch": 3.7793024585477415, |
|
"grad_norm": 16.393108367919922, |
|
"learning_rate": 1.4000361541203637e-07, |
|
"loss": 0.6903, |
|
"step": 66100 |
|
}, |
|
{ |
|
"epoch": 3.7850200114351056, |
|
"grad_norm": 19.90047836303711, |
|
"learning_rate": 1.3875936757936496e-07, |
|
"loss": 0.7255, |
|
"step": 66200 |
|
}, |
|
{ |
|
"epoch": 3.79073756432247, |
|
"grad_norm": 17.67254638671875, |
|
"learning_rate": 1.3751978200261954e-07, |
|
"loss": 0.686, |
|
"step": 66300 |
|
}, |
|
{ |
|
"epoch": 3.796455117209834, |
|
"grad_norm": 18.827899932861328, |
|
"learning_rate": 1.3628487468018085e-07, |
|
"loss": 0.6793, |
|
"step": 66400 |
|
}, |
|
{ |
|
"epoch": 3.8021726700971983, |
|
"grad_norm": 20.450260162353516, |
|
"learning_rate": 1.3505466155005092e-07, |
|
"loss": 0.6828, |
|
"step": 66500 |
|
}, |
|
{ |
|
"epoch": 3.807890222984563, |
|
"grad_norm": 18.889638900756836, |
|
"learning_rate": 1.3382915848964777e-07, |
|
"loss": 0.7031, |
|
"step": 66600 |
|
}, |
|
{ |
|
"epoch": 3.813607775871927, |
|
"grad_norm": 19.9688663482666, |
|
"learning_rate": 1.3260838131559987e-07, |
|
"loss": 0.6785, |
|
"step": 66700 |
|
}, |
|
{ |
|
"epoch": 3.819325328759291, |
|
"grad_norm": 17.95281982421875, |
|
"learning_rate": 1.3139234578354235e-07, |
|
"loss": 0.7063, |
|
"step": 66800 |
|
}, |
|
{ |
|
"epoch": 3.825042881646655, |
|
"grad_norm": 15.684403419494629, |
|
"learning_rate": 1.3018106758791358e-07, |
|
"loss": 0.6715, |
|
"step": 66900 |
|
}, |
|
{ |
|
"epoch": 3.8307604345340196, |
|
"grad_norm": 17.38626480102539, |
|
"learning_rate": 1.2897456236175298e-07, |
|
"loss": 0.701, |
|
"step": 67000 |
|
}, |
|
{ |
|
"epoch": 3.8364779874213837, |
|
"grad_norm": 16.06013298034668, |
|
"learning_rate": 1.2777284567649825e-07, |
|
"loss": 0.6707, |
|
"step": 67100 |
|
}, |
|
{ |
|
"epoch": 3.8421955403087478, |
|
"grad_norm": 16.085487365722656, |
|
"learning_rate": 1.2657593304178583e-07, |
|
"loss": 0.6858, |
|
"step": 67200 |
|
}, |
|
{ |
|
"epoch": 3.8479130931961123, |
|
"grad_norm": 18.255733489990234, |
|
"learning_rate": 1.2538383990524937e-07, |
|
"loss": 0.686, |
|
"step": 67300 |
|
}, |
|
{ |
|
"epoch": 3.8536306460834764, |
|
"grad_norm": 20.442323684692383, |
|
"learning_rate": 1.2419658165232122e-07, |
|
"loss": 0.6726, |
|
"step": 67400 |
|
}, |
|
{ |
|
"epoch": 3.8593481989708405, |
|
"grad_norm": 16.613813400268555, |
|
"learning_rate": 1.230141736060335e-07, |
|
"loss": 0.6884, |
|
"step": 67500 |
|
}, |
|
{ |
|
"epoch": 3.8650657518582046, |
|
"grad_norm": 16.380739212036133, |
|
"learning_rate": 1.2183663102682029e-07, |
|
"loss": 0.6694, |
|
"step": 67600 |
|
}, |
|
{ |
|
"epoch": 3.8707833047455686, |
|
"grad_norm": 17.78510093688965, |
|
"learning_rate": 1.206639691123213e-07, |
|
"loss": 0.6988, |
|
"step": 67700 |
|
}, |
|
{ |
|
"epoch": 3.876500857632933, |
|
"grad_norm": 16.328201293945312, |
|
"learning_rate": 1.1949620299718467e-07, |
|
"loss": 0.6882, |
|
"step": 67800 |
|
}, |
|
{ |
|
"epoch": 3.8822184105202973, |
|
"grad_norm": 17.736520767211914, |
|
"learning_rate": 1.1833334775287273e-07, |
|
"loss": 0.6861, |
|
"step": 67900 |
|
}, |
|
{ |
|
"epoch": 3.887935963407662, |
|
"grad_norm": 18.87068748474121, |
|
"learning_rate": 1.1717541838746659e-07, |
|
"loss": 0.6827, |
|
"step": 68000 |
|
}, |
|
{ |
|
"epoch": 3.893653516295026, |
|
"grad_norm": 21.540847778320312, |
|
"learning_rate": 1.160224298454729e-07, |
|
"loss": 0.7021, |
|
"step": 68100 |
|
}, |
|
{ |
|
"epoch": 3.89937106918239, |
|
"grad_norm": 15.944188117980957, |
|
"learning_rate": 1.148743970076309e-07, |
|
"loss": 0.7025, |
|
"step": 68200 |
|
}, |
|
{ |
|
"epoch": 3.905088622069754, |
|
"grad_norm": 17.067222595214844, |
|
"learning_rate": 1.1373133469072033e-07, |
|
"loss": 0.6799, |
|
"step": 68300 |
|
}, |
|
{ |
|
"epoch": 3.910806174957118, |
|
"grad_norm": 17.428482055664062, |
|
"learning_rate": 1.1259325764737049e-07, |
|
"loss": 0.6897, |
|
"step": 68400 |
|
}, |
|
{ |
|
"epoch": 3.9165237278444827, |
|
"grad_norm": 15.801222801208496, |
|
"learning_rate": 1.1146018056586903e-07, |
|
"loss": 0.6857, |
|
"step": 68500 |
|
}, |
|
{ |
|
"epoch": 3.9222412807318467, |
|
"grad_norm": 19.24358558654785, |
|
"learning_rate": 1.1033211806997367e-07, |
|
"loss": 0.6919, |
|
"step": 68600 |
|
}, |
|
{ |
|
"epoch": 3.927958833619211, |
|
"grad_norm": 17.26407814025879, |
|
"learning_rate": 1.0920908471872192e-07, |
|
"loss": 0.6943, |
|
"step": 68700 |
|
}, |
|
{ |
|
"epoch": 3.9336763865065754, |
|
"grad_norm": 17.91105842590332, |
|
"learning_rate": 1.0809109500624486e-07, |
|
"loss": 0.6927, |
|
"step": 68800 |
|
}, |
|
{ |
|
"epoch": 3.9393939393939394, |
|
"grad_norm": 21.539033889770508, |
|
"learning_rate": 1.0697816336157805e-07, |
|
"loss": 0.6827, |
|
"step": 68900 |
|
}, |
|
{ |
|
"epoch": 3.9451114922813035, |
|
"grad_norm": 15.918169021606445, |
|
"learning_rate": 1.0587030414847753e-07, |
|
"loss": 0.6805, |
|
"step": 69000 |
|
}, |
|
{ |
|
"epoch": 3.9508290451686676, |
|
"grad_norm": 18.38698959350586, |
|
"learning_rate": 1.0476753166523278e-07, |
|
"loss": 0.7088, |
|
"step": 69100 |
|
}, |
|
{ |
|
"epoch": 3.956546598056032, |
|
"grad_norm": 16.94473648071289, |
|
"learning_rate": 1.0366986014448276e-07, |
|
"loss": 0.6869, |
|
"step": 69200 |
|
}, |
|
{ |
|
"epoch": 3.9622641509433962, |
|
"grad_norm": 16.196962356567383, |
|
"learning_rate": 1.0257730375303264e-07, |
|
"loss": 0.6828, |
|
"step": 69300 |
|
}, |
|
{ |
|
"epoch": 3.9679817038307603, |
|
"grad_norm": 19.227867126464844, |
|
"learning_rate": 1.0148987659166986e-07, |
|
"loss": 0.6786, |
|
"step": 69400 |
|
}, |
|
{ |
|
"epoch": 3.973699256718125, |
|
"grad_norm": 15.71776294708252, |
|
"learning_rate": 1.0040759269498373e-07, |
|
"loss": 0.6987, |
|
"step": 69500 |
|
}, |
|
{ |
|
"epoch": 3.979416809605489, |
|
"grad_norm": 15.973411560058594, |
|
"learning_rate": 9.933046603118229e-08, |
|
"loss": 0.6943, |
|
"step": 69600 |
|
}, |
|
{ |
|
"epoch": 3.985134362492853, |
|
"grad_norm": 18.862775802612305, |
|
"learning_rate": 9.825851050191402e-08, |
|
"loss": 0.6896, |
|
"step": 69700 |
|
}, |
|
{ |
|
"epoch": 3.990851915380217, |
|
"grad_norm": 15.70366382598877, |
|
"learning_rate": 9.719173994208718e-08, |
|
"loss": 0.7044, |
|
"step": 69800 |
|
}, |
|
{ |
|
"epoch": 3.9965694682675816, |
|
"grad_norm": 17.324993133544922, |
|
"learning_rate": 9.613016811969144e-08, |
|
"loss": 0.6801, |
|
"step": 69900 |
|
}, |
|
{ |
|
"epoch": 4.002287021154945, |
|
"grad_norm": 16.205799102783203, |
|
"learning_rate": 9.507380873562082e-08, |
|
"loss": 0.6951, |
|
"step": 70000 |
|
}, |
|
{ |
|
"epoch": 4.00800457404231, |
|
"grad_norm": 17.790599822998047, |
|
"learning_rate": 9.402267542349595e-08, |
|
"loss": 0.6811, |
|
"step": 70100 |
|
}, |
|
{ |
|
"epoch": 4.013722126929674, |
|
"grad_norm": 15.449918746948242, |
|
"learning_rate": 9.297678174948875e-08, |
|
"loss": 0.6645, |
|
"step": 70200 |
|
}, |
|
{ |
|
"epoch": 4.019439679817038, |
|
"grad_norm": 24.729137420654297, |
|
"learning_rate": 9.193614121214699e-08, |
|
"loss": 0.668, |
|
"step": 70300 |
|
}, |
|
{ |
|
"epoch": 4.0251572327044025, |
|
"grad_norm": 18.29893684387207, |
|
"learning_rate": 9.090076724222063e-08, |
|
"loss": 0.7008, |
|
"step": 70400 |
|
}, |
|
{ |
|
"epoch": 4.030874785591767, |
|
"grad_norm": 17.138538360595703, |
|
"learning_rate": 8.987067320248753e-08, |
|
"loss": 0.6688, |
|
"step": 70500 |
|
}, |
|
{ |
|
"epoch": 4.036592338479131, |
|
"grad_norm": 18.376750946044922, |
|
"learning_rate": 8.884587238758207e-08, |
|
"loss": 0.6829, |
|
"step": 70600 |
|
}, |
|
{ |
|
"epoch": 4.042309891366495, |
|
"grad_norm": 17.92237663269043, |
|
"learning_rate": 8.78263780238227e-08, |
|
"loss": 0.7007, |
|
"step": 70700 |
|
}, |
|
{ |
|
"epoch": 4.04802744425386, |
|
"grad_norm": 16.868654251098633, |
|
"learning_rate": 8.681220326904165e-08, |
|
"loss": 0.6996, |
|
"step": 70800 |
|
}, |
|
{ |
|
"epoch": 4.053744997141224, |
|
"grad_norm": 17.588733673095703, |
|
"learning_rate": 8.5803361212415e-08, |
|
"loss": 0.6894, |
|
"step": 70900 |
|
}, |
|
{ |
|
"epoch": 4.059462550028588, |
|
"grad_norm": 18.68837547302246, |
|
"learning_rate": 8.47998648742937e-08, |
|
"loss": 0.6716, |
|
"step": 71000 |
|
}, |
|
{ |
|
"epoch": 4.065180102915952, |
|
"grad_norm": 19.368438720703125, |
|
"learning_rate": 8.380172720603601e-08, |
|
"loss": 0.6771, |
|
"step": 71100 |
|
}, |
|
{ |
|
"epoch": 4.070897655803316, |
|
"grad_norm": 18.050640106201172, |
|
"learning_rate": 8.280896108983943e-08, |
|
"loss": 0.6803, |
|
"step": 71200 |
|
}, |
|
{ |
|
"epoch": 4.07661520869068, |
|
"grad_norm": 16.39789581298828, |
|
"learning_rate": 8.182157933857543e-08, |
|
"loss": 0.6801, |
|
"step": 71300 |
|
}, |
|
{ |
|
"epoch": 4.082332761578044, |
|
"grad_norm": 19.105905532836914, |
|
"learning_rate": 8.083959469562324e-08, |
|
"loss": 0.6868, |
|
"step": 71400 |
|
}, |
|
{ |
|
"epoch": 4.088050314465409, |
|
"grad_norm": 18.83084487915039, |
|
"learning_rate": 7.986301983470595e-08, |
|
"loss": 0.6655, |
|
"step": 71500 |
|
}, |
|
{ |
|
"epoch": 4.093767867352773, |
|
"grad_norm": 15.07455062866211, |
|
"learning_rate": 7.889186735972647e-08, |
|
"loss": 0.6782, |
|
"step": 71600 |
|
}, |
|
{ |
|
"epoch": 4.099485420240137, |
|
"grad_norm": 22.17110252380371, |
|
"learning_rate": 7.79261498046056e-08, |
|
"loss": 0.668, |
|
"step": 71700 |
|
}, |
|
{ |
|
"epoch": 4.1052029731275015, |
|
"grad_norm": 15.451409339904785, |
|
"learning_rate": 7.69658796331194e-08, |
|
"loss": 0.6651, |
|
"step": 71800 |
|
}, |
|
{ |
|
"epoch": 4.110920526014866, |
|
"grad_norm": 17.61263656616211, |
|
"learning_rate": 7.60110692387388e-08, |
|
"loss": 0.695, |
|
"step": 71900 |
|
}, |
|
{ |
|
"epoch": 4.11663807890223, |
|
"grad_norm": 15.308425903320312, |
|
"learning_rate": 7.506173094446982e-08, |
|
"loss": 0.6937, |
|
"step": 72000 |
|
}, |
|
{ |
|
"epoch": 4.122355631789594, |
|
"grad_norm": 20.97743034362793, |
|
"learning_rate": 7.41178770026939e-08, |
|
"loss": 0.6808, |
|
"step": 72100 |
|
}, |
|
{ |
|
"epoch": 4.128073184676959, |
|
"grad_norm": 18.173051834106445, |
|
"learning_rate": 7.317951959501056e-08, |
|
"loss": 0.6914, |
|
"step": 72200 |
|
}, |
|
{ |
|
"epoch": 4.133790737564323, |
|
"grad_norm": 20.474937438964844, |
|
"learning_rate": 7.224667083207925e-08, |
|
"loss": 0.6714, |
|
"step": 72300 |
|
}, |
|
{ |
|
"epoch": 4.139508290451687, |
|
"grad_norm": 17.615968704223633, |
|
"learning_rate": 7.131934275346408e-08, |
|
"loss": 0.6595, |
|
"step": 72400 |
|
}, |
|
{ |
|
"epoch": 4.145225843339051, |
|
"grad_norm": 18.85563087463379, |
|
"learning_rate": 7.039754732747766e-08, |
|
"loss": 0.6724, |
|
"step": 72500 |
|
}, |
|
{ |
|
"epoch": 4.150943396226415, |
|
"grad_norm": 22.671489715576172, |
|
"learning_rate": 6.948129645102674e-08, |
|
"loss": 0.6994, |
|
"step": 72600 |
|
}, |
|
{ |
|
"epoch": 4.156660949113779, |
|
"grad_norm": 18.428152084350586, |
|
"learning_rate": 6.857060194945924e-08, |
|
"loss": 0.6705, |
|
"step": 72700 |
|
}, |
|
{ |
|
"epoch": 4.162378502001143, |
|
"grad_norm": 16.202274322509766, |
|
"learning_rate": 6.766547557641084e-08, |
|
"loss": 0.6793, |
|
"step": 72800 |
|
}, |
|
{ |
|
"epoch": 4.168096054888507, |
|
"grad_norm": 16.106399536132812, |
|
"learning_rate": 6.676592901365408e-08, |
|
"loss": 0.6542, |
|
"step": 72900 |
|
}, |
|
{ |
|
"epoch": 4.173813607775872, |
|
"grad_norm": 22.502248764038086, |
|
"learning_rate": 6.587197387094645e-08, |
|
"loss": 0.7127, |
|
"step": 73000 |
|
}, |
|
{ |
|
"epoch": 4.179531160663236, |
|
"grad_norm": 20.502391815185547, |
|
"learning_rate": 6.4983621685882e-08, |
|
"loss": 0.7128, |
|
"step": 73100 |
|
}, |
|
{ |
|
"epoch": 4.1852487135506005, |
|
"grad_norm": 21.65897560119629, |
|
"learning_rate": 6.410088392374119e-08, |
|
"loss": 0.6901, |
|
"step": 73200 |
|
}, |
|
{ |
|
"epoch": 4.1909662664379645, |
|
"grad_norm": 16.635141372680664, |
|
"learning_rate": 6.322377197734379e-08, |
|
"loss": 0.6806, |
|
"step": 73300 |
|
}, |
|
{ |
|
"epoch": 4.196683819325329, |
|
"grad_norm": 20.783342361450195, |
|
"learning_rate": 6.235229716690132e-08, |
|
"loss": 0.6817, |
|
"step": 73400 |
|
}, |
|
{ |
|
"epoch": 4.202401372212693, |
|
"grad_norm": 16.46611785888672, |
|
"learning_rate": 6.148647073987113e-08, |
|
"loss": 0.686, |
|
"step": 73500 |
|
}, |
|
{ |
|
"epoch": 4.208118925100057, |
|
"grad_norm": 17.907161712646484, |
|
"learning_rate": 6.062630387081102e-08, |
|
"loss": 0.6759, |
|
"step": 73600 |
|
}, |
|
{ |
|
"epoch": 4.213836477987422, |
|
"grad_norm": 17.944095611572266, |
|
"learning_rate": 5.977180766123547e-08, |
|
"loss": 0.7087, |
|
"step": 73700 |
|
}, |
|
{ |
|
"epoch": 4.219554030874786, |
|
"grad_norm": 15.839191436767578, |
|
"learning_rate": 5.892299313947213e-08, |
|
"loss": 0.6782, |
|
"step": 73800 |
|
}, |
|
{ |
|
"epoch": 4.22527158376215, |
|
"grad_norm": 21.773887634277344, |
|
"learning_rate": 5.807987126051922e-08, |
|
"loss": 0.6756, |
|
"step": 73900 |
|
}, |
|
{ |
|
"epoch": 4.230989136649514, |
|
"grad_norm": 16.483123779296875, |
|
"learning_rate": 5.7242452905904625e-08, |
|
"loss": 0.6674, |
|
"step": 74000 |
|
}, |
|
{ |
|
"epoch": 4.236706689536878, |
|
"grad_norm": 20.822376251220703, |
|
"learning_rate": 5.6410748883545137e-08, |
|
"loss": 0.7021, |
|
"step": 74100 |
|
}, |
|
{ |
|
"epoch": 4.242424242424242, |
|
"grad_norm": 21.443439483642578, |
|
"learning_rate": 5.558476992760686e-08, |
|
"loss": 0.6816, |
|
"step": 74200 |
|
}, |
|
{ |
|
"epoch": 4.248141795311606, |
|
"grad_norm": 20.461912155151367, |
|
"learning_rate": 5.476452669836712e-08, |
|
"loss": 0.6659, |
|
"step": 74300 |
|
}, |
|
{ |
|
"epoch": 4.25385934819897, |
|
"grad_norm": 18.985849380493164, |
|
"learning_rate": 5.3950029782076356e-08, |
|
"loss": 0.7025, |
|
"step": 74400 |
|
}, |
|
{ |
|
"epoch": 4.259576901086335, |
|
"grad_norm": 17.57728385925293, |
|
"learning_rate": 5.31412896908221e-08, |
|
"loss": 0.6988, |
|
"step": 74500 |
|
}, |
|
{ |
|
"epoch": 4.265294453973699, |
|
"grad_norm": 18.530332565307617, |
|
"learning_rate": 5.2338316862392596e-08, |
|
"loss": 0.68, |
|
"step": 74600 |
|
}, |
|
{ |
|
"epoch": 4.2710120068610635, |
|
"grad_norm": 21.527196884155273, |
|
"learning_rate": 5.154112166014274e-08, |
|
"loss": 0.6725, |
|
"step": 74700 |
|
}, |
|
{ |
|
"epoch": 4.276729559748428, |
|
"grad_norm": 21.66538429260254, |
|
"learning_rate": 5.0749714372859744e-08, |
|
"loss": 0.6753, |
|
"step": 74800 |
|
}, |
|
{ |
|
"epoch": 4.282447112635792, |
|
"grad_norm": 23.227149963378906, |
|
"learning_rate": 4.9964105214631016e-08, |
|
"loss": 0.6791, |
|
"step": 74900 |
|
}, |
|
{ |
|
"epoch": 4.288164665523156, |
|
"grad_norm": 17.627233505249023, |
|
"learning_rate": 4.9184304324711433e-08, |
|
"loss": 0.6875, |
|
"step": 75000 |
|
}, |
|
{ |
|
"epoch": 4.29388221841052, |
|
"grad_norm": 16.310253143310547, |
|
"learning_rate": 4.841032176739363e-08, |
|
"loss": 0.6664, |
|
"step": 75100 |
|
}, |
|
{ |
|
"epoch": 4.299599771297885, |
|
"grad_norm": 17.00972557067871, |
|
"learning_rate": 4.764216753187694e-08, |
|
"loss": 0.6977, |
|
"step": 75200 |
|
}, |
|
{ |
|
"epoch": 4.305317324185249, |
|
"grad_norm": 20.538524627685547, |
|
"learning_rate": 4.6879851532139194e-08, |
|
"loss": 0.6803, |
|
"step": 75300 |
|
}, |
|
{ |
|
"epoch": 4.311034877072613, |
|
"grad_norm": 16.814311981201172, |
|
"learning_rate": 4.61233836068089e-08, |
|
"loss": 0.6922, |
|
"step": 75400 |
|
}, |
|
{ |
|
"epoch": 4.316752429959977, |
|
"grad_norm": 23.158313751220703, |
|
"learning_rate": 4.537277351903734e-08, |
|
"loss": 0.6774, |
|
"step": 75500 |
|
}, |
|
{ |
|
"epoch": 4.322469982847341, |
|
"grad_norm": 14.462933540344238, |
|
"learning_rate": 4.4628030956373966e-08, |
|
"loss": 0.6544, |
|
"step": 75600 |
|
}, |
|
{ |
|
"epoch": 4.328187535734705, |
|
"grad_norm": 21.14085578918457, |
|
"learning_rate": 4.388916553063965e-08, |
|
"loss": 0.6752, |
|
"step": 75700 |
|
}, |
|
{ |
|
"epoch": 4.333905088622069, |
|
"grad_norm": 21.701650619506836, |
|
"learning_rate": 4.315618677780436e-08, |
|
"loss": 0.6853, |
|
"step": 75800 |
|
}, |
|
{ |
|
"epoch": 4.339622641509434, |
|
"grad_norm": 17.62489128112793, |
|
"learning_rate": 4.242910415786288e-08, |
|
"loss": 0.702, |
|
"step": 75900 |
|
}, |
|
{ |
|
"epoch": 4.345340194396798, |
|
"grad_norm": 18.198381423950195, |
|
"learning_rate": 4.170792705471315e-08, |
|
"loss": 0.6784, |
|
"step": 76000 |
|
}, |
|
{ |
|
"epoch": 4.3510577472841625, |
|
"grad_norm": 19.804584503173828, |
|
"learning_rate": 4.0992664776035414e-08, |
|
"loss": 0.7006, |
|
"step": 76100 |
|
}, |
|
{ |
|
"epoch": 4.356775300171527, |
|
"grad_norm": 17.359596252441406, |
|
"learning_rate": 4.0283326553171346e-08, |
|
"loss": 0.6784, |
|
"step": 76200 |
|
}, |
|
{ |
|
"epoch": 4.362492853058891, |
|
"grad_norm": 15.833115577697754, |
|
"learning_rate": 3.9579921541005925e-08, |
|
"loss": 0.6723, |
|
"step": 76300 |
|
}, |
|
{ |
|
"epoch": 4.368210405946255, |
|
"grad_norm": 16.290063858032227, |
|
"learning_rate": 3.888245881784813e-08, |
|
"loss": 0.6656, |
|
"step": 76400 |
|
}, |
|
{ |
|
"epoch": 4.373927958833619, |
|
"grad_norm": 16.540603637695312, |
|
"learning_rate": 3.8190947385314865e-08, |
|
"loss": 0.6761, |
|
"step": 76500 |
|
}, |
|
{ |
|
"epoch": 4.379645511720984, |
|
"grad_norm": 15.980658531188965, |
|
"learning_rate": 3.750539616821402e-08, |
|
"loss": 0.6656, |
|
"step": 76600 |
|
}, |
|
{ |
|
"epoch": 4.385363064608348, |
|
"grad_norm": 16.78759002685547, |
|
"learning_rate": 3.682581401442969e-08, |
|
"loss": 0.6996, |
|
"step": 76700 |
|
}, |
|
{ |
|
"epoch": 4.391080617495712, |
|
"grad_norm": 14.298684120178223, |
|
"learning_rate": 3.6152209694807755e-08, |
|
"loss": 0.6911, |
|
"step": 76800 |
|
}, |
|
{ |
|
"epoch": 4.396798170383076, |
|
"grad_norm": 16.176973342895508, |
|
"learning_rate": 3.548459190304276e-08, |
|
"loss": 0.6678, |
|
"step": 76900 |
|
}, |
|
{ |
|
"epoch": 4.40251572327044, |
|
"grad_norm": 16.40897560119629, |
|
"learning_rate": 3.4822969255565934e-08, |
|
"loss": 0.6915, |
|
"step": 77000 |
|
}, |
|
{ |
|
"epoch": 4.408233276157804, |
|
"grad_norm": 23.246614456176758, |
|
"learning_rate": 3.41673502914333e-08, |
|
"loss": 0.6919, |
|
"step": 77100 |
|
}, |
|
{ |
|
"epoch": 4.413950829045168, |
|
"grad_norm": 18.167871475219727, |
|
"learning_rate": 3.351774347221653e-08, |
|
"loss": 0.7224, |
|
"step": 77200 |
|
}, |
|
{ |
|
"epoch": 4.419668381932533, |
|
"grad_norm": 19.697547912597656, |
|
"learning_rate": 3.287415718189268e-08, |
|
"loss": 0.711, |
|
"step": 77300 |
|
}, |
|
{ |
|
"epoch": 4.425385934819897, |
|
"grad_norm": 17.16083335876465, |
|
"learning_rate": 3.223659972673681e-08, |
|
"loss": 0.6496, |
|
"step": 77400 |
|
}, |
|
{ |
|
"epoch": 4.4311034877072615, |
|
"grad_norm": 15.293824195861816, |
|
"learning_rate": 3.160507933521422e-08, |
|
"loss": 0.6649, |
|
"step": 77500 |
|
}, |
|
{ |
|
"epoch": 4.436821040594626, |
|
"grad_norm": 16.652938842773438, |
|
"learning_rate": 3.097960415787443e-08, |
|
"loss": 0.6901, |
|
"step": 77600 |
|
}, |
|
{ |
|
"epoch": 4.44253859348199, |
|
"grad_norm": 19.576251983642578, |
|
"learning_rate": 3.036018226724607e-08, |
|
"loss": 0.6654, |
|
"step": 77700 |
|
}, |
|
{ |
|
"epoch": 4.448256146369354, |
|
"grad_norm": 18.951457977294922, |
|
"learning_rate": 2.9746821657732678e-08, |
|
"loss": 0.6804, |
|
"step": 77800 |
|
}, |
|
{ |
|
"epoch": 4.453973699256718, |
|
"grad_norm": 20.799861907958984, |
|
"learning_rate": 2.9139530245509526e-08, |
|
"loss": 0.6934, |
|
"step": 77900 |
|
}, |
|
{ |
|
"epoch": 4.459691252144083, |
|
"grad_norm": 14.459927558898926, |
|
"learning_rate": 2.8538315868421203e-08, |
|
"loss": 0.6595, |
|
"step": 78000 |
|
}, |
|
{ |
|
"epoch": 4.465408805031447, |
|
"grad_norm": 16.431798934936523, |
|
"learning_rate": 2.794318628588094e-08, |
|
"loss": 0.6773, |
|
"step": 78100 |
|
}, |
|
{ |
|
"epoch": 4.471126357918811, |
|
"grad_norm": 15.203049659729004, |
|
"learning_rate": 2.7354149178769936e-08, |
|
"loss": 0.7064, |
|
"step": 78200 |
|
}, |
|
{ |
|
"epoch": 4.476843910806175, |
|
"grad_norm": 22.0187931060791, |
|
"learning_rate": 2.6771212149338807e-08, |
|
"loss": 0.6787, |
|
"step": 78300 |
|
}, |
|
{ |
|
"epoch": 4.482561463693539, |
|
"grad_norm": 17.56171226501465, |
|
"learning_rate": 2.619438272110863e-08, |
|
"loss": 0.6941, |
|
"step": 78400 |
|
}, |
|
{ |
|
"epoch": 4.488279016580903, |
|
"grad_norm": 20.990859985351562, |
|
"learning_rate": 2.5623668338774885e-08, |
|
"loss": 0.7069, |
|
"step": 78500 |
|
}, |
|
{ |
|
"epoch": 4.493996569468267, |
|
"grad_norm": 21.19357681274414, |
|
"learning_rate": 2.5059076368110444e-08, |
|
"loss": 0.6983, |
|
"step": 78600 |
|
}, |
|
{ |
|
"epoch": 4.499714122355631, |
|
"grad_norm": 16.244356155395508, |
|
"learning_rate": 2.4500614095871174e-08, |
|
"loss": 0.6788, |
|
"step": 78700 |
|
}, |
|
{ |
|
"epoch": 4.505431675242996, |
|
"grad_norm": 18.686336517333984, |
|
"learning_rate": 2.3948288729701506e-08, |
|
"loss": 0.6861, |
|
"step": 78800 |
|
}, |
|
{ |
|
"epoch": 4.5111492281303605, |
|
"grad_norm": 18.47435760498047, |
|
"learning_rate": 2.3402107398041492e-08, |
|
"loss": 0.6557, |
|
"step": 78900 |
|
}, |
|
{ |
|
"epoch": 4.5168667810177245, |
|
"grad_norm": 17.009096145629883, |
|
"learning_rate": 2.286207715003502e-08, |
|
"loss": 0.6917, |
|
"step": 79000 |
|
}, |
|
{ |
|
"epoch": 4.522584333905089, |
|
"grad_norm": 17.560272216796875, |
|
"learning_rate": 2.2328204955438357e-08, |
|
"loss": 0.705, |
|
"step": 79100 |
|
}, |
|
{ |
|
"epoch": 4.528301886792453, |
|
"grad_norm": 20.05234146118164, |
|
"learning_rate": 2.180049770453085e-08, |
|
"loss": 0.6759, |
|
"step": 79200 |
|
}, |
|
{ |
|
"epoch": 4.534019439679817, |
|
"grad_norm": 19.148618698120117, |
|
"learning_rate": 2.1278962208025332e-08, |
|
"loss": 0.668, |
|
"step": 79300 |
|
}, |
|
{ |
|
"epoch": 4.539736992567181, |
|
"grad_norm": 18.14238929748535, |
|
"learning_rate": 2.0763605196980615e-08, |
|
"loss": 0.6781, |
|
"step": 79400 |
|
}, |
|
{ |
|
"epoch": 4.545454545454545, |
|
"grad_norm": 18.119190216064453, |
|
"learning_rate": 2.0254433322714758e-08, |
|
"loss": 0.6533, |
|
"step": 79500 |
|
}, |
|
{ |
|
"epoch": 4.55117209834191, |
|
"grad_norm": 16.649980545043945, |
|
"learning_rate": 1.9751453156718666e-08, |
|
"loss": 0.6872, |
|
"step": 79600 |
|
}, |
|
{ |
|
"epoch": 4.556889651229274, |
|
"grad_norm": 15.812389373779297, |
|
"learning_rate": 1.9254671190571948e-08, |
|
"loss": 0.6863, |
|
"step": 79700 |
|
}, |
|
{ |
|
"epoch": 4.562607204116638, |
|
"grad_norm": 14.970600128173828, |
|
"learning_rate": 1.876409383585842e-08, |
|
"loss": 0.6896, |
|
"step": 79800 |
|
}, |
|
{ |
|
"epoch": 4.568324757004002, |
|
"grad_norm": 18.37346649169922, |
|
"learning_rate": 1.827972742408407e-08, |
|
"loss": 0.6693, |
|
"step": 79900 |
|
}, |
|
{ |
|
"epoch": 4.574042309891366, |
|
"grad_norm": 20.24564552307129, |
|
"learning_rate": 1.7801578206594725e-08, |
|
"loss": 0.6849, |
|
"step": 80000 |
|
}, |
|
{ |
|
"epoch": 4.57975986277873, |
|
"grad_norm": 19.068187713623047, |
|
"learning_rate": 1.7329652354496016e-08, |
|
"loss": 0.6902, |
|
"step": 80100 |
|
}, |
|
{ |
|
"epoch": 4.5854774156660945, |
|
"grad_norm": 18.084623336791992, |
|
"learning_rate": 1.686395595857304e-08, |
|
"loss": 0.6784, |
|
"step": 80200 |
|
}, |
|
{ |
|
"epoch": 4.591194968553459, |
|
"grad_norm": 16.805601119995117, |
|
"learning_rate": 1.640449502921215e-08, |
|
"loss": 0.6657, |
|
"step": 80300 |
|
}, |
|
{ |
|
"epoch": 4.5969125214408235, |
|
"grad_norm": 16.83079719543457, |
|
"learning_rate": 1.595127549632347e-08, |
|
"loss": 0.6951, |
|
"step": 80400 |
|
}, |
|
{ |
|
"epoch": 4.602630074328188, |
|
"grad_norm": 18.01809310913086, |
|
"learning_rate": 1.5504303209263935e-08, |
|
"loss": 0.6749, |
|
"step": 80500 |
|
}, |
|
{ |
|
"epoch": 4.608347627215552, |
|
"grad_norm": 16.533803939819336, |
|
"learning_rate": 1.5063583936762325e-08, |
|
"loss": 0.6836, |
|
"step": 80600 |
|
}, |
|
{ |
|
"epoch": 4.614065180102916, |
|
"grad_norm": 17.195920944213867, |
|
"learning_rate": 1.4629123366844354e-08, |
|
"loss": 0.6721, |
|
"step": 80700 |
|
}, |
|
{ |
|
"epoch": 4.61978273299028, |
|
"grad_norm": 17.525428771972656, |
|
"learning_rate": 1.420092710675963e-08, |
|
"loss": 0.688, |
|
"step": 80800 |
|
}, |
|
{ |
|
"epoch": 4.625500285877644, |
|
"grad_norm": 17.868335723876953, |
|
"learning_rate": 1.3779000682909103e-08, |
|
"loss": 0.6875, |
|
"step": 80900 |
|
}, |
|
{ |
|
"epoch": 4.631217838765009, |
|
"grad_norm": 14.877182006835938, |
|
"learning_rate": 1.3363349540773561e-08, |
|
"loss": 0.6591, |
|
"step": 81000 |
|
}, |
|
{ |
|
"epoch": 4.636935391652373, |
|
"grad_norm": 22.093046188354492, |
|
"learning_rate": 1.2953979044843633e-08, |
|
"loss": 0.6894, |
|
"step": 81100 |
|
}, |
|
{ |
|
"epoch": 4.642652944539737, |
|
"grad_norm": 19.40646743774414, |
|
"learning_rate": 1.2550894478550678e-08, |
|
"loss": 0.6662, |
|
"step": 81200 |
|
}, |
|
{ |
|
"epoch": 4.648370497427101, |
|
"grad_norm": 17.37119483947754, |
|
"learning_rate": 1.2154101044198118e-08, |
|
"loss": 0.6887, |
|
"step": 81300 |
|
}, |
|
{ |
|
"epoch": 4.654088050314465, |
|
"grad_norm": 17.71015739440918, |
|
"learning_rate": 1.1763603862894544e-08, |
|
"loss": 0.6801, |
|
"step": 81400 |
|
}, |
|
{ |
|
"epoch": 4.659805603201829, |
|
"grad_norm": 17.027597427368164, |
|
"learning_rate": 1.137940797448783e-08, |
|
"loss": 0.6865, |
|
"step": 81500 |
|
}, |
|
{ |
|
"epoch": 4.665523156089193, |
|
"grad_norm": 24.06705665588379, |
|
"learning_rate": 1.1001518337499793e-08, |
|
"loss": 0.6884, |
|
"step": 81600 |
|
}, |
|
{ |
|
"epoch": 4.671240708976558, |
|
"grad_norm": 18.24313735961914, |
|
"learning_rate": 1.0629939829062351e-08, |
|
"loss": 0.693, |
|
"step": 81700 |
|
}, |
|
{ |
|
"epoch": 4.6769582618639225, |
|
"grad_norm": 19.492963790893555, |
|
"learning_rate": 1.0264677244854303e-08, |
|
"loss": 0.6848, |
|
"step": 81800 |
|
}, |
|
{ |
|
"epoch": 4.682675814751287, |
|
"grad_norm": 18.08428955078125, |
|
"learning_rate": 9.90573529903993e-09, |
|
"loss": 0.6924, |
|
"step": 81900 |
|
}, |
|
{ |
|
"epoch": 4.688393367638651, |
|
"grad_norm": 14.725008964538574, |
|
"learning_rate": 9.553118624207824e-09, |
|
"loss": 0.7114, |
|
"step": 82000 |
|
}, |
|
{ |
|
"epoch": 4.694110920526015, |
|
"grad_norm": 20.670289993286133, |
|
"learning_rate": 9.206831771310986e-09, |
|
"loss": 0.6478, |
|
"step": 82100 |
|
}, |
|
{ |
|
"epoch": 4.699828473413379, |
|
"grad_norm": 16.0545597076416, |
|
"learning_rate": 8.866879209608436e-09, |
|
"loss": 0.7017, |
|
"step": 82200 |
|
}, |
|
{ |
|
"epoch": 4.705546026300743, |
|
"grad_norm": 18.441730499267578, |
|
"learning_rate": 8.533265326607253e-09, |
|
"loss": 0.6937, |
|
"step": 82300 |
|
}, |
|
{ |
|
"epoch": 4.711263579188108, |
|
"grad_norm": 22.546123504638672, |
|
"learning_rate": 8.20599442800618e-09, |
|
"loss": 0.6759, |
|
"step": 82400 |
|
}, |
|
{ |
|
"epoch": 4.716981132075472, |
|
"grad_norm": 16.173694610595703, |
|
"learning_rate": 7.88507073763972e-09, |
|
"loss": 0.6623, |
|
"step": 82500 |
|
}, |
|
{ |
|
"epoch": 4.722698684962836, |
|
"grad_norm": 19.89776039123535, |
|
"learning_rate": 7.570498397424018e-09, |
|
"loss": 0.695, |
|
"step": 82600 |
|
}, |
|
{ |
|
"epoch": 4.7284162378502, |
|
"grad_norm": 19.8604736328125, |
|
"learning_rate": 7.262281467303011e-09, |
|
"loss": 0.6925, |
|
"step": 82700 |
|
}, |
|
{ |
|
"epoch": 4.734133790737564, |
|
"grad_norm": 19.60188865661621, |
|
"learning_rate": 6.960423925196468e-09, |
|
"loss": 0.6604, |
|
"step": 82800 |
|
}, |
|
{ |
|
"epoch": 4.739851343624928, |
|
"grad_norm": 16.95119857788086, |
|
"learning_rate": 6.66492966694826e-09, |
|
"loss": 0.6576, |
|
"step": 82900 |
|
}, |
|
{ |
|
"epoch": 4.745568896512292, |
|
"grad_norm": 26.87150001525879, |
|
"learning_rate": 6.3758025062761736e-09, |
|
"loss": 0.6714, |
|
"step": 83000 |
|
}, |
|
{ |
|
"epoch": 4.751286449399657, |
|
"grad_norm": 15.25060749053955, |
|
"learning_rate": 6.093046174723004e-09, |
|
"loss": 0.6928, |
|
"step": 83100 |
|
}, |
|
{ |
|
"epoch": 4.7570040022870215, |
|
"grad_norm": 19.705732345581055, |
|
"learning_rate": 5.816664321607767e-09, |
|
"loss": 0.6735, |
|
"step": 83200 |
|
}, |
|
{ |
|
"epoch": 4.762721555174386, |
|
"grad_norm": 14.579095840454102, |
|
"learning_rate": 5.54666051397934e-09, |
|
"loss": 0.6814, |
|
"step": 83300 |
|
}, |
|
{ |
|
"epoch": 4.76843910806175, |
|
"grad_norm": 19.118043899536133, |
|
"learning_rate": 5.283038236569837e-09, |
|
"loss": 0.6787, |
|
"step": 83400 |
|
}, |
|
{ |
|
"epoch": 4.774156660949114, |
|
"grad_norm": 18.051437377929688, |
|
"learning_rate": 5.025800891749865e-09, |
|
"loss": 0.6772, |
|
"step": 83500 |
|
}, |
|
{ |
|
"epoch": 4.779874213836478, |
|
"grad_norm": 17.763010025024414, |
|
"learning_rate": 4.774951799484672e-09, |
|
"loss": 0.6716, |
|
"step": 83600 |
|
}, |
|
{ |
|
"epoch": 4.785591766723842, |
|
"grad_norm": 18.0462646484375, |
|
"learning_rate": 4.530494197291179e-09, |
|
"loss": 0.6761, |
|
"step": 83700 |
|
}, |
|
{ |
|
"epoch": 4.791309319611207, |
|
"grad_norm": 16.567672729492188, |
|
"learning_rate": 4.292431240196403e-09, |
|
"loss": 0.6927, |
|
"step": 83800 |
|
}, |
|
{ |
|
"epoch": 4.797026872498571, |
|
"grad_norm": 15.077381134033203, |
|
"learning_rate": 4.060766000696214e-09, |
|
"loss": 0.6906, |
|
"step": 83900 |
|
}, |
|
{ |
|
"epoch": 4.802744425385935, |
|
"grad_norm": 18.41817855834961, |
|
"learning_rate": 3.8355014687164175e-09, |
|
"loss": 0.6884, |
|
"step": 84000 |
|
}, |
|
{ |
|
"epoch": 4.808461978273299, |
|
"grad_norm": 17.96348762512207, |
|
"learning_rate": 3.6166405515735686e-09, |
|
"loss": 0.6593, |
|
"step": 84100 |
|
}, |
|
{ |
|
"epoch": 4.814179531160663, |
|
"grad_norm": 16.820146560668945, |
|
"learning_rate": 3.404186073937776e-09, |
|
"loss": 0.6994, |
|
"step": 84200 |
|
}, |
|
{ |
|
"epoch": 4.819897084048027, |
|
"grad_norm": 16.160144805908203, |
|
"learning_rate": 3.1981407777961767e-09, |
|
"loss": 0.6702, |
|
"step": 84300 |
|
}, |
|
{ |
|
"epoch": 4.825614636935391, |
|
"grad_norm": 16.64735221862793, |
|
"learning_rate": 2.998507322417465e-09, |
|
"loss": 0.6854, |
|
"step": 84400 |
|
}, |
|
{ |
|
"epoch": 4.8313321898227555, |
|
"grad_norm": 18.906169891357422, |
|
"learning_rate": 2.805288284317475e-09, |
|
"loss": 0.6773, |
|
"step": 84500 |
|
}, |
|
{ |
|
"epoch": 4.8370497427101204, |
|
"grad_norm": 15.96215534210205, |
|
"learning_rate": 2.618486157226374e-09, |
|
"loss": 0.6826, |
|
"step": 84600 |
|
}, |
|
{ |
|
"epoch": 4.8427672955974845, |
|
"grad_norm": 17.746631622314453, |
|
"learning_rate": 2.4381033520559648e-09, |
|
"loss": 0.6786, |
|
"step": 84700 |
|
}, |
|
{ |
|
"epoch": 4.848484848484849, |
|
"grad_norm": 15.410120964050293, |
|
"learning_rate": 2.264142196868768e-09, |
|
"loss": 0.6698, |
|
"step": 84800 |
|
}, |
|
{ |
|
"epoch": 4.854202401372213, |
|
"grad_norm": 20.618608474731445, |
|
"learning_rate": 2.0966049368481566e-09, |
|
"loss": 0.722, |
|
"step": 84900 |
|
}, |
|
{ |
|
"epoch": 4.859919954259577, |
|
"grad_norm": 16.922340393066406, |
|
"learning_rate": 1.9354937342690446e-09, |
|
"loss": 0.7022, |
|
"step": 85000 |
|
}, |
|
{ |
|
"epoch": 4.865637507146941, |
|
"grad_norm": 17.488685607910156, |
|
"learning_rate": 1.7808106684703005e-09, |
|
"loss": 0.6848, |
|
"step": 85100 |
|
}, |
|
{ |
|
"epoch": 4.871355060034305, |
|
"grad_norm": 15.6874418258667, |
|
"learning_rate": 1.6325577358276e-09, |
|
"loss": 0.6674, |
|
"step": 85200 |
|
}, |
|
{ |
|
"epoch": 4.877072612921669, |
|
"grad_norm": 17.592735290527344, |
|
"learning_rate": 1.4907368497279471e-09, |
|
"loss": 0.6863, |
|
"step": 85300 |
|
}, |
|
{ |
|
"epoch": 4.882790165809034, |
|
"grad_norm": 18.772600173950195, |
|
"learning_rate": 1.355349840544806e-09, |
|
"loss": 0.6809, |
|
"step": 85400 |
|
}, |
|
{ |
|
"epoch": 4.888507718696398, |
|
"grad_norm": 19.88614273071289, |
|
"learning_rate": 1.226398455614508e-09, |
|
"loss": 0.6594, |
|
"step": 85500 |
|
}, |
|
{ |
|
"epoch": 4.894225271583762, |
|
"grad_norm": 18.379648208618164, |
|
"learning_rate": 1.1038843592137136e-09, |
|
"loss": 0.6746, |
|
"step": 85600 |
|
}, |
|
{ |
|
"epoch": 4.899942824471126, |
|
"grad_norm": 16.48954963684082, |
|
"learning_rate": 9.878091325379311e-10, |
|
"loss": 0.6912, |
|
"step": 85700 |
|
}, |
|
{ |
|
"epoch": 4.90566037735849, |
|
"grad_norm": 17.712480545043945, |
|
"learning_rate": 8.781742736811426e-10, |
|
"loss": 0.6921, |
|
"step": 85800 |
|
}, |
|
{ |
|
"epoch": 4.9113779302458545, |
|
"grad_norm": 17.105844497680664, |
|
"learning_rate": 7.749811976164866e-10, |
|
"loss": 0.6805, |
|
"step": 85900 |
|
}, |
|
{ |
|
"epoch": 4.9170954831332185, |
|
"grad_norm": 19.131834030151367, |
|
"learning_rate": 6.782312361778286e-10, |
|
"loss": 0.6824, |
|
"step": 86000 |
|
}, |
|
{ |
|
"epoch": 4.9228130360205835, |
|
"grad_norm": 17.468740463256836, |
|
"learning_rate": 5.879256380427744e-10, |
|
"loss": 0.6737, |
|
"step": 86100 |
|
}, |
|
{ |
|
"epoch": 4.928530588907948, |
|
"grad_norm": 21.079586029052734, |
|
"learning_rate": 5.040655687164608e-10, |
|
"loss": 0.6573, |
|
"step": 86200 |
|
}, |
|
{ |
|
"epoch": 4.934248141795312, |
|
"grad_norm": 18.417434692382812, |
|
"learning_rate": 4.2665211051651216e-10, |
|
"loss": 0.6848, |
|
"step": 86300 |
|
}, |
|
{ |
|
"epoch": 4.939965694682676, |
|
"grad_norm": 20.74665069580078, |
|
"learning_rate": 3.5568626255910726e-10, |
|
"loss": 0.6718, |
|
"step": 86400 |
|
}, |
|
{ |
|
"epoch": 4.94568324757004, |
|
"grad_norm": 19.413671493530273, |
|
"learning_rate": 2.911689407459894e-10, |
|
"loss": 0.676, |
|
"step": 86500 |
|
}, |
|
{ |
|
"epoch": 4.951400800457404, |
|
"grad_norm": 17.327878952026367, |
|
"learning_rate": 2.3310097775280926e-10, |
|
"loss": 0.676, |
|
"step": 86600 |
|
}, |
|
{ |
|
"epoch": 4.957118353344768, |
|
"grad_norm": 16.513090133666992, |
|
"learning_rate": 1.8148312301830006e-10, |
|
"loss": 0.6781, |
|
"step": 86700 |
|
}, |
|
{ |
|
"epoch": 4.962835906232133, |
|
"grad_norm": 20.207876205444336, |
|
"learning_rate": 1.363160427344523e-10, |
|
"loss": 0.6768, |
|
"step": 86800 |
|
}, |
|
{ |
|
"epoch": 4.968553459119497, |
|
"grad_norm": 15.743894577026367, |
|
"learning_rate": 9.760031983824246e-11, |
|
"loss": 0.6985, |
|
"step": 86900 |
|
}, |
|
{ |
|
"epoch": 4.974271012006861, |
|
"grad_norm": 19.615123748779297, |
|
"learning_rate": 6.533645400375044e-11, |
|
"loss": 0.6948, |
|
"step": 87000 |
|
}, |
|
{ |
|
"epoch": 4.979988564894225, |
|
"grad_norm": 16.00938606262207, |
|
"learning_rate": 3.952486163594226e-11, |
|
"loss": 0.6806, |
|
"step": 87100 |
|
}, |
|
{ |
|
"epoch": 4.985706117781589, |
|
"grad_norm": 19.39284324645996, |
|
"learning_rate": 2.016587586534113e-11, |
|
"loss": 0.6762, |
|
"step": 87200 |
|
}, |
|
{ |
|
"epoch": 4.991423670668953, |
|
"grad_norm": 18.533754348754883, |
|
"learning_rate": 7.259746543530898e-12, |
|
"loss": 0.6853, |
|
"step": 87300 |
|
}, |
|
{ |
|
"epoch": 4.9971412235563175, |
|
"grad_norm": 17.494525909423828, |
|
"learning_rate": 8.066402399364846e-13, |
|
"loss": 0.6858, |
|
"step": 87400 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"step": 87450, |
|
"total_flos": 1.0778410613996421e+19, |
|
"train_loss": 0.7259524769071445, |
|
"train_runtime": 192674.39, |
|
"train_samples_per_second": 1.815, |
|
"train_steps_per_second": 0.454 |
|
} |
|
], |
|
"logging_steps": 100, |
|
"max_steps": 87450, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 5, |
|
"save_steps": 5000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.0778410613996421e+19, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|